#!/usr/bin/perl
# ^^^^^^^^ change this if your perl is somewhere else or just run
# as an argument to the Perl interpreter:
# $ perl merge -mxxfile1 -sxxfile2
#
# See the README for more info on running merge.
#
#Copyright (C) 2000  Jeremy Buchmann
#
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program in a file called License.txt; if not, write to
#the Free Software Foundation at:
#
#Free Software Foundation, Inc.
#59 Temple Place - Suite 330
#Boston, MA  02111-1307
#USA
#
# You can get a copy of the GPL at:
# http://www.gnu.org/copyleft/gpl.html
# 
# This is merge...
#

use strict;

my $sort_links = 0;
my $NULL = "";
my $netscape_ID = "ns";
my $ie_ID = "ie";
my $mozilla_ID = "mo";
my $numargs = 6;
my $output_type = $netscape_ID;
my $master_filename = "";
my $slave_filename = "";
my $master_type = "";
my $slave_type = "";

# Go through the command line arguments and get the output type, master
# type, master filename, slave type, and slave filename.
#
my $i = 0;
my $got_master_file = 0;
my $got_slave_file = 0;
foreach my $token (@ARGV)
{
  $token = lc($token);
  if ($token eq "-s")
  {
    $sort_links = 1;
    print STDERR "Sorry, link-sorting isn't working yet.\n";
  }
  elsif ($token eq "-mns" || $token eq "-mie" || $token eq "-mmo")
  {
    if ($ARGV[$i + 1] ne $NULL)
    {
      if ($token eq "-mns")
      {
        $master_type = $output_type = $netscape_ID;
      }
      elsif ($token eq "-mie")
      {
        $master_type = $output_type = $ie_ID;
      }
      elsif ($token eq "-mmo")
      {
        $master_type = $output_type = $mozilla_ID;
      }
      else
      {
        print STDERR "$token not recognized.\n";
        usage();
        exit;
      }
      $master_filename = $ARGV[$i + 1];
      $got_master_file = 1;
    }
    else
    {
      print STDERR "Filename did not follow $token.\n";
      usage();
      exit;
    }
  }
  
  elsif ($token eq "-sns" || $token eq "-sie" || $token eq "-smo")
  {
    if ($ARGV[$i + 1] ne $NULL)
    {
      if ($token eq "-sns")
      {
        $slave_type = $netscape_ID;
      }
      elsif ($token eq "-sie")
      {
        $slave_type = $ie_ID;
      }
      elsif ($token eq "-smo")
      {
        $slave_type = $mozilla_ID
      }
      else
      {
        print STDERR "$token not recognized.\n";
        usage();
        exit;
      }
      $slave_filename = $ARGV[$i + 1];
      $got_slave_file = 1;
    }
    else
    {
      print STDERR "Filename did not follow $token.\n";
      usage();
      exit;
    }
  }
  
  $i++;
}

# Make sure we got a master and a slave file.
#
if ($got_master_file == 0)
{
  print STDERR "Didn't get a master file.\n";
  usage();
  exit;
}
elsif ($got_slave_file == 0)
{
  print STDERR "Didn't get a slave file.\n";
  usage();
  exit;
}

# Open the files and read them into arrays.
#
open(MASTER, "< $master_filename") or die "Could not open master $master_filename: $!";
my @master = <MASTER>;
close(MASTER);
open(SLAVE, "< $slave_filename") or die "Could not open slave $slave_filename: $!";
my @slave = <SLAVE>;
close(SLAVE);

# Get all the header crap from the master file.
#
my @master_headers = ();
my $start_line = 0;
foreach my $line (@master)
{
  if ($line =~ /<DL>/)
  {
    last;
  }
  else
  {
    push @master_headers, $line;
    $start_line++;
  }
}
#
# Now we have the line number where the bookmarks really start...
#
# Parse the links in the files.  To add support for another file format, we'd
# just need a new or modified parseXXLinks() function.
#
my @master_links;
my @slave_links;
if ($master_type eq $netscape_ID)
{
  @master_links = parseNSLinks($start_line, \@master);
}
elsif ($master_type eq $mozilla_ID)
{
  @master_links = parseMOLinks($start_line, \@master);
}
elsif ($master_type eq $ie_ID)
{
  @master_links = parseIELinks($start_line, \@master);
}
else
{
  print STDERR "Something weird happened...bailing out.\n";
  exit;
}

if ($slave_type eq $netscape_ID)
{
  @slave_links = parseNSLinks(0, \@slave);
}
elsif ($slave_type eq $mozilla_ID)
{
  @slave_links = parseMOLinks(0, \@slave);
}
elsif ($slave_type eq $ie_ID)
{
  @slave_links = parseIELinks(0, \@slave);
}
else
{
  print STDERR "Something weird happened...bailing out.\n";
  exit;
}

# Merge the two trees.  The new tree is @master_links.
#
merge(\@master_links, \@slave_links);

# Print the headers.
#
print "@master_headers\n";

# Print the tree.
#
if ($output_type eq $netscape_ID)
{
  print "<DL><p>\n";
  printNSTree(\@master_links);
  print "</DL><p>\n";
}
elsif ($output_type eq $mozilla_ID)
{
  print "<DL><p>\n";
  printMOTree(\@master_links);
  print "</DL><p>\n";
}
elsif ($output_type eq $ie_ID)
{
  print "<DL><p>\n";
  printIETree(\@master_links);
  print "</DL><p>\n";
}
else
{
  print STDERR "Couldn't determine output type...exiting.\n";
}

exit;

### ------------------------------------------------------------------------ ###
### Functions
### ------------------------------------------------------------------------ ###

# This function parses Netscape bookmark files and puts them into our
# own internal format.
# The first argument is an index that tells us where to start parsing.
# The second argument is a reference to the array that holds the bookmark
# file.
#
sub parseNSLinks() {
  my $start = $_[0]; # The line number at which we start.
  my $ref = $_[1]; # A reference to the bookmark file array.
  my @links_section = @$ref; # Put the bookmark array into a local array.
  my @returned_links = (); # The array we return.
  my @folder_stack = (); # The stack of references to the heirarchy of folders.

  # Push a reference to the root folder onto the folder stack.
  #
  my $root_folder_ref = \@returned_links;
  push @folder_stack, $root_folder_ref;
  
  for my $line_num ($start .. scalar(@links_section)-1)
  {
    my $line = $links_section[$line_num];
    if ($line =~ /<DT><H3.*?ADD_DATE=\"(.*?)\".*?>(.*?)<\/H3>/) 
    {
      my %thingy;
      $thingy{type} = 2;
      $thingy{name} = $2;
      $thingy{adddate} = $1;
      my @new_folder = ();
      $thingy{payload} = \@new_folder;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
      push @folder_stack, \@new_folder;
    }
    elsif ($line =~ /<\/DL>/) 
    {
      pop(@folder_stack);
    }
    elsif ($line =~ /<DT><A HREF="(.*?)".*?>(.*?)<\/A>/)
    {
      my %thingy;
      $thingy{type} = 1;
      $thingy{payload} = $1;
      $thingy{name} = $2;
      
      if ($line =~ /ADD_DATE="(\d*?)"/)
      {
        $thingy{adddate} = $1;
      }
      if ($line =~ /LAST_VISIT="(\d*?)"/)
      {
        $thingy{lastvisit} = $1;
      }
      if ($line =~ /LAST_MODIFIED="(\d*?)"/)
      {
        $thingy{lastmodified} = $1;
      }
      
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
    elsif ($line =~ /<HR>/)
    {
      my %thingy;
      $thingy{type} = 3;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
  }
  
  return @returned_links;
}

# This function parses Mozilla bookmark files and puts them into our
# own internal format.
# The first argument is an index that tells us where to start parsing.
# The second argument is a reference to the array that holds the bookmark
# file.
#
sub parseMOLinks() {
  my $start = $_[0]; # The line number at which we start.
  my $ref = $_[1]; # A reference to the bookmark file array.
  my @links_section = @$ref; # Put the bookmark array into a local array.
  my @returned_links = (); # The array we return.
  my @folder_stack = (); # The stack of references to the heirarchy of folders.

  # Push a reference to the root folder onto the folder stack.
  #
  my $root_folder_ref = \@returned_links;
  push @folder_stack, $root_folder_ref;
  
  for my $line_num ($start .. scalar(@links_section)-1)
  {
    my $line = $links_section[$line_num];
    if ($line =~ /<DT><H3.*?ADD_DATE=\"(.*?)\" ID="(.*?)".*?>(.*?)<\/H3>/) 
    {
      my %thingy;
      $thingy{type} = 2;
      $thingy{name} = $3;
      $thingy{id} = $2;
      $thingy{adddate} = $1;
      my @new_folder = ();
      $thingy{payload} = \@new_folder;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
      push @folder_stack, \@new_folder;
    }
    elsif ($line =~ /<\/DL>/) 
    {
      pop(@folder_stack);
    }
    elsif ($line =~ /<DT><A HREF="(.*?)".*?>(.*?)<\/A>/)
    {
      my %thingy;
      $thingy{type} = 1;
      $thingy{payload} = $1;
      $thingy{name} = $2;
      
      if ($line =~ /ADD_DATE="(\d*?)"/)
      {
        $thingy{adddate} = $1;
      }
      if ($line =~ /LAST_VISIT="(\d*?)"/)
      {
        $thingy{lastvisit} = $1;
      }
      if ($line =~ /LAST_MODIFIED="(\d*?)"/)
      {
        $thingy{lastmodified} = $1;
      }
      if ($line =~ /LAST_CHARSET="(.*?)"/)
      {
        $thingy{lastcharset} = $1;
      }
      
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
    elsif ($line =~ /<HR>/)
    {
      my %thingy;
      $thingy{type} = 3;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
  }
  
  return @returned_links;
}

# This function parses IE bookmark files and puts them into our
# own internal format.
# The first argument is an index that tells us where to start parsing.
# The second argument is a reference to the array that holds the bookmark
# file.
#
sub parseIELinks() {
  my $start = $_[0];
  my $ref = $_[1];
  my @links_section = @$ref;
  my @returned_links; # The array we return.
  my @folder_stack = (); # The stack of references to the heirarchy of folders.

  # Push a reference to the root folder onto the folder stack.
  #
  my $root_folder_ref = \@returned_links;
  push @folder_stack, $root_folder_ref;
  
  for my $line_num ($start .. scalar(@links_section)-1)
  {
    my $line = $links_section[$line_num];
    if ($line =~ /<DT><H3.*?ADD_DATE=\"(.*?)\".*?>(.*?)<\/H3>/) 
    {
      my %thingy;
      $thingy{type} = 2;
      $thingy{name} = $2;
      $thingy{adddate} = $1;
      my @new_folder = ();
      $thingy{payload} = \@new_folder;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
      push @folder_stack, \@new_folder;
    }
    elsif ($line =~ /<\/DL>/) 
    {
      pop(@folder_stack);
    }
    elsif ($line =~ /<DT><A HREF="(.*?)".*?>(.*?)<\/A>/)
    {
      my %thingy;
      $thingy{type} = 1;
      $thingy{payload} = $1;
      $thingy{name} = $2;
      
      if ($line =~ /ADD_DATE="(\d*?)"/)
      {
        $thingy{adddate} = $1;
      }
      if ($line =~ /LAST_VISIT="(\d*?)"/)
      {
        $thingy{lastvisit} = $1;
      }
      if ($line =~ /LAST_MODIFIED="(\d*?)"/)
      {
        $thingy{lastmodified} = $1;
      }
      
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
    elsif ($line =~ /<HR>/)
    {
      my %thingy;
      $thingy{type} = 3;
      my $current_folder_ref = pop(@folder_stack);
      push @folder_stack, $current_folder_ref;
      push @$current_folder_ref, \%thingy;
    }
  }
  
  return @returned_links;
}


# This is the function that does the actual merging of the trees.
#
sub merge() {
  my $master_ref = $_[0];
  my $slave_ref = $_[1];
  
  foreach my $slave_thingy (@$slave_ref)
  {
    
    if ($slave_thingy->{type} == 1)
    {
      my $found = 0;
      foreach my $master_thingy (@$master_ref)
      {
        if (URL_equal($slave_thingy->{payload}, $master_thingy->{payload}))
        {
          $found = 1;
          last
        }
      }
      if (!$found)
      {
        push @$master_ref, $slave_thingy;
      }
    }
    
    elsif ($slave_thingy->{type} == 2)
    {
      my $found = 0;
      foreach my $master_thingy (@$master_ref)
      {
        if ($master_thingy->{type} == 2 && $slave_thingy->{name} eq $master_thingy->{name})
        {
          merge($master_thingy->{payload}, $slave_thingy->{payload});
          $found = 1;
          last;
        }
      }
      if (!$found)
      {
        push @$master_ref, $slave_thingy;
      }
    }
    
    elsif ($slave_thingy->{type} == 3)
    {
    }
  }
}

# This function tries to figure out if two URLs are "equal", meaning, they
# point to the same page.
#
sub URL_equal() {
  my $link1 = $_[0];
  my $link2 = $_[1];
  if ($link1 eq $link2) # An easy case...
  {
    return 1;
  }
  #
  # This one should take care of the case of [http://foo.com] and 
  # [http://foo.com/] from being considered different.
  #
  elsif ($link1 eq "$link2/" || $link2 eq "$link1/")
  {
    return 1;
  }
  
  return 0;
}

# This function prints the tree in Netscape's format.
#
sub printNSTree() {
  my $tree_ref = $_[0];
  foreach my $thingy (@$tree_ref)
  {
    if ($thingy->{type} == 1)
    {
      print "<DT><A HREF=\"$thingy->{payload}\" ADD_DATE=\"$thingy->{adddate}\" LAST_VISIT=\"$thingy->{lastvisit}\" LAST_MODIFIED=\"$thingy->{lastmodified}\">$thingy->{name}</A>\n";
    }
    elsif ($thingy->{type} == 2)
    {
      print "<DT><H3 FOLDED ADD_DATE=\"$thingy->{adddate}\">$thingy->{name}</H3>\n";
      print "<DL><p>\n";
      printNSTree($thingy->{payload});
      print "</DL><p>\n";
    }
    elsif ($thingy->{type} == 3)
    {
      print "<HR>\n";
    }
  }
}

# This function prints the tree in Mozilla's format.
#
sub printMOTree() {
  my $tree_ref = $_[0];
  foreach my $thingy (@$tree_ref)
  {
    if ($thingy->{type} == 1)
    {
      print "<DT><A HREF=\"$thingy->{payload}\" ADD_DATE=\"$thingy->{adddate}\" LAST_VISIT=\"$thingy->{lastvisit}\" LAST_MODIFIED=\"$thingy->{lastmodified}\" LAST_CHARSET=\"$thingy->{lastcharset}\">$thingy->{name}</A>\n";
    }
    elsif ($thingy->{type} == 2)
    {
      print "<DT><H3 FOLDED ADD_DATE=\"$thingy->{adddate}\" ID=\"$thingy->{id}\">$thingy->{name}</H3>\n";
      print "<DL><p>\n";
      printMOTree($thingy->{payload});
      print "</DL><p>\n";
    }
    elsif ($thingy->{type} == 3)
    {
      print "<HR>\n";
    }
  }
}

# This function prints the tree in IE's format.
sub printIETree() {
  my $tree_ref = $_[0];
  foreach my $thingy (@$tree_ref)
  {
    if ($thingy->{type} == 1)
    {
      print "<DT><A HREF=\"$thingy->{payload}\" ADD_DATE=\"$thingy->{adddate}\" LAST_MODIFIED=\"$thingy->{lastmodified}\" LAST_VISIT=\"$thingy->{lastvisit}\" OBJECT_TYPE=\"LINK\">$thingy->{name}</A>\n";
    }
    elsif ($thingy->{type} == 2)
    {
      print "<DT><H3 FOLDED ADD_DATE=\"$thingy->{adddate}\" OBJECT_TYPE=\"LINK\">$thingy->{name}</H3>\n";
      print "<DL><p>\n";
      printIETree($thingy->{payload});
      print "</DL><p>\n";
    }
    elsif ($thingy->{type} == 3)
    {
      print "<DT><A HREF=\"-\">-</A>\n";
    }
  }
}


# This removes the first element of an array.
#
sub removeFirstElem() {
  my $ref = $_[0];
  my @in = @$ref;
  my $len = scalar(@in);
  if ($len < 2) {return ();}
  my @out;
  for (my $i = 1; $i < $len; $i++)
  {
    push @out, $in[$i];
  }
  return @out;
}

# This prints the usage.
#
sub usage() {
  print STDERR "\nUsage: $0 -m[NS,MO,IE] master-file -s[NS,MO,IE] slave-file\n\n",
  "$0 can read and write Netscape 4.x, Mozilla/Netscape 6.x or IE bookmarks.\n",
  "You must specify what type each of the input files are.  The output type\n",
  "is the same type as the master bookmark file.  For example:\n\n",
  " > $0 -mIE favorites.html -mNS bookmarks.html\n\n",
  "would read a master IE bookmark file named favorites.html, a slave Netscape\n",
  "bookmark file named bookmarks.html, and would print the output in IE format.\n";
}
