#! /usr/bin/perl -w
use strict;

# Special (non-url) key for 'links' hashes; maps to array holding all keys IN THE
# ORDER THEY WERE ADDED. This is the most sensible order to display the diffs.
my $ORDERED_LINKS = '@@ ordered links @@';

my ( $show_added, $show_removed, $show_preserved,
     $show_duplicates, $show_locations, $old_file, $new_file ) = process_args();

my %old_links = read_links($old_file);
my %new_links = read_links($new_file);

# The following lists will hold urls (keys) in display order.
my @added = ();
my @removed = ();
my @preserved = ();
my @duplicates = ();

# Must count occurrences of each link, so we only add duplicate urls once.
my %new_counts = ();

# Don't iterate over 'keys %old_links', as that'll return the urls in random order;
# instead, get the ordered array via the special (non-url) hash key, $ORDERED_LINKS.
foreach ( @{ $old_links{$ORDERED_LINKS} } ) {
    if (exists $new_links{$_})  { push(@preserved, $_); }
    else                        { push(@removed, $_); }
}

# Don't iterate over 'keys %new_links', as that'll return the urls in random order;
# instead, get the ordered array via the special (non-url) hash key, $ORDERED_LINKS.
foreach ( @{ $new_links{$ORDERED_LINKS} } ) {
    # Keep a count so we can spot duplicates (and only include them once).
    $new_counts{$_} = 0 unless exists $new_counts{$_};
    $new_counts{$_}++;
    push(@duplicates, $_) if ($new_counts{$_} == 2);
    push(@added, $_) unless exists $old_links{$_};
}

# Output the introductory HTML ...
print <<END;
<html>
  <head>
    <title>Bookmark Diff</title>
    <style><!--
      body, td    { font-size: small; font-family: arial, helvetica, sansserif; }
      div         { margin: 20px 0; padding: 5px 20px; }
      h3          { margin: 0px 5px 5px -15px; padding: 0px; }
      a           { text-decoration: none; }
      a:hover     { text-decoration: underline; }
      .path       { font-size: x-small; color: gray; xpadding-left: 20px }
      .added      { background: #eeffee; border: 1px dashed gray; }
      .removed    { background: #ffeeee; border: 1px dashed gray; }
      .moved      { background: #ffffee; border: 1px dashed gray; }
      .preserved  { background: #ffffee; border: 1px dashed gray; }
      .duplicated { background: #ffffee; border: 1px dashed gray; }
    --></style>
  </head>
  <body>
    Original File: <b>$old_file</b><br>
    Modified File: <b>$new_file</b><br>
END

# ... the requested diffs, ...
output( "added",      "Added:",      \%new_links, \@added )       if $show_added;
output( "removed",    "Removed:",    \%old_links, \@removed )     if $show_removed;
output( "preserved",  "Preserved:",  \%new_links, \@preserved )   if $show_preserved;
output( "duplicated", "Duplicated:", \%new_links, \@duplicates )  if $show_duplicates;

# ... and the closing HTML.
print "  </body>\n</html>";


sub output {
    my ( $class, $caption, $hash, $keys ) = @_;
    print "    <div class='$class'><h3>$caption</h3>\n" .
          "      <table cellspacing='0' cellpadding='0' width='100%'>\n";
    foreach (@{$keys}) {
        my $url = $_;
        foreach (@{ $hash->{$url} }) {
            my %entry = %{ $_ };
            my ( $label, $path ) = ( $entry{label}, join("/", @{ $entry{path} }) );
            print "        <tr><td width='50%'><a href='$url'>$label</a></td>";
            print "<td width='50%' class='path'>$path/</td>" if $show_locations;
            print "        </tr>\n";
        }
    }
    print "      </table>\n    </div>\n";
}


# Processes the commandline args: filenames and (optionally) flags (a, r, d, p, l).
sub process_args {
    my @files = ();
    my ( $a, $r, $p, $d, $l ) = ( 0, 0, 0, 0, 0 );
    my $use_defaults = 1;
    foreach (@ARGV) {
        if ( /^-(.*)/ ) {
            my $flags = $1;
            $flags =~ /a/ && (($a = 1), $use_defaults = 0);
            $flags =~ /r/ && (($r = 1), $use_defaults = 0);
            $flags =~ /p/ && (($p = 1), $use_defaults = 0);
            $flags =~ /d/ && (($d = 1), $use_defaults = 0);
            $flags =~ /l/ && ($l = 1);
        }
        else {
            push(@files, $_);
        }
    }
    if ($use_defaults) {
        $a = $r = $d = 1;
    }
    return ( $a, $r, $p, $d, $l, @files );
}


# The returned hash maps url => list of (label, path) records, except for the special
# $ORDERED_LINKS entry, which is an array reference holding all urls in the order they
# were read (needed because 'keys %hash' returns the urls in random order).
sub read_links {
    my ( $filename ) = @_;
    my @path = ( "" );
    my %hash = ();
    open ( FILE, $filename ) or die "Can't open $filename: $!";
    while ( <FILE> ) {
        if ( /<a.*?\shref=\"(.*?)\".*?>(.*?)<\/a>/i )  {
            push(@{ $hash{$ORDERED_LINKS} }, $1);
            push(@{ $hash{$1} }, { path => [@path], label => $2 });
        }
        if ( /<dt><h3.*?>(.*?)<\/h3>/i )  { push(@path, $1); }
        if ( /<\/dl>/i )                  { pop(@path); }
    }
    close( FILE );
    %hash;
}

# Play nicely with scripts and the shell.
0;