#! /usr/bin/perl -w use strict; # Special (non-url) key for 'links' hashes; maps to array holding all keys IN THE # ORDER THEY WERE ADDED. This is the most sensible order to display the diffs. my $ORDERED_LINKS = '@@ ordered links @@'; my ( $show_added, $show_removed, $show_preserved, $show_duplicates, $show_locations, $old_file, $new_file ) = process_args(); my %old_links = read_links($old_file); my %new_links = read_links($new_file); # The following lists will hold urls (keys) in display order. my @added = (); my @removed = (); my @preserved = (); my @duplicates = (); # Must count occurrences of each link, so we only add duplicate urls once. my %new_counts = (); # Don't iterate over 'keys %old_links', as that'll return the urls in random order; # instead, get the ordered array via the special (non-url) hash key, $ORDERED_LINKS. foreach ( @{ $old_links{$ORDERED_LINKS} } ) { if (exists $new_links{$_}) { push(@preserved, $_); } else { push(@removed, $_); } } # Don't iterate over 'keys %new_links', as that'll return the urls in random order; # instead, get the ordered array via the special (non-url) hash key, $ORDERED_LINKS. foreach ( @{ $new_links{$ORDERED_LINKS} } ) { # Keep a count so we can spot duplicates (and only include them once). $new_counts{$_} = 0 unless exists $new_counts{$_}; $new_counts{$_}++; push(@duplicates, $_) if ($new_counts{$_} == 2); push(@added, $_) unless exists $old_links{$_}; } # Output the introductory HTML ... print < Bookmark Diff Original File: $old_file
Modified File: $new_file
END # ... the requested diffs, ... output( "added", "Added:", \%new_links, \@added ) if $show_added; output( "removed", "Removed:", \%old_links, \@removed ) if $show_removed; output( "preserved", "Preserved:", \%new_links, \@preserved ) if $show_preserved; output( "duplicated", "Duplicated:", \%new_links, \@duplicates ) if $show_duplicates; # ... and the closing HTML. print " \n"; sub output { my ( $class, $caption, $hash, $keys ) = @_; print "

$caption

\n" . " \n"; foreach (@{$keys}) { my $url = $_; foreach (@{ $hash->{$url} }) { my %entry = %{ $_ }; my ( $label, $path ) = ( $entry{label}, join("/", @{ $entry{path} }) ); print " "; print "" if $show_locations; print " \n"; } } print "
$label$path/
\n
\n"; } # Processes the commandline args: filenames and (optionally) flags (a, r, d, p, l). sub process_args { my @files = (); my ( $a, $r, $p, $d, $l ) = ( 0, 0, 0, 0, 0 ); my $use_defaults = 1; foreach (@ARGV) { if ( /^-(.*)/ ) { my $flags = $1; $flags =~ /a/ && (($a = 1), $use_defaults = 0); $flags =~ /r/ && (($r = 1), $use_defaults = 0); $flags =~ /p/ && (($p = 1), $use_defaults = 0); $flags =~ /d/ && (($d = 1), $use_defaults = 0); $flags =~ /l/ && ($l = 1); } else { push(@files, $_); } } if ($use_defaults) { $a = $r = $d = 1; } return ( $a, $r, $p, $d, $l, @files ); } # The returned hash maps url => list of (label, path) records, except for the special # $ORDERED_LINKS entry, which is an array reference holding all urls in the order they # were read (needed because 'keys %hash' returns the urls in random order). sub read_links { my ( $filename ) = @_; my @path = ( "" ); my %hash = (); open ( FILE, $filename ) or die "Can't open $filename: $!"; while ( ) { if ( /(.*?)<\/a>/i ) { push(@{ $hash{$ORDERED_LINKS} }, $1); push(@{ $hash{$1} }, { path => [@path], label => $2 }); } if ( /
(.*?)<\/h3>/i ) { push(@path, $1); } if ( /<\/dl>/i ) { pop(@path); } } close( FILE ); %hash; } # Play nicely with scripts and the shell. 0;