#!/usr/bin/perl

#
# bookmarks.pl version 1E-23
#
# by Tom Rathborne - tomr@aceldama.com - http://www.aceldama.com/~tomr/
#
# I'm to lazy to stick a license message in here. Do what you want with it.
#
# Usage: bookmarks.pl ~/.netscape/bookmarks.html > processed-bookmarks.html
#
# This script transforms a Netscape Navigator 4 bookmarks file into a
# somewhat more valid HTML file, with all the bookmark metadata stripped
# out. It doesn't handle descriptions. It does what I want, on my
# bookmarks file, so please don't gripe if it doesn't work for you.
#
# You can see the output of this script on my bookmarks file at:
#     http://www.aceldama.com/~tomr/resources.html
#
# To make a named-anchor table-of-contents, make your top-level headings
# end in ' - xxx' where 'xxx' is the name you would like to assign to
# that heading. It requires that xxx =~ /[a-z]{3}/. By specifying a
# three-letter mnemonic (rather than a number or something based on the
# name of the category), you can ensure that people's bookmarks to your
# named anchors do not become stale.
#
# If you want it to stop partway through your bookmarks file, you *must*
# have a *top-level* bookmark named '- fin -' to say where. Otherwise,
# it'll do your entire bookmarks file.
#

### You'll probably want to customize these things:

$html_head = `cat ~/www/_dev_/resources/head.html`;  # what to print first
$html_tail = `cat ~/www/_dev_/resources/tail.html`;  # what to print last
# Dates are in YYYY MM DD format because it makes the most sense to computers.
$last_check = '1997.06.03'; # when all bookmarks were supposedly last verified
$hr = '<hr width="100%" size="2">'; # What to print as a separator

### End of customizations ####################################################

# *Today* is the last update date, obviously.
($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
$last_update = sprintf("%04d.%02d.%02d", $year + 1900, $mon + 1, $mday);

$bookmarks = ''; # The processed output
%url = ();       # Unique count of bookmarks

SKIPHEAD: while (<>) { last SKIPHEAD if(m/^$/); } # Skip to first blank line

PROCESS: while (<>) { # fix up bookmark <dl> into a <ul>, etc.
    s/^<DL><p>//o;
    s/^<\/DL><p>/<\/ul>/o;
    s/DL>/ul>/o;
    s/DT>/li>/o;
    s/<p>//o;
    s/ ALIASOF="[0-9]+"//o;
    s/ ALIASID="[0-9]+"//o;
    s/ ADD_DATE="[0-9]+"//o;
    s/ LAST_VISIT="[0-9]+"//o;
    s/ LAST_MODIFIED="[0-9]+"//o;
    s/ FOLDED//o;
    s/ NEWITEMHEADER//o;
    s/®/\&reg;/go;
    s/ & / \&amp; /go;
    s/<H3>//o;
    s/<\/H3>//o;
    s/<A HREF/<a href/o;
    s/A>/a>/o;
    if(/^    <li>(.*) - ([a-z]{3})/o) {
        $_ = "$hr\n<b><a name=\"$2\">$1</a></b>";
        $toc .= "<li><a href=\"#$2\">$1</a>\n";
    }
    s/^(    )+//o;

    if(/- fin -/) { # The '- fin -' top-level bookmark is the bailout
        $bookmarks .= "</ul>\n";
        last PROCESS;
    }

    $url{$1} = 1 if(/href="([^"]+)"/o); # Update unique URL list

    $bookmarks .= $_ unless (/<HR>/o or /<DD>/o); # add this bookmark to output
}

$urls = scalar keys %url; # count unique URLs

# Time to print everything out.

print $html_head;

print <<ENDTOP;
<div align="center">
<b>$urls unique URLs</b> &middot;
<b>Last&nbsp;Update:&nbsp;$last_update</b> &middot;
<b>Last&nbsp;Check:&nbsp;$last_check</b>
</div>

ENDTOP

print "$hr\n\n<h2>Contents</h2>\n<ul>\n$toc</ul>\n" if $toc ne ''; 

print $bookmarks;

print $html_tail;

# The end.
