#!/usr/bin/perl -s
#
# troff2html - convert troff text to HTML
#              "-mm" macros supported, "-ms" in development
#
# Copyright © 1993  Oscar Nierstrasz
# Copyright © 1994  Jon Crowcroft
# Copyright © 1994-1995  Daniel Quinlan
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Requirements to use `troff2html'
#
# 1. Perl 5
#
# 2. <A HREF="http://iamwww.unibe.ch/~scg/Src/">html.pl</A>
#	to extract, normalize and hypertextify URLs in HTML files.
#
# 3. groff, ghostscript, giftrans, and the PBM utilities
#	for `pic' and `tbl' conversion to work.  giftrans can be
#	commented out if you don't require tables and figures to be
#	transparent.
#
# Acknowledgments: originally based on `mm2html' 1.3 from Jon
# Crowcroft <jon@cs.ucl.ac.uk>, but almost completely rewritten since
# then.  Ideas from troff2html by John Troyer <troyer@cgl.ucsf.edu>.
#
# Things to do
#
# 1. `eqn'
# 2. do references really work?
# 3. .B, .I, .R
# 4. fix lists in &ms2html, finish basic "ms" support
# 5. HTML 3.0 tables
# 6. if and ie/el processing - must be done in second pass in order
#    to test registers
#
# Good Habits
#
# 1. Use LB to start a list instead of VL whenever possible.
# 2. If you want a "no-fill" region in fixed width font, use
#    ".nf" and ".fi" in addition to the font change.
# 3. Always match any double quotes in macro arguments. (I think you
#    can now get away with unmatched double quotes.)

# location of perl scripts
unshift(@INC, $ENV{'HOME'} . "/lib/perl");

# required perl scripts
require "button.pl";
require "html.pl";
require "newgetopt.pl";

# constants
$version = "v1.4";
$url = 'http://www.pathname.com/~quinlan/troff2html/index.html';
$usage = 'usage: troff2html [options] [files...]
        -mm           translate -mm macros
        -ms           translate -ms macros
        --single      create a single body page only
        --plain       use plain text instead of button to navigate
        --soelim="x"  use program x as a filter for .so requests
        --latin1      use Latin-1 characters as available
';

# Do not alter the order of the first four pairs.
%FontStart = (
	      'R'  => '',	# (1) Times Roman
	      'I'  => '<I>',	# (2) Times Italic
	      'B'  => '<B>',	# (3) Times Bold
	      'BI' => '<B><I>', # (4) Times Bold Italic
	      'C'  => '<TT>',	# Courier
	      'CW' => '<TT>',	# constant width
	      'U'  => ''	# unknown font
	      );

%FontEnd = (
	    'R'  => '',		# (1) Times Roman
	    'I'  => '</I>',	# (2) Times Italic
	    'B'  => '</B>',	# (3) Times Bold
	    'BI' => '</I></B>',	# (4) Times Bold Italic
	    'C'  => '</TT>',	# Courier
	    'CW' => '</TT>',	# constant width
	    'U'  => ''		# unknown font
	    );

# defaults
$soelim = "gsoelim";		# external soelim program
$opt_mm = 1;

# name of this script
$prog = $0;
$prog =~ s@.*/@@;

# date
chop($date = `date +"%d %B %Y"`);

# user, fullname
$user = $ENV{'USER'} || $ENV{'LOGNAME'} || (getpwuid($<))[0];
$fullname = (getpwnam($user))[6];
$fullname =~ s/.*-\s*(.*)\(.*//;
$fullname =~ s/,.*//;

# signature
$signature = "<I>Translated by <A HREF=\"$url\">$prog</A> $version on $date by $fullname<BR></I>\n";

&handle_arguments;

foreach $filename (@ARGV) {
    # run input through `soelim'
    if (!open(IN, "$soelim $filename |")) {
	die("$prog: can't create \`$filename': $!\n");
    }

    # first we delete comments, remove ignored blocks
    $tempname = $filename.$$;
    if (!open(PASS1, ">$tempname")) {
	die("$prog: can't create \`$tempname': $!\n");
    }
    $ignore_mode = 0;
    while (<IN>) {
	if (/^\.ig\s*/) {
	    $ignore_mode = 1;
	}
	elsif ($ignore_mode && /^\.\./) {
	    $ignore_mode = 0;
	}
	else {
	    print PASS1 unless (/^\.\\"/ || $ignore_mode);
	}
    }
    close(IN);
    close(PASS1);

    # initialize file-wide variables
    $toc = "";			# Table of Contents
    $refs = "";			# references
    $NR{'Hu'} = 3;		# default unnumbered heading level
    $TL = 0;			# Table of Contents level

    if (!open(FILE, $tempname)) {
	die("$prog: can't create \`$tempname': $!\n");
    }
    ($BASE = $filename) =~ s/\.mm$//;	# drop the .mm suffix
    $TOC = $BASE . "-toc.html";		# title page filename
    $CURR = $PREV_LINK = $TOC;		# current and previous pages
    &newpage($TOC); $inbody = 0;

    if ($single) {
	$TOTOC = "";
    }
    else {
	$TOTOC = $TOC;
    }

    # some useful strings:
    $REFS = $BASE . "-refs.html";
    $totoc = "<I>To <A HREF=\"$TOTOC#TOC\">Table of Contents</A></I><P>\n\n";
    $torefs = "<I>To <A HREF=\"$REFS\">References</A></I><P>\n\n";

    # the record separator is a newline followed by a dot
    $/ = "\n\.";

    # translate:
    while(<FILE>) {
	&handle_record;
    }

    # gracefully close the last body page:
    &lastbody;

    # put the collected table of contents at the end
    # of the title page:
    while ($TL > 0) { $toc .= "</UL>\n"; $TL--; }
    open(TOC, ">>$TOC");
    print TOC "<H1><A NAME=\"TOC\">Table of Contents</H1>\n$toc\n";
    if ($refs =~ /./) { print TOC $torefs; }
    print TOC $signature;
    close(TOC);

    # if there are references, print them out:
    if ($refs =~ /./) {
	&newpage($REFS);
	&printtitle("References");
	print "<H2>References</H2>\n";
	print "<OL>\n$refs\n</OL>\n\n";
	&up; print "<P>\n";
	print $signature;
	close(STDOUT);
    }

    # remove temporary file
    unlink ($tempname);
}

sub handle_arguments {
    if ($#ARGV < 0) {
	die ($usage);
    }
    &NGetOpt("single", "plain", "soelim=s", "latin1", "mm", "ms");

    if ($opt_soelim) {
	$soelim = $opt_soelim;
    }
}

sub handle_record {
    s/^\.//;			# delete initial "." in first record
    s/\n\.$//;			# delete record separator
    s/[ \t]+\n/\n/g;		# delete trailing spaces and tabs
    0 while s/\n\\\./\n./g;	# unescape leading dots

    # split input record
    $last_cmd = $cmd if $cmd =~ /[A-Z][A-Z]/;
    ($cmd, $args, $text) = /^(\S+)[ \t]*(.*)\n?((.|\n)*)$/;

    # we cannot just split $args, since "foo bar" is one argument
    @args = &troffwords($args);

    if ($opt_mm) {
	&mm2html;
    }
    elsif ($opt_ms) {
	&ms2html;
    }
    else {
	&troff2html;
    }
}

sub mm2html {
    local($_) = $cmd;

    # An attempt has been made to put these macros in order of
    # frequency, with some consideration for parsing time.

    # ??? - expand references into HTML links:
    s/\[RF:(\d*)\]/<A HREF="${REFS}#RF:$1">[$1]<\/A>/g;

    # paragraph
    /^P$/ && do {
	print "<P>\n";
	print &format($text), "\n";
	return;
    };

    # space
    # XXX
    /^SP$/ && do {
	local $i;

	if ($args[0] !~ /^\d+$/) {
	    $args[0] = 1;
	}
	for (; $args[0] > 0; $args[0]--) {
	    print "<BR>\n";
	}
	print &format($text), "\n";
	return;
    };

    # list item
    /^LI$/ && do {
	print "<LI>";
	# if the mark is not a number, print it after the <LI>
	if ($args[0] && $args[0] !~ /^[[(<{]?[^0-9][])>}.]?$/) {
	    print " ", &format($args[0]), " ";
	}
	print &format($text), "\n";
	return;
    };

    # end list
    /^LE$/ && do {
	&poplist_mm;
	print &format($text), "\n";
	return;
    };

    # begin list
    /^(BL|LB|ML|VL|AL|DL)$/ && do {
	&newlist_mm($1);
	return;
    };

    # bottom block -- Note that we only print a bottom block once.
    # This could be fixed to work correctly, but it would be more
    # trouble than it is worth.
    /^BS$/ && do {
	# Only insert <HR> if there is following text.  Otherwise, it
	# really means that we are trying to reset the bottom block.
	if ($text =~ /\S/) {
	    print "<HR>\n";
	}
	print &format($text), "\n";
	return;
    };

    # footnote -- This should be fixed to create HTML 3.0 footnotes.
    /^FS$/ && do {
	print " (<I>", &format($text), "</I>) ";
	return;
    };

    # displays
    /^DS$/ && do {
	print &format($text), "\n";
	return;
    };

    # numbered header
    /^H$/ && do {
	local $hlevel, $htext;

	if ($#args < 1) {
	    die("$prog: `.H' must have at least two arguments\n");
	}

	$hlevel = $args[0];
	$htext = $args[1];

	# increment this HeaderLevel
	$HeaderLevel[$hlevel - 1]++;

	# zero any HeaderLevel elements greater than this one
	for $i (($hlevel) .. $#HeaderLevel) {
	    $HeaderLevel[$i] = 0;
	}

	# set up ID
	$HeaderID = "$HeaderLevel[0]";
	for $i (1 .. ($hlevel - 1)) {
	    $HeaderID .= "." . $HeaderLevel[$i];
	}

	while ($TL < $hlevel) {
	    $toc .= "<UL>\n";
	    $TL++;
	}
	while ($TL > $hlevel) {
	    $toc .= "</UL>\n";
	    $TL--;
	}

	$name = "$HeaderID";	# unique anchor name
	$num = "$HeaderID ";
	$pic = 0;
	$eqn = 0;
	$tbl = 0;
	# start a new page unless -b option was selected:
	if (!$opt_single) {
	    $NEXT_LINK = "$BASE-$name.html";
	    &popall;
	    &newbody($NEXT_LINK, $htext);
	    &printtitle("${num}$htext");
	}
	$inbody = 1;
	print "<H$hlevel><A NAME=\"$name\">${num}$htext</H$hlevel>\n";
	if ($b) {
	    print $totoc;
	}
	$toc .= "<LI><A HREF=\"${CURR}\">${num}$htext</A>\n";
	print &format($text), "\n";
	return;
    };

    # unnumbered header
    /^HU$/ && do {
	print "<H$NR{'Hu'}>", &format($args[0]), "</H$NR{'Hu'}>\n\n";
	print &format($text), "\n";
	return;
    };

    /^(RF:\d+)$/ && do {
	$refs .= "\n<LI><A NAME=\"$1\">$text</A>\n\n";
	return;
    };

    /^UR$/ && do {
	$refs .= "\n<DT>$text</A>\n\n";
	return;
    };

    /^SK$/ && do {
	if ($last_cmd !~ /^(SK)$/) {
	    print "<HR>\n";
	}
	print &format($text), "\n";
	return;
    };

    /^TS$/ && do {
	$tbl++;
	$basename = "fsstnd.$HeaderID.$tbl";

	# only expand strings in body of table
	$_ = $text;
	&ds2html;

	# run groff
	open(OUTPUT, "|groff -t -Tps -rN4 > $basename.ps");
	print OUTPUT ".TS\n", $_, "\n.TE\n";
	close(OUTPUT);

	# convert PostScript to GIF
	system "gs -r120 -q -sDEVICE=pbmraw -sOutputFile=$basename.pbm - < $basename.ps > /dev/null 2>&1";
	system "pnmcrop -white $basename.pbm | pnmmargin -white 10 | ppmtogif > $basename.in.gif";
	system "giftrans -t #ffffff -o $basename.tbl.gif $basename.in.gif";
	unlink "$basename.pbm", "$basename.ps", "$basename.in.gif";
	print "<BR><IMG SRC=\"$basename.tbl.gif\">\n<P ALIGN=CENTER>\n<STRONG>Table $HeaderID.$tbl</STRONG>\n<P>\n";
	return;
    };

    /^PS$/ && do {
	$pic++;
	$basename = "fsstnd.$HeaderID.$pic";

	# only expand strings in body of picture
	$_ = $text;
	&ds2html;

	# run groff
	open(OUTPUT, "|groff -p -Tps -rN4 > $basename.ps");
	print OUTPUT ".PS\n", $_, "\n.PE\n";
	close(OUTPUT);

	# convert PostScript to GIF
	system "gs -r120 -q -sDEVICE=pbmraw -sOutputFile=$basename.pbm - < $basename.ps > /dev/null 2>&1";
	system "pnmcrop -white $basename.pbm | pnmmargin -white 10 | pnmmargin -black 1 | ppmtogif > $basename.in.gif";
	system "giftrans -t #ffffff -o $basename.pic.gif $basename.in.gif";
	unlink "$basename.pbm", "$basename.ps", "$basename.in.gif";
	print "<BR><IMG SRC=\"$basename.pic.gif\">\n<P ALIGN=CENTER>\n<STRONG>Figure $HeaderID.$pic</STRONG>\n<P>\n";
	return;
    };

    /^TL$/ && do {
	$text =~ s/(.*) *\\\(em.*/$1/;	# chop after emdash
	$text =~ s/<\S+>//g;		# delete any HTML commands
	$title = $text;
	$title =~ s/^\s+//;		# delete any leading whitespace
	$title =~ s/\s+$//;		# delete any trailing whitespace
	&printtitle("Title Page");
	return;
    };

    # author
    /^AU$/ && do {
	print "<STRONG>", &format($text), "</STRONG>\n<P>\n\n";
	return;
    };

    # abstract start
    /^AS$/ && do {
	print "<STRONG>Abstract</STRONG>\n<P>\n", &format($text), "\n<P>\n";
	return;
    };

    # ignored mm macros
    # AE - abstract end
    # AF - alternate format for 1st page
    # EN - equation end
    # DE - display end
    # FE - footnote end
    # BE - block end
    # MT - memorandum type and title
    # PE - picture end
    # PF - page footer
    # PH - page header
    # S  - point size & vertical spacing
    # SA - right margin justification
    # TC - table of contents
    # TE - table end
    # TM - number a technical memorandum
    /^(AE|AF|EN|DE|FE|BE|MT|PE|PF|PH|S|SA|TC|TE|TM)$/ && do {
	print &format($text), "\n";
	return;
    };

    # ignored groff mm macros
    # COVEND - cover end
    # COVER - cover start
    # PGFORM - page form
    # PGNH - no page header
    /^(COVEND|COVER|PGFORM|PGNH)$/ && do {
	print &format($text), "\n";
	return;
    };

    # unknown mm macros
    /^([A-Z]+)$/ && do {
	warn("$prog: unknown mm macro, \"$cmd\"\n");
	print &format($text), "\n";
	return;
    };

    # troff requests
    &troff2html;
}

sub ms2html {
    # expand references into HTML links:
    s/\[RF:(\d*)\]/<A HREF="${REFS}#RF:$1">[$1]<\/A>/g;

    &popall unless
	/^[LI][PL]/ || /^N[SN]$/ || /^BU[1234]/;

    /^TL$/ && do {
	$text =~ s/(.*) *\\\(em.*/$1/;	# chop after emdash
	$text =~ s/<\S+>//g;		# delete any HTML commands
	$title = $text;
	$title =~ s/^\s+//;		# delete any leading whitespace
	$title =~ s/\s+$//;		# delete any trailing whitespace
	&printtitle("Title Page");
    };

    # ??? - from ms2html.
    /^ST$/ && do {
	print switch_font('B'), &format($text), switch_font('P');
	return;
    };

    # author's institution
    /^AI$/ && do {
	print switch_font('I'), &format($text), switch_font('P'), "<P>\n";
	return;
    };

    # author's name
    /^AU$/ && do {
	print switch_font('B'), &format($text), switch_font('P'), "<P>\n";
	return;
    };

    # abstract begin
    /^AB$/ && do {
	print "<STRONG>Abstract</STRONG>\n<P>\n", &format($text), "\n<P>\n";
	return;
    };

    # indented paragraph
    /^PP$/ && do {
	print "<P>\n";
	print &format($text), "\n";
	return;
    };

    # ??? - from ms2html.
    /^BH$/ && do {
	print "<DL><DT>$text</DL>\n\n";
	return;
    };

    # ??? - from ms2html.
    /^BC$/ && do {
	print "<DL><DD>$text</DL>\n\n";
	return;
    };

    # footnote -- This should be fixed to create HTML 3.0 footnotes.
    /^FS$/ && do {
	print " (<I>", &format($text), "</I>) ";
	return;
    };

#    # from ms2html
#    /^FS$/ && do {
#	print "<DL><DD><I>$text</I></DL>\n\n";
#	return;
#    };

    # quoted paragraph
    /^QP$/ && do {
	print "<BLOCKQUOTE>\n", &format($text), "\n</BLOCKQUOTE>\n";
	return;
    };

    # displays
    /^DS$/ && do {
	print &format($text), "\n";
	return;
    };

#XXX    # don't distinguish LP, LL & IP for nesting purposes:
    /^LP$/ && do {
	&listitem("LP"); print "<DT>$text\n\n";
	return;
    };

    /^LL$/ && do {
	&listitem("LP");
	print "<DT><B>$text</B>\n\n";
	return;
    };

    /^IP$/ && do {
	&listitem("LP");
	print "<DD>$text\n\n";
	return;
    };

    (/^N[SN]$/ || /^BU[1234]$/)
	&& do { &listitem($_); print "<LI>$text\n\n"; return; };

    /^MD$/ && do {
	$md++;
	print "<DL><DT><B>Definition $md</B>\n<DD>$text</DL>\n\n";
	return;
    };

    /^MT$/ && do {
	$mt++;
	print "<DL><DT><B>Theorem $md</B>\n<DD>$text</DL>\n\n";
	return;
    };

    /^ML$/ && do {
	$ml++;
	print "<DL><DT><B>Lemma $md</B>\n<DD>$text</DL>\n\n";
	return;
    };

    /^MP$/ && do {
	$mp++;
	print "<DL><DT><B>Proposition $md</B>\n<DD>$text</DL>\n\n";
	return;
    };

    /^PR$/ && do {
	print "<DL><DT><B>Proof</B>\n<DD>$text</DL>\n\n";
	return;
    };

    if (/^([NS])H(\d)$/) {
	# skip if this is the reference section:
	if (($text eq "References") || ($text eq "Bibliography"))
	{ return; };
	$stype = $1;	# numbered or unnumbered sections
	$H = $2;	# the header level

	if ($H == 1) {
	    if ($stype =~ /N/) { $n1++; $n2 = $n3 = $n4 = 0; $id = "$n1"; }
	    else { $s1++; $s2 = $s3 = $s4 = 0; $id = "$s1"; }
	}
	elsif ($H == 2) {
	    if ($stype =~ /N/) { $n2++; $n3 = $n4 = 0; $id = "$n1.$n2"; }
	    else { $s2++; $s3 = $s4 = 0; $id = "$s1.$s2"; }
	}
	elsif ($H == 3) {
	    if ($stype =~ /N/) { $n3++; $n4 = 0; $id = "$n1.$n2.$n3"; }
	    else { $s3++; $s4 = 0; $id = "$s1.$s2.$s3"; }
	}
	elsif ($H == 4) {
	    if ($stype =~ /N/) { $n4++; $id = "$n1.$n2.$n3.$n4"; }
	    else { $s4++; $id = "$s1.$s2.$s3.$s4"; }
	}

	while ($TL < $H) { $toc .= "<UL>\n"; $TL++; }
	while ($TL > $H) { $toc .= "</UL>\n"; $TL--; }

	$name = "${stype}-$id";		# unique anchor name
	if ($stype =~ /N/) { $num = "$id "; }
	else { $num = ""; }
	# start a new page unless -b option was selected:
	if (!$b) {
	    $NEXT = "$BASE-$name.html" ;
	    &popall;
	    &newbody($NEXT);
	    &printtitle("${num}$text");
	}
	$inbody = 1;
	print "<H$H><A NAME=\"$name\">${num}$text</H$H>\n\n";
	if ($b) { print $totoc; }
	$toc .= "<LI><A HREF=\"${CURR}#$name\">${num}$text</A>\n";
	return;
    }

    /^(RF:\d+)$/ && do {
	$refs .= "\n<LI><A NAME=\"$1\">$text</A>\n\n";
	return;
    };

    /^UR$/ && do {
	$refs .= "\n<DT>$text</A>\n\n";
	return;
    };

    # ???
    /^\\"$/ && do {		# "
        return;
    };

    # these are ignored:
    (/^AE$/ || /^FE$/ || /^DE$/) && do {
	return;
    };

    # unknown ms macros
    /^([A-Z]+)$/ && do {
	warn("$prog: unknown ms macro, \"$cmd\"\n");
	print &format($text), "\n";
	return;
    };

    # troff requests
    &troff2html;
}

sub switch_font {
    local($font) = @_ if @_;
    local($tags) = "";

    # If font is numeric, convert to letter format
    if ($font =~ /^\d+$/) {
	if ($font <= 4) {
	    $font = ("R", "I", "B", "BI")[$font - 1];
	}
	else {
	    die("$prog: invalid numeric font value, `$font'\n");
	}
    }

    # normal font
    if ($font ne "P") {
	if ($#FontStack >= 0) {
	    # end last font
	    $tags .= $FontEnd{$FontStack[0]};
	}
	if (defined($FontStart{$font})) {
	    # start new font, push new font on stack
	    $tags .= $FontStart{$font};
	    push(@FontStack, $font);
	}
	else {
	    # push unknown font on stack
	    push(@FontStack, 'U');	# unknown
	    warn("$prog: unknown font `", $font, "'\n");
	}
    }
    # previous font
    elsif ($#FontStack >= 0) {
	# pop current font off stack, end current font
	$tags .= $FontEnd{pop(@FontStack)};
	if ($#FontStack >= 0) {
	    # if there was a previous font, start previous font
	    $tags .= $FontStart{$FontStack[0]};
	}
    }

    return $tags;
}

sub troff2html {
    local $end, $start;

    # troff font changes
    /^ft$/ && do {
	print switch_font($args[0]), &format($text);
	return;
    };

    # troff "no fill"
    /^nf$/ && do {
	print "<PRE>\n", &format($text), "\n";
	return;
    };

    # troff "fill mode"
    /^fi$/ && do {
	print "</PRE>\n", &format($text), "\n";
	return;
    };

    # number registers
    /^nr$/ && do {
	$NR{$args[0]} = $args[1];
	return;
    };

    # set string definitions
    /^ds$/ && do {
	local($name);

	$name = shift(@args);
	$_ = join(' ', @args);	# want every remaining argument
	&troffize;		# convert only troff escapes
	s/\n$//;		# don't want trailing newline
	$DS{$name} = $_;
	print &format($text), "\n";
	return;
    };

    # troff spaces
    /^sp$/ && do {
	print "<P>\n", &format($text), "\n";
	return;
    };

    # troff line breaks
    /^br$/ && do {
	# only insert a break if we need it
	if ($opt_mm) {
	    if ($last_cmd !~ /^(HU?|P|SP|SK)$/) {
		print "<BR>\n";
	    }
	}
	else {
	    print "<BR>\n";
	}
	print &format($text), "\n";
	return;
    };

    /^el/ && do {
	if ($text =~ s/\\\}//) {
	    print &format($text), "\n";
	}
	return;
    };

    # ignored troff requests
    # lf - change line number
    # nh - no hyphenation
    # af - assign format to register
    /^(lf|nh|af)$/ && do {
	print &format($text), "\n";
	return;
    };

    # empty command!?
    /^$/ && do {
	warn("$prog: empty command\n");
	print &format($text), "\n";
	return;
    };

    # unknown troff commands
    (/^[a-z]*$/) && do {
	$text =~ s/\\\}//g;	# for .ie blocks
	warn("$prog: unknown troff command, \"$cmd\"\n");
	print &format($text), "\n";
	return;
    };

    warn("$prog: unrecognized command, \"$cmd\"\n");
    print &format($text), "\n";
}

# Close the current body page and open a new one.
sub newbody {
    local($NEXT_LINK, $NEXT_TEXT) = @_;
    $NEXT_TEXT =~ s/^((\S+ ){1,3})(.*)/$1/;
    &popall;
    if ($inbody) {
	print "<HR>\n" unless $last_cmd eq "SK";
	&previous;
	&next;
	&up;
	print "<BR>\n<B>Previous:</B> <A HREF=\"$PREV_LINK\">$PREV_TEXT</A>\n";
	print "<B>Next:</B> <A HREF=\"$NEXT_LINK\">$NEXT_TEXT</A>\n";
	print "<B>Up:</B> <A HREF=\"$TOC#TOC\">Table of Contents</A>\n";
	print "<BR><HR>\n";
	print $signature;
    }
    close(STDOUT);
    $PREV_LINK = $CURR;
    $CURR = $NEXT_LINK;
    $PREV_TEXT = $CURR_TEXT;
    $CURR_TEXT = $NEXT_TEXT;
    &newpage($CURR);
}

# terminate the last body page:
sub lastbody {
    local($NEXT_LINK);
    &popall;
    if ($opt_mm) {
	print "<BR>";
	if ($last_cmd !~ /^SK$/) {
	    print "<HR>"
	}
	print "\n";
    }
    else {
	print "<BR><HR>\n";
    }

    &previous;
    # pointer to next only if references exist:
    if ($refs =~ /./) {
	$NEXT_LINK = $REFS; &next;
    };
    &up;

    print "<BR>\n<B>Previous:</B> <A HREF=\"$PREV_LINK\">$PREV_TEXT</A>\n";
    # pointer to next only if references exist:
    if ($refs =~ /./) {
	print "<B>Next:</B> <A HREF=\"$REFS\">References</A>\n";
    };
    print "<B>Up:</B> <A HREF=\"$TOC#TOC\">Table of Contents</A>\n";
    print "<BR><HR>\n";

    # clean up:
    print $signature;
    close(STDOUT);
}

# open a new page
sub newpage {
    local($PAGE) = @_;
    open(STDOUT, ">$PAGE") || die "fatal error: Can't create $PAGE";
    print STDERR "Created $PAGE\n";
}

# start a new list
sub newlist_mm {
    local($type) = @_;		# type of list

    if ($type eq "BL")    { print "\n<UL>\n"; }
    elsif ($type eq "DL") { print "\n<UL>\n"; }
    elsif ($type eq "ML") { print "\n<UL>\n"; }
    elsif ($type eq "VL") { print "\n<UL>\n"; }
    elsif ($type eq "AL") { print "\n<OL>\n"; }
    elsif ($type eq "LB") { print "\n<OL>\n"; }
    # this should never happen
    else {
	die("$prog: (newlist_mm) unknown list type `$type'\n");
    }

    push(@lstack,$type);
}

# pop the current list
sub poplist_mm {
    local($type);

    $type = pop(@lstack);
    if ($type eq "BL")    { print "</UL>\n"; }
    elsif ($type eq "LB") { print "</OL>\n"; }
    elsif ($type eq "ML") { print "</UL>\n"; }
    elsif ($type eq "VL") { print "</UL>\n"; }
    elsif ($type eq "AL") { print "</OL>\n"; }
    elsif ($type eq "DL") { print "</UL>\n"; }
    # this should never happen
    else {
	die("$prog: (poplist_mm) unknown list type `$type'\n");
    }
}

# pop out of all remaining lists
sub popall {
    while ($#lstack > 0) {
	if ($opt_mm) {
	    &poplist_mm;
	}
    }
}

# yep, you guessed it!
sub printtitle {
    local($name) = @_;

    print "<TITLE>$title - $name</TITLE>\n\n" ;
    print "<H1>$title</H1>\n\n";
}

# standard buttons
sub up {
    &button("up_motif","$TOC#TOC");
}

sub previous {
    &button("previous_motif","$PREV_LINK");
}

sub next {
    &button("next_motif","$NEXT_LINK");
}

# This function formats regions of troff text
sub format {
    local($_) = join('',@_);

    &troffize;		# convert standard troff sequences
    &ds2html;		# convert string definitions
    &html'href;		#' change URLs into hypertext links

    0 while s/\n\s*$/\n<P>/g;
    0 while s/\n\s*\n/\n<P>\n/g;
    0 while s/\n$/\n<P>\n/g;

    $_;
}

# This should be called on any text from the document, except for
# titles, and before any other HTML commands have been inserted into
# the text.
sub htmlescapes {
    s/&/&amp;/g;
    s/</&lt;/g;
    s/>/&gt;/g;
}

# Convert standard troff sequences into HTML.
sub troffize {
    local $end, $begin;

    # want to convert troff special characters before we mangle any
    # other elements into HTML format.
    &troffescapes;

    # HTML escapes
    &htmlescapes;

    # convert dead-key accents
    s/\\AE/\&AElig;/g;
    s/\\'([AEIOUYaeiouy])/\&$1acute;/g;
    s/\\[<^]([AEIOUaeiou])/\&$1circ;/g;
    s/\\`([AEIOUaeiou])/\&$1grave;/g;
    s/\\o([Aa])/\&$1ring;/g;
    s/\\~([ANOano])/\&$1tilde;/g;
    s/\\[:"]([AEIOUYaeiouy])/\&$1uml;/g;
    s/\\,([Cc])/\&$1cedil;/g;
    s/\\\/([Oo])/\&$1slash;/g;
    s/\\ss/\&szlig;/g;		# '

    # escape sequences
    s/\\%(\S+)/$1/g;		# hyphenation indication
    s/\\".*//g; 		# end of line comment "

    # font changes
    while (/\\f(\w|\(\w\w|\[\w+\])/) {
	local $font = $1;

	$font =~ s/[([]?(.*)\]?/$1/;
	s/\\f(\w|\(\w\w|\[\w+\])/&switch_font($font)/e;
    }

    # size changes
    s/\\s-[0-9]([\w\s\\]*)\\s0/$1/g;                    # ignore small text
    s/\\s\+[1-9]([\w\s\\]*)\\s0/<STRONG>$1<\/STRONG>/g; # large
    s/\\s\+[1-9]/<STRONG>/g;
    s/\\s0/<\/STRONG>/g;
}

# Convert troff escape sequences into HTML.  Operates on $_.
#
# This should *only* be called from troffize
sub troffescapes {
    # standard escapes
    s/\\\(em/--/g;		# 3/4 em dash
    s/\\\(hy/-/g;		# hyphen
    s/\\-/-/g;			# minus in current font
    # bullet
    if ($opt_latin1 eq '') {
	s/\\\(bu/·/g;
    }
    else {
	s/\\\(bu/o/g;
    }
    # square
    while (s/\\\(sq/[]/) {
	warn("$prog: using `[]' for `\\(sq'\n");
    }
    s/\\\(ru/_/g;		# rule
    s/\\\(14/&frac14;/g;	# 1/4
    s/\\\(12/&frac12;/g;	# 1/2
    s/\\\(34/&frac34;/g;	# 3/4
    s/\\\(fi/fi/g;		# fi ligature
    s/\\\(fl/fl/g;		# fl ligature
    s/\\\(Fi/ffi/g;		# ffi ligature
    s/\\\(Fl/ffl/g;		# ffl ligature
    # degree
    if ($opt_latin1 eq '') {
	s/\\\(de/°/g;
    }
    else {
	s/\\\(de/o/g;
    }
    # dagger
    while (s/\\\(dg/+/) {
	warn("$prog: using `+' for `\\(dg'\n");
    }
    s/\\\(fm/'/g; #'		# foot mark
    # cent sign
    if ($opt_latin1 eq '') {
	s/\\\(ct/¢/g;
    }
    else {
	s/\\\(ct/c/g;
    }
    s/\\\(rg/(R)/g;		# registered
    # copyright
    if ($opt_latin1 eq '') {
	s/\\\(co/©/g;
    }
    else {
	s/\\\(co/(C)/g;
    }

    # miscellaneous characters
    # section symbol
    if ($opt_latin1 eq '') {
	s/\\\(sc/§/g;
    }
    else {
	s/\\\(sc/S/g;
    }
    s/\\\(aa/'/g;		#' acute accent
    s/\\\(ga/`/g;		#` grave accent
    s/\\\(ul/_/g;		# underrule
    s/\\\(->/-&gt;/g;		# right arrow
    s/\\\(<-/&lt;-/g;		# left arrow
    s/\\\(ua/^/g;		# up arrow
    s/\\\(da/v/g;		# down arrow
    s/\\\(br/|/g;		# box rule
    # double dagger
    while (s/\\\(dd/*/) {
	warn("$prog: using `*' for \"\\(dd\"\n");
    }
    s/\\\(rh/=&gt;/g;		# right hand
    s/\\\(lh/&lt;=/g;		# left hand
    s/\\\(ci/O/g;		# circle
    s/\\\(vs/ /g;		# visual space indicator

    # mathematics symbols
    s/\\\(pl/+/g;		# math plus
    s/\\\(mi/-/g;		# math minus
    s/\\\(eq/=/g;		# math equals
    s/\\\(\*\*/*/g;		# math star
    s/\\\(sl/\//g;		# slash (matching backslash)
    s/\\\(sr/\\\//g;		# square root
    s/\\\(rn//g;		# root en extender
    s/\\\(>=/&gt;=/g;		# greater than or equal
    s/\\\(<=/&lt;=/g;		# less than or equal
    s/\\\(==/==/g;		# identically equal
    s/\\\(~~/~~/g;		# approximately equal
    s/\\\(~=/~=/g;		# ?
    s/\\\(ap/~/g;		# approximates
    s/\\\(!=/!=/g;		# not equal
    s/\\\(mu/x/g;		# multiply
    s/\\\(di/\//g;		# divide
    s/\\\(\+-/+-/g;		# plus-minus
    s/\\\(cu/U/g;		# cup (union)
    s/\\\(ca/A/g;		# cup (intersection)
    s/\\\(sb/(=/g;		# subset of
    s/\\\(sp/=)/g;		# superset of
    s/\\\(ib/(_/g;		# improper subset
    s/\\\(ip/_)/g;		# improper superset
    s/\\\(if/oo/g;		# infinity
    s/\\\(pd/a/g;		# partial derivative
    s/\\\(gr/V/g;		# gradient
    s/\\\(no/~/g;		# not
    s/\\\(is/I/g;		# integral sign
    s/\\\(pt/oc/g;		# proportional to
    s/\\\(es/{}/g;		# empty set
    s/\\\(mo/E/g;		# member of
    s/\\\(or/|/g;		# or

    # greek charactters
    if ($opt_latin1 eq '') {
	s/\\\(\*b/ß/g;
	s/\\\(\*m/µ/g;
    }
    else {
	s/\\\(\*b/beta/g;
	s/\\\(\*m/micro/g;
    }
    s/\\\(\*a/alpha/g;
    # beta
    s/\\\(\*g/gamma/g;
    s/\\\(\*d/delta/g;
    s/\\\(\*e/epsilon/g;
    s/\\\(\*z/zeta/g;
    s/\\\(\*y/eta/g;
    s/\\\(\*h/theta/g;
    s/\\\(\*i/iota/g;
    s/\\\(\*k/kappa/g;
    s/\\\(\*l/lambda/g;
    # mu
    s/\\\(\*n/nu/g;
    s/\\\(\*c/xi/g;
    s/\\\(\*o/omicron/g;
    s/\\\(\*p/pi/g;
    s/\\\(\*r/rho/g;
    s/\\\(\*s/sigma/g;
    s/\\\(ts/sigma/g;	# terminal sigma
    s/\\\(\*t/tau/g;
    s/\\\(\*u/upsilon/g;
    s/\\\(\*f/phi/g;
    s/\\\(\*x/chi/g;
    s/\\\(\*q/psi/g;
    s/\\\(\*w/omega/g;
    s/\\\(\*A/ALPHA/g;
    s/\\\(\*B/BETA/g;
    s/\\\(\*G/GAMMA/g;
    s/\\\(\*D/DELTA/g;
    s/\\\(\*E/EPSILON/g;
    s/\\\(\*Z/ZETA/g;
    s/\\\(\*Y/ETA/g;
    s/\\\(\*H/THETA/g;
    s/\\\(\*I/IOTA/g;
    s/\\\(\*K/KAPPA/g;
    s/\\\(\*L/LAMBDA/g;
    s/\\\(\*M/MU/g;
    s/\\\(\*N/NU/g;
    s/\\\(\*C/XI/g;
    s/\\\(\*O/OMICRON/g;
    s/\\\(\*P/PI/g;
    s/\\\(\*R/RHO/g;
    s/\\\(\*S/SIGMA/g;
    s/\\\(\*T/TAU/g;
    s/\\\(\*U/UPSILON/g;
    s/\\\(\*F/PHI/g;
    s/\\\(\*X/CHI/g;
    s/\\\(\*Q/PSI/g;
    s/\\\(\*W/OMEGA/g;

    # bracket-building symbols
    s/\\\(lt/\{/g;
    s/\\\(lb/\{/g;
    s/\\\(rt/\}/g;
    s/\\\(rb/\}/g;
    s/\\\(lk/\{/g;
    s/\\\(rk/\}/g;
    s/\\\(bv/\|/g;
    s/\\\(lf/\[/g;
    s/\\\(rf/\]/g;
    s/\\\(lc/\[/g;
    s/\\\(rc/\]/g;

    # delete characters that we cannot print
    warn("$prog: deleting non-ASCII character `$1'\n") if s/(\\\(\S\S)//;
}

# Expand strings definitions.  Operates on $_ and changes string
# definition escapes into the appropriate character sequence.
sub ds2html {
    # \*x
    while (s/\\\*(\w)/$DS{$1}/) {
	warn("$prog: warning: `$1' not defined\n") unless $DS{$1};
    }
    # \*(xx for a two character name
    while (s/\\\*\((\w\w)/$DS{$1}/) {
	warn("$prog: warning: `$1' not defined\n") unless $DS{$1};
    }
    # \*[xxx] for a name of arbitrary length (groff extension)
    while (s/\\\*[(\w+)]/$DS{$1}/) {
	warn("$prog: warning: `$1' not defined\n") unless $DS{$1};
    }
}

# Based on `shellwords.pl' from the Perl 5 distribution.
#
#   @words = &troffwords($line);
#   @words = &troffwords(@lines);
#   @words = &troffwords;		# defaults to $_ (and clobbers it)
sub troffwords {
    local($_) = join('', @_) if @_;
    local(@words,$snippet,$field);

    s/^\s+//;
    /\001/ && die("$prog: bad characters in troffword: $_\n");
    while ($_ ne '') {
	$field = '';
	s/\\ /\001/g;		# escape all `\ ' to \001
	for (;;) {
	    if (s/^"([^"]*)"//) {
		$snippet = $1;
	    }
	    elsif (s/^"(.*)//) {
		$snippet = $1;
	    }
	    elsif (s/^([^\s"]+)//) {
		$snippet = $1;
	    }
	    else {
		s/^\s+//;
		last;
	    }
	    $snippet =~ s/\001/ /g; # replace \001 with a space
	    $field .= $snippet;
	}
	push(@words, $field);
    }
    @words;
}

__END__
