#!/usr/bin/perl -s # # troff2html - convert troff text to HTML # "-mm" macros supported, "-ms" in development # # Copyright © 1993 Oscar Nierstrasz # Copyright © 1994 Jon Crowcroft # Copyright © 1994-1995 Daniel Quinlan # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # Requirements to use `troff2html' # # 1. Perl 5 # # 2. html.pl # to extract, normalize and hypertextify URLs in HTML files. # # 3. groff, ghostscript, giftrans, and the PBM utilities # for `pic' and `tbl' conversion to work. giftrans can be # commented out if you don't require tables and figures to be # transparent. # # Acknowledgments: originally based on `mm2html' 1.3 from Jon # Crowcroft , but almost completely rewritten since # then. Ideas from troff2html by John Troyer . # # Things to do # # 1. `eqn' # 2. do references really work? # 3. .B, .I, .R # 4. fix lists in &ms2html, finish basic "ms" support # 5. HTML 3.0 tables # 6. if and ie/el processing - must be done in second pass in order # to test registers # # Good Habits # # 1. Use LB to start a list instead of VL whenever possible. # 2. If you want a "no-fill" region in fixed width font, use # ".nf" and ".fi" in addition to the font change. # 3. Always match any double quotes in macro arguments. (I think you # can now get away with unmatched double quotes.) # location of perl scripts unshift(@INC, $ENV{'HOME'} . "/lib/perl"); # required perl scripts require "button.pl"; require "html.pl"; require "newgetopt.pl"; # constants $version = "v1.4"; $url = 'http://www.pathname.com/~quinlan/troff2html/index.html'; $usage = 'usage: troff2html [options] [files...] -mm translate -mm macros -ms translate -ms macros --single create a single body page only --plain use plain text instead of button to navigate --soelim="x" use program x as a filter for .so requests --latin1 use Latin-1 characters as available '; # Do not alter the order of the first four pairs. %FontStart = ( 'R' => '', # (1) Times Roman 'I' => '', # (2) Times Italic 'B' => '', # (3) Times Bold 'BI' => '', # (4) Times Bold Italic 'C' => '', # Courier 'CW' => '', # constant width 'U' => '' # unknown font ); %FontEnd = ( 'R' => '', # (1) Times Roman 'I' => '', # (2) Times Italic 'B' => '', # (3) Times Bold 'BI' => '', # (4) Times Bold Italic 'C' => '', # Courier 'CW' => '', # constant width 'U' => '' # unknown font ); # defaults $soelim = "gsoelim"; # external soelim program $opt_mm = 1; # name of this script $prog = $0; $prog =~ s@.*/@@; # date chop($date = `date +"%d %B %Y"`); # user, fullname $user = $ENV{'USER'} || $ENV{'LOGNAME'} || (getpwuid($<))[0]; $fullname = (getpwnam($user))[6]; $fullname =~ s/.*-\s*(.*)\(.*//; $fullname =~ s/,.*//; # signature $signature = "Translated by $prog $version on $date by $fullname
\n"; &handle_arguments; foreach $filename (@ARGV) { # run input through `soelim' if (!open(IN, "$soelim $filename |")) { die("$prog: can't create \`$filename': $!\n"); } # first we delete comments, remove ignored blocks $tempname = $filename.$$; if (!open(PASS1, ">$tempname")) { die("$prog: can't create \`$tempname': $!\n"); } $ignore_mode = 0; while () { if (/^\.ig\s*/) { $ignore_mode = 1; } elsif ($ignore_mode && /^\.\./) { $ignore_mode = 0; } else { print PASS1 unless (/^\.\\"/ || $ignore_mode); } } close(IN); close(PASS1); # initialize file-wide variables $toc = ""; # Table of Contents $refs = ""; # references $NR{'Hu'} = 3; # default unnumbered heading level $TL = 0; # Table of Contents level if (!open(FILE, $tempname)) { die("$prog: can't create \`$tempname': $!\n"); } ($BASE = $filename) =~ s/\.mm$//; # drop the .mm suffix $TOC = $BASE . "-toc.html"; # title page filename $CURR = $PREV_LINK = $TOC; # current and previous pages &newpage($TOC); $inbody = 0; if ($single) { $TOTOC = ""; } else { $TOTOC = $TOC; } # some useful strings: $REFS = $BASE . "-refs.html"; $totoc = "To Table of Contents

\n\n"; $torefs = "To References

\n\n"; # the record separator is a newline followed by a dot $/ = "\n\."; # translate: while() { &handle_record; } # gracefully close the last body page: &lastbody; # put the collected table of contents at the end # of the title page: while ($TL > 0) { $toc .= "\n"; $TL--; } open(TOC, ">>$TOC"); print TOC "

\n$toc\n"; if ($refs =~ /./) { print TOC $torefs; } print TOC $signature; close(TOC); # if there are references, print them out: if ($refs =~ /./) { &newpage($REFS); &printtitle("References"); print "

References

\n"; print "

\n$refs\n\n\n"; &up; print "

\n"; print $signature; close(STDOUT); } # remove temporary file unlink ($tempname); } sub handle_arguments { if ($#ARGV < 0) { die ($usage); } &NGetOpt("single", "plain", "soelim=s", "latin1", "mm", "ms"); if ($opt_soelim) { $soelim = $opt_soelim; } } sub handle_record { s/^\.//; # delete initial "." in first record s/\n\.$//; # delete record separator s/[ \t]+\n/\n/g; # delete trailing spaces and tabs 0 while s/\n\\\./\n./g; # unescape leading dots # split input record $last_cmd = $cmd if $cmd =~ /[A-Z][A-Z]/; ($cmd, $args, $text) = /^(\S+)[ \t]*(.*)\n?((.|\n)*)$/; # we cannot just split $args, since "foo bar" is one argument @args = &troffwords($args); if ($opt_mm) { &mm2html; } elsif ($opt_ms) { &ms2html; } else { &troff2html; } } sub mm2html { local($_) = $cmd; # An attempt has been made to put these macros in order of # frequency, with some consideration for parsing time. # ??? - expand references into HTML links: s/\[RF:(\d*)\]/[$1]<\/A>/g; # paragraph /^P$/ && do { print "

\n"; print &format($text), "\n"; return; }; # space # XXX /^SP$/ && do { local $i; if ($args[0] !~ /^\d+$/) { $args[0] = 1; } for (; $args[0] > 0; $args[0]--) { print "
\n"; } print &format($text), "\n"; return; }; # list item /^LI$/ && do { print "

"; # if the mark is not a number, print it after the

if ($args[0] && $args[0] !~ /^[[(<{]?[^0-9][])>}.]?$/) { print " ", &format($args[0]), " "; } print &format($text), "\n"; return; }; # end list /^LE$/ && do { &poplist_mm; print &format($text), "\n"; return; }; # begin list /^(BL|LB|ML|VL|AL|DL)$/ && do { &newlist_mm($1); return; }; # bottom block -- Note that we only print a bottom block once. # This could be fixed to work correctly, but it would be more # trouble than it is worth. /^BS$/ && do { # Only insert

if there is following text. Otherwise, it # really means that we are trying to reset the bottom block. if ($text =~ /\S/) { print "

\n"; } print &format($text), "\n"; return; }; # footnote -- This should be fixed to create HTML 3.0 footnotes. /^FS$/ && do { print " (", &format($text), ") "; return; }; # displays /^DS$/ && do { print &format($text), "\n"; return; }; # numbered header /^H$/ && do { local $hlevel, $htext; if ($#args < 1) { die("$prog: `.H' must have at least two arguments\n"); } $hlevel = $args[0]; $htext = $args[1]; # increment this HeaderLevel $HeaderLevel[$hlevel - 1]++; # zero any HeaderLevel elements greater than this one for $i (($hlevel) .. $#HeaderLevel) { $HeaderLevel[$i] = 0; } # set up ID $HeaderID = "$HeaderLevel[0]"; for $i (1 .. ($hlevel - 1)) { $HeaderID .= "." . $HeaderLevel[$i]; } while ($TL < $hlevel) { $toc .= "

\n"; $TL++; } while ($TL > $hlevel) { $toc .= "\n"; $TL--; } $name = "$HeaderID"; # unique anchor name $num = "$HeaderID "; $pic = 0; $eqn = 0; $tbl = 0; # start a new page unless -b option was selected: if (!$opt_single) { $NEXT_LINK = "$BASE-$name.html"; &popall; &newbody($NEXT_LINK, $htext); &printtitle("${num}$htext"); } $inbody = 1; print "${num}$htext\n"; if ($b) { print $totoc; } $toc .= "

${num}$htext\n"; print &format($text), "\n"; return; }; # unnumbered header /^HU$/ && do { print "", &format($args[0]), "\n\n"; print &format($text), "\n"; return; }; /^(RF:\d+)$/ && do { $refs .= "\n

$text\n\n"; return; }; /^UR$/ && do { $refs .= "\n

$text\n\n"; return; }; /^SK$/ && do { if ($last_cmd !~ /^(SK)$/) { print "

\n"; } print &format($text), "\n"; return; }; /^TS$/ && do { $tbl++; $basename = "fsstnd.$HeaderID.$tbl"; # only expand strings in body of table $_ = $text; &ds2html; # run groff open(OUTPUT, "|groff -t -Tps -rN4 > $basename.ps"); print OUTPUT ".TS\n", $_, "\n.TE\n"; close(OUTPUT); # convert PostScript to GIF system "gs -r120 -q -sDEVICE=pbmraw -sOutputFile=$basename.pbm - < $basename.ps > /dev/null 2>&1"; system "pnmcrop -white $basename.pbm | pnmmargin -white 10 | ppmtogif > $basename.in.gif"; system "giftrans -t #ffffff -o $basename.tbl.gif $basename.in.gif"; unlink "$basename.pbm", "$basename.ps", "$basename.in.gif"; print "

\nTable $HeaderID.$tbl\n

\n"; return; }; /^PS$/ && do { $pic++; $basename = "fsstnd.$HeaderID.$pic"; # only expand strings in body of picture $_ = $text; &ds2html; # run groff open(OUTPUT, "|groff -p -Tps -rN4 > $basename.ps"); print OUTPUT ".PS\n", $_, "\n.PE\n"; close(OUTPUT); # convert PostScript to GIF system "gs -r120 -q -sDEVICE=pbmraw -sOutputFile=$basename.pbm - < $basename.ps > /dev/null 2>&1"; system "pnmcrop -white $basename.pbm | pnmmargin -white 10 | pnmmargin -black 1 | ppmtogif > $basename.in.gif"; system "giftrans -t #ffffff -o $basename.pic.gif $basename.in.gif"; unlink "$basename.pbm", "$basename.ps", "$basename.in.gif"; print "
\n

\nFigure $HeaderID.$pic\n

\n"; return; }; /^TL$/ && do { $text =~ s/(.*) *\\\(em.*/$1/; # chop after emdash $text =~ s/<\S+>//g; # delete any HTML commands $title = $text; $title =~ s/^\s+//; # delete any leading whitespace $title =~ s/\s+$//; # delete any trailing whitespace &printtitle("Title Page"); return; }; # author /^AU$/ && do { print "", &format($text), "\n

\n\n"; return; }; # abstract start /^AS$/ && do { print "Abstract\n

\n", &format($text), "\n

\n"; return; }; # ignored mm macros # AE - abstract end # AF - alternate format for 1st page # EN - equation end # DE - display end # FE - footnote end # BE - block end # MT - memorandum type and title # PE - picture end # PF - page footer # PH - page header # S - point size & vertical spacing # SA - right margin justification # TC - table of contents # TE - table end # TM - number a technical memorandum /^(AE|AF|EN|DE|FE|BE|MT|PE|PF|PH|S|SA|TC|TE|TM)$/ && do { print &format($text), "\n"; return; }; # ignored groff mm macros # COVEND - cover end # COVER - cover start # PGFORM - page form # PGNH - no page header /^(COVEND|COVER|PGFORM|PGNH)$/ && do { print &format($text), "\n"; return; }; # unknown mm macros /^([A-Z]+)$/ && do { warn("$prog: unknown mm macro, \"$cmd\"\n"); print &format($text), "\n"; return; }; # troff requests &troff2html; } sub ms2html { # expand references into HTML links: s/\[RF:(\d*)\]/[$1]<\/A>/g; &popall unless /^[LI][PL]/ || /^N[SN]$/ || /^BU[1234]/; /^TL$/ && do { $text =~ s/(.*) *\\\(em.*/$1/; # chop after emdash $text =~ s/<\S+>//g; # delete any HTML commands $title = $text; $title =~ s/^\s+//; # delete any leading whitespace $title =~ s/\s+$//; # delete any trailing whitespace &printtitle("Title Page"); }; # ??? - from ms2html. /^ST$/ && do { print switch_font('B'), &format($text), switch_font('P'); return; }; # author's institution /^AI$/ && do { print switch_font('I'), &format($text), switch_font('P'), "

\n"; return; }; # author's name /^AU$/ && do { print switch_font('B'), &format($text), switch_font('P'), "

\n"; return; }; # abstract begin /^AB$/ && do { print "Abstract\n

\n", &format($text), "\n

\n"; return; }; # indented paragraph /^PP$/ && do { print "

\n"; print &format($text), "\n"; return; }; # ??? - from ms2html. /^BH$/ && do { print "

$text

\n\n"; return; }; # ??? - from ms2html. /^BC$/ && do { print "

$text

\n\n"; return; }; # footnote -- This should be fixed to create HTML 3.0 footnotes. /^FS$/ && do { print " (", &format($text), ") "; return; }; # # from ms2html # /^FS$/ && do { # print "

$text

\n\n"; # return; # }; # quoted paragraph /^QP$/ && do { print "

\n", &format($text), "\n

\n"; return; }; # displays /^DS$/ && do { print &format($text), "\n"; return; }; #XXX # don't distinguish LP, LL & IP for nesting purposes: /^LP$/ && do { &listitem("LP"); print "

$text\n\n"; return; }; /^LL$/ && do { &listitem("LP"); print "

$text\n\n"; return; }; /^IP$/ && do { &listitem("LP"); print "

$text\n\n"; return; }; (/^N[SN]$/ || /^BU[1234]$/) && do { &listitem($_); print "

$text\n\n"; return; }; /^MD$/ && do { $md++; print "

Definition $md\n: $text

\n\n"; return; }; /^MT$/ && do { $mt++; print "

Theorem $md\n: $text

\n\n"; return; }; /^ML$/ && do { $ml++; print "

Lemma $md\n: $text

\n\n"; return; }; /^MP$/ && do { $mp++; print "

Proposition $md\n: $text

\n\n"; return; }; /^PR$/ && do { print "

Proof\n: $text

\n\n"; return; }; if (/^([NS])H(\d)$/) { # skip if this is the reference section: if (($text eq "References") || ($text eq "Bibliography")) { return; }; $stype = $1; # numbered or unnumbered sections $H = $2; # the header level if ($H == 1) { if ($stype =~ /N/) { $n1++; $n2 = $n3 = $n4 = 0; $id = "$n1"; } else { $s1++; $s2 = $s3 = $s4 = 0; $id = "$s1"; } } elsif ($H == 2) { if ($stype =~ /N/) { $n2++; $n3 = $n4 = 0; $id = "$n1.$n2"; } else { $s2++; $s3 = $s4 = 0; $id = "$s1.$s2"; } } elsif ($H == 3) { if ($stype =~ /N/) { $n3++; $n4 = 0; $id = "$n1.$n2.$n3"; } else { $s3++; $s4 = 0; $id = "$s1.$s2.$s3"; } } elsif ($H == 4) { if ($stype =~ /N/) { $n4++; $id = "$n1.$n2.$n3.$n4"; } else { $s4++; $id = "$s1.$s2.$s3.$s4"; } } while ($TL < $H) { $toc .= "

\n"; $TL++; } while ($TL > $H) { $toc .= "\n"; $TL--; } $name = "${stype}-$id"; # unique anchor name if ($stype =~ /N/) { $num = "$id "; } else { $num = ""; } # start a new page unless -b option was selected: if (!$b) { $NEXT = "$BASE-$name.html" ; &popall; &newbody($NEXT); &printtitle("${num}$text"); } $inbody = 1; print "${num}$text\n\n"; if ($b) { print $totoc; } $toc .= "

${num}$text\n"; return; } /^(RF:\d+)$/ && do { $refs .= "\n

$text\n\n"; return; }; /^UR$/ && do { $refs .= "\n

$text\n\n"; return; }; # ??? /^\\"$/ && do { # " return; }; # these are ignored: (/^AE$/ || /^FE$/ || /^DE$/) && do { return; }; # unknown ms macros /^([A-Z]+)$/ && do { warn("$prog: unknown ms macro, \"$cmd\"\n"); print &format($text), "\n"; return; }; # troff requests &troff2html; } sub switch_font { local($font) = @_ if @_; local($tags) = ""; # If font is numeric, convert to letter format if ($font =~ /^\d+$/) { if ($font <= 4) { $font = ("R", "I", "B", "BI")[$font - 1]; } else { die("$prog: invalid numeric font value, `$font'\n"); } } # normal font if ($font ne "P") { if ($#FontStack >= 0) { # end last font $tags .= $FontEnd{$FontStack[0]}; } if (defined($FontStart{$font})) { # start new font, push new font on stack $tags .= $FontStart{$font}; push(@FontStack, $font); } else { # push unknown font on stack push(@FontStack, 'U'); # unknown warn("$prog: unknown font `", $font, "'\n"); } } # previous font elsif ($#FontStack >= 0) { # pop current font off stack, end current font $tags .= $FontEnd{pop(@FontStack)}; if ($#FontStack >= 0) { # if there was a previous font, start previous font $tags .= $FontStart{$FontStack[0]}; } } return $tags; } sub troff2html { local $end, $start; # troff font changes /^ft$/ && do { print switch_font($args[0]), &format($text); return; }; # troff "no fill" /^nf$/ && do { print "

\n", &format($text), "\n";
	return;
    };

    # troff "fill mode"
    /^fi$/ && do {
	print "

\n", &format($text), "\n"; return; }; # number registers /^nr$/ && do { $NR{$args[0]} = $args[1]; return; }; # set string definitions /^ds$/ && do { local($name); $name = shift(@args); $_ = join(' ', @args); # want every remaining argument &troffize; # convert only troff escapes s/\n$//; # don't want trailing newline $DS{$name} = $_; print &format($text), "\n"; return; }; # troff spaces /^sp$/ && do { print "

\n", &format($text), "\n"; return; }; # troff line breaks /^br$/ && do { # only insert a break if we need it if ($opt_mm) { if ($last_cmd !~ /^(HU?|P|SP|SK)$/) { print "
\n"; } } else { print "
\n"; } print &format($text), "\n"; return; }; /^el/ && do { if ($text =~ s/\\\}//) { print &format($text), "\n"; } return; }; # ignored troff requests # lf - change line number # nh - no hyphenation # af - assign format to register /^(lf|nh|af)$/ && do { print &format($text), "\n"; return; }; # empty command!? /^$/ && do { warn("$prog: empty command\n"); print &format($text), "\n"; return; }; # unknown troff commands (/^[a-z]*$/) && do { $text =~ s/\\\}//g; # for .ie blocks warn("$prog: unknown troff command, \"$cmd\"\n"); print &format($text), "\n"; return; }; warn("$prog: unrecognized command, \"$cmd\"\n"); print &format($text), "\n"; } # Close the current body page and open a new one. sub newbody { local($NEXT_LINK, $NEXT_TEXT) = @_; $NEXT_TEXT =~ s/^((\S+ ){1,3})(.*)/$1/; &popall; if ($inbody) { print "

\n" unless $last_cmd eq "SK"; &previous; &next; &up; print "
\nPrevious: $PREV_TEXT\n"; print "Next: $NEXT_TEXT\n"; print "Up: Table of Contents\n"; print "

\n"; print $signature; } close(STDOUT); $PREV_LINK = $CURR; $CURR = $NEXT_LINK; $PREV_TEXT = $CURR_TEXT; $CURR_TEXT = $NEXT_TEXT; &newpage($CURR); } # terminate the last body page: sub lastbody { local($NEXT_LINK); &popall; if ($opt_mm) { print "
"; if ($last_cmd !~ /^SK$/) { print "

" } print "\n"; } else { print "

\n"; } &previous; # pointer to next only if references exist: if ($refs =~ /./) { $NEXT_LINK = $REFS; &next; }; &up; print "
\nPrevious: $PREV_TEXT\n"; # pointer to next only if references exist: if ($refs =~ /./) { print "Next: References\n"; }; print "Up: Table of Contents\n"; print "

\n"; # clean up: print $signature; close(STDOUT); } # open a new page sub newpage { local($PAGE) = @_; open(STDOUT, ">$PAGE") || die "fatal error: Can't create $PAGE"; print STDERR "Created $PAGE\n"; } # start a new list sub newlist_mm { local($type) = @_; # type of list if ($type eq "BL") { print "\n

\n"; } # this should never happen else { die("$prog: (newlist_mm) unknown list type `$type'\n"); } push(@lstack,$type); } # pop the current list sub poplist_mm { local($type); $type = pop(@lstack); if ($type eq "BL") { print "

\n"; } # this should never happen else { die("$prog: (poplist_mm) unknown list type `$type'\n"); } } # pop out of all remaining lists sub popall { while ($#lstack > 0) { if ($opt_mm) { &poplist_mm; } } } # yep, you guessed it! sub printtitle { local($name) = @_; print "$title - $name\n\n" ; print "

$title

\n\n"; } # standard buttons sub up { &button("up_motif","$TOC#TOC"); } sub previous { &button("previous_motif","$PREV_LINK"); } sub next { &button("next_motif","$NEXT_LINK"); } # This function formats regions of troff text sub format { local($_) = join('',@_); &troffize; # convert standard troff sequences &ds2html; # convert string definitions &html'href; #' change URLs into hypertext links 0 while s/\n\s*$/\n

/g; 0 while s/\n\s*\n/\n

\n/g; 0 while s/\n$/\n

\n/g; $_; } # This should be called on any text from the document, except for # titles, and before any other HTML commands have been inserted into # the text. sub htmlescapes { s/&/&/g; s//>/g; } # Convert standard troff sequences into HTML. sub troffize { local $end, $begin; # want to convert troff special characters before we mangle any # other elements into HTML format. &troffescapes; # HTML escapes &htmlescapes; # convert dead-key accents s/\\AE/\Æ/g; s/\\'([AEIOUYaeiouy])/\&$1acute;/g; s/\\[<^]([AEIOUaeiou])/\&$1circ;/g; s/\\`([AEIOUaeiou])/\&$1grave;/g; s/\\o([Aa])/\&$1ring;/g; s/\\~([ANOano])/\&$1tilde;/g; s/\\[:"]([AEIOUYaeiouy])/\&$1uml;/g; s/\\,([Cc])/\&$1cedil;/g; s/\\\/([Oo])/\&$1slash;/g; s/\\ss/\ß/g; # ' # escape sequences s/\\%(\S+)/$1/g; # hyphenation indication s/\\".*//g; # end of line comment " # font changes while (/\\f(\w|\(\w\w|\[\w+\])/) { local $font = $1; $font =~ s/[([]?(.*)\]?/$1/; s/\\f(\w|\(\w\w|\[\w+\])/&switch_font($font)/e; } # size changes s/\\s-[0-9]([\w\s\\]*)\\s0/$1/g; # ignore small text s/\\s\+[1-9]([\w\s\\]*)\\s0/$1<\/STRONG>/g; # large s/\\s\+[1-9]//g; s/\\s0/<\/STRONG>/g; } # Convert troff escape sequences into HTML. Operates on $_. # # This should *only* be called from troffize sub troffescapes { # standard escapes s/\\\(em/--/g; # 3/4 em dash s/\\\(hy/-/g; # hyphen s/\\-/-/g; # minus in current font # bullet if ($opt_latin1 eq '') { s/\\\(bu/·/g; } else { s/\\\(bu/o/g; } # square while (s/\\\(sq/[]/) { warn("$prog: using `[]' for `\\(sq'\n"); } s/\\\(ru/_/g; # rule s/\\\(14/¼/g; # 1/4 s/\\\(12/½/g; # 1/2 s/\\\(34/¾/g; # 3/4 s/\\\(fi/fi/g; # fi ligature s/\\\(fl/fl/g; # fl ligature s/\\\(Fi/ffi/g; # ffi ligature s/\\\(Fl/ffl/g; # ffl ligature # degree if ($opt_latin1 eq '') { s/\\\(de/°/g; } else { s/\\\(de/o/g; } # dagger while (s/\\\(dg/+/) { warn("$prog: using `+' for `\\(dg'\n"); } s/\\\(fm/'/g; #' # foot mark # cent sign if ($opt_latin1 eq '') { s/\\\(ct/¢/g; } else { s/\\\(ct/c/g; } s/\\\(rg/(R)/g; # registered # copyright if ($opt_latin1 eq '') { s/\\\(co/©/g; } else { s/\\\(co/(C)/g; } # miscellaneous characters # section symbol if ($opt_latin1 eq '') { s/\\\(sc/§/g; } else { s/\\\(sc/S/g; } s/\\\(aa/'/g; #' acute accent s/\\\(ga/`/g; #` grave accent s/\\\(ul/_/g; # underrule s/\\\(->/->/g; # right arrow s/\\\(<-/<-/g; # left arrow s/\\\(ua/^/g; # up arrow s/\\\(da/v/g; # down arrow s/\\\(br/|/g; # box rule # double dagger while (s/\\\(dd/*/) { warn("$prog: using `*' for \"\\(dd\"\n"); } s/\\\(rh/=>/g; # right hand s/\\\(lh/<=/g; # left hand s/\\\(ci/O/g; # circle s/\\\(vs/ /g; # visual space indicator # mathematics symbols s/\\\(pl/+/g; # math plus s/\\\(mi/-/g; # math minus s/\\\(eq/=/g; # math equals s/\\\(\*\*/*/g; # math star s/\\\(sl/\//g; # slash (matching backslash) s/\\\(sr/\\\//g; # square root s/\\\(rn//g; # root en extender s/\\\(>=/>=/g; # greater than or equal s/\\\(<=/<=/g; # less than or equal s/\\\(==/==/g; # identically equal s/\\\(~~/~~/g; # approximately equal s/\\\(~=/~=/g; # ? s/\\\(ap/~/g; # approximates s/\\\(!=/!=/g; # not equal s/\\\(mu/x/g; # multiply s/\\\(di/\//g; # divide s/\\\(\+-/+-/g; # plus-minus s/\\\(cu/U/g; # cup (union) s/\\\(ca/A/g; # cup (intersection) s/\\\(sb/(=/g; # subset of s/\\\(sp/=)/g; # superset of s/\\\(ib/(_/g; # improper subset s/\\\(ip/_)/g; # improper superset s/\\\(if/oo/g; # infinity s/\\\(pd/a/g; # partial derivative s/\\\(gr/V/g; # gradient s/\\\(no/~/g; # not s/\\\(is/I/g; # integral sign s/\\\(pt/oc/g; # proportional to s/\\\(es/{}/g; # empty set s/\\\(mo/E/g; # member of s/\\\(or/|/g; # or # greek charactters if ($opt_latin1 eq '') { s/\\\(\*b/ß/g; s/\\\(\*m/µ/g; } else { s/\\\(\*b/beta/g; s/\\\(\*m/micro/g; } s/\\\(\*a/alpha/g; # beta s/\\\(\*g/gamma/g; s/\\\(\*d/delta/g; s/\\\(\*e/epsilon/g; s/\\\(\*z/zeta/g; s/\\\(\*y/eta/g; s/\\\(\*h/theta/g; s/\\\(\*i/iota/g; s/\\\(\*k/kappa/g; s/\\\(\*l/lambda/g; # mu s/\\\(\*n/nu/g; s/\\\(\*c/xi/g; s/\\\(\*o/omicron/g; s/\\\(\*p/pi/g; s/\\\(\*r/rho/g; s/\\\(\*s/sigma/g; s/\\\(ts/sigma/g; # terminal sigma s/\\\(\*t/tau/g; s/\\\(\*u/upsilon/g; s/\\\(\*f/phi/g; s/\\\(\*x/chi/g; s/\\\(\*q/psi/g; s/\\\(\*w/omega/g; s/\\\(\*A/ALPHA/g; s/\\\(\*B/BETA/g; s/\\\(\*G/GAMMA/g; s/\\\(\*D/DELTA/g; s/\\\(\*E/EPSILON/g; s/\\\(\*Z/ZETA/g; s/\\\(\*Y/ETA/g; s/\\\(\*H/THETA/g; s/\\\(\*I/IOTA/g; s/\\\(\*K/KAPPA/g; s/\\\(\*L/LAMBDA/g; s/\\\(\*M/MU/g; s/\\\(\*N/NU/g; s/\\\(\*C/XI/g; s/\\\(\*O/OMICRON/g; s/\\\(\*P/PI/g; s/\\\(\*R/RHO/g; s/\\\(\*S/SIGMA/g; s/\\\(\*T/TAU/g; s/\\\(\*U/UPSILON/g; s/\\\(\*F/PHI/g; s/\\\(\*X/CHI/g; s/\\\(\*Q/PSI/g; s/\\\(\*W/OMEGA/g; # bracket-building symbols s/\\\(lt/\{/g; s/\\\(lb/\{/g; s/\\\(rt/\}/g; s/\\\(rb/\}/g; s/\\\(lk/\{/g; s/\\\(rk/\}/g; s/\\\(bv/\|/g; s/\\\(lf/\[/g; s/\\\(rf/\]/g; s/\\\(lc/\[/g; s/\\\(rc/\]/g; # delete characters that we cannot print warn("$prog: deleting non-ASCII character `$1'\n") if s/(\\\(\S\S)//; } # Expand strings definitions. Operates on $_ and changes string # definition escapes into the appropriate character sequence. sub ds2html { # \*x while (s/\\\*(\w)/$DS{$1}/) { warn("$prog: warning: `$1' not defined\n") unless $DS{$1}; } # \*(xx for a two character name while (s/\\\*\((\w\w)/$DS{$1}/) { warn("$prog: warning: `$1' not defined\n") unless $DS{$1}; } # \*[xxx] for a name of arbitrary length (groff extension) while (s/\\\*[(\w+)]/$DS{$1}/) { warn("$prog: warning: `$1' not defined\n") unless $DS{$1}; } } # Based on `shellwords.pl' from the Perl 5 distribution. # # @words = &troffwords($line); # @words = &troffwords(@lines); # @words = &troffwords; # defaults to $_ (and clobbers it) sub troffwords { local($_) = join('', @_) if @_; local(@words,$snippet,$field); s/^\s+//; /\001/ && die("$prog: bad characters in troffword: $_\n"); while ($_ ne '') { $field = ''; s/\\ /\001/g; # escape all `\ ' to \001 for (;;) { if (s/^"([^"]*)"//) { $snippet = $1; } elsif (s/^"(.*)//) { $snippet = $1; } elsif (s/^([^\s"]+)//) { $snippet = $1; } else { s/^\s+//; last; } $snippet =~ s/\001/ /g; # replace \001 with a space $field .= $snippet; } push(@words, $field); } @words; } __END__