#!/usr/bin/perl 

# This is a tool which converts the HTML FAQ documentation in to a format
# suitable for embedding in a manual page. 

$lines = 0;
while(<>) {

  # Get rid of leading whitespace
  s/^[ \t]+//;
  
  # <H1> becomes an all-upper-case section heading
  tr/a-z/A-Z/ if(/<h1>/i);
  s;<h1>;.SH ";i;
  s;</h1>;";i;

  # <H2>s become sub-section headings.  We do the conversion this way
  # because SS must be on a single line in the man page (Disabled because
  # sub sectoin headings look ugly when the text of the sub-heading is
  # wrapped)
  #if(s;<h2>;.SS ";i) {$inq = 1}
  #if(s;</h2>;"\n\n;i) {$inq = 0}
  #if($inq) { s/\n/ /; $lines--; }

  # <H2>s become boldface text which is three columns to the left
  # We convert this way because bold text is ideally on a single line in the
  # man page source
  if(s;<h2>;.B ";i) {$inq = 1; $faq .= "\n.in -3\n"}
  if(s;</h2>;"\n.PP\n;i) {$inq = 0}
  if($inq) { s/\n/ /; $lines--; }

  # <UL> and <OL> become the beginning of a list (the an macros do not have 
  # a explicit "let's begin a list" macro, so these are simply zapped)
  s;<[ou]l>;;i;

  # <LI> elements become bullets
  # If you want to have real bullets in troff, use the following
  #s;<li>;\n.TP 2\n.if t \\\(bu\n.if n \*\n;ig;
  # (It is a shame that groff's "bullet" on a UTF-8 terminal is not 
  # \N'8226'--this is a bug in groff, since the UTF-8 font definition
  # defines a bullet as 8226 [0x2022])
  # However, I feel that people will almost never run a man page
  # through troff, but only through nroff to make an ASCII text page
  # It's better to have simpler *roff source
  s;<li>;.TP 2\n\*\n;ig;

  # </OL> and </UL> become the end of a list
  s;</[ou]l>;.PP;i;

  # <pre> becomes .nf
  s;<pre>;.nf;i;
  # </pre> becomes .fi
  s;</pre>;.fi;i;

  # Get rid of all other tags
  s;<[^>]*>;;ig;

  # Since nroff likes to break the word "list-subscribe@maradns.org" where
  # the hyphen is in the above "word", we add a troff command 
  # explicitably asking her not to do this before this word.
  s/(list\-subscribe\@maradns\.org)/\\\%$1/;

  $faq .= $_;
  }

# Get rid of multiple subsquent newlines, since these mess up nroff formatting
$faq =~ s/\n\n+/\n\n/g;
# Get rid of newlines before a .TP element, since these mess up troff output
$faq =~ s/\n\s*(\n\.TP)/$1/g;
# Get rid of newlines after a .PP element, since these mess up troff output
$faq =~ s/(\.PP\s*\n)\s*\n/$1/g;
# Count the number of lines in the converted document
$lines = 0;
for($a=0;$a<length($faq);$a++) {$lines++ if(substr($faq,$a,1) eq "\n")}

# We change the line length (.ll) because, in the current FAQ, the way
# "list-subscribe@maradns.org" wraps to the next line is ugly
print ".\\\" Beginning of FAQ ($lines lines long)\n.ll +2\n";
print $faq;
print ".ll -2\n.\\\" End of FAQ ($lines lines long)\n";
