#!/usr/bin/perl

#   Vpl2vpl: a program to generate accented virtual fonts for TeX
#   Copyright (C) 1997 John D. Smith

#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.

#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

$version = 0.22;
#------------------------------------------------------------------------#
$description =
"Syntax: vpl2vpl -d definition-file [-s shrink-factor]
           [-c candrabindu-adjustment] [-b] vpl-file

Vpl2vpl creates new TeX virtual fonts based on existing virtual
fonts (\"input fonts\"). A successful run will read a vpl (Virtual
Property List) file and a definition file, and will generate a new
vpl file on standard output. The input font is assumed to adhere to
the standard TeX encoding for text fonts unless it was created with
the program afm2tfm, in which case it is assumed to conform to the
encoding specified in the file dvips.enc. In either case, the name
of the input font is assumed to be the name of the input file
without its .vpl extension: it must conform to normal TeX
conventions for naming fonts, as vpl2vpl attempts to draw
conclusions from it about the kind of font it is dealing with.

In the case of a non-virtual font (such as the Computer Modern
fonts), use a pl (Property List) file, created with the standard
utility tftopl, as the input file in place of a vpl file; vpl2vpl
will make the necessary changes on the fly.

A typical complete sequence of commands to create a new virtual
font might therefore be
     afm2tfm Times-Roman.afm -t dvips.enc -v ptmr rptmr
     vpl2vpl -d ISO-Latin1.def ptmr.vpl >ptmr_isol1.vpl
     vptovf ptmr_isol1.vpl ptmr_isol1.vf ptmr_isol1.tfm
for a PostScript font, or
     tftopl cmr10.tfm cmr10.pl
     vpl2vpl -d ISO-Latin1.def cmr10.pl >cmr10_isol1.vpl
     vptovf cmr10_isol1.vpl cmr10_isol1.vf cmr10_isol1.tfm
for a Computer Modern font.

In order to keep the whole upper half of the character set free for
the requirements of the encoding specified in the definition file,
certain modifications are made to input fonts following the
dvips.enc encoding to bring them into greater conformity with the
TeX norm. In particular, the characters dotaccent and hungarumlaut
are placed in the positions assigned by TeX (\"5F, \"7D), not those
enforced by dvips.enc (\"C7, \"CD). The f-ligatures, double quotes
and dashes are also moved from the upper half of the character set
to their normal TeX positions. As a result, the following characters
are not found in the lower half of the character set: quotesingle,
quotedbl, backslash, underscore, braceleft, bar, braceright. These
characters can, however, be assigned positions in the output font if
they are needed. (Indeed, they could all be explicitly restored to
their dvips.enc positions if this were desired.)

Options:

  -d should refer to a font definition file. This file (which could
     usefully be named, e.g., \"French.def\") should consist of
     lines of character definitions, in the form
               \"number\"   \"character\"
     or
               \"number\"   \"character\"   \"accent\"
     Here \"number\" represents the character's position in the new
     encoding and may be expressed in decimal, octal or hex;
     \"character\" names the character (e.g. \"comma\", \"eight\",
     \"A\") or consists of the word \".notdef\" (indicating that
     the specified number's \"slot\" in the new encoding is to be
     empty); and \"accent\" optionally names an accent to be placed
     on the character. In addition to the standard accents available
     in PostScript fonts, \"underbar\" and \"underdot\" are also
     available, as are \"under\" versions of all the normal
     superscript accents (\"underdieresis\", \"underring\", etc.).
     The Indian accent \"candrabindu\" may also be specified: it
     is formed by overprinting a breve with a dotaccent. Finally,
     \"overdot\" may be used as a synonym for \"dotaccent\".

     If the character named in the \"accent\" position is not in fact
     a valid accent character, the program interprets the definition
     as a request for a digraph formed from the \"character\" and the
     \"accent\". A digraph consisting of, say, \"k\" and \"h\" will be
     indistinguishable from the letters \"k\" and \"h\" printed
     consecutively, but the digraph \"kh\" can itself receive accents
     like any other character: see next paragraph.

     A new character (such as \"amacron\" or \"kh\") may be freely
     used in the \"character\" position of a further definition (such
     as \"amacron breve\" or \"kh underbar\"). There is no constraint
     on the ordering of definitions within a definition file. The
     definition of \"a macron\" does not have to precede that of
     \"amacron breve\": requests for \"impossible\" characters are
     deferred until their constituents have had a chance to come into
     being.

     \"Slots\" for which no new definition is given retain the
     definition they have in the input font.

     The definition file may also contain blank lines and comments
     (introduced by \"\#\").

  -s may optionally give the factor, expressed as a per-thousand
     value, by which normally superscript accents (such as dieresis,
     ring) should be shrunk when they are used as subscript accents
     (such as underdieresis, underring). Values of around 800 may be
     found useful.

  -c may optionally give two comma-separated numerical values to
     adjust the x and y coordinates of the dotaccent placed within a
     breve to form the candrabindu accent. A coordinate scheme using
     \"DESIGNUNITS R 1000\" is assumed.

  -b may optionally be specified to block the use of predefined
     accented characters, forcing vpl2vpl to define its own
     versions. This may be useful to secure a consistent appearance
     in cases where a font designer does not share vpl2vpl's views
     on where accents should be placed.

  -h prints this help.
";
#------------------------------------------------------------------------#

########################
# Packages and constants
########################
#
use File::Basename;
use Getopt::Std;
$cmdline = basename($0) . " " . join " ", @ARGV;
getopts('d:s:c:bh');
if  ($opt_h or !$opt_d or $#ARGV != 0) {
   print STDERR $description;
   exit 1;
}

$filename = $ARGV[0];
($fontname = $filename) =~ s/\..*$//;
($encname = basename($opt_d)) =~ s/\..*$//;
$vtitle = "(VTITLE Font $fontname modified for $encname encoding by vpl2vpl";
$vtitle .= " v. $version" if $version;
$vtitle .= ")\n(COMMENT Command line: $cmdline)";

#
# Flags for bold and small caps. These are probably a bit iffy, but
# there's not much that can be done about it.
#
if ($fontname =~ /(^p.*b[oi]?[c]?$|^[^p].*bx[a-z]*[0-9]+$)/) { $bold = 1 }
if ($fontname =~ /(^p.*c$|^[^p].*csc[a-z]*[0-9]+$)/)         { $scaps = 1 } 
if ($opt_s) { $shrink = $opt_s / 1000 } else { $shrink = 1 }

#
# Array to convert from number to vpl representation
#
foreach $i (0 .. 255) {
   $nv[$i] = (chr($i) =~ /[0-9A-Za-z]/ ? "C " . chr $i : sprintf("O %lo", $i));
}

#
# Now the encoding vectors.
#
@TeXenc = (
   "Gamma",          "Delta",          "Theta",          "Lambda",
   "Xi",             "Pi",             "Sigma",          "Upsilon",
   "Phi",            "Psi",            "Omega",          "ff",
   "fi",             "fl",             "ffi",            "ffl",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedblright",  "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "exclamdown",     "equal",          "questiondown",   "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "quotedblleft",   "bracketright",   "circumflex",     "dotaccent",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "endash",
   "emdash",         "hungarumlaut",   "tilde",          "dieresis"
);

@dvipsenc = (
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "quotesingle",    "exclamdown",     "questiondown",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedbl",       "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "less",           "equal",          "greater",        "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "backslash",      "bracketright",   "circumflex",     "underscore",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "braceleft",
   "bar",            "braceright",     "tilde",          "dieresis",
   "asciicircum",    "asciitilde",     "Ccedilla",       "Iacute",
   "Icircumflex",    "atilde",         "edieresis",      "egrave",
   "scaron",         "zcaron",         "Eth",            "ff",
   "ffi",            "ffl",            ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "Scaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "Ydieresis",      ".notdef",        "Zcaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "cent",           "sterling",
   "fraction",       "yen",            "florin",         "section",
   "currency",       "copyright",      "quotedblleft",   "guillemotleft",
   "guilsinglleft",  "guilsinglright", "fi",             "fl",
   "degree",         "endash",         "dagger",         "daggerdbl",
   "periodcentered", ".notdef",        "paragraph",      "bullet",
   "quotesinglbase", "quotedblbase",   "quotedblright",  "guillemotright",
   "ellipsis",       "perthousand",    ".notdef",        ".notdef",
   "Agrave",         "Aacute",         "Acircumflex",    "Atilde",
   "Adieresis",      "Aring",          ".notdef",        "dotaccent",
   "Egrave",         "Eacute",         "Ecircumflex",    "Edieresis",
   "Igrave",         "hungarumlaut",   "ogonek",         "Idieresis",
   "emdash",         "Ntilde",         "Ograve",         "Oacute",
   "Ocircumflex",    "Otilde",         "Odieresis",      ".notdef",
   ".notdef",        "Ugrave",         "Uacute",         "Ucircumflex",
   "Udieresis",      "Yacute",         "Thorn",          ".notdef",
   "agrave",         "aacute",         "acircumflex",    "ordfeminine",
   "adieresis",      "aring",          ".notdef",        "ccedilla",
   "Lslash",         "eacute",         "ecircumflex",    "ordmasculine",
   "igrave",         "iacute",         "icircumflex",    "idieresis",
   ".notdef",        "ntilde",         "ograve",         "oacute",
   "ocircumflex",    "otilde",         "odieresis",      ".notdef",
   "lslash",         "ugrave",         "uacute",         "ucircumflex",
   "udieresis",      "yacute",         "thorn",          "ydieresis"
);

###############
# Read DEF file
###############
#
open DEF, $opt_d or die "Cannot open $opt_d: $!\n";
while (<DEF>) {
   next if (/^\s*$/ || /^\#/);
   s/\s*(\#.*)?$//;
   push @deflines, $_;
}
close DEF;

###############
# Read VPL file
###############
#
# File header
#
$vplhead = <> or exit 1;
unless ($vplhead =~ /^\((VTITLE|FAMILY) /) {
   die "$filename is not a vpl file: giving up\n"
}
do {
   $_ = <>;
   $vplhead .= $_;
} until ($_ =~ /^\(LIGTABLE$/ or eof);
if (eof) {
   die "$filename does not seem to be a text font (no LIGTABLE): giving up\n";
}
if ($vplhead =~ s/\A\(VTITLE(.*)$/$vtitle\n(COMMENT Old vtitle:$1/m) {
   $vplhead =~ s/\n\(COMMENT Please edit that VTITLE .*\)$//m;
   @enc = @dvipsenc;
   $dvips = 1;
}
else {
   $vplhead =~ s/\A/$vtitle\n/m;
   @enc = @TeXenc;
   $dvips = 0;
}
if ($vplhead =~ /^\(CODINGSCHEME TEX MATH SYMBOLS/m) {
   die "$filename is a TeX math font: giving up\n";
}
unless ($vplhead =~ s/^(\(CODINGSCHEME .*\+\s?)(\S+)\)$/$1$encname)/m) {
   $vplhead =~ s/^(\(CODINGSCHEME .*)\)$/$1 + $encname)/m;
}
if ($vplhead =~ /^\(DESIGNUNITS R (.+)\)/m)    { $scale = $1 }
else {$scale = 1 }
if ($vplhead =~ /^   \(SLANT R (.+)\)/m)       { $slant = $1 }
if ($vplhead =~ /^   \(XHEIGHT [DR] (.+)\)/m)  { $xheight = $1 }
unless ($vplhead =~ /^\(MAPFONT /m) {
   if ($vplhead =~ /^\(DESIGNSIZE R (.*)\)$/m) { $dsize = $1  }
   $mapfont = "\n(MAPFONT D 0\n   (FONTNAME $fontname)\n   (FONTDSIZE R $dsize)\n   )";
   $vplhead =~ s/\n\(LIGTABLE\Z/$mapfont$&/m;
}
if ($opt_s) {
   $vplhead =~ s[^(\(MAPFONT D )0(.*?)(   \))]
                [$&\n${1}1$2   (FONTAT R ${ \($shrink * $scale) })\n$3]ms;
}

#
# Ligatures and kerns
#
do {
   $_ = <>;
   s/ \(comment .*$//i;
   $ligs .= $_;
} until $_ =~ /^   \)/;
#
# Now build a hash to convert from vpl representation to char name
# and use it to make ligtable readable
#
foreach $i (0 .. 255) { $vc{$nv[$i]} = $enc[$i] }
$ligs =~ s/^(   \((?:LABEL|KRN) )(\S+ \S+)(.*\))$/$1$vc{$2}$3/gm;
$ligs =~ s/^(   \(LIG )(\S+ \S+) (\S+ \S+)\)$/$1$vc{$2} $vc{$3})/gm;

#
# Character definitions: store "encoded" defs in @chars, store *all*
# defs in %allchars
#
$_ = <>;
do {
   if (/^\(CHARACTER/) {
      $character = $_;
      do {
	 $_ = <>;
	 $character .= $_;
      } until $_ =~ /^   \)/;
      storeinfo($character);
      $_ = <>;
   }
} until eof;
foreach $i (0 .. $#chars) {
   if ($chars[$i] and $enc[$i] ne ".notdef") { 
      $allchars{$enc[$i]} = $chars[$i];
   }
}

##################
# Set up constants
##################

$subacc = "(cedilla|ogonek|commaaccent)";
$supacc = "(grave|acute|circumflex|tilde|macron|breve|dotaccent|overdot|dieresis|ring|hungarumlaut|caron|candrabindu)";
$underacc = "(underdot|under$supacc)";
$accents = "($subacc|$supacc|$underacc|underbar)";
$underadp = 0.230;				# depth of "under" accs
$underddp = 0.213;				# depth of underdot
if ($bold) { $thk = 0.072 } else { $thk = 0.052 }	# thickness and
$underbdp = 0.082 + $thk;				# depth of underbar
$capheight = $allchars{"X"}{ht};
$accheight = $allchars{"macron"}{ht};
$accdepth = $accheight - $thk * $scale;	# probable approx. "depth" of macron
$v1 = $accheight - $xheight;		# vertical offset for double accents
$v2 = $capheight - $xheight;		# vertical offset for accented caps etc
if ($scaps)  {						# accented small caps
    $scoffset = $allchars{"x"}{ht} - $xheight;
    $v1 += $scoffset;
}
if ($opt_c) {						# candrabindu
   ($cbx, $cby) = $opt_c =~ /^(.*),(.*)$/;
   $cbx += ($allchars{"breve"}{wd} - $allchars{"dotaccent"}{wd}) / 2;
   $cbx /= 1000;
   $cby /= 1000;
}

######################
# Build the characters
######################
#
# First normalise dvips.enc encoding quirks
#
if ($dvips) {
   chmove("fi", 014);
   chmove("fl", 015);
   chmove("quotedblright", 042);
   chmove("quotedblleft", 0134);
   chmove("dotaccent", 0137);
   chmove("endash", 0173);
   chmove("emdash", 0174);
   chmove("hungarumlaut", 0175);
}
#
# Now build a list of definitions supplied by user
#
for (@deflines) {
   if (/^\s*(\d+|0[0-7]+|0x[0-9a-fA-F]+)\s+([a-zA-Z]+?|\.notdef)(?:\s+([a-zA-Z]+))?$/) {
      ($num, $char, $acc)  = ($1, $2, $3);
      $num = oct $num if $num =~ /^0/;
      if ($num > 255) { die "Bad definition (number out of range): $_\n" }
      $def = {};
      $def->{qdef}  = $_;
      $def->{num}   = $num;
      $def->{char}  = $char;
      $def->{acc}   = $acc;
      $def->{nchar} = $char . $acc;
      push @nchars, $def->{nchar};
      push @defs,   $def;
   }
   else { die "Bad definition: $_\n" }
}
#
# Work through the list
#
while (@defs) {
   $def  = shift @defs;
   $qdef  = $def->{qdef};
   $num   = $def->{num};
   $char  = $def->{char};
   $acc   = $def->{acc};
   $nchar = $def->{nchar};
   #
   # If we can't handle $char/$acc yet, but believe we will be able
   # to later, send the definition to the back of the queue. In case
   # it later turns out we were wrong, allow only five loops before
   # giving up.
   #
   if (!$allchars{$char} and $char ne ".notdef") {
      if (grep /^$char$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such character): $qdef\n" }
   }
   if ($acc and !$allchars{$acc} and $acc !~ /^$accents$/) {
      if (grep /^$acc$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such accent): $qdef\n" }
   }
   #
   # Remove any existing claims on $num
   #
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   #
   # First deal with .notdef
   #
   if ($nchar eq ".notdef") {
      undef $chars[$num];
   }
   #
   # Next look among existing chars (unless blocked by -b)
   #
   elsif (!($acc and $opt_b) and $allchars{$nchar}) { 
      push( @{ $allchars{$nchar}{num} }, $num);
      $chars[$num] = $allchars{$nchar};
   }
   #
   # If it can't be built from sub-elements, issue a warning and move on
   #
   elsif (!$acc) {
      warn "No such character - ignoring definition: $qdef\n";
      undef $chars[$num];
   }
   #
   # Now build the char
   #
   else {
      #
      # First get rid of predefined/duplicated ligtable statements
      # and character definitions; also synonyms
      #
      $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar .*|.*$nchar)\)$//gm;
      $allchars{$nchar} = ();
      if ($acc eq "overdot") {
         $nchar2 = $char . "dotaccent";
	 $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar2 .*|.*$nchar2)\)$//gm;
	 delete $allchars{$nchar2};
      }
      #
      # Go!
      #
      if ($acc =~ /^$subacc$/) {
	 subacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$supacc$/) {
	 supacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$underacc$/) {
	 underacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^underbar$/) {
	 underb($num, $char, $nchar);
	 fixkerns($char, $acc);
      }
      else {
	 digraph($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
   }
}

###################
# Sort out ligtable
###################
#
# Convert to vpl representation, eliminating statements invoking
# "unencoded" characters
#
@liglist = split /\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^(   \(LIG \S+ )(\S+)\)$/) {
      if ($n = ${ $allchars{$2}{num} }[0]) {
	 s/^(   \(LIG \S+ )(\S+)\)$/$1$nv[$n])/;
      }
      else { next }
   }
   if (/^(   \((?:LABEL|LIG|KRN) )([^ )]+)(.*)$/) {
      ($one, $two, $three) = ($1, $2, $3);
      foreach $n (@{ $allchars{$two}{num} }) {
	 $ligs .= "$one$nv[$n]$three\n";
      }
   }
   else { $ligs .= "$_\n" }
}
#
# Eliminate sequences orphaned by elimination of a LABEL
#
@liglist = split /   \(STOP\)\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^   \(LABEL /m) { $ligs .= "$_   (STOP)\n" }
   elsif (/^   \)$/m) { $ligs .= $_ }
}
#
# Eliminate empty statements
#
$ligs =~ s/(^   \(LABEL .*\)\n)+   \(STOP\)\n//gm;

####################
# Output the results
####################
#
print $vplhead, $ligs;
foreach $i (0 .. 255) { if (defined $chars[$i]{id}) { printchar($i) } }

#####################
# End of main program
#####################

sub storeinfo {
   #
   # Extract info from a character definition and store it in @chars
   #
   my $char = shift;
   my $num;
   if ($char =~ /\A\(CHARACTER O ([0-7]+)/m) { $num = oct $1 }
   elsif ($char =~ /\A\(CHARACTER C (.)/m) { $num = ord $1 }
   $chars[$num]{id} = $enc[$num];
   push( @{ $chars[$num]{num} }, $num);
   if ($char =~ /^   \(CHARWD R (.*?)\)$/m) { $chars[$num]{wd} = $1 }
   if ($char =~ /^   \(CHARHT R (.*?)\)$/m) { $chars[$num]{ht} = $1 }
   if ($char =~ /^   \(CHARDP R (.*?)\)$/m) { $chars[$num]{dp} = $1 }
   if ($char =~ /^   \(CHARIC R (.*?)\)$/m) { $chars[$num]{ic} = $1 }
   if ($char =~ /^   \(MAP\n((.|\n)*)^      \)/m) { $chars[$num]{map} = $1 }
   else { $chars[$num]{map} = "      (SETCHAR $nv[$num])\n" }
}

sub printchar {
   #
   # Extract info from @chars and build it into a character definition
   #
   my $num = shift;
   print "(CHARACTER ";
   if (chr($num) =~ /[0-9A-Za-z]/) { print "C " . chr($num) }
   else {
      printf "O %lo", $num;
      print " (COMMENT " . $chars[$num]{id} . ")";
   }
   print "\n";
   print "   (CHARWD R " . $chars[$num]{wd} . ")\n" if $chars[$num]{wd};
   print "   (CHARHT R " . $chars[$num]{ht} . ")\n" if $chars[$num]{ht};
   print "   (CHARDP R " . $chars[$num]{dp} . ")\n" if $chars[$num]{dp};
   print "   (CHARIC R " . $chars[$num]{ic} . ")\n" if $chars[$num]{ic};
   print "   (MAP\n";
   print $chars[$num]{map};
   print "      )\n";
   print "   )\n";
}

sub chmove {
   #
   # Move a character
   #
   my ($char, $num) = @_;
   my $i;
   foreach $i (@{ $allchars{$char}{num} }) { undef $chars[$i] }
   @{ $allchars{$char}{num} } = ();
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   push( @{ $allchars{$char}{num} }, $num);
   $chars[$num] = $allchars{$char};
}

sub max {
   #
   # Return greater of two values
   #
   my ($a, $b) = @_;
   return $a > $b ? $a : $b;
}

sub subacc {
   #
   # Subscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $allchars{$acc}{dp};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   if ($h > 0) { $s2 = "      (MOVERIGHT R $h) " }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVELEFT R $h) ";
   }
   else { $s2 = "      " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub supacc {
   #
   # Superscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($cb, $h, $hadj, $tallchar, $ic, $s1, $s2, $s3);
   if ($char eq "i" and $allchars{"dotlessi"}) { $char = "dotlessi" }
   if ($char eq "j" and $allchars{"dotlessj"}) { $char = "dotlessj" }
   if ($acc eq "overdot") { $acc = "dotaccent" }
   if ($acc eq "candrabindu") {
      $acc = "breve";
      ($cb = $allchars{"dotaccent"}{map}) =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$acc}{ht};
   $allchars{$id}{dp} = $allchars{$char}{dp};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($scaps and $char =~ /^[a-z]/) {			# accented small caps
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# single accs
	 $s2 = "      (MOVEUP R $scoffset)";
	 $allchars{$id}{ht} += $scoffset;
	 $hadj = sprintf("%.3f", $scoffset * $slant);
	 $ic = $allchars{$acc}{ic} + $hadj;
      }
   }
   elsif ($allchars{$char}{ht} >= ($accheight + $v2)) {	# double accs
      $tallchar = 1;					# on caps etc.
      $s2 = "      (MOVEUP R ${ \($v1 + $v2) })";
      $allchars{$id}{ht} += ($v1 + $v2);
      $hadj = sprintf("%.3f", ($v1 + $v2) * $slant);
      $ic = $allchars{$char}{ic};
   }
   elsif ($allchars{$char}{ht} > 1.15 * $xheight) {
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# caps etc.
	 $s2 = "      (MOVEUP R $v2)";
	 $allchars{$id}{ht} += $v2;
	 $hadj = sprintf("%.3f", $v2 * $slant);
	 $ic = $allchars{$char}{ic};
      }
   }
   else {						# single accs
      $s2 = "     ";
      $ic = $allchars{$acc}{ic};
   }
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   unless ($tallchar) { $ic -= $h }
   $allchars{$id}{ic} = $ic unless $ic < 0;
   $h += $hadj;
   if ($h > 0) { $s2 .= " (MOVERIGHT R $h) " }
   elsif ($h < 0)  {
      $h = -$h;
      $s2 .= " (MOVELEFT R $h) ";
   }
   else { $s2 .= " " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($cb) {						# candrabindu
      $cb = $s2 . $cb;
      if ($cbx) {
	 unless (($cb =~ s/(MOVERIGHT R )([0-9.]+)/$1 . ($2 + $cbx * $scale)/e)
	   or ($cb =~ s/(MOVELEFT R )([0-9.]+)/$1 . ($2 - $cbx * $scale)/e)) {
	      $cb =~ s/^( +)/"$1(MOVERIGHT R " . ($cbx * $scale) . ") "/e;
	   }
      }
      if ($cby) {
	 unless ($cb =~ s/(MOVEUP R )([0-9.]+)/$1 . ($2 + $cby * $scale)/e) {
	    $cb =~ s/^( +)/"$1(MOVEUP R " . ($cby * $scale) . ") "/e;
	 }
      }
      $cb .= "\n";
      $s1 = "(PUSH) " . $s1;
      $s3 .= " (POP)";
   }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n$cb";
   $chars[$num] = $allchars{$id};
}

sub underacc {
   #
   # Dropped accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $v, $s1, $s2, $s3);
   $acc =~ s/^under//;
   if ($acc eq "dot") { $acc = "period" }
   if ($acc eq "candrabindu") { die "Bad definition (no such accent): $qdef\n" }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   if ($acc =~ /^$supacc$/) { 
      $v = $allchars{$id}{dp} = $underadp * $scale * $shrink;
      $v += ($accdepth * $shrink);
   }
   else {
      $v = $allchars{$id}{dp} = $underddp * $scale + $allchars{$acc}{dp};
   }
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($acc =~ /^$supacc$/) {
      $h = ($allchars{$char}{wd} - ($allchars{$acc}{wd} * $shrink)) / 2 - $v * $slant;
      if ($opt_s) { $s2 = "      (SELECTFONT D 1)\n" }
   }
   else {
      $h = ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2 - $v * $slant;
   }
   $h = sprintf("%.3f", $h);
   if ($h > 0) {
      $s2 .= "      (MOVEDOWN R $v) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 .= "      (MOVEDOWN R $v) (MOVELEFT R $h) ";
   }
   else  { $s2 = "      (MOVEDOWN R $v) " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($opt_s and $acc =~ /^$supacc$/) { $s3 .= "\n      (SELECTFONT D 0)" }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub underb {
   #
   # Underbar
   #
   my ($num, $char, $id) = @_;
   my ($h, $w, $dp, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $dp = $underbdp * $scale;
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$id}{wd} / 10 - $dp * $slant));
   $w = sprintf("%.3f", ($allchars{$id}{wd} * 8 / 10));
   if ($h > 0) {
      $s2 = "      (MOVEDOWN R $dp) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVEDOWN R $dp) (MOVELEFT R $h) ";
   }
   else { $s2 .= "      (MOVEDOWN R $dp) " }
   $s3 = "(SETRULE R ${ \($thk * $scale) } R $w)";
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub digraph {
   #
   # Make a new character consisting of two existing characters
   #
   my ($num, $char, $acc, $id) = @_;
   my ($one, $two, $kern, $s1, $s2);
   if ($ligs =~ /^   \(LABEL $char\)\n(.*?)\n   \(KRN $acc R (-?[0-9.]+)\)\n/ms) {
      ($one, $two) = ($1, $2);
   }
   $kern = $two if $one !~ /^   \(STOP\)$/m;
   $allchars{$id}{wd} = $allchars{$char}{wd} + $allchars{$acc}{wd};
   $allchars{$id}{wd} += $kern;
   $allchars{$id}{ht} = max($allchars{$char}{ht}, $allchars{$acc}{ht});
   $allchars{$id}{dp} = max($allchars{$char}{dp}, $allchars{$acc}{dp});
   $allchars{$id}{ic} = $allchars{$acc}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   chomp ($s2 = $allchars{$acc}{map});
   $s1 =~ s/(\(SETCHAR .*?\))/$1\n$s2/;
   if ($kern) {
      if ($kern < 0) {
	 $kern = -$kern;
	 $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVELEFT R $kern)/;
      }
      else { $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVERIGHT R $kern)/ }
   }
   $allchars{$id}{map} = $s1;
   $chars[$num] = $allchars{$id};
}

sub fixkerns {
   #
   # Generalise the kerning info contained in the vpl file by applying
   # it to new accented chars. Do not kern lower-case chars bearing
   # superscript accents with capitals, quotes or a preceding "f".
   #
   my ($char, $acc) = @_;
   my ($olabel, $nlabel, @liglist, $lchar, $rchar);
   if ($acc =~ /^$accents$/) { $lchar = $rchar = $char }
   else {
      $lchar = $char;
      $rchar = $acc;
   }
   unless ($char =~ /^[a-z]/ and $acc =~ /^$supacc$/) {
      $ligs =~ s[(\n   \(LABEL )$rchar\)(?!\n   \(LIG.*$)]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(LABEL )$rchar\)(\n   \(LIG.*$)+(?!\n   \(STOP\))]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(KRN )$lchar( .*)$]
		[$&$1$char$acc$2]gm;
   }
   else {
      if ($ligs =~ /\n   \(LABEL $char\).*?\(STOP\)/s) {
	 $nlabel = $olabel = $&;
	 $nlabel =~ s/(\n   \(LABEL $char)\)/$1$acc)/
	   unless $ligs =~ /\n   \(LABEL $char$acc\)/m;
         $nlabel =~ s/\n   \(LIG .*\)$//gm;
         $nlabel =~ s/\n   \(LABEL (?!$char$acc).*\)$//gm;
	 $nlabel =~ s/\n   \(KRN ([A-Z]|quote).*\)$//gm;
	 $ligs =~ s/(\n   \(LABEL )$char\).*?\(STOP\)/$olabel$nlabel/s;
      }
      @liglist = split /\n   \(STOP\)/, $ligs;
      foreach (@liglist) {
	 unless (/\n   \(LABEL ([A-Zf]|quote).*\)$/m) {
	    s/(\n   \(KRN $char)( .*\))$/$&$1$acc$2/gm;
	 }
      }
      $ligs = join("\n   (STOP)", @liglist);
   }
}
