#!/bin/sh -e

#  debiandoc-normalize   - normalize debiandoc sgml file
#
#  Copyright (C) 2002  Philippe Batailler <pbatailler@teaser.fr>
#  Copyright (C) 2002  Osamu Aoki <osamu@aokiconsulting.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or (at
#  your option) any later version.
#
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the
#  Free Software Foundation, Inc.,
#  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#

progname=${0##*/}

version=0.6

dummypercent="DuMmyPerCeNtIZheReDuMmY"

# [ 	] are '[' and 'space' and 'tab' and ']'
# sed regex does not have +

##----------------------------------------------------------------
## get command line options

opt_h=false
opt_k=false
opt_e=false

infile=
encoding=
encoding0="iso-8859-1"

for opt ; do
    case $opt in
      -e | --encoding ) opt_e=true ;;
      -h | --help ) opt_h=true ;;
      -v | --version ) { echo "$progname: version "$version; exit 0;} ;;
      -t | --trace ) set -x ;;
      -k | --keep ) opt_k=true ;;
      -* ) { echo "$progname:error: invalid command-line option: $opt
Try \`$progname --help' for more information." >&2; exit 1; } ;;
      * ) 
          if test -n "$encoding"; then
              { echo "$progname:error: too many arguments.
Try \`$progname --help' for more information." >&2; exit 1; }
          fi
          if test -z "$infile"; then
              infile=$opt
          else
              encoding=$opt
          fi
          ;;
    esac
done


##------------------------------------------------------------
## Display help message

if $opt_h; then
    cat <<END
$progname version $version

Usage: $progname [options] filename [encoding]
Options: -h   print this help message
         -e   set encoding for <?xml ... ,  default $encoding0
         -k   keep intermediate files
         -t   trace
         -v   version number
Description:
 Prepare SGML file for the conversion to XML
 Fix % related syntax and capitalize reserved words

 Copyright (C) 2002  Philippe Batailler <pbatailler@teaser.fr>
 Copyright (C) 2002  Osamu Aoki <osamu@aokiconsulting.com>
END
    exit 0
fi

if test -z "$infile"; then
    echo "$progname:error: too few arguments.
Try \`$progname --help' for more information." >&2
    exit 1
fi

if test -z "$encoding"; then
    encoding=$encoding0
fi

##-----------------------------------------------------------------
$opt_k || trap "rm -f ${infile}.mod? >/dev/null 2>&1; exit 1" 1 2 15

##-----------------------------------------------------------------
# Keep original
mv -f $infile $infile.mod0

# Upper case for reserved words, no tab but space
sed \
    -e 's/^[ 	]*<!doctype[ 	]/<!DOCTYPE /' \
    -e 's/^[ 	]*<!\(.*\)public[ 	]/<!\1 PUBLIC /' \
    -e 's/^[ 	]*<![ 	]*system[ 	]/<! SYSTEM /' \
    -e 's/^[ 	]*<!\(.*\)[ 	]system[ 	]/<!\1 SYSTEM /' \
    -e 's/^[ 	]*<!entity[ 	]/<!ENTITY /g' \
    -e 's/^[ 	]*<\(.*\)[ 	]entity[ 	]/<\1 ENTITY /g' \
    <$infile.mod0 >$infile.mod1

# Normalize sgml for external reference and protect %
sed -e 's/<!\[[ 	]*%\([^ ][^ ;]*\);*[ 	]*/<!\[ '$dummypercent'\1; /g' \
    -e 's/<!ENTITY[ 	][ 	]*%[ 	][ 	]*/<!ENTITY '$dummypercent' /g' \
    < $infile.mod1 >$infile.mod2

# replace % with ASCII 37 code description to avoid problem.
# But do not do that in the header <!DOCTYPE .... ]>
# End of header should be 1 line only containing ]>
sed -n \
    -e '/^<!DOCTYPE/,/]>$/!{s/%/\&#37;/g;p;}' \
    -e '/^<!DOCTYPE/,/]>$/p' \
    <$infile.mod2 >$infile.mod3

# recover % where needed
sed -e 's/'$dummypercent'/%/g' <$infile.mod3 >$infile.mod4

# insert encoding
## do not add encoding in .sgml files, only in .ent files.

if $opt_e         ## Do we add encoding ?
then
    sed -e '/<?xml version="1.0" encoding="/d' $infile.mod4 |\
    sed -e '1i\
<?xml version="1.0" encoding="'$encoding'"?>' >$infile
else
    cp $infile.mod4 $infile
fi
##--------------------------------------------------------------
## removing intermediate files

$opt_k || rm -f $infile.mod? 
