# manpages -- lintian check script -*- perl -*-

# Copyright (C) 1998 Christian Schwarz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::manpages;
use strict;
use Dep;
use Tags;
use Util;

# Set to true if the man program supports --warnings based on its version
# number.  This is probed if this variable is undefined and set to 0 or 1.
our $MAN_WARNINGS;

sub run {

my $pkg = shift;
my $type = shift;

use File::Basename;

my %file_info;
my %binary;
my %link;
# my %sect_by_binary;
# my %sect_by_manpage;
my %manpage;

# Read file info...
open(IN, '<', "file-info")
    or fail("cannot find file-info for $type package $pkg");
while (<IN>) {
    chop;

    m/^(.*?):\s+(.*)$/o or fail("an error in the file pkg is preventing lintian from checking this package: $_");
    my ($file,$info) = ($1,$2);

    next unless $file =~ m/man/o;
    $file =~ s,^(\./)?,,;

    $file_info{$file} = $info;
}
close(IN);

# Read package contents...
open(IN, '<', "index") or fail("cannot open index file index: $!");
while (<IN>) {
    chop;

    my ($perm,$owner,$size,$date,$time,$file) = split(' ', $_, 6);
    my $link;

    $file =~ s,^(\./),,;
    $file =~ s/ link to .*//;

    if ($perm =~ m/^l/) {
	($file, $link) = split(' -> ', $file);
    }

    my ($fname,$path,$suffix) = fileparse($file);

    # Binary that wants a manual page?
    #
    # It's tempting to check the section of the man page depending on the
    # location of the binary, but there are too many mismatches between
    # bin/sbin and 1/8 that it's not clear it's the right thing to do.
    if (($perm =~ m,^[\-l],o) and
    	(($path =~ m,^bin/$,o) or
	 ($path =~ m,^sbin/$,o) or
	 ($path =~ m,^usr/bin/$,o) or
	 ($path =~ m,^usr/bin/X11/$,o) or
	 ($path =~ m,^usr/bin/mh/$,o) or
	 ($path =~ m,^usr/sbin/$,o) or
	 ($path =~ m,^usr/games/$,o) or
	 ($path =~ m,^usr/X11R6/bin/$,o) )) {

	my $bin = $fname;
	
	$binary{$bin} = $file;
	$link{$bin} = $link if $link;

    	next;
    }

    if (($path =~ m,usr/(share|X11R6)/man/$,) and ($fname ne "")) {
	tag "manpage-in-wrong-directory", "$file";
    	next;
    }

    # manual page?
    next unless ($perm =~ m,^[\-l],o) and
	(($path =~ m,^usr/man(/\S+),o)
	 or ($path =~ m,^usr/X11R6/man(/\S+),o)
	 or ($path =~ m,^usr/share/man(/\S+),o) );

    my $t = $1;
    if (not $t =~ m,^.*man(\d)/$,o) {
	tag "manpage-in-wrong-directory", "$file";
    	next;
    }
    my ($section,$name) = ($1,$fname);
    my $lang = "";
       $lang = $1 if $t =~ m,^/([^/]+)/man\d/$,o;

    my @pieces = split(/\./, $name);
    my $ext = pop @pieces;
    if ($ext ne 'gz') {
        push @pieces, $ext;
	tag "manpage-not-compressed", "$file";
    } elsif ($perm =~ m,^-,o) { # so it's .gz... files first; links later
	my $info = $file_info{$file};
	if ($info !~ m/gzip compressed data/o) {
	    tag "manpage-not-compressed-with-gzip", "$file";
	} elsif ($info !~ m/max compression/o) {
	    tag "manpage-not-compressed-with-max-compression", "$file";
	}
    }
    my $fn_section = pop @pieces;
    my $section_num = $fn_section;
    if (scalar @pieces && $section_num =~ s/^(\d).*$/$1/) {
	my $bin = join(".", @pieces);
	       $manpage{$bin} = [] unless $manpage{$bin};
	push @{$manpage{$bin}}, { file => $file, lang => $lang };

	# number of directory and manpage extension equal?
	if ($section_num != $section) {
	    tag "manpage-in-wrong-directory", "$file";
	}
    } else {
	tag "manpage-has-wrong-extension", "$file";
    }

    # special check for manual pages for X11 games
    if ($path =~ m,^usr/X11R6/man/man6/,o) {
	tag "x11-games-should-be-in-usr-games", "$file";
    }

    # check symbolic links to other manual pages
    if ($perm =~ m,^l,o) {
	if ($link =~ m,(^|/)undocumented,o) {
	    if ($path =~ m,^usr/share/man,o) {
		# undocumented link in /usr/share/man -- possibilities
                #    undocumented... (if in the appropriate section)
		#    ../man?/undocumented...
		#    ../../man/man?/undocumented...
		#    ../../../share/man/man?/undocumented...
		#    ../../../../usr/share/man/man?/undocumented...
                if ((($link =~ m,^undocumented\.([237])\.gz,o) and
                    ($path =~ m,^usr/share/man/man$1,)) or
                    ($link =~ m,^\.\./man[237]/undocumented\.[237]\.gz$,o) or
                    ($link =~ m,^\.\./\.\./man/man[237]/undocumented\.[237]\.gz$,o) or
                    ($link =~ m,^\.\./\.\./\.\./share/man/man[237]/undocumented\.[237]\.gz$,o) or
                    ($link =~ m,^\.\./\.\./\.\./\.\./usr/share/man/man[237]/undocumented\.[237]\.gz$,o)) {
		    tag "link-to-undocumented-manpage", "$file";
                } else {
		    tag "bad-link-to-undocumented-manpage", "$file";
		}
	    } else {
		# undocumented link in /usr/X11R6/man -- possibilities:
		#    ../../../share/man/man?/undocumented...
		#    ../../../../usr/share/man/man?/undocumented...
		if (($link =~ m,^\.\./\.\./\.\./share/man/man[237]/undocumented\.[237]\.gz$,o) or
		    ($link =~ m,^\.\./\.\./\.\./\.\./usr/share/man/man[237]/undocumented\.[237]\.gz$,o)) {
		    tag "link-to-undocumented-manpage", "$file";
		} else {
		    tag "bad-link-to-undocumented-manpage", "$file";
		}
	    }
	}
    } else { # not a symlink
	open (MANFILE, '-|', "zcat unpacked/\Q$file\E 2>/dev/null")
	    or fail("cannot open $file: $!");
	my @manfile = ();
	while (<MANFILE>) { push @manfile, $_; }
	close MANFILE;
	# Is it a .so link?
	if ($size < 256) {
	    my ($i, $first) = (0, "");
	    do {
		$first = $manfile[$i++] || ""; 
	    } while ($first =~ /^\.\\"/ && $manfile[$i]); #");

	    unless ($first) {
		tag "empty-manual-page", "$file";
	    } elsif ($first =~ /^\.so\s+(.+)?$/) {
		my $dest = $1;
		if ($dest =~ m,^([^/]+)/(.+)$,) {
		    my ($manxorlang, $rest) = ($1, $2);
		    if ($manxorlang !~ /^man\d+$/) {
			# then it's likely a language subdir, so let's run
			# the other component through the same check
			if ($rest =~ m,^([^/]+)/(.+)$,) {
			    my ($lang, $rest) = ($1, $2);
			    if ($rest !~ m,^[^/]+\.\d(?:\S+)?(?:\.gz)?$,) {
				tag "bad-so-link-within-manual-page", "$file";
			    }
			} else {
			    tag "bad-so-link-within-manual-page", "$file";
			}
		    }
		} else {
		    tag "bad-so-link-within-manual-page", "$file";
		}
		next;
	    }
	}

	# If it's not a .so link, use lexgrog to find out if the man page
	# parses correctly and make sure the short description is reasonable.
	#
	# This check is currently not applied to pages in language-specific
	# hierarchies, because those pages are not currently scanned by
	# mandb (bug #29448), and because lexgrog can't handle pages in all
	# languages at the moment, leading to huge numbers of false
	# negatives. When man-db is fixed, this limitation should be
	# removed.
	if ($path =~ m,/man/man\d/,) {
	    my $pid = open LEXGROG, '-|';
	    if (not defined $pid) {
		fail("cannot run lexgrog: $!");
	    } elsif ($pid == 0) {
		my %newenv = (LANG => 'C', PATH => $ENV{PATH});
		undef %ENV;
		%ENV = %newenv;
		exec "lexgrog unpacked/\Q$file\E 2>&1"
		    or fail("cannot run lexgrog: $!");
	    }
	    my $desc = <LEXGROG>;
	    $desc =~ s/^[^:]+: \"(.*)\"$/$1/;
	    if ($desc =~ /(\S+)\s+-\s+manual page for \1/i) {
		tag "manpage-has-useless-whatis-entry", "$file";
	    } elsif ($desc =~ /(\S+)\s+-\s+programs? to do something/i) {
		tag "manpage-is-dh_make-template", "$file";
	    }
	    1 while <LEXGROG>;
	    close LEXGROG;
	    tag "manpage-has-bad-whatis-entry", "$file" if $? != 0;
	}

	# If we've not probed yet, determine if man supports --warnings.
	# This can be removed once man 2.5.1 makes it to testing.
	unless (defined $MAN_WARNINGS) {
	    my $version = `man -V 2>&1`;
	    if ($? == 0 && $version =~ / (\d+\.[\d.]+)(,|\Z)/) {
		$MAN_WARNINGS = Dep::versions_gte($1, '2.5.1');
	    } else {
		$MAN_WARNINGS = 0;
	    }
	}

	# If it's not a .so link, run it through "man" to check for errors.
	# If it is in a directory with the standard man layout, cd to the
	# parent directory before running man so that .so directives are
	# processed properly.  (Yes, there are man pages that include other
	# pages with .so but aren't simple links; rbash, for instance.)
	my $cmd;
	my $warnings = $MAN_WARNINGS ? ' --warnings' : '';
	if ($file =~ m,^(.*)/(man\d/.*)$,) {
	    $cmd = "cd unpacked/\Q$1\E && man$warnings -l \Q$2\E";
	} else {
	    $cmd = "man$warnings -l unpacked/\Q$file\E";
	}
	my $pid = open MANERRS, '-|';
	if (not defined $pid) {
	    fail("cannot run man -l: $!");
	} elsif ($pid == 0) {
	    my %newenv = (LANG => 'C', PATH => $ENV{PATH});
	    undef %ENV;
	    %ENV = %newenv;
	    exec "($cmd >/dev/null) 2>&1"
		or fail("cannot run man -l: $!");
	}
	while (<MANERRS>) {
	    # ignore progress information from man
	    next if /^Reformatting/;
	    next if /^\s*$/;
	    # ignore errors from gzip, will be dealt with at other places
	    next if /^(man|gzip)/;
	    # ignore 8bit character errors on localized man pages
	    if ($lang ne "") {
		next if /warning: can\'t find numbered character/;
		next if /warning \[.*\]: cannot adjust line/;
		next if /warning \[.*\]: can\'t break line/;
	    }
	    # ignore common undefined macros from pod2man << Perl 5.10
	    next if /warning: \`(Tr|IX)\' not defined/;
	    chomp;
	    s/^[^:]+://o;
	    tag "manpage-has-errors-from-man", "$file", "$_";
	    last;
	}
	close(MANERRS);
	# Now we search through the whole man page for some common errors
	my $lc = 0;
	my $hc = 0;
	foreach my $line (@manfile) {
	    $lc++;
	    chomp $line;
	    next if $line =~ /^\.\\\"/o; # comments .\"
	    if ($line =~ /^\.TH\s/) { # header
		require Text::ParseWords;
		my ($th_command, $th_title, $th_section, $th_date ) =
		    Text::ParseWords::parse_line( '\s+', 0, $line);
		if ($th_section && (lc($fn_section) ne lc($th_section))) {
		    tag "manpage-section-mismatch", "$file:$lc $fn_section != $th_section";
		}
	    }
	    # Catch hyphens used as minus signs by looking for ones at the
	    # beginning of a word, but don't generate false positives on \s-1
	    # (small font), \*(-- (pod2man long dash), or things like \h'-1'.
	    if ($line =~ /^(
			    ([^\.].*)?
			    [\s\'\"\`\(\[]
			    (?<! \\s | \*\( | \(- | \w\' )
			   )?
			   (--?\w+)/ox) {
		$hc++;
		tag "hyphen-used-as-minus-sign", "$file:$lc" if $hc <= 10 or $ENV{'LINTIAN_DEBUG'};
	    }
	    if (($line =~ m,(/usr/(dict|doc|etc|info|man|adm|preserve)/),o)
		|| ($line =~ m,(/var/(adm|catman|named|nis|preserve)/),o)) {
		# FSSTND dirs in man pages
		# regexes taken from checks/files
		tag "FSSTND-dir-in-manual-page", "$file:$lc $1";
	    }
	}
	tag "hyphen-used-as-minus-sign", $file, ($hc-10), "more occurrences not shown" if $hc > 10 and ! $ENV{'LINTIAN_DEBUG'};
    }
}
close(IN);

for my $f (sort keys %binary) {
    if (exists $manpage{$f}) {
	# X11 binary?  This shouldn't happen any more; these are no longer
	# allowed.
	if ($binary{$f} =~ m,usr/X11R6, or
	     ($link{$f} && $link{$f} =~ m,(\.\.|usr)/X11R6,)) {
	    # yes. manpage in X11 too?
	    for my $manp_info (@{$manpage{$f}}) {
		if ($manp_info->{file} =~ m/X11R6/) {
		    # ok.
		} else {
		    tag "manpage-for-x11-binary-in-wrong-directory", "$binary{$f} $manp_info->{file}";
		}
	    }
	} else {
	    for my $manp_info (@{$manpage{$f}}) {
		# no. manpage in X11?
		if ($manp_info->{file} =~ m/X11R6/) {
		    tag "manpage-for-non-x11-binary-in-wrong-directory", "$binary{$f} $manp_info->{file}";
		} else {
		    # ok.
		}
	    }
	}

	if (not grep { $_->{lang} eq "" } @{$manpage{$f}}) {
	    tag "binary-without-english-manpage", "$binary{$f}";
	}
    } else {
	tag "binary-without-manpage", "$binary{$f}";
    }
}

}

1;

# Local Variables:
# indent-tabs-mode: t
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 ts=8
