#! /usr/bin/perl -w
#
# Copyright (C) Colin Watson 2003, 2004, 2005.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

use strict;

use File::Path;
use Getopt::Long;

use constant VERSION => '0.11';

use constant USER_CONFIG => "$ENV{HOME}/.madison-lite";
use constant SYSTEM_CONFIG => '/etc/madison-lite';

use constant CACHE_FORMAT => 2;

my $configfile;
my $simplemirror;
my $caching = 1;
my $update_cache = 0;
my $source_and_binary = 0;
my $regex = 0;
my ($architectures, %architectures);
my ($components, %components);
my ($suites, @suites);

my %config;
my (@packages, %packages);


############################################################################
# Utility functions
############################################################################

# Print a usage message to FILEHANDLE and exit with status EXITCODE.
sub usage (*$) {
    my ($filehandle, $exitcode) = @_;
    print $filehandle <<EOF;
Usage: madison-lite [options] package [...]

Inspect a Debian package archive and display the versions of the given
packages found in each suite.

  --config-file FILE         read configuration from FILE
  --mirror DIRECTORY         use DIRECTORY as top level of Debian mirror
  --nocache                  don't cache parsed Packages and Sources files

  -a, --architecture ARCH    only show info for ARCH(es)
  -c, --component COMPONENT  only show info for COMPONENT(s)
  -h, --help                 show this help and exit
  -r, --regex                treat PACKAGE as a regular expression
  -s, --suite SUITE          only show info for SUITE(s)
  -S, --source-and-binary    show info for binary children of source packages

ARCH, COMPONENT, and SUITE can be comma- or space-separated lists.
EOF
    exit $exitcode;
}

# Print version information to standard output and exit with zero status.
sub showversion () {
    print 'madison-lite version ', VERSION, "\n";
    exit 0;
}

# Read configuration file FILENAME into %config. Return true if the file
# was opened successfully, otherwise false.
sub read_config ($) {
    my $filename = shift;
    local *CONFIG;
    return 0 unless open CONFIG, "< $filename";
    local $_;
    while (<CONFIG>) {
	chomp;
	next if /^#/;
	if (/^mirror\s+(.*)/) {
	    $config{mirror} = $1;
	} elsif (/^suite\s+(.*?) # name
			\s+(.*?) # directory
			((?: \s+(.*?) )*) # optional components
		  $/x) {
	    my ($name, $directory, $components) = ($1, $2, $3);
	    $config{suites}{$name} = $directory;
	    push @{$config{suiteorder}}, $name;
	    $components =~ s/^\s+//;
	    $config{suitecomponents}{$name} = [split ' ', $components];
	} else {
	    print STDERR "$0: $filename:$.: unrecognized directive '$_'\n";
	}
    }
    return 1;
}

# Compare two architecture names. Normal comparison except that 'source'
# always compares earlier.
sub archcmp ($$)
{
    if ($_[0] eq 'source') {
	if ($_[1] eq 'source') {
	    return 0;
	} else {
	    return -1;
	}
    } else {
	if ($_[1] eq 'source') {
	    return 1;
	} else {
	    return $_[0] cmp $_[1];
	}
    }
}

# Parse a Debian version number into its component parts.
sub parseversion ($)
{
    my $ver = shift;
    my %verhash;
    if ($ver =~ /:/)
    {
	$ver =~ /^(\d+):(.+)/ or die "bad version number '$ver'";
	$verhash{epoch} = $1;
	$ver = $2;
    }
    else
    {
	$verhash{epoch} = 0;
    }
    if ($ver =~ /(.+)-(.+)$/)
    {
	$verhash{version} = $1;
	$verhash{revision} = $2;
    }
    else
    {
	$verhash{version} = $ver;
	$verhash{revision} = 0;
    }
    return %verhash;
}

# Compare upstream-version or Debian-revision components of a Debian version
# number.
sub verrevcmp ($$)
{
    my ($val, $ref) = @_;
    for (;;)
    {
	$val =~ s/^(\D*)//;
	my $alphaval = $1;
	$ref =~ s/^(\D*)//;
	my $alpharef = $1;
	if (length $alphaval or length $alpharef)
	{
	    my @avsplit = split //, $alphaval;
	    my @arsplit = split //, $alpharef;
	    my ($av, $ar) = (0, 0);
	    while ($av < @avsplit and $ar < @arsplit)
	    {
		my ($v, $r) = (ord $avsplit[$av], ord $arsplit[$ar]);
		$v += 256 unless chr($v) =~ /[A-Za-z]/;
		$r += 256 unless chr($r) =~ /[A-Za-z]/;
		return $v <=> $r if $v != $r;
		$av++;
		$ar++;
	    }
	    return 1 if $av < @avsplit;
	    return -1 if $ar < @arsplit;
	}

	return 0 unless length $val and length $ref;

	$val =~ s/^(\d*)//;
	my $numval = $1;
	$ref =~ s/^(\d*)//;
	my $numref = $1;
	return $numval <=> $numref if $numval != $numref;
    }
}

# Compare the two arguments as dpkg-style version numbers. Returns -1 if the
# first argument represents a lower version number than the second, 1 if the
# first argument represents a higher version number than the second, and 0
# if the two arguments represent equal version numbers.
sub vercmp ($$)
{
    my %version = parseversion $_[0];
    my %refversion = parseversion $_[1];
    return 1 if $version{epoch} > $refversion{epoch};
    return -1 if $version{epoch} < $refversion{epoch};
    my $r = verrevcmp $version{version}, $refversion{version};
    return $r if $r;
    return verrevcmp $version{revision}, $refversion{revision};
}

# Find the first of FILENAME, FILENAME.gz, or FILENAME.bz2 that exists.
sub find_list_file ($) {
    my $filename = shift;
    if (-f $filename) {
	return $filename;
    } elsif (-f "$filename.gz") {
	return "$filename.gz";
    } elsif (-f "$filename.bz2") {
	return "$filename.bz2";
    } else {
	return undef;
    }
}

# Open a Packages or Sources file FILENAME, decompressing it if necessary.
# Return a filehandle associated with that (uncompressed) file, or undef if
# it could not be opened successfully.
sub open_list_file ($) {
    my $filename = shift;
    return undef unless defined $filename;
    my $fh;
    if ($filename =~ /\.gz$/) {
	open my $fh, "zcat \Q$filename\E |" or return undef;
	return $fh;
    } elsif ($filename =~ /\.bz2$/) {
	open my $fh, "bzcat \Q$filename\E |" or return undef;
	return $fh;
    } else {
	open my $fh, "< $filename" or return undef;
	return $fh;
    }
}

# Print a warning about caching being disabled, unless it has been printed
# before.
{
    my $cache_warning_printed = 0;
    sub caching_disabled ($) {
	return if $cache_warning_printed;
	my $why = shift;
	print STDERR "$0: caching disabled because $why\n";
	$cache_warning_printed = 1;
    }
}

# Encode FILENAME into a cache filename.
sub cache_filename ($) {
    my $filename = shift;
    my $cache_dir = USER_CONFIG . '/cache';

    eval { require Digest::MD5; import Digest::MD5 qw(md5_hex); };
    if ($@) {
	caching_disabled 'Digest::MD5 cannot be loaded';
	return undef;
    }

    eval { mkpath ($cache_dir); };
    die "$0: can't create cache directory '$cache_dir': $@" if $@;

    return "$cache_dir/" . md5_hex ($filename);
}

# Print the cache format to FILEHANDLE.
sub cache_print_format (*) {
    my $filehandle = shift;
    print $filehandle 'Format: ', CACHE_FORMAT, "\n";
}

# Check the cache format in FILEHANDLE. Return true if it's OK, otherwise
# false.
sub cache_check_format (*) {
    my $filehandle = shift;
    my $line = <$filehandle>;
    return 0 unless defined $line;
    chomp $line;
    if ($line eq ('Format: ' . CACHE_FORMAT)) {
	return 1;
    } else {
	return 0;
    }
}

# Convert a list file FILENAME into cached form. The package cache contains:
#   <package> <version>
# The source cache contains:
#   <source> <binaries> (space-separated)
# Return true if a cached form is now available, otherwise false.
sub cache_list_file ($$$) {
    my ($filename, $what, $is_packages) = @_;
    my $real_filename = find_list_file $filename;
    unless (defined $real_filename) {
	warn "$0: can't find $what\n";
	return 0;
    }
    my $listtime = (stat $real_filename)[9];
    my $cache_filename = cache_filename $filename;
    return 0 unless defined $cache_filename;

    # Already cached?
    if (not $update_cache and
	(-f "$cache_filename.pkg" and
	 (stat "$cache_filename.pkg")[9] >= $listtime) and
	(-f "$cache_filename.src" and
	 (stat "$cache_filename.src")[9] >= $listtime)) {
	local (*PCACHE, *SCACHE);
	if ((open PCACHE, "< $cache_filename.pkg") and
	    (open SCACHE, "< $cache_filename.src") and
	    cache_check_format (*PCACHE) and
	    cache_check_format (*SCACHE)) {
	    return 1;
	}
    }

    my $fh = open_list_file $real_filename;

    unless (open PCACHE, "> $cache_filename.pkg") {
	caching_disabled "'$cache_filename.pkg' cannot be opened: $!";
	return 0;
    }
    cache_print_format *PCACHE;
    print PCACHE "Original: $filename\n";

    my %sources;

    local $/ = ''; # paragraph mode
    local $_;
    while (<$fh>) {
	if (/^Package:\s+(.*)/m) {
	    my $package = $1;
	    next if $package =~ /\s/;
	    if (/^Version:\s+(.*)/m) {
		my $version = $1;
		if ($is_packages and /^Architecture: all$/m) {
		    print PCACHE "$package $version all\n";
		} else {
		    print PCACHE "$package $version\n";
		}
	    }
	    if (/^Source:\s+(.*)/m) {
		# Packages file
		push @{$sources{$1}}, $package;
	    }
	    # Don't bother with Binary: entries in Sources files. There
	    # should always be corresponding Package: and Source: pairs in
	    # Packages, and if there aren't we won't be able to do anything
	    # useful with the source-to-binary mapping anyway.
	}
    }

    close PCACHE;

    unless (open SCACHE, "> $cache_filename.src") {
	caching_disabled "'$cache_filename.src' cannot be opened: $!";
	return 0;
    }
    cache_print_format *SCACHE;
    print SCACHE "Original: $filename\n";
    for my $source (sort keys %sources) {
	print SCACHE "$source ", (join ' ', @{$sources{$source}}), "\n";
    }
    close SCACHE;

    return 1;
    # $fh is auto-closed
}

# Search a list file for %packages, given a FILEHANDLE and a precompiled
# regex FIELD matching the desired field names.
sub search_list_file ($$$$) {
    my ($fh, $field, $is_packages, $arch) = @_;
    my @results;

    # Precompile search pattern.
    my $packlist;
    if ($regex) {
	$packlist = join '|', map "(?:$_)", keys %packages;
    } else {
	$packlist = join '|', map "\Q$_\E\$", keys %packages;
    }
    my $search = qr/^$field:\s+(?:$packlist)/;

    local $/ = ''; # paragraph mode
    local $_;
    while (<$fh>) {
	if (/$search/m) {
	    next unless /^Package: (.*)/m; # might have been Package|Source
	    my $foundpackage = $1;
	    next unless /^Version: (.*)/m;
	    my $foundversion = $1;
	    my $foundsource;
	    # If the source isn't in our list of packages to search for,
	    # then it doesn't matter for sorting purposes, so just pretend
	    # it's $foundpackage.
	    if (/^Source: (.*)/m and exists $packages{$1}) {
		$foundsource = $1;
	    } else {
		$foundsource = $foundpackage;
	    }
	    if ($is_packages and /^Architecture: all/m) {
		push @results, [$foundsource, $foundpackage, $foundversion, 'all'];
	    } else {
		push @results, [$foundsource, $foundpackage, $foundversion, $arch];
	    }
	}
    }

    return @results;
}

# Search the cache file corresponding to FILENAME for %packages.
sub search_cache ($$) {
    my ($filename, $arch) = @_;
    my $cache_filename = cache_filename $filename;
    return () unless defined $cache_filename;
    my @results;

    my %allpackages = %packages;
    my $pkglist;
    my $match;

    if ($regex) {
	$pkglist = join '|', map "(?:$_)", keys %packages;
    } else {
	$pkglist = join '|', map "\Q$_\E\$", keys %packages;
    }
    $match = qr/^(?:$pkglist)/;

    if ($source_and_binary and open SCACHE, "< $cache_filename.src") {
	# Look for source cache entries, indicating additional packages we
	# need to find.
	local $_;
	while (<SCACHE>) {
	    next if /^\S+: /;
	    my ($key, @values) = split;
	    if ($key =~ /$match/) {
		$allpackages{$_} = $key foreach @values;
	    }
	}
	close SCACHE;
    }

    if ($regex) {
	$pkglist = join '|', map "(?:$_)", keys %allpackages;
    } else {
	$pkglist = join '|', map "\Q$_\E\$", keys %allpackages;
    }
    $match = qr/^(?:$pkglist)/;

    open PCACHE, "< $cache_filename.pkg" or return ();
    local $_;
    while (<PCACHE>) {
	next if /^\S+: /;
	my ($key, $value, $is_all) = split;
	if ($key =~ /$match/) {
	    if (defined $is_all and $is_all eq 'all') {
		push @results, [$allpackages{$key}, $key, $value, 'all'];
	    } else {
		push @results, [$allpackages{$key}, $key, $value, $arch];
	    }
	}
    }
    close PCACHE;

    return @results;
}

# Search the Packages file in a directory, if any, for %packages.
sub search_packages ($$) {
    my ($dir, $arch) = @_;

    if ($caching and
	cache_list_file "$dir/Packages", "Packages list file in '$dir'", 1) {
	return search_cache "$dir/Packages", $arch;
    } else {
	my $fh = open_list_file (find_list_file "$dir/Packages");
	unless (defined $fh) {
	    warn "$0: can't find Packages list file in '$dir'\n";
	    return;
	}

	my $field;
	if ($source_and_binary) {
	    $field = qr/(?:Package|Source)/;
	} else {
	    $field = qr/Package/;
	}

	return search_list_file $fh, $field, 1, $arch;
	# $fh is auto-closed
    }
}

# Search the Sources file in a directory, if any, for %packages.
sub search_sources ($$) {
    my ($dir, $arch) = @_;

    if ($caching and
	cache_list_file "$dir/Sources", "Sources list file in '$dir'", 0) {
	return search_cache "$dir/Sources", $arch;
    } else {
	my $fh = open_list_file (find_list_file "$dir/Sources");
	unless (defined $fh) {
	    warn "$0: can't find Sources list file in '$dir'\n";
	    return;
	}

	my $field = qr/Package/;

	return search_list_file $fh, $field, 0, $arch;
	# $fh is auto-closed
    }
}


############################################################################
# Read configuration
############################################################################

Getopt::Long::Configure qw(no_ignore_case);
my $optresult = GetOptions (
    'help|h|?' => sub { usage *STDOUT, 0 },
    'version' => \&showversion,
    'config-file=s' => \$configfile,
    'mirror=s' => \$simplemirror,
    'cache!' => \$caching,
    'update!' => \$update_cache,
    'source-and-binary|S' => \$source_and_binary,
    'regex|r' => \$regex,
    'architecture|a=s' => \$architectures,
    'component|c=s' => \$components,
    'suite|s=s' => \$suites,
);

if (!$optresult) {
    usage *STDERR, 1;
} elsif (!@ARGV) {
    usage *STDERR, 1;
}

if ($configfile) {
    unless (read_config $configfile) {
	print STDERR "$0: can't find configuration file '$configfile'\n";
    }
} else {
    unless (read_config (USER_CONFIG . '/config')) {
	read_config (SYSTEM_CONFIG . '/config');
    }
}

$config{mirror} = $simplemirror if defined $simplemirror;

# Apply default configuration if necessary.
unless (exists $config{mirror}) {
    $config{mirror} = '.';
}
unless (exists $config{suites}) {
    opendir MIRROR, "$config{mirror}/dists"
	or die "$0: can't open mirror directory '$config{mirror}/dists'\n";
    my @dirents = sort grep { !/^\.\.?$/ } readdir MIRROR;
    for my $dirent (@dirents) {
	# Ignore symlinks to other suites in the same directory (e.g.
	# unstable -> sid).
	if (-l "$config{mirror}/dists/$dirent" and
	    (readlink "$config{mirror}/dists/$dirent") !~ m[/]) {
	    next;
	}
	if (-d "$config{mirror}/dists/$dirent") {
	    $config{suites}{$dirent} = "dists/$dirent";
	    push @{$config{suiteorder}}, $dirent;
	    $config{suitecomponents}{$dirent} = [];
	}
    }
    closedir MIRROR;
    die "$0: no suites found in $config{mirror}/dists\n"
	unless exists $config{suites};
}


############################################################################
# Main search loop
############################################################################

@packages = @ARGV;
%packages = map { $_ => $_ } @packages;

%architectures = map { $_ => 1 } split /[, ]+/, $architectures
    if defined $architectures;
%components = map { $_ => 1 } split /[ ,]+/, $components
    if defined $components;

# Find the list of suites we're looking at.
my @allsuites;
if (defined $suites) {
    @suites = split /[, ]+/, $suites if defined $suites;
    for my $cursuite (@suites) {
	die "$0: unknown suite '$cursuite'\n"
	    unless exists $config{suites}{$cursuite};
    }
} else {
    @suites = @{$config{suiteorder}};
}

# Compare two suite names in configured suite order.
sub suitecmp ($$)
{
    for my $suite (@suites) {
	if ($_[0] eq $suite) {
	    if ($_[1] eq $suite) {
		return 0;
	    } else {
		return -1;
	    }
	} elsif ($_[1] eq $suite) {
	    return 1;
	}
    }
    return $_[0] cmp $_[1];
}

# Search through all Packages and Sources files for %packages.
my %results;
for my $cursuite (@suites) {
    my $cursuitedir = $config{suites}{$cursuite};
    $cursuitedir = "$config{mirror}/$cursuitedir" if $cursuitedir !~ m[^/];
    # e.g. /debian/dists/stable

    # Find the list of components we're looking at; might be listed
    # explicitly in the configuration file and/or on the command line.
    my @components = @{$config{suitecomponents}{$cursuite}};
    unless (@components) {
	unless (opendir SUITE, $cursuitedir) {
	    warn "$0: can't open suite directory '$cursuitedir'\n";
	    next;
	}
	my @dirents = sort grep { !/^\.\.?$/ } readdir SUITE;
	for my $dirent (@dirents) {
	    push @components, $dirent if -d "$cursuitedir/$dirent";
	}
	closedir SUITE;
    }
    @components = grep { $components{$_} } @components if %components;

    for my $curcomp (@components) {
	next if $curcomp =~ /^\.\.?$/;
	# e.g. /debian/dists/stable/main
	my $curcompdir = "$cursuitedir/$curcomp";

	unless (opendir COMPONENT, "$curcompdir") {
	    warn "$0: can't open component directory '$curcompdir'\n";
	    next;
	}
	while (my $curarch = readdir COMPONENT) {
	    my @archresults;
	    my $curarchdir = "$curcompdir/$curarch";
	    if ($curarch =~ /^binary-(.*)$/) {
		# e.g. /debian/dists/stable/main/binary-i386
		$curarch = $1;
		next if defined $architectures and
			not $architectures{$curarch};
		@archresults = search_packages $curarchdir, $curarch;
	    } elsif ($curarch eq 'source') {
		# e.g. /debian/dists/stable/main/source
		next if defined $architectures and
			not $architectures{'source'};
		@archresults = search_sources $curarchdir, $curarch;
	    } else {
		next;
	    }

	    for my $result (@archresults) {
		my ($ressource, $respackage, $resversion, $resarch) = @$result;
		$results{$ressource}{$respackage}{$resversion}{$cursuite}{$curcomp}{$resarch} = 1;
	    }
	}
	closedir COMPONENT;
    }
}

# Print out the results.
for my $package (@packages) {
    next unless exists $results{$package};
    for my $binpkg (sort keys %{$results{$package}}) {
	for my $version (sort vercmp keys %{$results{$package}{$binpkg}}) {
	    for my $suite (sort suitecmp keys %{$results{$package}{$binpkg}{$version}}) {
		for my $comp (sort keys %{$results{$package}{$binpkg}{$version}{$suite}}) {
		    my $dispsuite = $suite;
		    if ($comp ne 'main') {
			$dispsuite = "$suite/$comp";
		    }
		    printf "%10s | %10s | %13s | %s\n",
			$binpkg, $version, $dispsuite,
			(join ', ', sort archcmp keys %{$results{$package}{$binpkg}{$version}{$suite}{$comp}});
		}
	    }
	}
    }
}
