#!/usr/bin/perl -w

use strict;
use warnings;

# debtags-get - Keep debtags source data up to date
#
# Copyright (C) 2006  Enrico Zini <enrico@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

our $CONFIG='/etc/debtags/sources.list';
our $CONFIGDIR='/etc/debtags/sources.list.d';
our $OUTPUTDIR='/var/lib/debtags';

sub readConfig ($);
sub readConfigs ();
sub fetcher_apt ($$);
sub fetcher_wget ($$);
sub fetcher_copy ($$);

my $VERBOSE=undef;
my $LOCAL=undef;

sub error (@)
{
	print STDERR @_, "\n";
	exit 1;
}

sub verbose (@)
{
	print STDOUT @_, "\n" if $VERBOSE;
}

if (@ARGV and $ARGV[0] eq '--verbose')
{
	$VERBOSE=1;
	shift @ARGV;
}
if (@ARGV and $ARGV[0] eq '--local')
{
	$LOCAL=1;
	shift @ARGV;
}

# TODO: allow to use more than one config file
# TODO: allow to override $OUTPUTDIR with parameters read from commandline

if (@ARGV and $ARGV[0] eq 'islocal')
{
	my $res = 0;
	# Read all the configuration first, so we can warn of syntax errors
	for my $task (readConfigs())
	{
		my ($islocal, $func, $arg1, $arg2) = @$task;
		$res = 1 if not $islocal;
	}
	exit $res;
} elsif (@ARGV and $ARGV[0] eq 'dump') {
	my $res = 0;
	# Read all the configuration first, so we can warn of syntax errors
	for my $task (readConfigs())
	{
		print join(', ', map { defined($_) ? $_ : '(undef)' } @$task), "\n";
	}
	exit $res;
} else {
	if (not -w $OUTPUTDIR)
	{
		if (not -e $OUTPUTDIR)
		{
			error "Output directory $OUTPUTDIR does not exist";
		} else {
			error "I do not have permission to write to $OUTPUTDIR";
		}
	}

	# Read all the configuration first, so we can warn of syntax errors
	my @sources = readConfigs();

	# Delete old sources (this will also get rid of sources removed from
	# sources.list)
	system "rm -f '$OUTPUTDIR/debtags-fetch-'*";

	for my $task (@sources)
	{
		my ($islocal, $func, $arg1, $arg2) = @$task;
		# Skip nonlocal sources if so instructed
		next if $LOCAL and not $islocal;
		&$func($arg1, $arg2);
	}
}

exit 0;

sub readConfigs ()
{
	my @res = readConfig($CONFIG);
	if (-d $CONFIGDIR)
	{
		opendir(DIR, $CONFIGDIR) or die "Cannot access directory $CONFIGDIR: $!";
		while (my $name = readdir(DIR))
		{
			next if $name =~ /^\.|~$/;
			my $file = $CONFIGDIR."/".$name;
			next if not -f $file;
			push @res, readConfig($file);
		}
		closedir(DIR);
	}

	return @res;
}

sub readConfig ($)
{
	my $config = shift;
	my @res;

	open IN, $config or error "Cannot open $config: $!";

	while (<IN>)
	{
		# Skip empty lines and comments
		next if /^\s*(#|$)/;

		# We are only interested in 'tags' lines
		next if not /^\s*tags\s+(.+?)\s*$/;

		my $line = $1;
		if ($line eq 'apt://')
		{
			push @res, [ 1, \&fetcher_apt, undef, "$OUTPUTDIR/debtags-fetch-apt" ];
		}
		elsif ($line =~ /^(?:http|ftp):\/\//)
		{
			my $mangle = $line;
			$mangle =~ s/[^A-Za-z0-9._-]/-/g;
			push @res, [ undef, \&fetcher_wget, $line, "$OUTPUTDIR/debtags-fetch-$mangle" ];
		}
		elsif ($line =~ /^file:(.+)/)
		{
			my $name = $1;
			my $mangle = $name;
			$mangle =~ s/[^A-Za-z0-9._-]/-/g;
			# Delete extra leading slashes
			$name =~ s/^\/*(\/.+)/$1/;
			push @res, [ 1, \&fetcher_copy, $name, "$OUTPUTDIR/debtags-fetch-$mangle" ];
		}
	}

	close IN;

	return @res;
}

sub checked_copy ($$)
{
	my ($src, $dst) = @_;
	system("cp", $src, $dst) and error "Cannot copy $src to $dst";
}

sub finalize_file ($)
{
	my ($file) = @_;
	rename "$file.tmp", "$file" or error "Cannot rename $file.tmp to $file: $!";
}

sub apt_normalise ($)
{
	my @res;
	# Split on 
	for my $tag (split(/,\s+/, $_[0]))
	{
		if ($tag =~ /^(.+){(.+)}$/)
		{
			my $prefix = $1;
			for my $suffix (split(',', $2))
			{
				push @res, $prefix.$suffix;
			}
		} else {
			push @res, $tag;
		}
	}
	return @res;
}

sub fetcher_apt ($$)
{
	my ($dummy, $target) = @_;

	verbose("Fetching data from apt...");

	open OUT, '>', "$target.tag.tmp" or error "Cannot write to $target: $!";
	for my $file (glob("/var/lib/apt/lists/*_Packages"))
	{
		open IN, $file or error "Cannot read $file: $!";

		my $pkg;
		while (<IN>)
		{
			if (/^Package:\s*(\S+)/)
			{
				$pkg = $1;
			}
			elsif (/^Tag:\s*(.+?)\s*$/)
			{
				if (not defined $pkg)
				{
					error "Error in apt-cache dumpavail output, line $.: a Tag: line appeared before any Package: line";
				}
				# Split and join the tags, to normalize the spaces inbetween
				print OUT "$pkg: ", join(', ', apt_normalise($1)), "\n";
			}
		}

		close IN;
	}
	close OUT;

	checked_copy("/usr/share/debtags/vocabulary", "$target.voc.tmp");

	finalize_file "$target.tag";
	finalize_file "$target.voc";
}

sub fetcher_wget ($$)
{
	my ($uri, $target) = @_;

	verbose("Fetching data from $uri via wget...");

	system "wget", "-O", "$target.voc.gz.tmp", "$uri/vocabulary.gz" and error "Failed downloading $uri/vocabulary.gz";
	system "wget", "-O", "$target.tag.gz.tmp", "$uri/tags-current.gz" and error "Failed downloading $uri/tags-current.gz";

	finalize_file "$target.tag.gz";
	finalize_file "$target.voc.gz";
}

sub fetcher_copy ($$)
{
	my ($base, $target) = @_;

	verbose("Fetching data from directory $base...");

	my $voc;
	if (-r "$base/vocabulary.gz")
	{
		$voc = "$target.voc.gz";
		checked_copy("$base/vocabulary.gz", "$voc.tmp");
	} elsif (-r "$base/vocabulary") {
		$voc = "$target.voc";
		checked_copy("$base/vocabulary", "$voc.tmp");
	} else {
		error "Cannot find $base/vocabulary.gz or $base/vocabulary";
	}

	my $tag;
	if (-r "$base/tags-current.gz")
	{
		$tag = "$target.tag.gz";
		checked_copy("$base/tags-current.gz", "$tag.tmp");
	} elsif (-r "$base/tags-current") {
		$tag = "$target.tag";
		checked_copy("$base/tags-current", "$tag.tmp");
	} else {
		error "Cannot find $base/tags-current.gz or $base/tags-current";
	}

	finalize_file "$tag";
	finalize_file "$voc";
}

# vim:set ts=4 sw=4:
