#!/usr/bin/perl -w

=head1 NAME

rinse - RPM Installation Entity.

=head1 SYNOPSIS

  rinse [options]

  Help Options:
   --help     Show help information.
   --manual   Read the manual for this script.
   --version  Show the version information and exit.

  Mandatory Options:
   --directory    The directory to install the distribution within.
   --distribution The distribution to install.

  Misc Options:
   --arch                Specify the architecture to install.
   --cache               Should we use a local cache?  (Default is 1)
   --cache-dir           Specify the directory we should use for the cache.
   --clean-cache         Clean our cache of .rpm files.
   --list-distributions  Show installable distributions.
   --print-uris          Only show the RPMs which should be downloaded.

=cut


=head1 OPTIONS

=over 8

=item B<--arch>
Specify the architecture to install.  Valid choices are 'amd64' and 'i386' only.

=item B<--cache>
Specify whether to cache packages (1) or not (0).

=item B<--cache-dir>
Specify the directory we should use for the cache.

=item B<--clean-cache>
Remove all cached .rpm files.

=item B<--directory>
Specify the directory into which the distribution should be installed.

=item B<--distribution>
Specify the distribution to be installed.

=item B<--help>
Show help information.

=item B<--list-distributions>
Show the distributions which are installable.

=item B<--manual>
Read the manual for this script.

=item B<--print-uris>
Only show the files we would download, don't actually do so.

=item B<--version>
Show the version number and exit.

=back

=cut


=head1 DESCRIPTION

  rinse is a simple script which is designed to be able to install
 a minimal working installation of an RPM-based distribution into
 a directory.

  The tool is analagous to the standard Debian GNU/Linux debootstrap
 utility.

=cut


=head1 USAGE

  To use this script you will need to be root.  This is required
 to mount /proc, run chroot, and more.

  Basic usage is as simple as:

=for example begin

   rinse --distribution fedora-core-6 --directory /tmp/test

=for example end


  This will download the required RPM files and unpack them into
 a minimal installation of Fedora Core 6.

  To see which RPM files would be downloaded, without actually
 performing an installation or downloading anything, then you
 may run the following:


=for example beging

   rinse --distribution fedora-core-6 --print-uris

=for example end

=cut


=head1 TODO

  Short of supporting more distributions or architectures there aren't
 really any outstanding issues.

=cut


=head1 AUTHOR

 Steve
 --
 http://www.steve.org.uk/

 $Id: rinse,v 1.31 2007-08-11 22:39:22 steve Exp $

=cut


=head1 LICENSE

Copyright (c) 2007 by Steve Kemp.  All rights reserved.

This module is free software;
you can redistribute it and/or modify it under
the same terms as Perl itself.
The LICENSE file contains the full text of the license.

=cut



#
#  Good practise
#
use strict;
use warnings;

#
#  Standard Perl modules we require
#
use English;
use File::Copy;
use File::Path;
use File::Find;
use Getopt::Long;
use Pod::Usage;
use LWP::UserAgent;




#
# Release number.
#
my $RELEASE = '0.8';


#
#  Our confiuration options.
#
my %CONFIG;

#
#  Default options.
#
$CONFIG{'arch'}      = 'i386';
$CONFIG{'cache'}     = 1;
$CONFIG{'cache-dir'} = "/var/cache/rinse/";


#
#  Find the size of our terminal
#
( $CONFIG{'width'}, $CONFIG{'height'} ) = getTerminalSize();


#
#  Make sure the host is setup correctly, and that all required
# tools are available.
#
testSetup();


#
#  Parse our arguments
#
parseCommandLineArguments();


#
#  Handle special case first.
#
if ( $CONFIG{'list-distributions'} )
{
    listDistributions();
    exit;
}
if ( $CONFIG{'clean-cache'} )
{
    cleanCache();
    exit;
}


#
#  Sanity check our arguments
#
sanityCheckArguments();


#
#  Ensure we're started by root at this point.  This is required
# to make sure we mount /proc, etc.
#
testRootUser() unless( $CONFIG{'print-uris'} );


#
#  Make sure the directory we're installing into exists.
#
if ( ( !$CONFIG{'print-uris'} ) && ( ! -d $CONFIG{'directory'} ) )
{
    #
    # Make the directory, including all required parent(s) directories.
    #
    mkpath( $CONFIG{'directory'}, 0, 0755 );
}


#
#  Find the list of packages to download
#
my @packages = getDistributionPackageList( $CONFIG{'distribution'} );


#
#  Find the mirror, if not specified already.
#
if ( ! $CONFIG{'mirror'} )
{
  $CONFIG{'mirror'} = getDistributionMirror( $CONFIG{'distribution'},
                                             $CONFIG{'arch'} );
}


#
#
#  Download the packages into the specified directory
#
downloadPackagesToDirectory( $CONFIG{'directory'}, $CONFIG{'mirror'}, @packages );


#
#  If we're only printing then exit here.
#
exit if ( $CONFIG{'print-uris'} );


#
#  Unpack the packages
#
unpackPackages( $CONFIG{'directory'} );


#
#  Now run the post-installation customization.
#
postInstallationCustomization( $CONFIG{'distribution'}, $CONFIG{'directory'} );


#
#  All done
#
print "Installation complete.\n";
exit;







=begin doc

  This routine is designed to test that the host system we're running
 upon has all the required binaries present.

  If any are missing then we'll abort.

=end doc

=cut

sub testSetup
{

    my @required = qw/ rpm rpm2cpio wget /;

    foreach my $file ( @required )
    {
        if ( ( ! -x "/bin/$file" ) &&
             ( ! -x "/usr/bin/$file" ) )
        {
            print "The following (required) binary appears to be missing:\n";
            print "\t" . $file . "\n";
            print "Aborting...\n";
            exit;
        }
    }
}



=begin doc

  Make sure this script is being run by a user with UID 0.

=end doc

=cut

sub testRootUser
{
    if ( $EFFECTIVE_USER_ID != 0 )
    {
        print<<E_O_ROOT;

  In order to use this script you must be running with root privileges.

  This is necessary to mount /proc inside the new install and run
  chroot, etc.

E_O_ROOT
        exit;
    }
}



=begin doc

  Parse our command line arguments.

=end doc

=cut

sub parseCommandLineArguments
{
    my $HELP    = 0;
    my $MANUAL  = 0;
    my $VERSION = 0;

    #
    #  Parse options.
    #
    GetOptions(
               # Main options
               "directory=s",        \$CONFIG{'directory'},
               "distribution=s",     \$CONFIG{'distribution'},

               # Misc options.
               "arch=s",             \$CONFIG{'arch'},
               "cache=s",            \$CONFIG{'cache'},
               "cache-dir=s",        \$CONFIG{'cache-dir'},
               "clean-cache",        \$CONFIG{'clean-cache'},
               "list-distributions", \$CONFIG{'list-distributions'},
               "print-uris",         \$CONFIG{'print-uris'},

               # Help options
               "help",         \$HELP,
               "manual",       \$MANUAL,
               "verbose",      \$CONFIG{'verbose'},
               "version",      \$VERSION

              );

    pod2usage(1) if $HELP;
    pod2usage(-verbose => 2 ) if $MANUAL;


    if ( $VERSION )
    {
        my $REVISION      = '$Revision: 1.31 $';
        if ( $REVISION =~ /1.([0-9.]+) / )
        {
            $REVISION = $1;
        }

        print( "rinse release $RELEASE - CVS: $REVISION\n" );
        exit;
    }
}



=begin doc

  Test that our arguments are sane and sensible.

  Mostly this just means ensuring that mandatory options are present.

=end doc

=cut

sub sanityCheckArguments
{
    #
    #  Distribution is mandatory
    #
    if ( ! $CONFIG{'distribution'} )
    {
        print <<EOF;

  The name of the distribution to install is mandatory.

  To see all supported distributions run:

$0 --list-distributions

EOF
        exit;
    }


    #
    #  Installation root is mandatory *unless* we're just printing
    # the URLs we'd download
    #
    if ( ( ! $CONFIG{'directory'} ) &&
         ( ! $CONFIG{'print-uris'} ) )
    {
        print <<EOF;

  The directory to install into is mandatory.  Please specify one with
 --directory

EOF
        exit;
    }

    if ( $CONFIG{'arch'} )
    {
        if ( ( $CONFIG{'arch'} ne "i386" ) &&
             ( $CONFIG{'arch'} ne "amd64" ) )
        {
            print <<EOARCH;

  Only two architectures are supported:

   i386
   amd64

EOARCH
            exit;
        }
    }
}



=begin doc

  Show the distributions we are capable of installing.

=end doc

=cut

sub listDistributions
{
    my @avail;

    #
    #  An installable distribution requires both:
    #
    #  1.  A package/configuration file.
    #
    #  2.  A scripts directory.  (Even if empty!)
    #
    foreach my $file ( glob( "/etc/rinse/*.packages" ) )
    {
        #
        #  Get the name - so that we can test for the directory.
        #
        if ( $file =~ /^(.*)\/(.*)\.packages$/ )
        {
            push @avail, $2 if ( -d "/usr/lib/rinse/" . $2 );
        }
    }

    if ( @avail )
    {
        print "The following distributions are available:\n";
        foreach my $a ( @avail )
        {
            print "\t$a\n";
        }
    }
}



=begin doc

  Clean our cache of .rpm files.

=end doc

=cut

sub cleanCache
{
    my $dir = $CONFIG{'cache-dir'};

    #
    #  Nested function to remove .rpm files.
    #
    sub removePackages
    {
        my $file = $File::Find::name;
        if ( $file =~ /\.rpm$/ )
        {
            $CONFIG{'verbose'} && print "Removing: $file\n";
            unlink( $file );
        }
    }

    #
    #  Call our function.
    #
    find( { wanted => \&removePackages, no_chdir => 1 }, $dir );

}



=begin doc

  Return the list of packagse which are required for a basic
 installation of the specified distribution.

  These packages are located in the configuration file in /etc/rinse.

=end doc

=cut

sub getDistributionPackageList
{
    my( $distribution ) = (@_);

    my $file = "/etc/rinse/$distribution.packages";

    if ( ! -e $file )
    {
        print <<EOF;

  The package list for the distribution $distribution was not found.

  We expected to find:

    $file

  Aborting.

EOF
        exit;
    }

    #
    #  Read the file, skipping blank lines and comments
    #
    my @packages;


    open( FILE, "<", $file ) or die "Failed to open $file - $!";
    foreach my $line ( <FILE> )
    {
        next if ( !$line );
        chomp( $line );
        next if ( $line =~ /^#/ );
        next if ( !length( $line ) );

        push( @packages, $line );
    }
    close( FILE );

    #
    #  Return the list in a sorted fashion.
    #
    return( sort {lc($a) cmp lc($b) } @packages );
}



=begin doc

  Find the mirror which should be used for the specified distribution.

=end doc

=cut

sub getDistributionMirror
{
    my( $dist, $arch ) = (@_);

    my $file = "/etc/rinse/rinse.conf";

    if ( ! -e $file )
    {
        print <<EOF;

  The configuration file was not found.

  We expected to find:

    $file

  Aborting.

EOF
        exit;
    }

    open( INPUT, "<", $file ) or die "Failed to open $file - $!";

    #
    #  Are we in the block of the named distribution?
    #
    my $indist = 0;

    #
    #  Configuration values we've read.
    #
    my %options;

    foreach my $line ( <INPUT> )
    {
        next if ( !$line || !length($line ) );
        next if $line =~ /^#/;
        chomp( $line );

        if ( $line =~ /^\[([^]]+)\]/ )
        {
            if ( lc($1) eq lc($dist) )
            {
                $indist = 1;
            }
            else
            {
                $indist = 0;
            }
        }
        elsif ( ( $line =~ /([^=]+)=([^\n]+)/ ) && $indist )
        {
            my $key = $1;
            my $val = $2;

            # Strip leading and trailing whitespace.
            $key =~ s/^\s+//;
            $key =~ s/\s+$//;
            $val =~ s/^\s+//;
            $val =~ s/\s+$//;

            $options{$key} = $val;
        }
    }
    close( INPUT );

    #
    #  Did we find it?
    #
    my $key = "mirror." . $arch;
    return( $options{$key} )     if ( $options{$key} );
    return( $options{'mirror'} ) if ( $options{'mirror'} );

    #
    #  Error if we didn't.
    #
    print <<EOF;

  We failed to find a distribution mirror for $dist ($arch)
 in the file: $file

  Aborting
EOF
    exit;
}



=begin doc

  Attempt to download each of the named packages from the specified
 mirror, and place them in the given directory.

  Use the cache unless we're not supposed to.

=end doc

=cut

sub downloadPackagesToDirectory
{
    my( $dir, $mirror, @packages ) = ( @_ );

    #
    #  Cache directory.
    #
    my $cache = "$CONFIG{'cache-dir'}/$CONFIG{'distribution'}.$CONFIG{'arch'}/";

    #
    #  Unless we've been told not to then copy packages from
    # the cache.
    #
    if ( ( $CONFIG{'cache'} ) && !$CONFIG{'print-uris'} )
    {
        #
        #  Make sure we have a cache directory.
        #
        if ( -d $cache )
        {
            $CONFIG{'verbose'} && print "Copying files from cache directory: $cache\n";
            copyPackageFiles( $cache, $dir );
        }
    }

    #
    #  Find the links available for download on our mirror.
    #
    my %links = findMirrorContents( $mirror );


    #
    #  Count of links, and the currently active download.
    # Used purely for the status updates..
    #
    my $count = 0;
    my $total = $#packages;

    #
    #  Process each package we're supposed to fetch.
    #
    foreach my $package ( @packages )
    {
        my $found = 0;

        $CONFIG{'verbose'} && print "-Download $package\n";

        #
        # Find the candidate package to download from our list of links.
        #
        foreach my $key ( keys %links )
        {
            #
            #  If we have haven't found the package yet, and the name
            # of the link is *longer* than the name of the package we're
            # looking for.
            #
            #  (ie. to cope with -$ver.$arch.$rpm).
            #
            if ( ( ! $found ) &&
                 ( length( $key ) > length( $package ) ) )
            {
                # get the substring of the link we've got
                my $pre = substr( $key, 0, length( $package ) );
                my $post= substr( $key, length( $package ), 2 );

                if ( ( lc( $pre ) eq lc( $package ) ) &&
                     $post =~ /-[0-9]/ )
                {
                    $found += 1;

                    if ( $CONFIG{'print-uris'} )
                    {
                        print $mirror . "/" . $key . "\n";
                    }
                    else
                    {
                        #
                        #  Print message and padding.
                        #
                        my $msg =  "\r[$count:$total] Downloading: $key ..";
                        while( length( $msg ) < ( $CONFIG{'width'} -1) )
                        {
                            $msg .= " ";
                        }
                        print $msg;

                        # download - unless already present.
                        if ( ! -e "$dir/$key" )
                        {
                            system( "wget --quiet -O $dir/$key $mirror/$key" );
                        }
                    }
                }
            }

        }

        if ( !$found )
        {
            print "[Harmless] Failed to find download link for $package\n";
        }

        $CONFIG{'verbose'} && print "+Download $package\n";

        $count += 1;
    }

    # newline.
    print "\r";
    print " " x ($CONFIG{'width'} - 1 );
    print "\n";

    #
    #  Now update the cache.
    #
    if ( ( $CONFIG{'cache'} ) && !$CONFIG{'print-uris'} )
    {
        $CONFIG{'verbose'} && print "Copying files to cache directory: $cache\n";

        #
        #  Make sure we have a cache directory.
        #
        mkpath( $cache, 0, 0755 ) if ( ! -d $cache );

        copyPackageFiles( $dir, $cache );
    }
}



=begin doc

  Find the links which are contained in the given HTML
 page.

=end doc

=cut

sub findMirrorContents
{
    my ( $mirror ) = ( @_ );

    #
    #  Download
    #
    my $index = downloadURL( $mirror );

    #
    #  Now we want to store all the links we've found.
    #
    my %links;

    #
    # Parse the HTML.
    #
    foreach my $line ( split( /\n/, $index ) )
    {
        #
        #  Look for contents of the form:
        #
        #     href="...">
        #
        while ( $line =~ /href=\"([^\"]+)\">(.*)/i )
        {
            my $link = $1;
            $line    = $2;

            # strip any path from the link.
            $link = $2 if ( $link =~ /^(.*)\/(.*)$/ );

            # ignore any non-RPM links.
            next if ( $link !~ /\.rpm$/i );

            #  Decode entities.  eg. libstd++
            $link = uri_unescape( $link );

            # store
            $links{$link} = 1;
        }
    }

    #
    #  Now we need to do something sneaky.
    #
    #  If we're looking at installing i386, or amd64,
    # then we need to *only* return those things.
    #
    my $i386 = undef;
    $i386 = 1 if ( $CONFIG{'arch'} =~ /i386/ );
    $i386 = 0 if ( $CONFIG{'arch'} =~ /amd64/ );

    foreach my $key ( sort keys %links )
    {
        # i386/i486/i586/i686 packages when we're dealing with amd64 installs.
        if ( ( $key =~ /\.i[3456]86\./ ) && !$i386 )
        {
            delete( $links{$key} );
        }

        # amd64 packages when we're dealing with i386 installs.
        if ( $key =~ /\.x86_64\./ && ($i386) )
        {
            delete( $links{$key} );
        }
    }

    return( %links );
}



=begin doc

  Download the contents of an URL and return it.

=end doc

=cut

sub downloadURL
{
    my( $URL ) = ( @_ );

    #
    #  Create the helper.
    #
    my $ua = LWP::UserAgent->new;
    $ua->timeout(10);
    $ua->env_proxy;

    #
    #  Fetch the URI
    #
    my $response = $ua->get( $URL );

    #
    #  If it worked then return it
    #
    if ($response->is_success)
    {
        return( $response->content );
    }
    else
    {
        print "Failed to fetch : $URL\n";
        print  "\t" . $response->status_line . "\n\n";
        exit;
    }
}



=begin doc

  Unpack each of the RPM files which are contained in the given
 directory.


=end doc

=cut

sub unpackPackages
{
    my( $dir ) = ( @_ );

    #
    #  Get a sorted list of the RPMs
    #
    my @rpms = glob( $dir . "/*.rpm" );
    @rpms    = sort {lc($a) cmp lc($b) } @rpms;

    #
    #  For each RPM file: convert to .tgz
    #
    foreach my $file ( @rpms )
    {
        $CONFIG{'verbose'} && print "-extract $file\n";

        #
        #  Show status
        #
        my $name = $file;
        if ( $name =~ /(.*)\/(.*)/ )
        {
            $name = $2;
        }

        #
        #  Show status output.
        #
        my $msg =  "\rExtracting .rpm file : $name";
        while( length( $msg ) < ( $CONFIG{'width'} -1) )
        {
            $msg .= " ";
        }
        print $msg;

        #
        #  Run the unpacking command.
        #
        my $cmd = "rpm2cpio $file | (cd $CONFIG{'directory'} ; cpio --extract --make-directories --no-absolute-filenames --preserve-modification-time) 2>/dev/null >/dev/null";
        system( $cmd );

        $CONFIG{'verbose'} && print "+extract $file\n";
    }
    print "\r";
    print " " x $CONFIG{'width'};
    print "\n";

}



=begin doc

  Run the post-installation customization scripts for the given
 distribution.

=end doc

=cut

sub postInstallationCustomization
{
    my( $distribution, $prefix ) = (@_);

    my $scriptDir = "/usr/lib/rinse/$distribution/";

    #
    #  Setup environment for the post-install scripts.
    #
    $ENV{'ARCH'}      = $CONFIG{'arch'};
    $ENV{'mirror'}    = $CONFIG{'mirror'};
    $ENV{'dist'}      = $CONFIG{'distribution'};
    $ENV{'directory'} = $CONFIG{'directory'};


    foreach my $file ( sort( glob( $scriptDir . "/*" ) ) )
    {
        $CONFIG{'verbose'} && print "-script $file\n";

        #
        #  Report on progress all the time.
        #
        my $name = $file;
        if ( $name =~ /(.*)\/(.*)/ )
        {
            $name = $2;
        }
        print "Running post-install script $name:\n";
        my $cmd = "$file $prefix";
        system( $cmd );

        $CONFIG{'verbose'} && print "+script $file\n";
    }
}



=begin doc

  Copy a collection of RPM files from one directory to another.

=end doc

=cut

sub copyPackageFiles
{
    my( $src, $dest ) = (@_);

    $CONFIG{'verbose'} && print "- archiving from $src - $dest\n";

    foreach my $file ( sort( glob( $src . "/*.rpm" ) ) )
    {
        # strip path.
        if ( $file =~ /^(.*)\/(.*)$/ )
        {
            $file = $2;
        }

        #
        # if the file isn't already present in the destination then
        # copy it there.
        #
        if ( ! -e $dest . "/" . $file )
        {
            copy( $src  . "/" . $file,
                  $dest . "/" . $file );
        }
    }

    $CONFIG{'verbose'} && print "+ archiving from $src - $dest\n";
}




=begin doc

  Find and return the width of the current terminal.  This makes
 use of the optional Term::Size module.  If it isn't installed then
 we fall back to the standard size of 80x25.

=end doc

=cut


sub getTerminalSize
{
    my $testModule  = "use Term::Size;";

    my $width  = 80;
    my $height = 25;

    #
    #  Test loading the size module.  If this fails
    # then we will use the defaults sizes.
    #
    eval( $testModule );
    if ( $@ )
    {
    }
    else
    {
        #
        # Term::Size is available, so use it to find
        # the current terminal size.
        #
        ($width, $height ) = Term::Size::chars();
    }

    return( $width, $height );
}



=begin doc


  Taken from the URI::Escape module, which contains the following
 copyright:

    Copyright 1995-2004 Gisle Aas.

    This program is free software; you can redistribute it and/or modify
    it under the same terms as Perl itself.

=end doc

=cut

sub uri_unescape
{
    # Note from RFC1630:  "Sequences which start with a percent sign
    # but are not followed by two hexadecimal characters are reserved
    # for future extension"
    my $str = shift;
    if (@_ && wantarray)
    {
        # not executed for the common case of a single argument
        my @str = ($str, @_);  # need to copy
        foreach (@str)
        {
            s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
        }
        return @str;
    }
    $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
    $str;
}
