#!/usr/bin/env python2.4
# (c) Zygmunt Krynicki 2005,
# Licensed under GPL, see COPYING for the whole text

import os
import os.path
import sys
import re
import logging
import DebPackage
import urllib
import apt
import apt_pkg

from string import Template

class PackageInfo(object):
    def __init__(self, name, arch, repo, executables):
        self.name = name
        self.arch = arch
        self.repo = repo
        self.executables = set(executables)
    def __str__(self):
        return "%s|%s|%s|%s" % (self.arch, self.repo, self.name, ",".join(self.executables))
    def display(self):
        print "Package: %s for arch %s in repo %s" % (self.name, self.arch, self.repo)
        print "Executables: ", self.executables

def strip_comment(line):
    i = line.find('#')
    if i == -1:
        return line
    else:
        return line[:i]

def get_alternatives(package):
    """
    Attempts to extract alternatives registerd by a package.
    NOTE: This code is a big HACK since we don't really understand or execute
    postinst shell scripts here. Some alternatives are not retrieved.
    """
    contents = package.getControlFile("postinst")
    shell_vars = dict()
    values_of_i = None
    name_of_i = None
    def expand_values(*text_list):
        logging.debug("About to perform substitution on %s using %s and %s (for i-list)" % (text_list, shell_vars, values_of_i))
        if values_of_i is None:
            yield tuple([Template(text).safe_substitute(**shell_vars) for text in text_list])
        else:
            for i in values_of_i:
                shell_vars[name_of_i] = i
                yield tuple([Template(text).safe_substitute(**shell_vars) for text in text_list])
    def one_element_tuple_to_value(it):
        for item in it:
            if len(item) == 1:
                yield item[0]
            else:
                yield item
    def nice_expand(*text_list):
        return set(one_element_tuple_to_value(expand_values(*text_list)))
    if contents != None:
        for line in contents.replace("\\\n","").splitlines():
            line = strip_comment(line).strip()
            if line == "":
                continue
            words = line.split()
            # this ugly hack is capable of detecting both
            # variable=value as well as variable="list of values"
            if '=' in words[0] and "==" not in words[0]:
                name = words[0][:words[0].index('=')]
                value = line[line.index('=')+1:].strip('\'"')
                shell_vars[name] = value
                logging.debug("Based on line: %s" % line)
                logging.debug("Variable binding established: %s=%s" % (name, value))
            # this ugly hack is capable of detecting
            # for something in a list of values; do ...
            elif len(words) > 3 and words[0] == 'for' and words[2] == 'in' and ';' in line:
                name_of_i = words[1]
                values_of_i = " ".join(nice_expand (line[len('for %s in' % name_of_i):line.index(';')])).split()
                logging.debug("Based on line: %s" % line)
                logging.debug("Iterable variable binding established %s=%s" % (name_of_i, values_of_i))
            # this ugly hack is capable of detecting
            # for something in a list of values\n
            elif len(words) > 3 and words[0] == 'for' and words[2] == 'in' and ';' not in line:
                name_of_i = words[1]
                values_of_i = " ".join(nice_expand(" ".join(words[3:]))).split()
                logging.debug("Based on line: %s" % line)
                logging.debug("Iterable variable binding established %s=%s" % (name_of_i, values_of_i))
            else:
                if words[0] == "update-alternatives":
                    words = words[1:]
                    while len(words) > 0:
                        if words[0] == "--install":
                            name = words[1]
                            target = words[3]
                            words = words[5:]
                        elif words[0] == "--slave":
                            name = words[1]
                            target = words[3]
                            words = words[4:]
                        # all remaining options are ignored
                        elif words[0] in ("--set", "--remove"):
                            words = words[3:]
                            continue
                        elif words[0] in ("--verbose", "--quiet", "--test", "--help", "--all"):
                            words = words[1:]
                            continue
                        elif words[0] in ("--altdir", "--admindir", "--list", "--display", "--auto", "--config", "--remove-all"):
                            words = words[2:]
                            continue
                        # sometimes || true or redirection follows the last argument
                        elif words[0] in (";", "||") or words[0].startswith(">"):
                            break
                        else:
                            logging.error("%s: aborting analisys of update-alternatives on argument '%s', line: %s" % (
                                package.filename, words[0], line))
                            break
                        # sometimes pathnames are quoted
                        name = name.strip('\'"')
                        target = target.strip('\'"')
                        for new_name, new_target in nice_expand(name, target):
                            if not new_name.startswith('/'):
                                new_name = "/" + new_name
                            if not new_target.startswith('/'):
                                new_target = "/" + new_target
                            if '$' in new_name or '$' in new_target:
                                logging.error("%s: FIXME alternatives need manual analisys: %s -> %s" % (
                                    package.filename, new_name, new_target))
                            else:
                                yield new_name, new_target 

def get_symlinks(package):
    def symlink_filter(item_info):
        return isinstance (item_info, DebPackage.SymbolicLinkInfo)
    return [("/"+item_info.name, item_info.target) for item_info in package.items if symlink_filter(item_info)]

def get_executables(package):
    def exec_filter(item_info):
        # lib*.so* are usualy +x so we'll exclude them
        return isinstance(item_info, DebPackage.FileInfo) and item_info.is_executable() and not re.match("^lib.*\\.so.*", os.path.basename(item_info.name))
    return ["/"+item_info.name for item_info in package.items if exec_filter(item_info)]

def get_symlinks_to_executables(symlinks, executables):
    for name, target in symlinks:
        if target in executables:
            yield name

def normalize_symlinks(symlinks):
    for name, target in symlinks:
        if not target.startswith("/"):
            target = os.path.join(os.path.dirname(name), target)
        target = os.path.normpath(target)
        yield name, target

def get_repo(package):
    parts = package.filename.split(os.path.sep)
    try:
        return parts[parts.index("pool")+1]
    except ValueError:
        logging.error("Unable to determine repository of package %s" % package.filename)
        return None

def analyze_package(filename):
    try:
        package = DebPackage.load(filename)
    except Exception, ex:
        logging.error("Unable to process package %s: %s" % (filename, ex))
        return None
    logging.info("PACKAGE: %s" % package.filename)
    repo = get_repo(package)
    executables = get_executables(package)
    logging.debug("EXECUTABLES:")
    for executable in executables:
        logging.debug("\t%s" % executable)
    symlinks = list()
    symlinks.extend(get_symlinks(package))
    symlinks.extend(get_alternatives(package))
    logging.debug("SYMLINKS:")
    for name, target in symlinks:
        logging.debug("\t%s -> %s" % (name, target))
    symlinks = list(normalize_symlinks(symlinks))
    logging.debug("NORMALIZED SYMLINKS:")
    for name, target in symlinks:
        logging.debug("\t%s -> %s" % (name, target))
    executables.extend(get_symlinks_to_executables(symlinks, executables))
    logging.debug("FINAL LIST OF EXECUTABLES:")
    for executable in executables:
        logging.debug("\t%s" % executable)
    executables = [item for item in executables if item.startswith('/usr/bin') or item.startswith('/usr/sbin') or item.startswith('/bin') or item.startswith('/sbin') or item.startswith('/usr/games')]
    if len(executables) == 0:
        return None
    return PackageInfo(package.name, package.arch, repo, executables)

def verify_package(filename):
    " check if the given pkg is our arch/our distribution "
    global arch
    m = re.match("(.*)_(.*)_(.*).deb", filename)
    pkgname = m.group(1)
    ver = m.group(2)
    # fix the quoting
    ver = urllib.unquote(ver)
    pkgarch = m.group(3)
    # check arch
    if pkgarch != "all" and arch != pkgarch:
        return False
    # check distro
    candVer = "xxx"
    if cache.has_key(pkgname):
        candVer = cache[pkgname].candidateVersion
        # strip the epoch
        if candVer and ":" in candVer:
            candVer = candVer.split(":")[1]
    if candVer != ver:
        logging.debug("Skipping because '%s' (%s) it's not in our distro release" % (ver, pkgname))
        return False
    return True

def dir_walk(output, dirname, names):
    #logging.debug("Entering dir: '%s' " % dirname)
    for filename in names:
        if filename.endswith(".deb") and verify_package(filename):
            package_info = analyze_package(os.path.join(dirname, filename))
            if package_info is None:
                continue
            #package_info.display()
            print >> output, package_info

def scan(dir_path, output):
    os.path.walk(dir_path, dir_walk, output)
    
if __name__ == "__main__":
    global arch, cache

    # init logging
    logging.basicConfig(level=logging.DEBUG,
            filename="scan.log",
            format='%(levelname)s %(message)s',
            filemode='w')
    # init output
    filename="scan.data"
    out = file(filename, "w")
    
    for arch in ["i386","amd64","powerpc"]:
        # init arch specific cache
        apt_pkg.Config.Set("APT::Architecture",arch)
        apt_pkg.Config.Set("Dir::state","./apt/")
        apt_pkg.Config.Set("Dir::Etc","./apt")
        cache = apt.Cache(apt.progress.OpProgress())
        prog = apt.progress.FetchProgress()
        cache.update(prog)
        cache.open(apt.progress.OpProgress())
        # now do the scan for this arch
        if len(sys.argv) == 2:
            scan(sys.argv[1],out)
        else:
            scan("mirror/archive.ubuntu.com/pool", out)
    #scan("mirror/archive.ubuntu.com/pool/main/a")
    #scan("mirror/archive.ubuntu.com/pool/universe/c/command-not-found")
    #scan("mirror/archive.ubuntu.com/pool/main/g/gksu")
    #scan("mirror/archive.ubuntu.com/pool/main/v")

