#!/usr/bin/ruby

# pkg-ruby-get-sources - downloads sources of Debian ruby extra packages
# Copyright (C) 2005 Antonio S. de A. Terceiro <asaterceiro@inf.ufrgs.br>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# TODO: check for already downloaded files (and their size, maybe)
# TODO: delete temporary bzipped archives after converting to gzipped

require 'uri'
require 'net/http'
require 'yaml'
require 'getoptlong'
require 'open-uri'
require 'rexml/document'

$sources_file = "/usr/share/ruby-pkg-tools/pkg-ruby-extras.sources"
$sources_url = "http://pkg-ruby-extras.alioth.debian.org/pkg-ruby-extras.sources"
$watch_file = 'debian/watch'
$target_directory = "../tarballs"
$verbose = false
$list_available = false

def info_msg(text)
  puts "I: #{text}"
end

def error_msg(text)
  puts "E: #{text}"
end

opts = GetoptLong.new(
  [ "--sources",           "-s",  GetoptLong::REQUIRED_ARGUMENT ],
  [ "--target-directory",  "-t",  GetoptLong::REQUIRED_ARGUMENT ],
  [ "--verbose",           "-v",  GetoptLong::NO_ARGUMENT ],
  [ "--list-available",    "-l",  GetoptLong::NO_ARGUMENT ],
  [ "--help",              "-h",  GetoptLong::NO_ARGUMENT ],
  [ "--version",                  GetoptLong::NO_ARGUMENT ]
)

help = {
  "--sources" => "indicates the sources file (defaults to #{$sources_file})",
  "--target-directory" => "indicates where to put downloaded tarballs (defaults to #{$target_directory})",
  "--verbose" => "display verbose information while running",
  "--list-available" => "does not download any file; only list available packages and versions",
  "--help" => "shows this help message",
  "--version" => "shows version information and exit"
}

# maps extensions of upstream tarballs to extensions of the downloaded
# archives
$extensions = {
  # rename .tgz to .tar.gz; dpkg-source likes it that way
  'tgz' => 'tar.gz',
  # do not rename these archive types
  'tar.gz' => nil,
  'tar.bz2' => nil
}

begin
  opts.each { |opt,val|
    case opt
      when '--verbose'
        $verbose = true
      when '--sources'
        $sources_file = val
      when '--target-directory'
        $target_directory = val
      when '--help'
        puts "Usage: #{$PROGRAM_NAME} [options]"
        puts "Options:"
        help.each { |optname,helpmsg|
          puts(format("  %-20s %s\n",optname,helpmsg))
        }
        exit
      when '--version'
        puts "#{$PROGRAM_NAME}, version #{0.1}"
        exit
      when '--list-available'
        $list_available = true
    end
  }
rescue
  exit 1
end

info_msg("Running in verbose mode") if $verbose
info_msg("Reading sources from #{$sources_url}, #{$sources_file}") if $verbose
info_msg("Downloading files to #{$target_directory}") if $verbose

if (!$list_available and ! (File.directory?($target_directory) and File.writable?($target_directory)))
  error_msg("#{$target_directory} should be a valid writable directory")
  exit 1
end

# guess package name and version from debian/changelog
# ex: liblocale-ruby (0.1-2)
def guess_package()
  info_msg("Trying to guess package name and version from debian/changelog ... ") if $verbose
  if !File.readable?("debian/changelog")
    error_msg("Couldn't read debian/changelog")
    exit 2
  end

  # guess a package's name and version from debian/changelog
  changelogline = File.read("debian/changelog").split(/\n/)[0]
  matches = /^(\S+)\s*\(([^-]+)-([^-]+)\)/.match(changelogline)
  package = matches[1]
  version = matches[2]
  info_msg("Package: #{package}") if $verbose
  info_msg("Version: #{version}") if $verbose

  return [ package, version ]
end

# reads a YAML object from a given sources file
def get_available_sources(source_file)
  info_msg("Loading available sources from #{source_file} ... ") if $verbose
  begin
    sources = open(source_file) { |io| YAML::load(io) }
  rescue Exception => exception
    error_msg("#{exception}") if $verbose
    return false
  end

  return sources
end

# list available packages from a sources file
def list_packages(sources)
  sources.each { |package,versions|
    versions.each { |version,tarball_url|
      puts "#{package}-#{version}"
    }
  }
end

# follows redirections up to <limit> levels deep
def follow(uri, limit = 10)
  # anyone know what the second parameter is supposed to do? HTTP status code?
  raise Net::HTTPFatalError.new('Too many redirections', 0) if limit == 0

  begin
    response = Net::HTTP.get_response(URI.parse(uri))
  rescue SocketError => error
    error_msg("#{error}")
    return false
  end

  case response
    when Net::HTTPSuccess then
      return uri
    when Net::HTTPRedirection then
      info_msg("Redirecting: #{response['location']}") if $verbose
      return follow(response['location'], limit-1)
    else
      raise Net::HTTPFatalError.new(response.message, response.code)
  end
end

# downloads a given package from a specified URL
def download(package,version,tarball_url)
  # remove any leading or trailing whitespace (including \n) which would
  # otherwise ruin the nice progress display
  tarball_url.strip!

  # follow redirection
  begin
    tarball_url = follow(tarball_url)
  rescue Net::HTTPFatalError => error
    error_msg("HTTP Error: #{error}")
    return false
  end

  if tarball_url then
    uri = URI.parse(tarball_url)
  else
    return false
  end

  info_msg("URL: #{tarball_url}") if $verbose
  
  source = uri.path.split('/').pop
  ext = nil

  $extensions.each { |key, val|
    if source.index(key) then
      ext = val ? val : key
      info_msg("Known archive format: #{source}") if $verbose
    end
  }
  
  if not ext then
    error_msg("Unknown archive type: #{uri.path}")
    return false
  end

  tarball_name = "#{package}_#{version}.orig.#{ext}"
  filename = File.join($target_directory,tarball_name)

  Net::HTTP.start(uri.host, uri.port) { |http|
    http.request_get(uri.path) { |res|
      total = res.header.content_length
      current = 0
      File.open(filename,'w') { |file|
        res.read_body { |part|
          current += part.length
          print(format("\r[%3d%%] #{tarball_url}",((current * 100) / total)))
          $stdout.flush
          file.write(part)
        }
        puts
      }
      info_msg("Saved #{filename}") if $verbose
    }
  }

  orig_tarball = filename.sub(ext, 'tar.gz')

  case ext
    when 'tar.bz2'
      puts "I: Converting #{filename} to #{orig_tarball}" if $verbose
      system "bzcat '#{filename}' | gzip -c > '#{orig_tarball}'"
    else
      info_msg("Nothing to do for archive type: #{ext}") if $verbose
  end

  return true
end

package, version = guess_package
tarball = nil

# first try to download a source tarball by looking at the watch file, if the
# package has one
begin
  File.readlines($watch_file).each { |line|
    if line =~ /(http|ftp):\/\/\S+/ then
      tarball = Regexp.last_match(0)
      tarball.gsub!(/\\/, '')
      tarball.gsub!(/\(.*\)/, version)

      if not $list_available then
        if tarball then
          info_msg("Found source tarball for package #{package}-#{version}: #{tarball}") if $verbose
          if download(package,version,tarball) then
            exit 0
          end
        else
          info_msg("Found no source tarball by looking at the watch file, continuing") if $verbose
        end
      end
    end
  }
rescue
  info_msg("No watch file for this package") if $verbose
end

# Now try to use the output of uscan --dehs to get a source tarball URL
if File.exist?($watch_file)
  begin
    info_msg("Running uscan --dehs and parsing the output ...") if $verbose
    i = IO::popen('uscan --dehs')
    d = REXML::Document::new(i.read)
    i.close
    v = d.elements['/dehs/upstream-version'].text
    if v == version
      tarball = d.elements['/dehs/upstream-url'].text
      info_msg("Found source tarball for package #{package}-#{version}: #{tarball}") if $verbose
      if download(package,version,tarball) then
        exit 0
      end
    else
      info_msg("uscan --dehs only gave us a tarball for v. #{v}, while we were looking for #{version}") if $verbose
    end
  rescue
    info_msg("The uscan --dehs strategy failed") if $verbose
  end
end

# Try to get the source URL from a sources file
[ $sources_file, $sources_url ].each { |file|
  sources = get_available_sources(file)
  if (sources) then
    if $list_available then
      list_packages(sources)
    else
      if sources[package] then
        if sources[package][version] then
          tarball = sources[package][version]
          break
        else
          error_msg("#{package}'s version #{version} is not available (in #{file}).")
        end
      else
        error_msg("Package #{package} is not available (in #{file}).")
        next
      end
    end
  end
}

# don't go beyond here if all we wanted to do was list available packages
exit if $list_available

if tarball then
  info_msg("Found source tarball for package #{package}-#{version}: #{tarball}") if $verbose
else
  error_msg("No source tarball found for package #{package}-#{version}")
  exit 2
end

# actually download package
download(package,version,tarball)

# vi: sts=2 sw=2 ts=2 et

