require "web/css/scan"
require "uri"
require "stringio"

module Web
  class LinkExtor
    def initialize(base)
      case base
      when URI
        @base = base
      else
        @base = URI.parse(base)
      end
      @links = []
    end
    attr_accessor :base
    
    def each
      @links.each {|type,uri|
        yield type, uri
      }
    end
    
    def parse(doc)
      #
      doc.traverse_all_element {|elem|
        case elem.name
        when 'base'
          if href = elem.get_attr('href')
            @base = @base + URI.parse(href)
          end
        when 'a'
          if href = elem.get_attr('href')
            begin
              uri = @base + href
              @links << [:a, uri]
            rescue URI::InvalidURIError
            end
          end
        when 'area'
          if href = elem.get_attr('href')
            begin
              uri = @base + href
              @links << [:area, uri]
            rescue URI::InvalidURIError
            end
          end
        when 'frame'
          src = elem.get_attr('src')
          uri = @base + src
          @links << [:frame, uri]
        when 'iframe'
          if href = elem.get_attr('src')
            begin
              uri = @base + href
              @links << [:iframe, uri]
            rescue URI::InvalidURIError
            end
          end
        when 'img'
          if href = elem.get_attr('src')
            begin
              uri = @base + href
              @links << [:img, uri]
            rescue URI::InvalidURIError
            end
          end
        when 'link'
          if (rel = elem.get_attr('rel')) && (href = elem.get_attr('href'))
            case rel.downcase
            when 'stylesheet'
              begin
                uri = @base + href
                @links << [:stylesheet, uri]
              rescue URI::InvalidURIError
              end
            end
          end
        end
      }
      #
    end # parse
    
    def parse_css(src)
      scan = Web::Agent::CSS::Scanner.new(StringIO.new(src))
      while token=scan.token
        type,value = token
        if type==:URI && value=~/url\((.+?)\)/
          uri = @base + $1
          @links << [:css, uri]
        end
      end
    end # parse_css
  end # LinkExtor
end # Web

if $0==__FILE__
  require "web/agent"
  require "htree"
  uri = ARGV.shift || 'http://www.ruby-lang.org/ja/'
  agent = Web::Agent.new
  agent.setup
  agent.get(uri)
  linkextor = Web::LinkExtor.new(agent.uri)
  doc = HTree.parse(agent.rsp.body)
  linkextor.parse(doc)
  linkextor.each {|type,uri|
    puts "#{type}:#{uri}"
  }
end
