# dictformat.rb:
# $Id: dictformat.rb,v 1.4 2005/03/07 07:51:33 komatsu Exp $
#
# Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.

class DictFormat
  attr_accessor :dict

  def initialize (is_interactive = true)
    @is_interactive = is_interactive
#    @data = {}
#    @pos_mapping = {}
    @dict = [{}, {}]
    @pos_unknown = "θ"
  end

  def load_external_dict (filename)
    label = @is_interactive ? "INPUT_FILE LODING" : nil
    File::open(filename, "r").each_with_pbar(label) {|line|
      if items = parse(line.chomp) then
	items.each {|pron, pos, literal, freq, *attr|
	  set_word(pron, pos, literal, freq, *attr)
	}
      end
    }
  end

  def load_existent_dict (filename)
    return unless File::exist?(filename)

    label = @is_interactive ? "OUTPUT_FILE LODING" : nil
    File::open(filename, "r").each_with_pbar(label) {|line|
      (pron, pos, literal, *rest) = line.chomp.split(/\t/)
      pos = (pos == @pos_unknown) ? nil : pos
      set_word(pron, pos, literal, *rest)
    }
  end

  def save_dict(filename)
    label = @is_interactive ? "OUTPUT_FILE SAVING" : nil
    File::open(filename, "w") {|io|
      sort_result(@dict[0].values).each_with_pbar(label) {|data|
	io.puts(dict_format(*data))
      }
    }
  end

  private
  ## This method should be over-written.
  def parse (line)
    line.chomp!
    if line !~ /\t/ then
      return []
    else
      return [line.split(/\t/)]
    end
  end

  def sort_result (results)
    label = @is_interactive ? "WORDS SORTING" : nil
    results.sort_with_pbar(label) {|result1, result2|
      (pron1, pos1, literal1, freq1, *rest1) = result1
      (pron2, pos2, literal2, freq2, *rest2) = result2

      if freq1 != freq2 then
	r = (freq2 <=> freq1)
      elsif pron1.length != pron2.length then
	r = (pron1.length <=> pron2.length)
      elsif pos1 != pos2 then
	if pos1 == @pos_unknown then
	  r = 1
	elsif pos2 == @pos_unknown then
	  r = -1
	else
	  r = 0
	end
      else
	r = (literal1 <=> literal2)
      end
      r
    }
  end

  def dict_format (pron, pos, literal, freq, *attr)
    return [pron, (pos or @pos_unknown), literal, freq, attr].join("\t").strip
  end

  def set_word(*word)
    (pron, pos, literal, freq, *attr) = word
    freq = (freq.to_i or 0)

    if pron.empty? or literal.empty? then
      return false
    end

    existent_word = get_word(pron, pos, literal)
    if existent_word then
      word = merge_words(existent_word, word)
      (pron, pos, literal, freq, *attr) = word
    end

    @dict[0][label(pron, pos, literal)] = [pron, pos, literal, freq, *attr]
    if @dict[1][label(pron, literal)].nil? then
      @dict[1][label(pron, literal)] = pos
    end
    return true
  end

  def merge_words(existent_word, new_word)
    (pron,  pos,  literal,  freq,  *attr)  = new_word
    (pron2, pos2, literal2, freq2, *attr2) = existent_word

    freq  = freq.to_i()
    freq2 = freq2.to_i()

    if pos and pos2.nil? then 
      @dict[0].delete(label(pron, nil, literal))
    end
    pos = (pos or pos2)

    attr = (attr2 + attr).uniq
    if pos == pos2 or pos.nil? or pos2.nil? then
      freq = [freq, freq2].max
    end

    return [pron, pos, literal, freq, *attr]
  end

  def get_word (pron, pos, literal)
    word = (@dict[0][label(pron, pos, literal)] or
	    @dict[0][label(pron, @dict[1][label(pron, literal)], literal)] or
            nil)
#	    [pron, pos, literal, 0])
    return word
  end

  def label(*data)
    return data.join("\t")
  end
end

    

