require 'strscan'

# CKDefinition parses .ckd ( CGIKit Definition ) file.
# CKComponent stores the result of parse and uses it for binding.
#
# The format of CKD file is very different from Ruby or HTML
# Here, the format of CKD file is described as pseudo-BNF like Ruby's document.
#
#   STMTS           : (STMT)*
#   STMT            : ELEMENT_NAME ':' ELEMENT_TYPE '{' ATTRS '}'
#   ATTRS           : (ATTRIBUTE '=' VALUE TERM)*
#   ATTRIBUTE       : ruby_method_name
#   VALUE           : METHODS 
#                   | LITERAL 
#   METHODS         : ruby_method_name('.'ruby_method_name)*
#                   | METHODS BLITERAL
#   LITERAL         : 'true'
#                   | 'false'
#                   | DIGIT_SEQUENCE
#                   | STRING
#   BLITERAL        : ARRAY
#                   : HASH
#   ARRAY           : '[' DIGIT_SEQUENCE ']'
#   HASH            : '[' LITERAL ']'
#   DIGIT_SEQUENCE  : DIGIT+
#   DIGIT           : [0-9]
#   STRING          : ''' any_character* '''
#                   | '"' any_character* '"'
#   ELEMENT_NAME    : (LETTER | DIGIT | '_')+
#   ELEMENT_TYPE    : ruby_class_name
#   LETTER          : [A-Za-z]
#   TERM            : ';'
#                   | '\n'
#
# ruby_method_name means Ruby's Method Name: For example, 'foo', 'to_s' and 'is_a?'.
# Also, ruby_class means Ruby's Class Name: For instance, 'CKString' and 'CKComponent'.
#
# CKDefinition regards a part which begins from '#' to the end of the line as comment.
# In the next case, '# foo bar comment' is not interpreted by CKDefinition.
#
#   foo : CKString {
#     # foo bar comment
#     value = "foo";
#   }
#
class CKDefinition
	class CKDParseError < CKError ; end #:nodoc:

	class << self
		def parse_ckd_file(filename)
			filename.untaint
			string = nil

			File.open(filename) do |f|
				string = f.read
			end

			parse(string, filename)
		end

		def parse(string, filename = '')
			scanner = StringScanner.new( string, false )

			# The way to use these local variables is bad.			
			definitions     = {}
			in_attrs        = false
			line_num        = 1			
			current_element = nil
			element_name    = nil
			element_type    = nil
			term            = "(;|\n)"

			# in the future, we may have to change this Proc object.
			pos_proc = proc do 
				"#{filename}:#{line_num}: "
			end
			
			while scanner.rest?
				# line break
				if scanner.skip(/(\r)?\n/) then
					line_num += 1
					next
				end

				# skip commnet or space
				if scanner.skip(/(?:#.*$|[ \r\f\t]+)/) then
					next
				end

				if in_attrs then
					# attribute
					if scanner.skip(/([a-zA-Z0-9_]+[\!\?]?)[ \r\f\t]*=[ \r\f\t]*/)
						key = scanner[1]

						# array and hash literal
						if scanner.skip(/^([^"'][^"';\n]*)\[/) then
							list   = nil
							object = scanner[1]

							if scanner.match?(/^(["'])/) then
								# string literal
								close = scanner[1]
								if	   close == '"' and scanner.skip(/"(.*?)"\]/)
								elsif	close == "'" and scanner.skip(/'(.*?)'\]/)
								else
									raise CKDParseError, \
										pos_proc.call << scanner.peek(60).inspect << \
										"\nString literal error. Or, \";\" doesn't exist."
								end
								list = "\"#{scanner[1]}\""
							else
								if scanner.skip(/([a-zA-Z0-9_\.\!\^?]+)\][ \r\t\f]*#{term}/)
									list = scanner[1]
								else
									raise CKDParseError, pos_proc.call << \
										scanner.peek(60).inspect << \
										"\nArray or Hash literal error. Or, \";\" doesn't exist.\n"
								end
							end
							value = "#{object}[#{list}]"
							line_num += value.count("\n")

						# value of attribute
						elsif scanner.match?(/^(["'])/) then
							# string lietral
							close = scanner[1]
							if	   close == '"' and scanner.skip(/"(.*?)"[ \r\f\t]*#{term}/m)
							elsif	close == "'" and scanner.skip(/'(.*?)'[ \r\f\t]*#{term}/m)
							else
								raise CKDParseError, \
									pos_proc.call << scanner.peek(60).inspect << \
									"\nString literal error. Or, \";\" doesn't exist."
							end

							value = "\"#{scanner[1]}\""
							line_num += value.count("\n")
						else
							# others
							#
							# This regexp is not accurate.
							if scanner.skip(/([a-zA-Z0-9_\.\!\^?]+)[ \r\t\f]*#{term}/)
								value = scanner[1]
							else
								raise CKDParseError, pos_proc.call << \
									scanner.peek(60).inspect << \
									"\nMethod name error. Or, \";\" doesn't exist." "\n" 
							end
						end

						current_element[key] = value			
						next
					end

					# end of definition
					if scanner.skip(/\s*\}/)
						definitions[element_name] = current_element
						current_element = nil
						in_attrs = false
						next
					end

					# skips space and separator.
					#
					# (ex.)
					# foo : CKString{;}
					if scanner.skip(/[ \r\t\f]*#{term}/)
						next
					end
				else
					# extracts name of the element and its class
					# class's regexp is not precise.
				if scanner.skip(/([a-zA-Z0-9_]+)\s*:\s*([a-zA-Z0-9_]+)\s*\{/)
					element_name = scanner[1]
					element_type = scanner[2]
					current_element = {}
					current_element['oid'] = element_name
					current_element['element'] = element_type

					if definitions.key? element_name then
						raise CKDParseError, pos_proc.call << \
							scanner.peek(60).inspect << \
							"\n'#{element_name}' definition is already existed." "\n" 
					end

			
					in_attrs = true
					next
				end
			end

			def_str = _pretty_print(definitions)
				raise CKDParseError, pos_proc.call << scanner.peek(60).inspect << \
					"\nNot match any rule.\n\n" << def_str << "\n" 
			end

			# Now, there is no data to be parsed.			
			if in_attrs 
				raise CKDParseError, pos_proc.call << \
					"The last element is not enclosed.\n\n"
			end
			
			definitions
		end

		def pretty_print(defs)
			print _pretty_print(defs)
		end

		private
		def _pretty_print(defs)
			s = ''
			indent = '		'

			defs.keys.sort.each do |name|
				attrs = defs[name]
				s << name
				s << ":\n"
	
				attrs.keys.sort.each do |k|
					v = attrs[k]
					s << indent 
					s << k 
					s << ': '
					s << v.inspect
					s << "\n"
				end

				s << "\n"
			end

			s
		end
	end
end


# CKHTMLParser parses a template of a component.
# The template is HTML, except one fact that it has
# "<cgikit>" tag. You can use two types of formats as CGIKit tag,
# "<cgikit>...</cgikit>" and "<cgikit />". The difference of the
# formats is whether the tag's body is empty or not. So, if you write
# "<cgikit></cgikit>", it has the same meaning as "<cgikit />".
#
# The CGIKit tag has only one attribute, "name". The "name" attribute is
# a name of CGIKit element. The "name" attribute is composed of "[a-zA-Z0-9_]".
# You can enclose the value of the "name" attribute by double quotation mark.
# And, The value is case-sensitive.
#
# For instance, these have the same meaning.
#
#   <cgikit name=foo></cgikit>
#   <cgikit name=foo />
#   <cgikit name="foo"></cgikit>
#   <cgikit name="foo" />
#
# On the other hand, these have different meanings.
#   <cgikit name=Foo />
#   <cgikit name="foo" />
#   <cgikit name=FOO></cgikit>
#
# Comment of HTML is interpreted by CKHTMLParser because it is
# necessary to include something like CSS and Javascript.
# If you want to comment out some parts of a template, you use "<!--- ... --->".
# The format is like HTML comments, but with an additional hyphen. 
#
#   <html>
#   <head>
#   </head>
#   <body>
#   <!-- HTML comment. This is interpreted. -->
#   <!--- CGIKit comment. This is not interpreted.
#     <!-- HTML comment. Because this is in CGIKit comment,
#          this is not also interpreted. -->
#   --->
#   </body>
#   </html>
#
# As the template is explained in the example, the first HTML comment
# is interpreted and the second HTML comment is not interpreted.
#
# CGIKit comment is not allowed to be nested. So, the next example
# is not allowed.
#
#   <!--- foo bar
#    <!--- foo bar --->
#   --->
class CKHTMLParser

	class CKHTMLParseError < CKError; end #:nodoc:
	
	attr_accessor :application, :html_string, :parent, :repetitions, \
	              :repetition_index

	def initialize( parent, string )
		@parent      = parent
		@application = parent.application
		@html_string = string
		@scanner     = StringScanner.new(string, false)
	end

	def parse
		contents = ''

		each_block_string do |str, kind|
			case kind 
			when :normal
				contents << str
			when :cgikit_tag
				element = _create_element( *_extract_name_body(str) )
				if parsed = element.to_s then 
					contents << parsed 
				end
			when :comment
			else
				raise CKHTMLParseError, "BUGS: CKHTMLParser#parse unknown token"
			end	
		end

		contents
	end

	private
	def _create_element( name, body )
		element_def = parent.definitions[ name ]
		if element_def.nil?
			raise CKHTMLParseError, "Can't create element: " << \
			                        "#{name} of #{parent.name_with_class}"
		end
	
		element = element_def[ 'element' ]
		object = CKElement.instance( element, @application, @parent, name, body )
		if @repetitions then
			object.repetitions      = @repetitions
			object.repetition_index = @repetition_index
		end
		object.run
		object
	end

	public
	def each_block_string
		rough_reg = /<(?:\!|c|\/c)/i #/
		
		comment_reg     = /<\!---[^-](.*?)[^-]--->/mi #/
		cktag_reg       = /<cgikit\s+[^>]*?\/>/i #/
		start_cktag_reg = /<cgikit\s+[^>]*?>/mi	
		end_cktag_reg   = /<\/cgikit\s*>/i #/		
		other_reg       = /(?:.+?(?=<(?:\!|c|\/c))|.+)/mi #/
		
		nest         = 0
		nested_cktag = ''
		str          = nil

		while @scanner.rest?
			# comment or cgikit tag
			if @scanner.match?(rough_reg)
				if str = @scanner.scan(comment_reg)
					yield str, :comment
					next
				end

				if str = @scanner.scan(cktag_reg)
					if nest > 0
						nested_cktag << str
					else
						yield str, :cgikit_tag
					end
					next
				end
		
				if str = @scanner.scan(start_cktag_reg) 	
					nested_cktag << str
					nest += 1
					next
				end
		
				if str = @scanner.scan(end_cktag_reg) 	
					nested_cktag << str
					nest -= 1
		
					if nest == 0
						yield nested_cktag, :cgikit_tag
						nested_cktag = ''
					elsif nest < 0
						raise CKHTMLParseError, \
							"too many end-tags(</cgikit>) in #{@parent.template_file}."
					end
		
					next	
				end
			end
			
			if str = @scanner.scan(other_reg)
				if nest > 0
					nested_cktag << str
				else
					yield str, :normal
				end
				next
			end

			raise CKHTMLParseError, "not match any rule in #{@parent.template_file}."
		end

		if nest != 0
			raise CKHTMLParseError, \
				"a cgikit tag is not closed in #{@parent.template_file}."
		end
	end

	private
	def _extract_name(name)
		if /(['"])(.*?)\1/ =~ name
			name = $2
		end

		name
	end

	def _extract_name_body(str)		
		if str =~ /\A<cgikit\s+name\s*=\s*(.*?)\s*>(.*)<\/cgikit>/im	#/
			name = _extract_name($1)
			body = $2

			return name, body
		elsif str =~ /\A<cgikit\s+name\s*=\s*(.*?)\s*\/>/im	#/
			name = _extract_name($1)
			
			return name, ''
		else
			raise CKHTMLParseError, "can't extract name and body: " << str.inspect
		end
	end
end


