# ############################################################
#
#  This file define character category and word pattern.
#
#  Each line contain following information.
#
#   for Category:
#	1. 'Category'
#	3. ID (Single Charater) for Each Category
#	3. List of Characters (<Character Set Name>:<Character Offset>)
#
#   for Word:
#	1. 'Word'
#	2. Regular Expression for word.
#
# ############################################################

Category	a				# ALPHABET
		ASCII:0x41-0x5a
		ASCII:0x61-0x7a
		JISX0201.1976-R:0x41-0x5a
		JISX0201.1976-R:0x61-0x7a
Category	c				# LATIN 
		ASCII:0x41-0x5a
		ASCII:0x61-0x7a
		JISX0201.1976-R:0x41-0x5a
		JISX0201.1976-R:0x61-0x7a
		ISO8859-1:0xa1-0xff
		ISO8859-2:0xa1-0xff
		ISO8859-3:0xa1-0xff
		ISO8859-4:0xa1-0xff
		ISO8859-9:0xa1-0xff
Category	g				# GREEK
		ISO8859-7:0xc1-0xfe
Category	k				# JAPANESE KATAKANA
		JISX0201.1976-K:0xa6-0xdf
Category	n				# NUMERIC
		ASCII:0x30-0x39
		JISX0201.1976-R:0x30-0x39
Category	r				# CYRILLIC
		ISO8859-5:0xa1-0xff
Category	s				# WHITE SPACE
		ASCII:0x09,ASCII:0x0a
		ASCII:0x0d,ASCII:0x20
Category	w				# ALPHABET/NUMERIC
		ASCII:0x30-0x39
		ASCII:0x41-0x5a
		ASCII:0x61-0x7a
		JISX0201.1976-R:0x30-0x39
		JISX0201.1976-R:0x41-0x5a
		JISX0201.1976-R:0x61-0x7a
Category	A				# JAPANESE 2BYTE ALPHABET
		JISX0208.1983:0x2341-0x235a
		JISX0208.1983:0x2361-0x237a
Category	G				# JAPANESE 2BYTE GREEK
		JISX0208.1983:0x2621-0x2638
		JISX0208.1983:0x2641-0x2658
Category	H				# JAPANESE 2BYTE HIRAGANA
		JISX0208.1983:0x2421-0x2473
		JISX0208.1983:0x212c
		JISX0208.1983:0x212d
		JISX0208.1983:0x2135
		JISX0208.1983:0x2136
		JISX0208.1983:0x213c
Category	J				# JAPANESE 2BYTE KANJI
		JISX0208.1983:0x3021-0x4f53
		JISX0208.1983:0x5021-7424
Category	K				# JAPANESE 2BYTE KATAKANA
		JISX0208.1983:0x2521-0x2576
		JISX0208.1983:0x212c
		JISX0208.1983:0x212d
		JISX0208.1983:0x2133
		JISX0208.1983:0x2134
		JISX0208.1983:0x213c
Category	N				# JAPANESE 2BYTE NUMERIC
		JISX0208.1983:0x2330-0x2339
Category	R				# JAPANESE 2BYTE CYRILLIC
		JISX0208.1983:0x2721-0x2741
		JISX0208.1983:0x2751-0x2771
Category	S				# JAPANESE 2BYTE SPACE
		JISX0208.1983:2121
Category	W				# 2BYTE ALPHABET/NUMERIC
 		JISX0208.1983:0x2341-0x235a
		JISX0208.1983:0x2361-0x237a
		JISX0208.1983:0x2330-0x2339

# ############################################################

Word		(\cw+|\cc+|\cg+|\cr+|\ck+|\cW+|\cJ+|\cH+|\cK+|\cG+|\cR+)
