#!/usr/bin/perl -w -C

=pod

=head1 NAME

tv_grab_jp - Grab TV listings for Japan.

=head1 SYNOPSIS

tv_grab_jp --help

tv_grab_jp [--config-file FILE] --configure [--gui OPTION]

tv_grab_jp [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--enable-readstr] [--no-category-code]
           [--quiet]

tv_grab_jp --list-channels

=head1 DESCRIPTION

Output TV listings for several channels available in Japan.
The grabber relies on parsing HTML so it might stop working at any
time.

First run B<tv_grab_jp --configure> to choose, which channels you want
to download. Then running B<tv_grab_jp> with no arguments will output
listings in XML format to standard output.

tv_grab_jp always grab 7 days of listings.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_jp.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default and the maximum is 7.

B<--offset N> Start N days in the future.  The default is to start
from today.

B<--enable-readstr> Add Hiragana(read strings) for program title.

B<--no-category-code> Suppress category code. English translated
category names will be added to the listings without this option.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--list-channels> Write output giving <channel> elements for every
channel available, but no programmes.

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Takeru Komoriya<komoriya@paken.org>
Based on tv_grab_fi by Matti Airas and tv_grab_sn by Stefan G:orling.

=head1 BUGS

The data source may not suit recommended XMLTV DTD format.

=cut

######################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_jp,v 1.12 2006/04/12 08:19:16 fgouget Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'Japan';
use Getopt::Long;
use Date::Manip;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Config_file;
use XMLTV::Mode;
use XMLTV::Get_nice;

use Text::Kakasi;
use utf8;
use Encode qw(from_to);
use Encode::JP;

# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Japanese television listings in XMLTV format
To configure: $0 --configure [--config-file FILE] [--gui OPTION]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--enable-readstr] [--no-category-code]
        [--quiet]
To list channels: $0 --list-channels
END
  ;

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'     => 'http://www.ontvjapan.com/',
	     'source-data-url'     => "http://www.ontvjapan.com/program/",
	     'generator-info-name' => 'XMLTV',
	     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
	   };

# The timezone in Japan.
my $TZ="+0900";

# language for XML output
my $XMLLANG = "ja_JP";

# message language
my $lang = $ENV{'LANG'};
$lang = '' if not defined $lang;

# base URL of source data
my $urlbase = "http://www.ontvjapan.com";

# xmltv channel id extension
my $channel_ext = ".ontvjapan.com";

# Make sure Encode supports the encodings we want.
my @encs = Encode->encodings();
my $wanted = 'euc-jp';
if (not grep { $_ eq $wanted } @encs) {
    die "did not see $wanted in list of encodings supported by Encode: @encs\n";
}

# Global channel data.
our @ch_all;

# category translation data
# (movie,series,sports,tvshow)
my %categories = ('ドラマ'         => 'Drama',
		  '映画'           => 'Movies',
		  'スポーツ'       => 'Sports',
		  '演劇'           => 'Drama',
		  '音楽'           => 'Music',
		  'バラエティ'      => 'Talk',
		  'ニュース・報道'  => 'News',
		  '趣味・実用'      => 'Home/How-to',
		  'ドキュメンタリー・教養' => 'Documentary',
		  'アニメ・特撮'    => 'Kids',
		  'キッズ'         => 'Kids',
		  '教育'           => 'Educational',
		  '情報'           => 'Home/How-to',
		  'その他'         => 'Etc');


######################################################################
# get options

XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_quiet, $opt_gui,
    $opt_readstr, $opt_no_cat_code, $opt_list_channels);
$opt_days        = 7; # default
$opt_offset      = 0; # default
$opt_readstr     = 0; # default
$opt_no_cat_code = 0; # default
$opt_quiet       = 0; # default
GetOptions('help'             => \$opt_help,
	   'configure'        => \$opt_configure,
	   'config-file=s'    => \$opt_config_file,
	   'gui:s'            => \$opt_gui,
	   'output=s'         => \$opt_output,
	   'quiet'            => \$opt_quiet,
	   'enable-readstr'   => \$opt_readstr,
	   'no-category-code' => \$opt_no_cat_code,
	   'list-channels'    => \$opt_list_channels,
	   'days=i'           => \$opt_days,
	   'offset=i'         => \$opt_offset,
	  )
  or usage(0);
die 'number of days must not be negative'
    if (defined $opt_days && $opt_days < 0);
die 'number of offset must not be negative'
    if (defined $opt_offset && $opt_offset < 0);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);
my $mode = XMLTV::Mode::mode('grab', # default
			     $opt_configure => 'configure',
			     $opt_list_channels => 'list-channels',
			    );

# File that stores which channels to download.
my $config_file;
my @config_lines; # used only in grab mode
if ($mode eq 'configure') {
    $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_jp', $opt_quiet);
    XMLTV::Config_file::check_no_overwrite($config_file);
}
elsif ($mode eq 'grab') {
    $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_jp', $opt_quiet);
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

######################################################################
# write configuration

if ($mode eq 'configure') {
    my $msg;
    open CONF, ">:utf8", "$config_file" or die "cannot write to $config_file: $!";

    # Get region information
    my $barmsg = select_lang('地域情報を取得中',
			     'getting regions');
    my $bar = new XMLTV::ProgressBar($barmsg, 1);
    my %regions = get_regions();
    update $bar;
    $bar->finish();

    # Ask region
    my %ask_regions;
    my @r;
    my $regionid = '0002';
    my $req_region;
    Text::Kakasi::getopt_argv('kakasi', '-ieuc', '-Ja', '-Ha', '-Ka');
    foreach (sort keys %regions) {
	my $region = $regions{$_};
	if ($lang ne 'ja_JP.UTF-8') {
	      utf8::encode($region);
	      from_to($region, "utf8", "euc-jp"); # convert to EUC
	      if ($lang ne 'ja_JP.eucJP') {
		  $region=Text::Kakasi::do_kakasi($region); # convert to Romaji
	      }
	}
	$ask_regions{$region} = $_;
	push @r, $region;
	if ($_ eq $regionid) {
	    $req_region = $region;
	}
    }
    Text::Kakasi::close_kanwadict();

    my $want_to_exit = 0;
    while (!$want_to_exit) {
	$msg = select_lang("\n地域を指定してください: ",
			   "\nSelect your region: ");
	$req_region = ask_choice($msg, $req_region, @r);
	$regionid = $ask_regions{$req_region};

	if ($regionid eq 'CATV') {
	    $msg = select_lang("README.CATV.jaを参照して4桁のID番号を入力してください: ",
			       "See README.CATV and input region ID: ");
	    $regionid = ask($msg);
	    if (length($regionid) != 4) {
		$msg = select_lang("IDが間違っています．\n",
				   "Invalid region ID.\n");
		die $msg;
	    }
	}
	print CONF "region $regionid\n";

	$barmsg = select_lang('チャンネル情報を取得中',
			      'getting channels');
	$bar = new XMLTV::ProgressBar($barmsg, 1);
	my @channels = get_channels($regionid);
	update $bar;
	$bar->finish();

	# Ask about each channel.
	Text::Kakasi::getopt_argv('kakasi', '-ieuc', '-Ja', '-Ha', '-Ka', '-Ea');
	my @qs;
	foreach (@channels) {
	    my $name = $_->{station};
	    my $callsign = $_->{callsign};
	    if (($lang ne 'ja_JP.UTF-8') and ($lang ne 'ja_JP.eucJP')) {
		utf8::encode($name);
		from_to($name, "utf8", "euc-jp"); # convert to EUC
		$name = Text::Kakasi::do_kakasi($name); # convert to Romaji
	    }
	    my $msg = select_lang("「$name($callsign)」を追加しますか？ ",
				  "add $name($callsign)? ");
	    push @qs, $msg;
	}
	Text::Kakasi::close_kanwadict();

	my @want = ask_many_boolean(1, @qs);
	foreach (@channels) {
	    my $w = shift @want;
	    my $id = $_->{id};
	    my $name = $_->{station};
	    my $callsign = $_->{callsign};
	    warn("cannot read input, stopping channel questions"), last
		if not defined $w;

	    # Print a config line, but comment it out if channel not wanted.
	    print CONF '#' if not $w;
	    print CONF "channel $id\n";
	}

	$msg = select_lang("\n他の地域を追加しますか？ (y/n): ",
			   "\nAdd another region ID?(y/n): ");
	$want_to_exit = !ask_boolean($msg, 0);
    }

    close CONF or warn "cannot close $config_file: $!";
    $msg = select_lang("設定完了.\n", "Finished\n");
    say( $msg );

    exit();
}

# Not configuration, we must be writing something, either full
# listings or just channels.
#
die if $mode ne 'grab' and $mode ne 'list-channels';

# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'UTF-8';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

######################################################################
# Channel listings
if ($mode eq 'list-channels') {
    # Get region information
    my $barmsg = select_lang('地域情報を取得中',
			     'getting regions');
    my $bar = new XMLTV::ProgressBar($barmsg, 1);
    my %regions = get_regions();
    update $bar;
    $bar->finish();

    # get channels for all region
    my @ch_regions = grep { $_ ne 'CATV' } sort keys %regions;
    $barmsg = select_lang('チャンネル情報を取得中',
			  'getting channels');
    $bar = new XMLTV::ProgressBar($barmsg, scalar keys %regions);
    foreach (@ch_regions) {
	my $regionid = $_;
	my @channels = get_channels($regionid);
	foreach my $ch (@channels) {
	    my $id = $ch->{'id'};
	    my $exist = 0;
	    foreach (@ch_all) {
		$exist = 1 if ($id eq $_->{'id'});
	    }
	    if (not $exist) {
		my $name = $ch->{'station'};
		my $callsign = $ch->{'callsign'};
		utf8::encode($name);
		push @ch_all, { 'id' => $id,
				'station' => $name,
				'callsign' => $callsign };
	    }
	}
	update $bar;
    }
    $bar->finish();

    # Write channels mode.
    foreach (@ch_all) {
	write_channel($_);
    }
    $writer->end();
    exit();
}

# We are producing full listings.
die if $mode ne 'grab';

######################################################################
# begin main program

# Read channels from configuration, push them to @ch_all
my @regions;
my $regionid = '';
my %ch_conf;
my $line_num = 1;
my $channels = 0;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;
    if (/^region:?\s+(\S+)/) {
	push @regions, $1;
	$regionid = $1;
    }
    elsif (/^channel:?\s+(\S+)/ && $regionid) {
	push @{$ch_conf{$regionid}}, $1;
	++ $channels;
    }
    else {
	warn "$config_file:$line_num: bad line\n";
    }
}

my $diemsg = select_lang("チャンネルが設定されていません．\n"
			 . "まず --configure オプションをつけて設定をしてください．\n",
			 "Channels are not configured. Run with --configure first.\n");
die $diemsg if ($channels == 0);

my $barmsg = select_lang('チャンネル情報を取得中',
			 'getting channels');
my $bar = new XMLTV::ProgressBar($barmsg, scalar @regions)
    if not $opt_quiet;
foreach (@regions) {
    $regionid = $_;
    my @channels = get_channels($regionid);
    foreach (@channels) {
	my $id = $_->{id};
	my $name = $_->{station};
	utf8::encode($name);
	my $callsign = $_->{callsign};
	foreach (@{$ch_conf{$regionid}}) {
	    if ($_ eq $id) {
		push @ch_all, { 'id' => $id,
				'station' => $name,
				'callsign' => $callsign };
	    }
	}
    }
    update $bar if not $opt_quiet;
}
$bar->finish() if not $opt_quiet;

# the order in which we fetch the channels matters
write_channel($_) foreach (@ch_all);

# This progress bar is for both downloading and parsing.
$barmsg = select_lang('番組表を取得中',
		      'getting program info');
$bar = new XMLTV::ProgressBar($barmsg, scalar @ch_all)
    if not $opt_quiet;
foreach (@ch_all) {
    process_table($_->{'id'});
    update $bar if not $opt_quiet;
}
$writer->end();
$bar->finish() if not $opt_quiet;

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

# write out channel data
sub write_channel {
    my ($ch) = @_;
    $writer->startTag('channel', 'id' =>$ch->{'id'});
    $writer->dataElement('display-name', $ch->{'station'}, 'lang'=>$XMLLANG);
    $writer->dataElement('display-name', $ch->{'callsign'}, lang => 'en');
    $writer->endTag('channel');
}

####
# process_table: fetch a URL and process it
#
# arguments:
#    id of channel
# returns: list of program hashes to write
#
sub process_table {
    my ($channel_id) = @_;

    my $data = get_table($channel_id);

    # parse the page to a document object
    my $tree = HTML::TreeBuilder->new();
    $tree->parse($data) or die "cannot parse page for $channel_id\n";
    $tree->eof;
    parse_program_table($channel_id, $tree);
}

# parse program table
sub parse_program_table {
    my ($channel, $tree) = @_;
    my %prog_all;
    t "parse_program_table() ENTRY for tree: $tree";

    # elements of <span> tag
    my @elems = $tree->look_down( '_tag','span');
    my $title, my $subtitle, my $desc;

    # parse by elements and push program infomation to @data
    foreach my $elem (@elems) {
	my $class = $elem->attr('class');
	next if not defined $class;
	if ( $class =~ m/^style_title/ ) {
	    t 'doing title elem: ' . d $elem;
	    $title = $elem->look_down( '_tag','a');
	    $subtitle = undef;
	    $desc = undef;
	} elsif ( $class =~ m/^style_subtitle/ ) {
	    t 'doing subtitle elem: ' . d $elem;
	    $subtitle = $elem;
	} elsif ( $class =~ m/^style_corner/ ) {
	    t 'doing corner elem: ' . d $elem;
	    $desc = $elem;
	    # get program infomation
	    if ( defined $title ) {
		get_content($title, $subtitle, $desc, $channel, \%prog_all);
	    }
	}
    }
    sort_program($channel, \%prog_all);
}

# parse program element and store to %prog_all
sub get_content {
    my ($elem_title, $elem_subtitle, $elem_desc, $channel, $prog_all) = @_;
    my $p;

    # program title
    my $title = '';
    foreach $p ($elem_title->content_list()) {
	if (ref $p) {
	    # News/Weather icons
	    my $src = $p->attr('src');
	    if ( $src =~ m/n.gif/ ) {
		$title .= "[Ｎ]";
	    } elsif ( $src =~ m/w.gif/ ) {
		$title .= "[天]";
	    }
	} else {
	    # title
	    from_to($p, "euc-jp", "utf8"); # convert to utf-8
	    utf8::decode($p);
	    $title .= $p;
	}
    }
    $title =~ s/\s+/　/g;      # convert Hankaku spaces to Zenkaku
    utf8::encode($title);
    return undef if $title eq '';

    # program sub-title
    my $subtitle = '';
    if ( defined $elem_subtitle ) {
	$p = $elem_subtitle->as_text();
	from_to($p, "euc-jp", "utf8"); # convert to utf-8
	utf8::decode($p);
	$p =~ s/\s+/　/g;  # convert Hankaku spaces to Zenkaku
	utf8::encode($p);
	$subtitle = $p;
    }

    # program description
    my $desc = '';
    if ( defined $elem_desc ) {
	$p = $elem_desc->as_text();
	from_to($p, "euc-jp", "utf8"); # convert to utf-8
	utf8::decode($p);
	$p =~ s/\s+/　/g;  # convert Hankaku spaces to Zenkaku
	utf8::encode($p);
	$desc = $p;
    }

    # air date
    my $hsid = $elem_title->attr('href');
    $hsid =~ m/hsid=(\d{8})(\d{4})(\d{3})/;
    return undef if (not defined $1) or (not defined $3) ;
    my $date = $1;
    my $prog_id = $3; # incremental program counter of the day

    # air time and genre
    my $time_genre = $elem_title->attr('title');
    $time_genre =~ m/^(\d\d:\d\d)-(\d\d:\d\d)\s*(\S*)/;
    my $starttime = $1;
    my $endtime = $2;
    my $genre = $3;
    from_to($genre, "euc-jp", "utf8"); # convert to UTF-8
    $genre =~ s/\/.*$//;   # delete after '/'
    return undef if (not defined $starttime) or (not defined $endtime);

    # start/end time
    $starttime =~ s/://;
    $endtime =~ s/://;

    my $startdate = $date;
    my $enddate = $date;
    # for the program over midnight
    if ($starttime > $endtime) {
	# real date of program end
	$enddate = UnixDate(DateCalc($date,"+ 1 day"), '%Q');
    }

    # check the date for --days and --offset option
    my $today = ParseDate("today");
    my $output_sdate = UnixDate(DateCalc($today, "+ $opt_offset days"), '%Q');
    my $edays = $opt_offset + $opt_days - 1;
    my $output_edate = UnixDate(DateCalc($today, "+ $edays days"), '%Q');
    if ($opt_offset > 0 || $opt_days < 7) {
	return undef if ($startdate < $output_sdate);
	return undef if ($startdate > $output_edate);
    }

    $prog_id = $startdate . $prog_id;
    my $program = { 'startdate' => $startdate,
		    'starttime' => $starttime,
		    'enddate'   => $enddate,
		    'endtime'   => $endtime,
		    'title'     => $title,
		    'sub-title' => $subtitle,
		    'genre'     => $genre,
		    'desc'      => $desc };

    $$prog_all{$prog_id} = $program;
}

# sort program by date and time, set real date, and output
sub sort_program {
    my ($channel, $prog_all) = @_;
    my $cdate = '';
    my $ptime;
    my $pid = '000';
    my $date_changed = 0;

    foreach (sort keys %$prog_all) {
	my $prog = $$prog_all{$_};
	my $date = substr($_, 0, 8);
	my $id = substr($_, -3, 3);

	if ($date ne $cdate) {
	    $cdate = $date;
	    $pid = '000';
	    $ptime = '0000';
	    $date_changed = 0;
	}

	# date transition check
	$date_changed = 1
	    if ((not $date_changed) and (defined $ptime) and ($ptime > $prog->{'starttime'}));
		
	# set real date
	if ($date_changed) {
	    my $d = $prog->{'startdate'};
	    $prog->{'startdate'} = UnixDate(DateCalc($d,"+ 1 day"), '%Q');
	    $prog->{'enddate'} = UnixDate(DateCalc($d,"+ 1 day"), '%Q');
	}

	$ptime = $prog->{'starttime'};
	$pid = $id;

	my $title = $prog->{'title'};
	my $subtitle = $prog->{'sub-title'};
	my $genre = $prog->{'genre'};
	my $desc = $prog->{'desc'};

	# read string (Romaji) of title
	my $t = $title;
	utf8::decode($t);
	$t =~ s/\[.+\]//g;   # delete [NEWS] etc.
	$t =~ s/◇//g;       # delete '◇'
	$t =~ s/[0-9]//g;    # delete figures that represent time
	# We cannot use \d here, because it's locale-aware and
	# matches figures in wide charactors. Use [0-9] instead.
	utf8::encode($t);
	Text::Kakasi::getopt_argv('kakasi', '-ieuc', '-JH', '-KH', '-aE');
	from_to($t, "utf8", "euc-jp"); # convert to EUC
	my $readstr = '';
	if ($t ne '') {
	    $readstr=Text::Kakasi::do_kakasi($t);
	    from_to($readstr, "euc-jp", "utf8"); # convert to UTF-8
	}
	Text::Kakasi::close_kanwadict();
	
	# write out program
	my $start = $prog->{'startdate'} . $prog->{'starttime'} . "00 " . $TZ;
	my $stop = $prog->{'enddate'} . $prog->{'endtime'} . "00 " . $TZ;
	
	$writer->startTag('programme', 'start'=>$start, 'stop'=>$stop, 'channel'=>$channel);
	$writer->dataElement('title', $title, 'lang'=>$XMLLANG);
	$writer->dataElement('title', $readstr, 'lang'=>$XMLLANG . '@kana')
	    if ($readstr ne '') and $opt_readstr;
	$writer->dataElement('sub-title', $subtitle, 'lang'=>$XMLLANG) if $subtitle ne '';
	$desc =~ s/\s+$//;
	$writer->dataElement('desc', $desc, 'lang'=>$XMLLANG) if $desc ne '';
	$writer->dataElement('category', $genre, 'lang'=>$XMLLANG) if $genre ne '';
	if (($genre ne '') and (not $opt_no_cat_code)) {
	    # Convert category code to English for MythTV's benefit.
	    utf8::decode($genre);
	      my $cat_code = $categories{$genre};
	      $writer->dataElement('category', $cat_code, lang => 'en')
		  if (defined $cat_code and $cat_code ne '');
	  }
	$writer->endTag('programme');
    }
}

# get channel listing
sub get_channels {
    my ($regionid) = @_;
    my @channels;
    my $local_data = get_channel_table($regionid);

    my $tree = HTML::TreeBuilder->new();
    $tree->parse($local_data) or die "cannot parse channel table for $regionid\n";
    $tree->eof;

    # channels elements are specially formatted <a> tags
    # with href="gridChannel.php..."
    my @ch_a_elems = $tree->look_down(_tag => 'a');

    # Zenkaku -> Hankaku conversion
    Text::Kakasi::getopt_argv('kakasi', '-ieuc', '-Ea');

    # get channels
    while (@ch_a_elems) {
	my $elem = shift @ch_a_elems;
	my $href = $elem->attr('href');
	if ($href =~ m/^gridChannel.php\?tikicd=${regionid}&ch=(\d\d\d\d)/) {
	    my $channel_id = $1;
	    $channel_id .= $channel_ext;    # this is xmltv channel id
	    my $callsign = ($elem->content_list)[0];
	    $callsign = Text::Kakasi::do_kakasi($callsign);
	    $callsign =~ s/[\(\)]//g;
	    $elem = shift @ch_a_elems;
	    my $channel = ($elem->content_list)[0];
	    # convert to UTF-8
	    from_to($channel, "euc-jp", "utf8");
	    from_to($callsign, "euc-jp", "utf8");
	    utf8::decode($channel);
	    utf8::decode($callsign);
	    # special fix for NHK-ETV call sign
	    $callsign = 'ETV' if ($callsign eq 'NHK' and $channel eq 'ＮＨＫ教育');
	    # special fix for Housou Daigaku
	    $callsign = 'UAIR' if ($channel eq '放送大学');
	    push @channels, { 'id' => $channel_id,
			      'station' => $channel,
			      'callsign' => $callsign };
        }
    }
    Text::Kakasi::close_kanwadict();

    my $chnum = @channels;
    my $diemsg = select_lang("チャンネル情報の取得に失敗しました\n"
			     . "ネットワークの接続と地域番号を確認してください",
			     "Failed to get channel information.\n"
			     . "Please check if region id and network connections are correct.");
    die $diemsg if $chnum == 0;
    return @channels;
}

# get regions
sub get_regions {
    my %regions;
    my $local_data = get_area_list();

    my $tree = HTML::TreeBuilder->new();
    $tree->parse($local_data) or die "cannot parse page for area list\n";
    $tree->eof;

    # area elements are formatted <a> tags
    # with href="javascript:sendUrl(0,'$id')"
    my @area_elems = $tree->look_down(_tag => 'a');

    # get channels
    foreach (@area_elems) {
	my $href = $_->attr('href');
	if ($href =~ m/^javascript:sendUrl.*'(.{3})'/) {
	    my $region_id = '0' . $1;
	    my $region_name = ($_->content_list)[0];
	    # convert to UTF-8
	    from_to($region_name, "euc-jp", "utf8");
	    utf8::decode($region_name);
	    $regions{$region_id} = $region_name;
	}
    }

    my $diemsg = select_lang("地域情報の取得に失敗しました\n"
			     . "ネットワークの接続を確認してください",
			     "Failed to get region information.\n"
			     . "Please check if network connections are correct.");
    die $diemsg if scalar keys %regions == 0;

    # for CATV station
    $regions{'CATV'} = 'CATV';

    return %regions;
}

# get program table
sub get_table {
    my ($channelid) = @_;
    my $ext = quotemeta $channel_ext;
    $channelid =~ s/$ext//;
    my $url = "$urlbase/program/gridChannel.php?ch=${channelid}&genre=all";
    my $content = XMLTV::Get_nice::get_nice($url);
    return $content;
}

# get channel table
sub get_channel_table {
    my ($regionid) = @_;
    # request channel data
    my $url = "$urlbase/program/gridChannel.php?tikicd=${regionid}";
    my $content = XMLTV::Get_nice::get_nice($url);
    return $content;
}

# get area list
sub get_area_list {
    # request area selection page
    my $url = "$urlbase/areachange/areaselect.php3?tv=1&force=1";
    my $content = XMLTV::Get_nice::get_nice($url);
    return $content;
}

# Select proper language and encoding of output message
sub select_lang {
    my ($ja, $eng) = @_;

    if ($lang eq 'ja_JP.UTF-8') {
	return $ja;
    } elsif ($lang eq 'ja_JP.eucJP') {
	utf8::encode($ja);
	from_to($ja, "utf8", "euc-jp"); # convert to EUC
	return $ja;
    } else {
	return $eng;
    }
}
