#!/usr/bin/perl -w
=pod 

=head1 NAME

tv_grab_re - Grab TV listings for Reunion Island (France).

=head1 SYNOPSIS

To configure: tv_grab_re --configure [--config-file FILE]
To grab channels listing: tv_grab_re --list-channels [--output FILE]
To grab programmes listings: tv_grab_re [--output FILE] [--offset N] [--days N] [--quiet]
Slower, detailed grab: tv_grab_re --slow [--output FILE] [--offset N] [--days N] [--quiet]
Help: tv_grab_fr --help

=head1 DESCRIPTION

Output TV listings for Canal Satellite Reunion and Parabole Reunion channels 
available in Reunion Island. The data comes from www.canalsatellite-reunion.com
for Canal Satellite Reunion and from www.parabolereunion.com for Parabole
Reunion. The default is to grab listing only for the current day.
By default program descriptions are not downloaded, so if you want description
and credits, you should activate the --slow option. To grab listing for 
hertzian channels, select them in Canal Satellite Reunion grid.

B<--configure> Grab channels informations and ask for channel type and names.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days, rather than only for the current day.

B<--offset N> Start grabbing for N days in the future, eg offset 1
means start with tomorrow.

B<--slow> Get additional information from the website, like program
description and credits.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Eric Castelnau, eric.castelnau@free.fr
Inspired by tv_grab_fr written by Sylvain Fabre, centraladmin@lahiette.com

=cut

use XMLTV::Usage <<END
$0: get Reunion Island television listings in XMLTV format
To configure: tv_grab_re --configure [--config-file FILE]
To grab channels listing: tv_grab_re --list-channels [--output FILE]
To grab programmes listings: tv_grab_re [--output FILE] [--days N] [-offset N] [--quiet]
Slower, detailed grab: tv_grab_re --slow [--output FILE] [--days N] [--offset N] [--quiet]
END
  ;

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_re,v 1.16 2008/01/30 17:18:46 ecastelnau Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig/;
use XMLTV::Description 'Reunion Island';
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use HTTP::Cookies;
use IO::File;
use URI;
use Date::Manip;
use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';

###
### Main declarations
###
my %BROADCASTERS = (
	'CANALSAT' => "Canal Satellite Reunion",
	'PARABOLE' => "Parabole Reunion"
);
my $CANALSAT_BASE_URL = "http://fw-web.canalreunion.net/";
my $CANALSAT_ICON_URL = "http://fw-web.canalreunion.net/uploads/tx_hhmod4bdd";
my $PARABOLE_BASE_URL = "http://www.parabolereunion.com/";
my $PARABOLE_ICON_URL = "http://www.parabolereunion.com/images/tmp";

###
### Options processing
###
my ($opt_offset, $opt_days);
my $opt_help;
my $opt_output;
my $opt_quiet;
my $opt_config_file;
my $opt_configure;
my $opt_list_channels;
my $opt_slow;

GetOptions(	'days=i'	=> \$opt_days,
		'offset=i'	=> \$opt_offset,
		'help'          => \$opt_help,
		'output=s'      => \$opt_output,
		'quiet'         => \$opt_quiet,
		'configure'     => \$opt_configure,
		'config-file=s' => \$opt_config_file,
		'list-channels' => \$opt_list_channels,
		'slow'		=> \$opt_slow,
) or usage(0);

# need help
usage(1) if $opt_help;

# verbose by default
$opt_quiet = 0;

# number of day to process
die 'Number of days must not be negative' if (defined $opt_days && $opt_days < 0);
die 'Number of days must not be more than 5' if (defined $opt_days && $opt_days > 5);
$opt_days = 1 if not defined $opt_days;

# offset - zero (default) means start from today
die 'Offset must not be negative' if (defined $opt_offset && $opt_offset < 0);
$opt_offset = 0 if not defined $opt_offset;

# output file
$opt_output = '-' if not defined $opt_output;

# slow mode off by default
$opt_slow = 0 if not defined $opt_slow;

# Now detects if we are in configure mode
my $mode = XMLTV::Mode::mode('grab', $opt_configure => 'configure',
                        	$opt_list_channels => 'list-channels');

# File that stores which channels to download.
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_re',
						$opt_quiet);

# Content of $config_file
my @config_lines;

###
### Global variables
###

# channels list
my @channels;

###
### Sub sections
###
sub dprint($) {
	my $msg = shift;
	print STDERR "debug: " . $msg;
}

sub dump_channel($) {
	my $c = shift;
	print "type: $c->{'type'}\n";
	print "id  : $c->{'id'}\n";
	print "name: $c->{'name'}\n";
	print "icon: $c->{'icon'}\n";
}

sub dump_programme($) {
	my $c = shift;
	print "channel  : $c->{'channel'}\n";
	print "title    : $c->{'title'}[0][0]\n";
	print "start    : $c->{'start'}\n";
	print "stop     : $c->{'stop'}\n";
	#print "length   : $c->{'length'}sec.\n";
	print "category : $c->{'category'}[0][0]\n" if defined $c->{'category'};
}

sub new_xmltv_writer() {
	my %writer_args;
	my $file = new IO::File(">$opt_output");
	die "Cannot write to $opt_output: $!" if not defined $file;
	$writer_args{OUTPUT} = $file;
	$writer_args{'encoding'}  = 'ISO-8859-1';
	return new XMLTV::Writer(%writer_args);
}

sub init_user_agent() {
	# Change HTTP Headers to make canalsat-reunion.com happy
	$XMLTV::Get_nice::ua->default_headers->push_header('Keep-Alive'=>'300');
	$XMLTV::Get_nice::ua->default_headers->push_header('Connection'=>'keep-alive');
	$XMLTV::Get_nice::ua->default_headers->push_header('Referer'=>'http://srv1.media-overseas.com/FMPro?-db=reunion.fp5&-lay=M1&-format=recherchereunion.htm&-view');

	# init cookies
	my $cookies = HTTP::Cookies->new(
		file => "$ENV{'HOME'}/.xmltv/tv_grab_re.cookies",
		autosave => 1,
	);
	$XMLTV::Get_nice::ua->cookie_jar($cookies);

	get_nice("http://fw-web.canalreunion.net/552.0.html?&amp;no_cache=1&programme[fuseauChoix]=2&programme[validation_choifuseaux]=Envoyer");
}

sub get_channels_list($) {
	my $arg = shift;
	my @channels;

	if ($arg eq 'CANALSAT') {
		#my $url = "http://srv1.media-overseas.com/FMPro?-db=reunion.fp5&-lay=M1&-format=recherchereunion.htm&-view";
		my $url = "http://fw-web.canalreunion.net/552.0.html?&no_cache=1&fuseauChoix=2&validation_choixfuseau=Envoyer";
		my $html = get_nice_tree $url;
	
		my $chaines = $html->look_down('_tag', 'select', 'name', 'programme[chaine]');
		foreach my $chaine ($chaines->look_down('_tag', 'option')) {
			my %channel;

			my $id = $chaine->attr_get_i('value');
			next if ($id eq "");
			my $title = $chaine->as_text();

			$channel{'type'} = "CANALSAT";
			$channel{'id'} = $id;
			$channel{'name'} = $title;
			$channel{'icon'} = "$CANALSAT_ICON_URL/${id}_grand_01.gif";

			push @channels,\%channel;
		}
	
		$html->delete();
		undef $html;
	}

	if ($arg eq 'PARABOLE') {
		my $url = "http://www.parabolereunion.com/index.jsp?childUrl=epg/epg0.jsp";
		my $html = get_nice_tree $url;

		my $chaines = $html->look_down('_tag', 'select', 'name', 'sel_channel');
		foreach my $chaine ($chaines->look_down('_tag', 'option')) {
			my %channel;

			my $id = $chaine->attr_get_i('value');
			next if ($id == -1);
			my $title = $chaine->as_text();
			$title =~ s/\d+\.//;
			$title =~ s/(\s)*$//;
	
			$channel{'type'} = "PARABOLE";
			$channel{'id'} = $id;
			$channel{'name'} = $title;
			$channel{'icon'} = "$PARABOLE_ICON_URL/channel_logo_small$id.gif";

			push @channels,\%channel;
		}
	
		$html->delete();
		undef $html;
	}
	
	return @channels;
}

sub get_canalsat_programmes_list_slow($%) {
	my $url = shift(@_);
	my $p = shift(@_);
	my @directors;
	my @actors;

	# get request and parse
	my $html = get_nice_tree $url;
	$html->objectify_text();

	# look for director
	my $div = $html->look_down('_tag' => 'div', 'id' => 'fiche_zoom_fiche_technique');
	#""$div->dump();	
	
	# look for actors
	my $table = $div->look_down('_tag' => 'table',
		'width' => '733', 
		'cellspacing' => '0',
		'cellpadding' => '0',
		'border' => '0'
	);
   #$table->dump();
	my $ptag = $table->look_down('_tag', 'p');
	my @texts = $ptag->look_down('_tag', '~text');
	foreach my $text (@texts) {
		my $tt = $text->attr_get_i('text');

		# année
		if ($tt =~ /(\d\d\d\d) - /) {
			$p->{'date'} = $1;
		}
		
		# acteurs
		if ($tt =~ / de (.*)  Avec (.*) /) {
			push @directors, $1;

			my @a = split(',', $2);
			foreach (@a) {
				if ($_ =~ /(.*) \(.*\)/) {
					push @actors, $1;
				}
			}
		}	

		# présentateur
		if ($tt =~ / de Pr.sent. par (.*)/) {
			if ($1 =~ /,/) {
				my @a = split(',', $1);
				foreach (@a) {
					push @actors, $_;
				}
			}
			else {
				push @actors, $1;
			}
		}
	}

	$p->{credits}{director} = \@directors if @directors;
	$p->{credits}{actor}    = \@actors if @actors;

	$html->delete();
	undef $html;
}

sub get_canalsat_programmes_list($$$) {
	my ($idchaine, $offset, $days) = @_;
	die if $offset < 0;
	die if $days < 1;

	# the progs list to return
	my @progs = ();

	my $today = ParseDate 'today';

	for ($offset + 1 .. $offset + $days) {
		my $n = $_ - 1;

		# the start tag of programs for this day
		my $start = DateCalc($today, "+ $n days");
		my $stop;
		my $url_day = UnixDate($start, "%d%%2F%m%%2F%Y");

		# build the url
		my $url = "http://fw-web.canalreunion.net/552.0.html?&no_cache=1&";
		$url .= "programme[jour]=".$url_day."&";
		$url .= "programme[horaires]=6.&"; # whole day
		$url .= "programme[chaine]=".$idchaine."&";
		$url .= "programme[genre]=&";
		$url .= "Submit=OK&";
		$url .= "programme[pdf]=";
		
		# get request and parse
		my $html = get_nice_tree $url;
		$html->objectify_text();
		#$html->dump();

		# look for every DIV elements that contain programm
		my @divs = $html->look_down('_tag' => 'div', 'class' => qr/contenu_prog_listing /);

		# scan each row
		foreach my $div (@divs) {
			# the current prog being processed
			my %prog;
			my ($tag, $tt, $stop);
			#$div->dump();
			
			$prog{'channel'} = $idchaine.".canalsat-reunion.com";

			$tt = $div->attr_get_i('style');
			if ($tt =~ /margin-top:(\d+)px/) {
				my $break = int($1 / 2);

				# if the time between 2 programs is greater than 30min
				# so we are in the day after (this is needed when grabbing
				# several days in one time : same program seen twice)
				next if ($break > 30)
			}

			# here is the start time
			$tag = $div->look_down('_tag', '~text');
			$tt = $tag->attr_get_i('text');
			if ($tt =~ /(\d+):(\d\d)/ ) {
				$start = Date_SetTime($start, $1, $2, 0);
				
				my $str = UnixDate($start, "%Y%m%d%H%M%S");
				$prog{'start'} = $str." +0400";
			}
			
			# compute the duration from height attribute of the div
			$tt = $div->attr_get_i('style');
			if ($tt =~ /height:(\d+)px/) {
				my $duration = int($1 / 2);
				$prog{'length'} = $duration * 60;

				$start = DateCalc($start, "+$duration min");
			}

			# here is the title
			$tag = $div->look_down('_tag' => 'a');
			$tt = $tag->attr_get_i('onmouseover');
			if ($tt =~ /<span class=blanc bold majuscule>(.*)<\/span>/ ) {
				# "Fin des programmes" is not a real tv show
				#next if ($1 eq "Fin des programmes");

				my $title = $1;
				$title =~ s/\\'/'/g;
				$prog{'title'} = [ [ $title ] ];
			}

			# here is the description
			if ($tt =~ /contenu_de_tooltip_listing_programme>([^<]*)</) {
				my $str = $1;
				$str =~ s/\\'/'/g;
				$prog{'desc'} = [ [ $str, "fr" ] ] if not $str eq "";
			}
		
			# here is the category
			$tt = $div->attr_get_i('class');
			$prog{'category'} = [ [ "Musique", "fr" ] ] if ($tt =~ /musique$/);
			$prog{'category'} = [ [ "Cinéma", "fr" ] ] if ($tt =~ /cinema$/);
			$prog{'category'} = [ [ "Sport", "fr" ] ] if ($tt =~ /sport$/);
			$prog{'category'} = [ [ "Divertissement", "fr" ] ] if ($tt =~ /divertissement$/);
			$prog{'category'} = [ [ "Jeunesse", "fr" ] ] if ($tt =~ /jeunesse$/);
			$prog{'category'} = [ [ "Découverte", "fr" ] ] if ($tt =~ /decouverte$/);
			$prog{'category'} = [ [ "Infos / Magazine", "fr" ] ] if ($tt =~ /infos_magazine_emission$/);
			$prog{'category'} = [ [ "Série", "fr" ] ] if ($tt =~ /serie_feuilleton$/);

			# get director/actors if --slow was asked
			if ($opt_slow) {
				$tag = $div->look_down('_tag', 'a');
				my $href = $CANALSAT_BASE_URL.$tag->attr_get_i('href');
				get_canalsat_programmes_list_slow($href, \%prog);
			}
	
			# add the current prog to the list if it is valid
			# and keep a hand on it
			if (defined $prog{'title'}) {
				push @progs,\%prog;
			}
		}

		$html->delete();
		undef $html;
	}

	return @progs;
}

sub get_parabole_programmes_list_slow($%) {
	my $url = shift(@_);
	my $p = shift(@_);

	# get request and parse
	my $html = get_nice_tree $url;

	my $t;
	
	# get the résumé
	my $resume_tag = $html->look_down('_tag', 'font', 'class', 'commonText');
	if (defined $resume_tag) {
		$t = $resume_tag->as_text();
		$p->{'desc'} = [ [ $t, "fr" ] ];
	}

	# get actors list
	my @actors;
	my $actors_tag = $html->look_down('_tag', 'font', 'class', 'highlightCasting');
	if (defined $actors_tag) {
		$t = $actors_tag->as_text();
		
		my @a = split(',', $t);
		foreach (@a) {
			if ($_ =~ /(.*) \(.*\)/) {
				push @actors, $1;
				next;
			}

			push @actors, $_;
		}
	}

	$p->{credits}{actor} = \@actors if @actors;

}

sub get_parabole_programmes_list( $$$ ) {
	my ($channel, $offset, $days) = @_;
	die if $offset < 0;
	die if $days < 1;

	my $url_base = "http://www.parabolereunion.com/index.jsp?childUrl=epg/epg0.jsp&mode=1&search_title=&sel_time=-1&sel_type=-1&";
	my $url_channel = "sel_channel=".$channel."&";

	# the progs list to return
	my @progs = ();

	for ($offset + 1 .. $offset + $days) {
		my $url_day;
		my $start;

		if ($_ == 1) { $url_day = "sel_day=-1";	}
		else { $url_day = "sel_day=".($_ - 1); }

		my $n = $_ - 1;
		$start = DateCalc("today", "+ $n days");

		my $url = $url_base.$url_channel.$url_day;

		# get request and parse
		my $html = get_nice_tree $url;
		$html->objectify_text();
		# look for the table of programmes
		my @tables = $html->look_down('_tag', 'table', 'border' , '0', 'width', '100%');

		# the two first tables is not necessary
		shift @tables;
		shift @tables;

		# sometime the previous day appear in today's listing
		# there could be more than 1 table left
		shift @tables if (@tables > 1);

		# sometimes there is no programme for a channel
		# Reality show 24h/24 for example
		next if (@tables == 0);

		# Here is the good table
		my $table = shift @tables;
		#$table->dump();

		# look for the list of rows of the table
		my @rows = $table->look_down('_tag', 'tr');

		# scan each row
		foreach my $r (@rows) {
			# the current prog being processed
			my %prog;
	
			# look for every column
			my @td = $r->look_down('_tag', 'td');
	
			$prog{'channel'} = $channel.".parabolereunion.com";
	
			# scan each cellule of the row
			foreach my $cell (@td) {
				my @b = $cell->look_down('_tag', '~text');
				foreach my $tag (@b) {
					#$tag->dump();
					my $tt = $tag->attr_get_i('text');
	
					if ($tt =~ /(\d+):(\d+)/) {
						$start = Date_SetTime($start, $1, $2, 0);
						$start = UnixDate($start, "%Y%m%d%H%M%S");
						#$start = utc_offset($start, "+0400");
						$prog{'start'} = $start." +0400";
						next;
					}
						
					if ($tt =~ /^\s*(\d+)h(\d+)/) {
						my $length = $1 * 60 + $2;
						# length tag is not necessary 
						# if start and stop tags are presents
						#$prog{'length'} = $length * 60;

						my $stop = DateCalc($start, "+ $length min");
						$stop = UnixDate($stop, "%Y%m%d%H%M%S");
						#$stop = utc_offset($stop, "+0400");
						$prog{'stop'} = $stop." +0400";

						# Change the start date because 
						# last programme begins this day (at
						# 23:00 PM) and ends the day after 
						# (at 01:00 AM)
						my $y = UnixDate($stop, "%Y");
						my $m = UnixDate($stop, "%m");
						my $d = UnixDate($stop, "%d");

						$start = Date_SetDateField($stop, "y", $y);
						$start = Date_SetDateField($start, "m", $m);
						$start = Date_SetDateField($start, "d", $d);
						next;
					}
	
					next if ($tt =~ /^\s+/);
					next if ($tt =~ /\s+$/);
					$prog{'title'} = [ [ $tt ] ];
				}

				# get director/actors if --slow was asked
				if ($opt_slow) {
					my $a = $cell->look_down('_tag', 'a');
					if (defined $a) {
						my $href = "http://www.parabolereunion.com/".$a->attr_get_i('href');
						get_parabole_programmes_list_slow($href, \%prog);
					}
				}

			}

			# add the current prog to the list if it is valid
			if (defined $prog{'title'}) {
				#dump_programme(\%prog);
				push @progs,\%prog;
			}
		}
	}

	return @progs;
}

###
### Configure mode
###
if ($mode eq 'configure') {
	XMLTV::Config_file::check_no_overwrite($config_file);
	
	# ask user to select his broadcasters
	my @id = sort keys %BROADCASTERS;
	my @questions = map { "Would you like to download data for '$BROADCASTERS{$_}' ?" } @id;
	my @responses = ask_many_boolean(1, @questions);

	init_user_agent();

	# retrieve the channels list for each broadcasters
	foreach (0..$#id) {
		if ($responses[$_]) {
			my @ch = get_channels_list($id[$_]);
			@channels = (@channels, @ch) if @ch;
		}
	}

	# ask user to add or not each channel
	@questions = map { "Add channel $_->{'name'} ?" } @channels;
	@responses = ask_many_boolean(1, @questions);

	# create configuration file
	open(CONF, ">$config_file") or die "Cannot write to $config_file: $!";

	foreach (@channels) {
		my $r = shift @responses;
	
		if ($r) {
			print CONF "channel:";
		}
		else {
			print CONF "#channel:";
		}

		if ( $_->{'type'} eq "CANALSAT" )
		{
			print CONF "$_->{'id'}.canalsat-reunion.com;$_->{'name'}\n";
		}
		else
		{
			print CONF "$_->{'id'}.parabolereunion.com;$_->{'name'}\n";
		}
	}

	close CONF or warn "Cannot close $config_file: $!";
	say("Finished configuration.");
	exit();
}

###
### List channels
###
if ($mode eq 'list-channels') {
	# init the XMLTV writer
	my $writer = new_xmltv_writer();

	# ask user to select his broadcasters
	my @id = sort keys %BROADCASTERS;
	my @questions = map { "Select '$BROADCASTERS{$_}' ?" } @id;
	my @responses = ask_many_boolean(1, @questions);

	init_user_agent();

	# retrieve the channels list for each broadcasters
	foreach (0..$#id) {
		if ($responses[$_]) {
			my @ch = get_channels_list($id[$_]);
			@channels = (@channels, @ch) if @ch;
		}
	}

	# write the XML header
	$writer->start({
		'generator-info-name' => 'XMLTV',
		'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
	});

	foreach (@channels) {
		my $id = "id";
		$id = $_->{'id'}.".canalsat-reunion.com" if ($_->{'type'} eq "CANALSAT");
		$id = $_->{'id'}.".parabolereunion.com" if ($_->{'type'} eq "PARABOLE");

		$writer->write_channel({
			'id'           => $id,
			'display-name' => [[ $_->{'name'} ]],
			'icon'         => [{ 'src' => $_->{'icon'} }]
		});
	}

	$writer->end();
	exit();
}

###
### Grab programmes listing
###
die if $mode ne 'grab';

# Now let's do it
Date_Init("TZ=UTC");

# read tv_grab_re conf file...
@config_lines = XMLTV::Config_file::read_lines($config_file);

# ...and parse its content
my $n = 0;
foreach (@config_lines) {
	++$n;
	next if not defined;

	if ( /^channel:(\d+)\.(.*);(.*)/ ) {
		my %channel;

		$channel{'id'} = $1;
		$channel{'name'} = $3;

		if ($2 eq 'canalsat-reunion.com') {
			$channel{'type'} = "CANALSAT";
			$channel{'icon'} = "$CANALSAT_ICON_URL/".$channel{'id'}."_grand.gif";
		}
		
		if ($2 eq 'parabolereunion.com') {
			$channel{'type'} = "PARABOLE";
			$channel{'icon'} = "$PARABOLE_ICON_URL/channel_logo_small".$channel{'id'}.".gif";
		}

		push @channels,\%channel;
	}
	else {
		die "$config_file:$n - Bad line channel";
	}
}

die "No working channels configured, so no grabing" if not @channels;

# init the XMLTV writer
my $writer = new_xmltv_writer();

# write the XML header
$writer->start({
	'generator-info-name' => 'XMLTV',
	'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
});

# first, write channels
foreach (@channels) {
	my $id = "id";
	$id = $_->{'id'}.".canalsat-reunion.com" if ($_->{'type'} eq "CANALSAT");
	$id = $_->{'id'}.".parabolereunion.com" if ($_->{'type'} eq "PARABOLE");

	$writer->write_channel({ 
		'id'           => $id,
		'display-name' => [ [ $_->{'name'} ] ],
		'icon'         => [ { 'src' => $_->{'icon'} } ]
	});
}

# then, programmes
foreach (@channels) {
	my @progs;

	if ($_->{'type'} eq 'CANALSAT') {
		init_user_agent();
		@progs = get_canalsat_programmes_list($_->{'id'}, $opt_offset, $opt_days);
	}

	if ($_->{'type'} eq 'PARABOLE') {
		@progs = get_parabole_programmes_list($_->{'id'}, $opt_offset, $opt_days);
	}

	foreach my $prog (@progs) {
		$writer->write_programme(\%$prog);
	}
}

$writer->end();
