#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_dk - Grab TV listings for Denmark.

=head1 SYNOPSIS

tv_grab_dk --help

tv_grab_dk [--config-file FILE] --configure [--gui OPTION]

tv_grab_dk [--config-file FILE] [--output FILE] [--days N]
           [--offset N] [--quiet]

tv_grab_dk --capabilities

tv_grab_dk --version

=head1 DESCRIPTION

Output TV listings for several channels available in Denmark.  The
data comes from tv.tv2.dk. The grabber relies on parsing HTML so it
might stop working at any time.

First run B<tv_grab_dk --configure> to choose, which channels you want
to download. Then running B<tv_grab_dk> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_dk.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default is one week.

B<--offset N> Start N days in the future.  The default is to start
from today.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Jesper Skov (jskov@zoftcorp.dk). Originally based on tv_grab_nl by
Guido Diepen and Ed Avis (ed@membled.com) and tv_grab_fi by Matti
Airas.
Additions by Jesper Toft (jesper@bzimage.dk)

=head1 BUGS

Things in the programme descriptions to handle:

Vaert: Mikael Kamber.

Genudsendelse fra mandag. - convert to <previously-shown>?

=cut

######################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_dk,v 1.35 2006/07/12 12:58:31 jtoft Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'Denmark';
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use URI;

use Date::Manip;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Date;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Danish television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet]
To show capabilities: $0 --capabilities
To show version: $0 --version
END
  ;

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("xmltv/$XMLTV::VERSION");

# Initialize cookie_jar
use HTTP::Cookies;
my $cookies = HTTP::Cookies->new;
$ua->cookie_jar($cookies);

# Whether zero-length programmes should be included in the output.
my $WRITE_ZERO_LENGTH = 0;

# default language
my $LANG = 'da';

# Winter time in Denmark - summer time is one hour ahead of this.
my $TZ = '+0100';

# tv2.dk userid.
my $userid;

sub process_summary_page( $$$ );
sub process_listings_page( $$$$$ );

######################################################################
# get options

# Known categories.
my %categories = (	'Film'		=> 'movie',
			'Serier'	=> 'series',
			'Sport'		=> 'sport',
			'Nyheder'	=> 'news',
			'Alment'	=> 'misc',
			'Musik'		=> 'music',
			'Dokumentar'	=> 'documentary',
			'Brn'		=> 'kids',
			'Underholdning'	=> 'entertainment',
			'Erotik'	=> 'adult');

my %movietypes = (	'action'	=> 'action',
			'drama'		=> 'drama',
			'erotisk'	=> 'erotic',
			'eventyr'	=> 'adventure',
			'gyser'		=> 'horror',
			'komedie'	=> 'comedy',
			'krimi'		=> 'crime',
			'thriller'	=> 'thriller',
			'romantisk'	=> 'romance',
			'western'	=> 'western'
);
# Get options
XMLTV::Memoize::check_argv('get_url');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_gui,
    $opt_quiet, $opt_list_channels);
$opt_offset = 0; # default
GetOptions('days=i'        => \$opt_days,
	   'offset=i'      => \$opt_offset,
	   'help'          => \$opt_help,
	   'configure'     => \$opt_configure,
	   'config-file=s' => \$opt_config_file,
	   'gui:s'         => \$opt_gui,
	   'output=s'      => \$opt_output,
	   'quiet'         => \$opt_quiet,
	   'list-channels' => \$opt_list_channels,
	  )
  or usage(0);

usage(1) if $opt_help;

die 'number of days must not be negative'
  if ((defined $opt_days && $opt_days < 0) || ($opt_offset < 0));

die "tv2.dk only provide information for today and the next 6 days."
  if ($opt_offset > 6);

if ( ! defined $opt_days ) {
	# If there is no --days given. Set it to as many as possible.
	$opt_days = 7 - $opt_offset;
} else {
	# --days option was given. Warn if its too high.
	if (($opt_days + $opt_offset) > 7) {
		$opt_days = 7 - $opt_offset;
		warn "tv2.dk only provide information for today and the next 6 days.";
	}
}

XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
			     $opt_configure => 'configure',
			     $opt_list_channels => 'list-channels',
			    );

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_dk', $opt_quiet);

if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    # find list of available channels
    my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
      if not $opt_quiet;
    my %channels = get_channels();
    die 'no channels could be found' if (scalar(keys(%channels)) == 0);
    update $bar if not $opt_quiet;
    $bar->finish() if not $opt_quiet;
    
    # Ask about tv2.dk id.
	my $username = ask( "\nTo be able to get some extra features from tv2.dk, the site needs a username and password.
Currently the extra features are the category tag, which require you
to go to http://tv.tv2.dk/indstillinger/ and move all icons under
'Programoversigt' to the list of shown icons.

Enter either your tv2.dk username or a blank line :");
	while(length($username)!=0) {
		my $password = ask_password("Enter your tv2.dk password:");
		$userid = get_tv2id($username,$password);
		if($userid == -1) {
			$username = ask("Wrong password.
Enter either your tv2.dk username or a blank line :");
		} else {
			print CONF "userid $userid\n";
			$username = "";
		}
	}

    # Ask about each channel.
    my @chs = sort keys %channels;
    my @names = map { $channels{$_} } @chs;
    my @qs = map { "add channel $_?" } @names;
    my @want = ask_many_boolean(1, @qs);
    foreach (@chs) {
	my $w = shift @want;
	warn("cannot read input, stopping channel questions"), last
	  if not defined $w;
	# No need to print to user - XMLTV::Ask is verbose enough.

	# Print a config line, but comment it out if channel not wanted.
	print CONF '#' if not $w;
	my $name = shift @names;
        print CONF "channel $_ $name\n";
	# TODO don't store display-name in config file.
    }
    
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");

    exit();
}

# Not configuring, we will need to write some output.
die if $mode ne 'grab' and $mode ne 'list-channels';

# If we are grabbing, check we can read the config file before doing
# anything else.
#
my @config_lines;
if ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
# TODO: standardize these things between grabbers.
$writer->start
  ({ 'source-info-url'     => 'http://tv.tv2.dk/tv',
     'source-data-url'     => 'http://tv.tv2.dk/tv',
     'generator-info-name' => 'XMLTV',
     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
   });

if ($opt_list_channels) {
    my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
      if not $opt_quiet;
    my %channels = get_channels();
    die 'no channels could be found' if (scalar(keys(%channels)) == 0);
    update $bar if not $opt_quiet;

    foreach my $ch_did (sort(keys %channels)) {
	my $ch_name = $channels{$ch_did};
	my $ch_xid = "$ch_did.tv.tv2.dk";
	$writer->write_channel({ id => $ch_xid,
				 'display-name' => [ [ $ch_name ] ],
				 'icon' => [{'src' => get_icon($ch_did)}]
				});
    }
    $bar->finish() if not $opt_quiet;
    $writer->end();
    exit();
}

# Not configuring or writing channels, must be grabbing listings.
die if $mode ne 'grab';
my (%channels, @channels, $ch_did, $ch_name);
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;

    # FIXME channel data should be read from the site, and then the
    # config file only gives the XMLTV ids that are interesting.
    #
    if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
	$ch_did = $1;
	$ch_name = $2;
	$ch_name =~ s/\s*$//;
	push @channels, $ch_did;
	$channels{$ch_did} = $ch_name;
    } elsif (/^userid ([0-9]*)/) {
        $userid = $1;
    }
    else {
	warn "$config_file:$.: bad line\n";
    }
}

######################################################################
# begin main program

# Setting cookies
if(defined $userid) {
	$cookies->set_cookie(0,'cookTV2[intPingUserID]',$userid,'/','.tv2.dk',80,1,0,9999999,0);
}
# For some reason this line only works, if you have a valid userid.
$cookies->set_cookie(0,'cookTV2[strKategoriIkoner]','5%2C1%2C55%2C38%2C50%2C61%2C13%2C59%2C40%2C2%2C51%2C46%2C63%2C26%2C48%2C53%2C28%2C31%2C3%2C57%2C42%2C44','/','.tv2.dk',80,1,0,9999999,0);

my $now = parse_date('now');
die if not defined $now;

Date_Init('TZ=UTC');

my @to_get;

# the order in which we fetch the channels matters
my $today = UnixDate($now, '%Y-%m-%d'); die if not defined $today;

# It looks like 5:00 is midnight at tv2.dk..
# This means we have to be extra carefull at 0:00 and 5:00
# Give a warning 30 min before and after.. Just in case.
if( (UnixDate(DateCalc($now,'+30min'),'%H') == 5) || (UnixDate(DateCalc($now,'+30min'),'%H') == 0) ) {
	warn 'It is not recomended to run this script at 23:30-00:30 and 4:30-5:30';
}

foreach $ch_did (@channels) {
    $ch_name = $channels{$ch_did};
    my $ch_xid = "$ch_did.tv.tv2.dk";
    $writer->write_channel({ id => $ch_xid,
			     'display-name' => [ [ $ch_name ] ],
				 'icon' => [{'src' => get_icon($ch_did)}]
				 });
    for (my $i = $opt_offset;$i<($opt_offset + $opt_days);$i++) {
	# Request day when constructing URL since it is represented as
	# an integere offset from today. Still pass in the computed
	# date - may need it sometime...
	my $day = UnixDate(DateCalc($today, "+ $i days"), '%Y-%m-%d');
	t "turned offset $i (from $today) into date $day";
	push @to_get, [ $i, $day, $ch_xid, $ch_did ];
    }
}

my %warned_ch_name; # suppress duplicate warnings

my $bar = new XMLTV::ProgressBar('fetching data', scalar @to_get)
  if not $opt_quiet;
my @to_get_detailed;
my $num_detailed = 0;
foreach (@to_get) {
    my ($tv2date, $date, $ch_xmltv_id, $ch_tvgids_id) = @$_;
    t "going to get $ch_xmltv_id for $date";
    process_listings_page($writer, $ch_xmltv_id, $tv2date, $ch_tvgids_id , $date);
    update $bar if not $opt_quiet;
}
$bar->finish() if not $opt_quiet;
$writer->end();
exit(0);

######################################################################
# subroutine definitions

# arguments:
#   XMLTV::Writer object to write to
#   XMLTV id of channel
#   URL to fetch
#   Date::Manip object giving day for programmes in page (at least
#     until they cross midnight)

my ($warned_discarding_parts, $commented_episode_num);
sub process_listings_page ( $$$$$ ){
#    local $Log::TraceMessages::On = 1;
    my ($writer, $ch_xmltv_id, $tv2date, $tv2chan, $day_obj) = @_;
    my $next_day = 0;
    my $day = UnixDate($day_obj, '%Q');
	my $dayOffset = 0;
    t "getting channel $ch_xmltv_id, date $day";
    
	# If after midnight and before 5, we are off by a day.
	if( UnixDate($now,'%H') < 5 ) {
		$dayOffset = 1;
	}
	
	# We make an HTML::TreeBuilder object, get the information
	# from it and them delete it.
	my $t = get_tree("http://tv.tv2.dk/tv/listning.php?Region=1&KanalID=".$tv2chan."&Dato=".($tv2date+$dayOffset)."&Kategori=0&Periode=1");
	
    my @elems = $t->look_down(class => 'udsendelse-vises-nu');
    push @elems, $t->look_down(class => 'udsendelse');
    if (not @elems) {
		warn "No data for channel $tv2chan $tv2date days from now, skipping" unless $opt_quiet;
		return;
    }

    my $crossed_midnight = 0;

    my @info;
    for (my $i = 0; $i <= $#elems; $i++) {
	my $e = $elems[$i];
	# Process the list of [ heading, data ] pairs.
	my ($start, $stop, # exactly one
	    $title, $sub_title, $genre, $date,               #
	    $episode_num, $actors, $writers, $commentators, #
	    $director, $previously_shown, $orig_title,      # at most one
	    $aspect, $colour, $stereo, $texted,
	    @presenter, @url # zero or more
	    );
	
	my $teletext_sub = 0;  # boolean

	# defaults
	$aspect = "4:3";
	$colour = 1;
	
	if (defined ($e->look_down(class => "udsendelse-er-vist"))) {
	    next;
	}

	if (defined ($_ = $e->look_down(class => "udsendelse-tid"))) {
	    my $eur_date = "$day " . $_->as_text();
	    $start = parse_local_date($eur_date, $TZ);
	} else {
	    warn "no start time, skipping programme\n" unless $opt_quiet;
	    next;
	}

	# When looking for the end time, we may also have to bump $day
	# to the next day
	if (defined (my $next_e = $elems[$i+1])) {
	    if (defined ($_ = $next_e->look_down(class => "udsendelse-tid"))) {
		$stop = parse_local_date("$day " . $_->as_text(), $TZ);

		if (0 < Date_Cmp($start, $stop)) {
		    my $err;
		    $day = nextday($day);
		    $stop = parse_local_date("$day " . $_->as_text(), $TZ);
		}
	    }
	}
	# If the stop time is not set, it's not set.  We don't guess
	# some arbitrary value.
	#

	if (defined ($_ = $e->look_down(class => "udsendelse-titel"))) {
	    $title = $_->as_text();
	} else {
	    warn "no title, skipping programme\n" unless $opt_quiet;
	    next;
	}

	my @desc; # bits of description
	if (defined (my $d = $e->look_down(class => "udsendelse-beskrivelse",
					name => "divBeskrivelseLang"))) {
	    my $line = 0;
	    foreach $_ ($d->content_list) {
		$line++;

		if (3 == $line && ref $_) {
		    $_ = $_->as_text();
		    if (/\((.+)\)/) {
			$orig_title = $1;
		    }
		    next;
		}

		next if ref $_;
		next if $_ eq " ";

		if (s/Vises i 16:9 format.|Vises i bredformat.//) {
		    $aspect = "16:9";
		}

		# Description contains more than just textual blurb.
		if (1 == $line) {
		    if (/fra (\d\d\d\d)/) {
			$date = $1;
		    }
		} elsif (/^Manuskript:\s*(.*)/) {
		    my @w = split(/, | og /, $1);
		    s/[.]$// foreach @w;
		    push @$writers, @w;
		    next;
		} elsif (/^Instruktr:\s*(.*)/) {
		    $director = $1;
		    $director =~ s/[.]$//;
		    next;
		} elsif (/^Kommentatorer:\s*(.*)/) {
		    push @$commentators, $1;
		    next;
		} elsif (/^Medvirkende:\s*(.*)/) {
		    my @a = split(/, | og /, $1);
		    s/[.]$// foreach @a;
		    push @$actors, @a;
		    next;
		}

		s/^\s+//; s/\s+$//;
		push @desc, $_ if length;
	    }
	} else {
	    if (defined ($_ = $e->look_down(class => "udsendelse-beskrivelse",
					    name => "divBeskrivelseKort"))) {
		foreach ($_->content_list) {
		    next if ref;
		    s/^\s+//; s/\s+$//;
		    next if not length;
		    push @desc, $_;
		}
	    }
	}

	my @symbols = $e->look_down(class => "udsendelse-symboler");
	
	foreach my $s (@symbols) {
	    $_ = $s->as_text();
	    if (/\(TTV\)/) {
		$teletext_sub = 1;
	    } elsif (/\(TH\)/) {
		$texted = 1;
	    } elsif (/\((\d+)\)/) {
		$episode_num = $1;
	    } elsif (/\(G\)/) {
		$previously_shown = {};
	    } elsif (/\(S\)/) {
		$stereo = {};
	    } else {
		my @imgs = $s->look_down( "_tag" ,"img");
		foreach my $i (@imgs) {
		    my $new_cat = $categories{$i->attr("alt")};
		    if ( defined $new_cat ) {
			$genre = $new_cat;
		    }
		}
	    }
	}


	my @title_data = ([ $title, $LANG]);
	push @title_data, [ $orig_title ] if defined $orig_title;
	my ($start_base, $start_tz) = @{date_to_local($start, $TZ)};

	my %prog
	    = (channel  => $ch_xmltv_id,
	       title    => \@title_data,
	       start    => UnixDate($start_base, '%q') . " $start_tz",
	      );
	if (defined $stop) {
	    my ($stop_base, $stop_tz) = @{date_to_local($stop, $TZ)};
	    $prog{stop} = UnixDate($stop_base, '%q') . " $stop_tz";
	}

	# Would be better split into paragraphs but the current format
	# doesn't allow that.
	#
	$prog{desc} = [ [ join(' ', @desc), $LANG ] ] if @desc;
	
	$prog{video} = { present => 1,
			 aspect => $aspect,
			 colour => $colour };

	$prog{audio} = { present => 1,
			 stereo => "stereo" } if defined $stereo;

	$prog{'sub-title'} = [ [ $sub_title, $LANG ] ] if defined $sub_title;
	$prog{subtitles} = [ { type => 'teletext' } ] if $teletext_sub;
	$prog{subtitles} = [ { type => 'onscreen' } ] if $texted;

	if (defined $episode_num) {
	    $episode_num = $episode_num - 1;
	    # We do not get any information about seasons, nor the total
	    # amount of episodes. Guess this is better than nothing.
	    $prog{'episode-num'} = [ [ " . $episode_num . ", "xmltv_ns" ] ];
	}
	$prog{date} = $date if defined $date;
	if ( defined $genre ) {
	    my @cats = ([$genre]);
	    
		if ( $genre eq "movie" || $genre eq "series" ) {
			foreach(keys(%movietypes)) {
				if ($desc[0]) {
					if($desc[0] =~ /$_/ ) {
						@cats = ( [ $movietypes{$_} ], @cats );
					}
				}
			}
# For movies, the type of movie is listed in the first line of the text.
	}
	    $prog{'category'} = [ @cats ];
	}
	$prog{'previously-shown'} = $previously_shown if $previously_shown;
	
	my %c;

	if ($actors) {
		s/.*: // foreach @$actors;
		$c{actor} = $actors;
	}
	$c{writer} = $writers if $writers;
	$c{commentator} = $commentators if $commentators;
	$c{director} = [ $director ] if $director;
	$prog{credits} = \%c if %c;

	$writer->write_programme(\%prog);

    }
    $t->delete(); undef $t;
}

# get channel listing
sub get_channels {
    my %channels;

    my $t = get_tree('http://tv.tv2.dk/tv');

    # The channels and their IDs are defined in javascript arrays
    my @elements = $t->look_down("_tag" => "script");
    foreach my $i (@elements) {
	my @children = $i->content_list();
	
	if (scalar(@children) != 1 || ref($children[0])) {
	    next;
	}
	
	my @lines = split(/\n/, $children[0]);

	while (defined($_ = shift @lines)) {
	    if (/aryKanalID\[.*Array\((.+)\);/) {
		my @channel_ids = split(/,/, $1);

		$_ = shift @lines;
		if (/aryKanalNavn\[.*Array\((.+)\);/) {
		    my @channel_names = split(/,/, $1);
		
		    foreach my $channel_id (@channel_ids) {
			$channel_id =~ s/\'//g;
			$channel_id =~ s/^\s*//;
			$channel_id =~ s/\s*$//;

			next if ($channel_id eq "0");

			if (length($channel_id) == 1) {
			    $channel_id = "00" . $channel_id;
			}
			if (length($channel_id) == 2) {
			    $channel_id = "0" . $channel_id;
			}

			my $channel_name = shift @channel_names;
			$channel_name =~ s/\'//g;
			$channel_name =~ s/^\s*//;
			$channel_name =~ s/\s*$//;
		
			$channels{$channel_id} = $channel_name;
		    }
		}
	    }
	}
    }

    $t->delete(); undef $t;
    return %channels;
}

# Clean up bad characters in the site's HTML.
my $warned_bad_chars;
sub tidy( $ ) {
    for (my $tmp = shift) {
	tr/\222/''/;
	if (tr/\012\015\040-\176\240-\377//dc) {
	    warn 'removing bad characters' unless ($warned_bad_chars++ or $opt_quiet);
	}
	return $_;
    }
}

require HTML::TreeBuilder;
sub get_tree( $ ) {
	my $t = new HTML::TreeBuilder;
        my $c = get_url(shift);
	$t->parse( $c );
	$t->eof;
	return $t;
}

my $fetched;
sub get_url( $ ) {
  sleep rand(5) if defined $fetched;
  $fetched = 1;
  my $c = tidy($ua->get(shift)->content);
  return $c;
}

# Bump a YYYYMMDD date by one.
sub nextday {
    my $d = shift;
    my $p = parse_date($d);
    my $n = DateCalc($p, '+ 1 day');
    return UnixDate($n, '%Q');
}

# This will emulate a browser login request, and filter out the login id.
sub get_tv2id {
	my ($username,$password) = @_;
	my @form = ["returnURL" => "http://tv.tv2.dk/",
			"username" => $username,
			"password" => $password,
			"LOGIN" => "True",
			"autologon" => "On"];
	$ua->post("http://login.tv2.dk/login.php",@form);
	my @lines = split(/\n/,$cookies->as_string);
	while($_ = shift @lines) {
		s/^Set-Cookie3: ([^;]*);.*$/$1/;
		if(/^tv2login=/) {
			s/^.*id%.{0,2}3D([0-9]*).*$/$1/;
			return $_;
		}
	}
	return -1;
}

# Icon URL for a given channel.
sub get_icon {
	my ($url) = @_;
	$url =~s/^0*//;
	return "http://tv.tv2.dk/images/logo/".$url.".gif";
}
