#! /usr/bin/perl
#* ============================================================
# * File  : default.pl
# * Author: Eric Giesselbach <ericgies@kabelfoon.nl>
# * Date  : 2004-10-31
# * Description : default playlist parser for streamtuned / mythstream
# *   gets absolute and relative url's from hyperlinks (name and url)
# *   url's in img tags are ignored
# *   other protocol://address url's are parsed (url only)
# *   gets (relative) url's from frame tags (url only)
# *   streamtuned detects relative url's and prefixes them properly
# * ============================================================ */

use English;
use XML::DOM;
#------------------------------------------------------------------------------
# Init
#------------------------------------------------------------------------------

&read_parse();    # get commandline parameters into @in
$source = $in[0]; # source filename from command line

my $doc = XML::DOM::Document->new;
my $head = $doc->createXMLDecl ('1.0');
my $root = $doc->createElement('items');

sub newNode
{
  local $name  = shift;
  local $value = shift;
  local $node = $doc->createElement($name);
  local $text = $doc->createTextNode($value);
  $node->appendChild($text);
  
  return $node;
}

#------------------------------------------------------------------------------
# read file into $data
#------------------------------------------------------------------------------

$datafile = $source;
open( INFO, "<$datafile" );      # Open file for reading
undef $/;
$data = <INFO>;                 # Read all
close(INFO) ;

#------------------------------------------------------------------------------
# Dump found url's in format expected by streamtuned
#------------------------------------------------------------------------------

sub dump_lines
{
  @lines = split ( "\n", $data);
  foreach $line(@lines)
  {
    if (@matches = ( $line =~ m/HYPER\*\*\*(.*)\*\*\*HYPER\*\*\*(.*)\*\*\*HYPER/ ) )
    {
      # remove html tags in name
      $name = @matches[1];
      $name =~ s/<.*?>//g;
      
      $item = $doc->createElement('item');
      $root->appendChild($item);
      
      $item->appendChild( newNode('name', $name) );
      $item->appendChild( newNode('url', @matches[0]) );
    }
     # else { print "NOMATCH: " . $line . "\n"; }
     #print $line . "\n";
  }
}

#------------------------------------------------------------------------------
# search url's in $data and place them in special format
#------------------------------------------------------------------------------

$urlpat = "\\w\\.\\-\\/\\:\\?\\&\\=\\_\\~\\*\\@;,\\+\\$\\(\\)";
# remove \n
$data =~ s/\n/ /g;
# kill javascript
$data =~ s/href='?"?javascript://g;

# get <a href=url>title</a>
  $data =~ s/<a\s+[^>]*?href\s*=\s*"?'?([$urlpat]+)"?'?[^>]*>(.*?)<\/a/\nHYPER***$1***HYPER***$2***HYPER\n/gi;
# remove <img sometag="" whatever>
  $data =~ s/<img\s(?:".*?"|.*?)*?>//gi; # check this: should extract description from alt="[description]"
# get FRAME src=url
  $data =~ s/FRAME\s+.*?src\s*=\s*"?'?([$urlpat]+)"?'?/\nHYPER***$1***HYPER***$1***HYPER\n/gi;

#print "\nDATA DUMP 3\n";
#print $data;
#print "\nEND DATA DUMP 3\n";

&dump_lines();


# delete matches
$data =~ s/HYPER\*\*\*.*\*\*\*HYPER\*\*\*.*\*\*\*HYPER//gi;
# get protocol://url
#$data =~ s/(?:^|[\s<>\[\]\:\(\)"'])(\w+:\/\/[$urlpat]+)/\nHYPER***$1***HYPER***$1***HYPER\n/gi;
$data =~ s/(\w+:\/\/[$urlpat]+)/\nHYPER***$1***HYPER***$1***HYPER\n/gi;

&dump_lines();

print $head->toString;
print $root->toString;
print "\n";

#--------------------------------------------------------------------------------
# get command line parameters
#--------------------------------------------------------------------------------

sub read_parse 
{
  local (*in) = @_ if @_;
  local ($i);
  push(@in, @ARGV);
  foreach $i (0 .. $#in) { $in[$i] =~ s/\+/ /g;}
  return scalar(@in);
}



