# -*- coding: utf-8 -*-
# Elisa - Home multimedia server
# Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com).
# All rights reserved.
#
# This file is available under one of two license agreements.
#
# This file is licensed under the GPL version 3.
# See "LICENSE.GPL" in the root of this distribution including a special
# exception to use Elisa with Fluendo's plugins.
#
# The GPL part of Elisa is also available under a commercial licensing
# agreement from Fluendo.
# See "LICENSE.Elisa" in the root directory of this distribution package
# for details on that license.

"""
This media provider provides access to the rss://-scheme and reads the
enclosures, if there are some or the describtion for images.

To use it follow these steps:
Get to your favorite podcast-website. copy the link of the rss, and paste it
into the locations of the audio activity for audio-data, in the video activity
for videos or in the image activity for images. Now remove the 'http' and
replace it with rss:
http://www.rocketboom.com/vlog/quicktime_daily_enclosures.xml
 = rss://www.rocketboom.com/vlog/quicktime_daily_enclosures.xml


This URI is now accessible in your audio (or video) menu inside browse by
folder.

Enjoy it!


Troubleshooting:
You get no Children, but it is not loading anymore?
    1. Are you sure that you entered in the right activity? Podcasts are
    NOT shown in video activity and the other way round.
    2. Are you sure, that the requested data is in there? To check this for
    videos/audios do this:
      Be sure that the rss you are using really has the enclosure tag. For this,
      you can just download the file and read it with your favorite editor.
      Look for the word 'enclosure'. If you don't find any, your blog does not
      support it and we can't read it.
    If you have problem with images, do this:
      You can just take a look with Firefox (> 2.0) at the file. If there are
      images appearing, this plugin should also be able to find them.

If you are sure, that it should work, but it doesn't: feel free to file a ticket
on the elisa trac. Don't forget to paste your link there and also, how you did
the troubleshooting!
"""


__maintainer__ = 'Benjamin Kampmann <benjamin@fluendo.com>'

from elisa.base_components.media_provider import MediaProvider
from elisa.core.media_uri import MediaUri, quote, unquote
from elisa.core.media_file import MediaFile

from elisa.extern.coherence.et import parse_xml

import urllib2, re

class PodcatcherMedia(MediaProvider):
    """
    This reads the enclosures for an rss-scheme...
    """

    reg_img_src = re.compile("<img.src=\"(.*?)\"")

    def __init__(self):
        MediaProvider.__init__(self)
        self._children = {}
        self._cached = []

    def scannable_uri_schemes__get(self):
        return {}

    def supported_uri_schemes__get(self):
        ### Feedparser also supports atom, so shouldn't we add it here?
        return { 'rss': 50 }

    def _blocking_get_media_type(self, uri):
        media_type = {}
        if self._blocking_is_directory(uri):
            media_type = {'file_type': 'directory', 'mime_type': ''}
        else:
            media_type = self._children.get(uri, None)
        return media_type

    def _blocking_is_directory(self, uri):
        return uri.host != 'CHILD'

    def _blocking_has_children_with_types(self, uri, media_types):
        has_children = False
        if self._blocking_is_directory(uri):
            ## we need a kind of a MAYBE-Opertor. We don't know, if there
            ## are this children, before accessing...
            has_children = True
        return has_children

    def _get_url_data(self, url):
        red = urllib2.urlopen(url)
        return red.read()

    def _blocking_get_direct_children(self, uri, children):
        if self._blocking_is_directory(uri) and not uri in self._cached:
            real_http = "http%s" % str(uri)[3:]
            self.debug("Real URI is %s" % real_http)

            data = self._get_url_data(real_http)

            parsed = parse_xml(data)

            for entry in parsed.findall('channel/item'):
                title = entry.find('title').text.lstrip()
                desc = entry.find('description')
                enclos = entry.find('enclosure')

                if enclos != None:
                    url = enclos.attrib['url']
                    length = enclos.attrib['length']
                    type = enclos.attrib['type']
                    media = None
                    if 'audio' in type:
                        media = 'audio'
                    elif 'video' in type:
                        media = 'video'
                    ### Is this also existing for images?
                    if url and media:
                        uri = MediaUri('rss://CHILD/%s' % quote(url))
                        uri.label = title
                        content = {'file_type' : media,
                                   'mime_type' : type,
                                   'default_image' : None,
                                   'length' : length} 
                        self._children[uri] = content
                        children.append( (uri, content) ) 
                elif desc != None:
                    # No real enclosures found. So let's search in the desc.
                    # currently only supported for images!
                
                    # The data should be quoted HTML.
                    data = unquote(desc.text)
                    data = data.replace("'","'")
                    search = self.reg_img_src.search(data)
                    if search != None:
                        for entry in search.groups():
                            content = {'file_type' : 'image',
                                       'media_type': '',
                                       'default_image' : MediaUri(entry) }

                            uri = MediaUri('rss://CHILD/%s' % quote(entry))
                            uri.label = title
                            self._children[uri] = content
                            children.append( (uri, content) )


        return children

    def get_real_uri(self, uri):
        return MediaUri(unquote(uri.path[1:]))


if __name__ == "__main__":

    m = PodcatcherMedia()
    uri = MediaUri('rss://www.brennpunkt-ostkreuz.de/rssfeed/Brennpunktgeschichten.rss')
    list =  []

    def printer (list):
        for i, meta in list:
            print "child", i, meta
            print "label", i.label
            print "real", m.get_real_uri(i)
        
    a = m.get_direct_children(uri, list)
    a.addCallback(printer)

    try:
        from twisted.internet import glib2reactor
        glib2reactor.install()
    except AssertionError:
        # already installed...
        pass

    from twisted.internet import reactor

    reactor.run()
