#!/usr/bin/perl
# $Id: p0rn-proxy,v 1.26 2004/11/20 20:56:56 mitch Exp $
#
# p0rn-proxy -- a proxy for comfortably browsing p0rn
#
# 2004 (C) by Christian Garbs <mitch@cgarbs.de>
# Licensed under GNU GPL.  See COPYING for details.

use strict;
use AppConfig qw(:expand);
use HTTP::Daemon;
use HTTP::Response;
use HTTP::Status;
use LWP::UserAgent;
use P0rn::DB;
use P0rn::Static;

=head1 NAME

p0rn-proxy - HTTP proxy for comfortably browsing p0rn

=head1 SYNOPSIS

B<p0rn-proxy>

S<[ B<--addr> I<addr> ]>
S<[ B<--configfile> I<configfile> ]>
S<[ B<--help> ]>
S<[ B<--port> I<port> ]>
S<[ B<--proxy> I<proxy> ]>
S<[ B<--version> ]>

B<p0rn-proxy>
S<[ B<-a> I<addr> ]>
S<[ B<-c> I<configfile> ]>
S<[ B<-h> ]>
S<[ B<-p> I<port> ]>
S<[ B<-P> I<proxy> ]>
S<[ B<-V> ]>

=head1 OVERVIEW

p0rn-proxy is a small and simple proxy written in Perl.  It adds some
links to the top of each HTML page that allow you to mark a page as
thumbnail site in order to blacklist it, as a site containing pictures
or as a picture site worth downloading.  It also allows you to access
and administrate the proxy link database.

=head1 DESCRIPTION

Just start B<p0rn-proxy>.  This will by default create a proxy running
on http://localhost:8080 (it will fail if something else is already
running on this port).  Then point your browser to the proxy.  Browse
some website and see all those extra links at the top of each page.
Now browse some p0rn and start blacklisting those annoying thumbnail
sites without real content.  After marking pages for download, run
L<p0rn-download(1)> to actually get them.

=head2 Switches

=over 5

=item B<--addr> I<addr> | B<-a> I<addr>

This is the IP address that p0rn-proxy will bind to.  This address
(together with the correct port) must be configured in your browser to
make use of the proxy.

Be careful: Everybody who can reach the port on this address can use
your proxy.  You should bind to an address only reachable from your
local net or use a packet filter to 'guard' p0rn-proxy from the
outside.

The address '0.0.0.0' will bind p0rn-proxy to all of your network
devices.

Default is to bind to address '127.0.0.1' as this address can only be
accessed from your local computer and is not accessible from the
network.  Please take care when binding to another address.

=item B<--configfile> I<configfile> | B<-c> I<configfile>

The options from the given configuration file will be read.  These
options can be overridden by other command line arguments.

Default is not to read a configuration file.

=item B<--help> | B<-h>

This prints a short help text and exits.

=item B<--port> I<port> | B<-p> I<port>

This is the port on which the proxy listens to your incoming requests.
This port (together with the correct address) must be configured in
your browser to make use of the proxy.

Default setting is port 8080.

=item B<--proxy> I<proxy> | B<-P> I<proxy>

If this variable contains a value, the given proxy is used by
p0rn-proxy.  This allows you to chain multiple proxies together.

Example: If you need a proxy to access the Internet then point your
browser to the p0rn-proxy and in turn point p0rn-proxy to your
original proxy.

Set this to 'none' to use no proxy at all.

Default is to use the environment variable ${http_proxy}.

=item B<--version> | B<-V>

This prints the current version of p0rn-proxy and exits.

=back

=head2 Configuration file format

Configuration is also possible via configuration files.  Every command
line switch is possible in a configuration file.  Empty lines and
lines starting with B<#> are ignored.

Instead of B<--port 3128> you would put this line in the configuration file

 port = 3128

and so on and so forth.

=head1 FILES

All data is stored in a database.  By default, it is located in
B<./p0rn.db> (yes, that's the directory from which you're starting
p0rn-proxy).  Is you want to change this, set the environment variable
B<P0RNDBLOCATION> (the second letter is a zero) to another path and
filename.

=head1 SEE ALSO

L<p0rn-download(1)>, L<p0rn-dbdump(1)>, L<p0rn-dbrestore(1)>

=head1 MODULES NEEDED

 use AppConfig;
 use DBM:Deep;
 use HTTP::Daemon;
 use LWP::UserAgent;

These modules can be obtained from L<http://www.cpan.org>.

=head1 BUGS

In the default configuration, p0rn-proxy supports B<NO ACCESS
CONTROL!> Everyone with access to the proxy port on your system will
be able to use the proxy.  Please bind p0rn-proxy to a port that is
either only available from your local network or protected by a packet
filter.

Please report bugs by mail to <F<p0rn-bugs@cgarbs.de>>.

=head1 AUTHOR

p0rn-proxy was written by Christian Garbs <F<mitch@cgarbs.de>>.

=head1 AVAILABILITY

Look for updates at L<http://www.cgarbs.de/p0rn-comfort.en.html>.

=head1 COPYRIGHT

p0rn-proxy is licensed under the GNU GPL.

=cut

######[ Global Variables ]
#

my $CVSVERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $URLBASE = $Static::PROXY_CTRL_BASE;
my ($ua, $thumbs, $pics, $downs);

my @downs = ( 'queued', 'running', 'finished' );
my $reload_after = 250; # reread databases after this many requests

######[ Subroutines ]
#

sub shorten_uri($)
# shorten an URI (cut off CGI parameters)
{
    my $uri = shift;
    $uri =~ s/\?.*$//;
    return $uri;
}

sub execute_proxy_command($)
# execute user commands
{
    my $request = shift;
    my $response = HTTP::Response->new( RC_OK, 'p0rn proxy response' );
    my $uri = $request->uri();
    my $content = '<html><head><title>p0rn proxy response</title></head><body>';

    if ( $uri =~ m,$URLBASE/([^/]+)/(.*)$, ) {
	my $cmd = $1;
	my $uri = $2;
	my $urishort = shorten_uri($uri);
	
	if ($cmd eq ${Static::PROXY_MARK_THUMB} ) {
	    $thumbs->{$urishort} = 0;
	    $content .= "<a href=\"$uri\">$urishort</a> has been marked as a thumbnail page.";
	    $response->header(refresh => '5; url=javascript:window.close()');
	    
	} elsif ($cmd eq ${Static::PROXY_SINGLE_THUMB} ) {
	    $thumbs->{$urishort} = 1;
	    $content .= "<a href=\"$uri\">$urishort</a> (thumbnail) can now be accessed once.";
	    $response->header(refresh => "1; $uri");
	    
	} elsif ($cmd eq ${Static::PROXY_UNMARK_THUMB} ) {
	    delete $thumbs->{$urishort};
	    $content .= "<a href=\"$uri\">$urishort</a> has been removed from the thumbnail list.";
	    
	} elsif ($cmd eq ${Static::PROXY_MARK_PICTURE} ) {
	    $pics->{$urishort} = 0;
	    $content .= "<a href=\"$uri\">$urishort</a> has been marked as a picture page.";
	    $response->header(refresh => '5; url=javascript:window.close()');
	    
	} elsif ($cmd eq ${Static::PROXY_SINGLE_PICTURE} ) {
	    $pics->{$urishort} = 1;
	    $content .= "<a href=\"$uri\">$urishort</a> (picture) can now be accessed once.";
	    $response->header(refresh => "1; $uri");
	    
	} elsif ($cmd eq ${Static::PROXY_UNMARK_PICTURE} ) {
	    delete $pics->{$urishort};
	    $content .= "<a href=\"$uri\">$urishort</a> has been removed from the picture list.";
	    
	} elsif ($cmd eq ${Static::PROXY_MARK_DOWNLOAD} ) {
	    $pics->{$urishort} = 0;
	    $content .= "<a href=\"$uri\">$urishort</a> has been marked as a picture page.<br>";
	    if (exists $downs->{$urishort}) {
		$content .= "It already exists in download list (status=$downs[$downs->{$urishort}]).";
	    } else {
		$downs->{$urishort} = 0;
		$content .= "It has been queued for download.";
	    }
	    $response->header(refresh => '5; url=javascript:window.close()');

	} elsif ($cmd eq ${Static::PROXY_UNMARK_DOWNLOAD} ) {
	    delete $downs->{$urishort};
	    $content .= "<a href=\"$uri\">$urishort</a> has been removed from the download list";

	} elsif ($cmd eq ${Static::PROXY_MARK_DOWNLOAD} ) {

	    $content .= '<h1>p0rn-proxy main menu</h1><ul><ul><ul>' .
		"<li><a href=\"$URLBASE/${Static::PROXY_LIST_THUMB}/_nouri_\">list thumbnail pages</a></li>" .
		"<li><a href=\"$URLBASE/${Static::PROXY_LIST_PICTURE}/_nouri_\">list picture pages</a></li>" .
		"<li><a href=\"$URLBASE/${Static::PROXY_LIST_DOWNLOAD}/_nouri_\">list download pages</a></li>" .
		"</ul></ul><hr>p0rn-proxy ${Static::VERSION}/$CVSVERSION</ul>";
	    
	} elsif ($cmd eq ${Static::PROXY_LIST_THUMB} ) {

	    $content .= '<h1>thumbnail pages</h1>';

	    foreach my $u (sort keys %{$thumbs}) {
		$content .= "[<a href=\"$URLBASE/${Static::PROXY_UNMARK_THUMB}/$u\">del</a>] <a href=\"$u\">$u</a><br>";
	    }

	} elsif ($cmd eq ${Static::PROXY_LIST_PICTURE} ) {

	    $content .= '<h1>picture pages</h1>';

	    foreach my $u (sort keys %{$pics}) {
		$content .= "[<a href=\"$URLBASE/${Static::PROXY_UNMARK_PICTURE}/$u\">del</a>] <a href=\"$u\">$u</a><br>";
	    }

	} elsif ($cmd eq ${Static::PROXY_LIST_DOWNLOAD} ) {

	    $content .= '<h1>download pages</h1>';

	    $content .= '<h2>queued</h2>';
	    foreach my $u (sort keys %{$downs}) {
		next unless $downs->{$u} == 0;
		$content .= "[<a href=\"$URLBASE/${Static::PROXY_UNMARK_DOWNLOAD}/$u\">del</a>] <a href=\"$u\">$u</a><br>";
	    }

	    $content .= '<h2>running</h2>';
	    foreach my $u (sort keys %{$downs}) {
		next unless $downs->{$u} == 1;
		$content .= "[<a href=\"$URLBASE/${Static::PROXY_UNMARK_DOWNLOAD}/$u\">del</a>] <a href=\"$u\">$u</a><br>";
	    }

	    $content .= '<h2>finished</h2>';
	    foreach my $u (sort keys %{$downs}) {
		next unless $downs->{$u} == 2;
		$content .= "[<a href=\"$URLBASE/${Static::PROXY_UNMARK_DOWNLOAD}/$u\">del</a>] <a href=\"$u\">$u</a><br>";
	    }

	} elsif ($cmd eq ${Static::PROXY_GET_VERSION} ) {

	    $content .= "\nVERSION: ${Static::VERSION}/$CVSVERSION\n";
	    
	} else {
	    $content .= "unknown command <tt>$cmd</tt>: <tt>".$request->uri().'</tt>';
	}

    } else {
	$content .= "parse error: <tt>$uri</tt>";
    }

    $content .= '</body></html>';
    $response->content($content);

    return $response;
}

sub handle_request($)
# handle an HTTP request
{
    my $request = shift;
    my $response;
    my $status = '..';
    
    # keine gezippten Daten, wir wollen im HTML rumwurschteln!
    $request->header('Accept-Encoding' => 'identity');
    
    my $uri = $request->uri();
    
    if ($uri =~ m,^$URLBASE/,) {
	
	$response = execute_proxy_command($request);
	$status = "CT";
	
    } else {
	
	my $urishort = shorten_uri($uri);
	
	# check for thumbsites
	if (exists $thumbs->{$urishort} and ! $thumbs->{$urishort} ) {
	    
	    # return error page
	    my $content = << "EOF";
<html><head><title>thumbnail page</title></head><body>
<a href="$uri">$urishort</a> is considered a thumbnail page and thus not displayed.<br>[
<a href="$URLBASE/${Static::PROXY_SINGLE_THUMB}/$uri">access once</a>
| <a href="$URLBASE/${Static::PROXY_UNMARK_THUMB}/$uri">access always</a>
]</body></html>
EOF
;
	    $response = HTTP::Response->new( RC_OK, 'thumbnail page!' );
	    $response->content( $content );
	    $status = "TH";
	    
	} else {
	    
	    # check for picsites
	    if (exists $pics->{$urishort} and ! $pics->{$urishort} ) {
		
		# return error page
		my $content = << "EOF";
<html><head><title>already seen</title></head><body>
<a href="$uri">$urishort</a> is considered an already seen picture page and thus not displayed.<br>[
<a href="$URLBASE/${Static::PROXY_SINGLE_PICTURE}/$uri">access once</a>
| <a href="$URLBASE/${Static::PROXY_UNMARK_PICTURE}/$uri">access always</a>
]</body></html>
EOF
;
		$response = HTTP::Response->new( RC_OK, 'already seen picture page!' );
		$response->content( $content );
		$status = "PI";
		
	    } else {
		
		if (exists $thumbs->{$uri}) {
		    $thumbs->{$uri}--;
		}
		
		if (exists $pics->{$uri}) {
		    $pics->{$uri}--;
		}
		
		# do the HTTP request
		$response = $ua->simple_request($request);
		
		$status = "ok";
		
		# mangle HTML pages
		if ( lc substr ($response->content_type(), 0, 9) eq "text/html" ) {
		    my $content = $response->content();
		    
		    my $linkline = '<p align="center">' .
			'[ th: <a target="_top" href="'."$URLBASE/${Static::PROXY_MARK_THUMB}/$uri".'">mark as thumbnail page</a> ' .
			'| ad: <a target="_top" href="'."$URLBASE/${Static::PROXY_ADMIN}/_nouri_".'">proxy admin</a> ' .
			'| pi: <a target="_top" href="'."$URLBASE/${Static::PROXY_MARK_PICTURE}/$uri".'">mark as picture page</a> ' .
			'| do: <a target="_top" href="'."$URLBASE/${Static::PROXY_MARK_DOWNLOAD}/$uri".'">mark as picture page and download</a> ' .
			']<br><br><p>';
		    
		    $content =~ s/<body([^>]*)>/<body$1>$linkline/i;
		    
		    $response->content($content);
		    $status = "OK";
		}
		
	    }
	    
	}
	
    }
    
    # print log message
    # TODO: more flexible logging, activate debug via configuration variable
    printf 
	"%s  %s\n" ,
	$status,
	$uri ;

    return $response;
}

sub print_usage()
# prints a short help text and exits
{
    print << "EOF";

Usage: p0rn-proxy [options]
Supported options (long and short forms):
  -a, --addr          : set address to listen on
  -c, --configfile    : set configuration file
  -h, --help          : print usage and exit
  -p, --port          : set port to listen on
  -P, --proxy         : set proxy to use
  -v, --version       : print version number and exit
EOF
;
    exit 0;
}

######[ Main program ]
#
   
print "this is p0rn-proxy ${Static::VERSION}/${CVSVERSION}\n";

# define configuration options
my $config = AppConfig->new( { CASE => 1 } );
$config->define( 'configfile|c=s',    { DEFAULT => '' } );
$config->define( 'addr|a=s',          { DEFAULT => '127.0.0.1' } );
$config->define( 'port|p=s',          { DEFAULT => '8080' } );
$config->define( 'proxy|P=s',         { DEFAULT => $ENV{'http_proxy'}, EXPAND => EXPAND_ENV } );
$config->define( 'version|V!' );
$config->define( 'help|h!' );

# Another config file might be given on command line, so process a copy of ARGV
$config->getopt( qw(no_ignore_case), [ @ARGV ]);

# if we are to just print the version number, then quit now
exit if $config->version();

# if we are to just print help, then do it now
print_usage() if $config->help();

# read config file, if existent and desired
if ($config->configfile() ne "") {
    if (-r $config->configfile()) {
	print "- reading options from `".$config->configfile()."'\n";
	$config->file($config->configfile());
    } else {
	warn "can't read configuration file `".$config->configfile()."': $!\nusing built-in defaults\n";
    }
}

# override config file with command line arguments
$config->getopt( qw(no_ignore_case), [ @ARGV ] );

# if we are to just print the version number, then quit now
exit if $config->version();

# if we are to just print help, then do it now
print_usage() if $config->help();

# create proxy
my $proxy = HTTP::Daemon->new(
			      LocalAddr=>$config->addr(),
			      LocalPort=>$config->port()
			      );
die "@_" unless defined $proxy;

# create UserAgent
$ua = LWP::UserAgent->new;
$ua->agent("p0rn-proxy ${Static::VERSION}/${CVSVERSION}");

if ( defined $config->proxy()
     and $config->proxy() ne ""
     and $config->proxy() ne "none" ) {
    print "- using existing proxy on ".$config->proxy()."\n";
    $ua->proxy('http', $config->proxy());
}

# open dbs
$thumbs = opendb('thumbz');
print "- thumbpages db loaded\n";
$pics = opendb('picz');
print "- picpages db loaded\n";
$downs = opendb('downz');
print "- download db loaded\n";

print "- proxy started on ".$config->addr().":".$config->port()."\n";

# Don't accumulate zombies
# (we don't care about our children
#  -> possible SIGPIPES when browser aborts request)
$SIG{CHLD} = 'IGNORE';

while (my $conn = $proxy->accept) {
    if (! fork()) {
	# CHILD
	while (my $request = $conn->get_request) {
	    $conn->send_response(handle_request($request));
	}
	$conn->close();
	exit;
    }
    $conn->close;
}
