#!/usr/bin/perl -w
#
# Quick and Dirty Proof of Concept of CrapProxy
#
# Filter Crap (Ads, Statscounter, Analystics, ..) from the wwweb
# WITHOUT any static filter lists.
#
# 2010-08-08 - http://ralf.stormbind.net/ - ralf@stormbind.net
#
#
# next steps:
# - this perl stuff is slow
# - add extra white/black-lists (how to admin?)
# - forward to a cache proxy or add a cache?
#
use strict;
use HTTP::Proxy;
use HTTP::Proxy::HeaderFilter::simple;
#
# dirty hack to get it working
#
my %whitelist;
whitelist_add('images-amazon.com','amazon.de');
whitelist_add('ytimg.com','youtube.com');
whitelist_add('ebaystatic.com','ebay.de');
whitelist_add('ebaystatic.com','ebayrtm.com');
whitelist_add('ebay.com','ebay.de');
whitelist_add('ebayobjects.com','ebay.de');
whitelist_add('ebaydesc.com','ebay.de');
whitelist_add('ebaydesc.de','ebay.de');
whitelist_add('ebayimg.com','ebay.de');
whitelist_add('ebayobjects.com','ebayrtm.com');
whitelist_add('ebayrtm.com','ebay.de');
whitelist_add('bestofmedia.com','tomshardware.de');
whitelist_add('bestofmicro.com','tomshardware.de');
whitelist_add('media-imdb.com','imdb.com');
my $proxy = HTTP::Proxy->new;
$proxy->port ( 3128 );
$proxy->host ( '0.0.0.0' );
my $filter = HTTP::Proxy::HeaderFilter::simple->new( \&referer_filter );
$proxy->push_filter( request => $filter );
$proxy->start ();
sub referer_filter
{
my $self = shift;
my $headers = shift;
my $message = shift;
my $uridest;
my $urireferer;
my $block;
if (!defined($headers->header('referer'))) {
return;
}
$uridest = URI->new($message->uri);
$urireferer = URI->new($headers->header('referer'));
$block = 0;
for (;;) {
# uri and referer domain is equal - all fine, dont block
if (lc($uridest->host()) eq lc($urireferer->host())) {
last;
}
#
# check sub.domai.ns
#
# uri : images.domain.tld
# ref : www.domain.tld
# ----------------------------
# problem: adserver.domain.tld
# problem: ip-addresses
# problem: domain.de vs. domain.com
if (domain_compare($uridest->host(), $urireferer->host())) {
last;
}
$block = 1;
last;
}
if ($block) {
#
# block the crap: but give the user a chance to visit that page
#
my $content_blocked = HTTP::Response->new( 200 );
$content_blocked->content_type('text/html');
$content_blocked->content('Click to show blocked content.
'.$message->uri);
print 'BLOCKED '.$message->method . ' ' . $uridest->host() . " REF ".$urireferer->host() ."\n";
$self->proxy->response( $content_blocked );
}
}
sub domain_compare
{
my $dom1 = lc(shift);
my $dom2 = lc(shift);
if ($dom1 eq $dom2) {
return 1;
}
$dom1=~ s/.*\.([^\.]+\.[a-z]+)$/$1/;
$dom2=~ s/.*\.([^\.]+\.[a-z]+)$/$1/;
if ($dom2 eq $dom1) {
return 1;
}
if (exists($whitelist{$dom1.'@'.$dom2})) {
return 1;
}
#
# dirty hack to get it working: nvida.de = nvidia.com
#
$dom1=~ s/([^\.]+)\.[a-z]+$/$1/;
$dom2=~ s/([^\.]+)\.[a-z]+$/$1/;
if ($dom1 eq $dom2) {
return 1;
}
return 0;
}
sub whitelist_add
{
my $dom1 = shift;
my $dom2 = shift;
$whitelist{$dom1.'@'.$dom2} = 1;
$whitelist{$dom2.'@'.$dom1} = 1;
}