#!/usr/bin/perl -w # # Quick and Dirty Proof of Concept of CrapProxy # # Filter Crap (Ads, Statscounter, Analystics, ..) from the wwweb # WITHOUT any static filter lists. # # 2010-08-08 - http://ralf.stormbind.net/ - ralf@stormbind.net # # # next steps: # - this perl stuff is slow # - add extra white/black-lists (how to admin?) # - forward to a cache proxy or add a cache? # use strict; use HTTP::Proxy; use HTTP::Proxy::HeaderFilter::simple; # # dirty hack to get it working # my %whitelist; whitelist_add('images-amazon.com','amazon.de'); whitelist_add('ytimg.com','youtube.com'); whitelist_add('ebaystatic.com','ebay.de'); whitelist_add('ebaystatic.com','ebayrtm.com'); whitelist_add('ebay.com','ebay.de'); whitelist_add('ebayobjects.com','ebay.de'); whitelist_add('ebaydesc.com','ebay.de'); whitelist_add('ebaydesc.de','ebay.de'); whitelist_add('ebayimg.com','ebay.de'); whitelist_add('ebayobjects.com','ebayrtm.com'); whitelist_add('ebayrtm.com','ebay.de'); whitelist_add('bestofmedia.com','tomshardware.de'); whitelist_add('bestofmicro.com','tomshardware.de'); whitelist_add('media-imdb.com','imdb.com'); my $proxy = HTTP::Proxy->new; $proxy->port ( 3128 ); $proxy->host ( '0.0.0.0' ); my $filter = HTTP::Proxy::HeaderFilter::simple->new( \&referer_filter ); $proxy->push_filter( request => $filter ); $proxy->start (); sub referer_filter { my $self = shift; my $headers = shift; my $message = shift; my $uridest; my $urireferer; my $block; if (!defined($headers->header('referer'))) { return; } $uridest = URI->new($message->uri); $urireferer = URI->new($headers->header('referer')); $block = 0; for (;;) { # uri and referer domain is equal - all fine, dont block if (lc($uridest->host()) eq lc($urireferer->host())) { last; } # # check sub.domai.ns # # uri : images.domain.tld # ref : www.domain.tld # ---------------------------- # problem: adserver.domain.tld # problem: ip-addresses # problem: domain.de vs. domain.com if (domain_compare($uridest->host(), $urireferer->host())) { last; } $block = 1; last; } if ($block) { # # block the crap: but give the user a chance to visit that page # my $content_blocked = HTTP::Response->new( 200 ); $content_blocked->content_type('text/html'); $content_blocked->content('Click to show blocked content.
'.$message->uri); print 'BLOCKED '.$message->method . ' ' . $uridest->host() . " REF ".$urireferer->host() ."\n"; $self->proxy->response( $content_blocked ); } } sub domain_compare { my $dom1 = lc(shift); my $dom2 = lc(shift); if ($dom1 eq $dom2) { return 1; } $dom1=~ s/.*\.([^\.]+\.[a-z]+)$/$1/; $dom2=~ s/.*\.([^\.]+\.[a-z]+)$/$1/; if ($dom2 eq $dom1) { return 1; } if (exists($whitelist{$dom1.'@'.$dom2})) { return 1; } # # dirty hack to get it working: nvida.de = nvidia.com # $dom1=~ s/([^\.]+)\.[a-z]+$/$1/; $dom2=~ s/([^\.]+)\.[a-z]+$/$1/; if ($dom1 eq $dom2) { return 1; } return 0; } sub whitelist_add { my $dom1 = shift; my $dom2 = shift; $whitelist{$dom1.'@'.$dom2} = 1; $whitelist{$dom2.'@'.$dom1} = 1; }