You are viewing an old version of this page. View the current version.

Compare with Current View Page History

Version 1 Next »

=head1 NAME

Mail::SpamAssassin::Plugin::iXhash - compute hashes from mail bodies and compare to known spam ones via DNS

=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm

body IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
describe IXHASH This mail has been classified as spam @ iX Magazine, Germany
tflags IXHASH net
score IXHASH 1.5

=head1 DESCRIPTION

iXhash.pm is a plugin for SpamAssassin 3.0.0 and up. It takes the body of a mail, removes

  • if certain requirements are met - irrelevant parts from it and then computes a MD5 hash value from the rest. These values will then be looked up via DNS. Call it a 'poor man's DCC', if you want.

This plugin is based on the procmail-based project 'NiXSpam', developed by Bert Ungerer (un-at-ix-dot-de). For more information see http://www.heise.de/ix/nixspam/. The procmail code producing the hashes only can be found here: ftp://ftp.ix.de/pub/ix/ix_listings/2004/05/checksums

Parts of the code were submitted via heise forum by 'kungfuhasi'
See http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292.

The hashes from spam received by Heise/iX magazine are available at ix.dnsbl.manitu.net, kindly provided by Manuel Schmitt.

It's not too difficult to create your own blacklist provided you have enough input (read: spam). If you do so please drop that info somewhere so other people can use that one too.

I know I'm a lousy coder but at least I'm willing to learn: If you have improvements to make let me know: dirk-dot-bonengel-at-login-solutions.de

=cut

  loadplugin    Mail::SpamAssassin::Plugin::iXhash /path/to/iXhash.pm
  body 		IXHASH eval:ixhashtest('ix.dnsbl.manitu.net')
  describe 	IXHASH This mail has been classified as spam @ iX Magazine, Germany
  tflags        IXHASH net
  score         IXHASH 1.5
package iXhash;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use Net::DNS;
use Net::DNS::Resolver;
# Locale - this was on Bert's wishlist
use POSIX qw(locale_h);
setlocale(LC_CTYPE, "de_DE.ISO8859-1");
# LC_CTYPE now "Deutsch, Deutschland, codeset ISO 8859-1"
# Maybe not appropriate for spam that is neither German nor English

our @ISA = qw(Mail::SpamAssassin::Plugin);

sub dbg { Mail::SpamAssassin::dbg (@_); }


sub new {
	my ($class, $mailsa, $server) = @_;
	$class = ref($class) || $class;
	my $self = $class->SUPER::new($mailsa);
	bless ($self, $class);
	$self->register_eval_rule ("ixhashtest");
	return $self;
}

sub ixhashtest {
	my ($self, $permsgstatus,$muell,$dnsserver) = @_;
	dbg("IXHASH: IxHash querying Server $dnsserver"); 
	my ($digest, $digest2) = "";
	my ($answer,$ixdigest,$body) = "";
	my @body = $permsgstatus->{msg}->get_body();
	my $resolver = Net::DNS::Resolver->new;
	my $body_copy = "";
	foreach (@body) {
		$body .= join "", @$_;
	}
	my $rr;
	my $hits = 0;
	# This is code contributed by KungfuHasi. Comments party by me (Dirk Bonengel)
	# Siehe dazu: http://www.heise.de/ix/foren/go.shtml?read=1&msg_id=7246759&forum_id=48292
	# Danke, KungfuHasi, wer oder was immer Du sein magst!
	#-------------------------------------------------------------------------
	# Some spaces to work with
	# The procmail code says at least 16 spaces or tabs required...
  	if ($body =~ /[\s\t].+[\s\t].*$?.*$?.*[\s\t]/ ) {
		# Generate first MD5 over Body
		$body_copy = $body;
		# All space class chars just one time
		# NOTE: This is the look-forward: (?:\1+)
		$body_copy =~ s/([[:space:]])(?:\1+)/$1/g;
		# remove graph class chars and some specials
		$body_copy =~ s/[[:graph:]]+//go;
		# First Digest
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Resolver-Objekt nehmen und Hash abtesten
		$answer = $resolver->send($digest.'.'.$dnsserver);
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
				$hits = 1 if $rr->address;
				return $hits;
			}
		}
	}
	# IF-Condition selbstgemacht - hoffentlich stimmts
	# The original procmail code says:
	# This checksum requires at least 2 of the following characters:
	# >* 1^1 ([<>()|@*'!?,]|:/)
	# (To match something like "Already seen?  http://host.domain.tld/")
	if ($body =~ /(([<>\(\)\|@\*'!?,]|:\/).*?([<>\(\)\|@\*'!?,]|:\/))/ms ) {
		# Genearation of 2nd Digest
		$body_copy = $body;
		$body_copy =~ s/[[:cntrl:][:alnum:]%&#;=]+//g;
		$body_copy =~ tr/_/./;
		$body_copy =~ s/([[:print:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest\n");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		$answer = $resolver->send($digest.'.'.$dnsserver);
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
				$hits = 1 if $rr->address;
				return $hits;
			}
		} 
	}
	# Requirement here in procmail:
	# >* [^ ][^ ][^ ][^ ]
	# (some non-empty characters in the body/ ein paar nicht-leere Zeichen im Body)
	if ($body =~ /[^\s\t][^\s\t][^\s\t][^\s\t]/) {
		$body_copy = $body;
		$body_copy =~ s/[[:cntrl:][:space:]=]+//g;
		$body_copy =~ s/([[:graph:]])(?:\1+)/$1/g;
		$digest = md5_hex($body_copy);
		dbg ("IXHASH: Computed hash-value $digest\n");
		dbg ("IXHASH: Now checking $digest.$dnsserver");
		# Hash abtesten
		$answer = $resolver->send($digest.'.'.$dnsserver);
		if ($answer) {
			foreach $rr ($answer->answer) {
				next unless $rr->type eq "A";
				dbg ("IXHASH: Received reply from $dnsserver:". $rr->address."\n");
				$hits = 1 if $rr->address;
				return $hits;
			}
		} 
	}
}
1;
  • No labels