#!/usr/bin/perl -w

#
# Copyright (C) 2006 by Victor Julien <victor@inliniac.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

# Drop-in replacement for Barnyard for feeding ModSecurity
# alerts to the Sguil NSM system.
# 
# Reads files or symlinks from the queue directory, feeds them
# to Sguil and removes the file/symlink.
#

use strict;
use warnings;
use diagnostics;

use IO::Socket;
use DirHandle;
use Getopt::Std;
use Time::Local;

use ModsecAlert;
use SguilBarnyardComms;

use strict "vars";
use strict "subs";

my $version = "0.6";
my $license = "GNU GPL. See http://www.gnu.org/licenses/gpl.txt for more information";

#
# CONVERSION FUNCTIONS
#

sub ConvertASCIItoHEX
{
	my $asciistr = shift @_;
	
	my $hexstr = join '', unpack "H*", $asciistr;

	#print $hexstr . "\n";

	return($hexstr);
}

sub ConvertSEVERITYtoPRIO
{
	my $severity = shift @_;

	my %sev = (	"EMERGENCY"	=> 7,
			"ALERT"		=> 6,
			"CRITICAL"	=> 5,
			"ERROR"		=> 4,
			"WARNING"	=> 3,
			"NOTICE"	=> 2,
			"INFO"		=> 1 );

	if(not defined $sev{$severity}) {
		return 0;
	}
	
	return $sev{$severity};
}

sub ConvertMONTHSTRtoDEC
{
	my $monthstr = shift;

	my %mon = (	"Jan" => 1, "Feb" => 2, "Mar" => 3, "Apr" => 4,
			"May" => 5, "Jun" => 6, "Jul" => 7, "Aug" => 8,
			"Sep" => 9, "Oct" => 10,"Nov" => 11,"Dec" => 12 );
	
	if(not defined $mon{$monthstr}) {
		return -1;
	}

	return $mon{$monthstr};
}

sub ConvertIPtoDEC
{
	my $ip = shift;

	( my $one , my $two , my $three , my $four ) = split(/\./, $ip);
	my $dec = $one * 16777216 + $two * 65536 + $three * 256 + $four;
	
	return($dec);
}


#
# ALERT PROCESSING
#

#
# Some preprocessing before sending the alert
# to Sguil.
#
# This function is only called for actual alerts.
#
# Returns:	-1 on error
# 		0 on success
#
sub PreprocessAlert
{
	# get the alert hash from the caller (by reference)
	my $ref = shift @_;

	#
	# SECTION A
	#
	
	# stupid trick to convert month(str) to month(dec)
	my $month = ConvertMONTHSTRtoDEC $ref->{"monthstr"};
	if($month != -1) {
		$ref->{"month"} = $month;
	} else {
		print "Month parse failed.\n";
		return -1;
	}

	# convert sipstr and dipstr to dec as well
	$ref->{"sipdec"} = ConvertIPtoDEC $ref->{"sipstr"};
	$ref->{"dipdec"} = ConvertIPtoDEC $ref->{"dipstr"};

	# timezone handling: the time/date is logged like this:
	# 13/Aug/2006:21:59:24 +0200, which is 19:59.24 UTC. In
	# Sguil we want the UTC value, so we convert it here.
	# 
	# first parse the timestring
	(my $hour, my $min, my $sec ) = split(/:/, $ref->{"timestr"});

	# get the unixtime in gm, month is from 0 to 11.
	my $time = timegm($sec,$min,$hour,$ref->{"day"},$ref->{"month"}-1,$ref->{"year"});
					
	# parse the timezone offset. It looks like +0200 or -0100
	$_ = $ref->{"tz"};
	my @parse = /(.{1})(\d{2})(\d{2})$/;
	if(@parse == 0) {
		print "Parsing timezone information failed.\n";
		return -1;
	} else {
		(my $op, my $tz_hour, my $tz_min) = @parse;
		my $tz_val = $tz_hour * 3600 + $tz_min * 60;

		# +0200 means we have to subtrackt 2 hrs to get UTC
		if ( $op eq "+" ) {
			$time -= $tz_val;
		} elsif ( $op eq "-" ) {
			$time += $tz_val;
		} else {
			print "Unknown operator $op in timezone string " . $ref->{"tz"} . "\n";
			return -1;
		}

		#print "time after tz apply: $time, gmtime " . gmtime($time) . "\n";

		# convert the new unix time
		# Fri Aug 18 04:35:35 2006
		# Mon Aug  7 07:51:23 2006 <= note two spaces, split(/ / won't work
		# that is what the \ ? takes care of.
		# 
		# update day, timestr and year
		$_ = gmtime($time);
		@parse = /(\S*) (\S*) \ ?(\d*) (.*) (\d+)/;
		if ( @parse == 0 ) {
			print "Date/time parsing error, parsing $_ failed\n";
			return -1;
		} else {
			(my $dayname, $ref->{"monthstr"}, $ref->{"day"}, $ref->{"timestr"}, $ref->{"year"} ) = @parse;
		}

		# update month
		$month = ConvertMONTHSTRtoDEC $ref->{"monthstr"};
		if($month != -1) {
			$ref->{"month"} = $month;
		} else {
			print "Month parse failed, parsing " . $ref->{"monthstr"} . ".\n";
			return -1;
		}

		#print $ref->{"timestr"} . "\n";
	}

	$ref->{"time"} = $ref->{"year"} . "-" . $ref->{"month"} . "-" . $ref->{"day"} . " " . $ref->{"timestr"};

	# add a padding zero to match the barnyard behaviour.
	if ( $ref->{"day"} < 10 ) {
		$ref->{"day"} = "0" . $ref->{"day"};
	}
	if ( $ref->{"month"} < 10 ) {
		$ref->{"month"} = "0" . $ref->{"month"};
	}

	#
	# SECTION H
	#
	
	# check for a message with a message, get it?
	if( defined( $ref->{"themsg"} ) ) {
		#print "themsg: " .  $ref->{"themsg"} . "\n";

		$_ = $ref->{"themsg"};

		# loop through all options which have the format:
		# Pattern match "!^HTTP/(0\\.9|1\\.0|1\\.1)$" at SERVER_PROTOCOL [id "340000"] [rev "1"] [msg "Bad HTTP Protocol"]
		# as long as we have options, $nextparm and $nextval will be set to:
		# id and 340000, rev and 1, etc
		while ( m/.* \[.*\].*/ )
		{
			#print "match\n";
			my @parsed_str = /.*?\[(.*?) \"(.*?)\"\](.*)/;
			if( @parsed_str != 0 )
			{
				( my $nextparm, my $nextval, my $therest ) = @parsed_str;
				#print "nextparm " . $nextparm . "\nnextval " . $nextval . "\nthe rest " . $therest . "\n";

				# update $_ for the regex in our while loop
				$_ = $therest;
	
				# lets see what we have
				if ($nextparm eq "msg") {
					#print "we have a msg: $nextval\n";
					$ref->{"themsg"} = $nextval;
				} elsif ($nextparm eq "id") {
					#print "we have an id: $nextval\n";
					$ref->{"id"} = $nextval;
				} elsif ($nextparm eq "rev") {
					#print "we have a rev: $nextval\n";
					$ref->{"rev"} = $nextval;
				} else {
					print "Warning: unknown keyword $nextparm with value $nextval in " . $ref->{"themsg"} . ".\n";
				}
			} else {
				# this will break us out of the while loop
				$_ = "";
			}
		}
	}

	# severity relates to priority.
	$ref->{"prio"} = ConvertSEVERITYtoPRIO $ref->{"severity"};

	# lets see what kind of class we are going to use
	# we use *-attack for stuff that is blocked, *-activity
	# for the rest
	if($ref->{"code"} < 400) {
		$ref->{"class"} = "web-application-activity";
	} else {
		$ref->{"class"} = "web-application-attack";
	}
	
	# Event message
	$ref->{"msg"} = "MOD-SECURITY " . $ref->{"code"} . " " . $ref->{"themsg"};

	# hex the payload
	$ref->{"payload"} = ConvertASCIItoHEX $ref->{"file"};
	
	return 0;
}


#
# Returns a list of filenames of event files.
# Returns 0 if no files were found.
#
sub GetAlertFiles {
	my $dir = shift;

	#print $dir . "\n";
	
	my $dh = DirHandle->new($dir) or die "can't open dir $dir: $!\n";
	return  sort
		grep { -f        }
		grep { /^$dir\/modsec\.log\..*/ } # we want only the files that start with modsec.log.
		map  { "$dir/$_" }
		grep { !/^\./    }
		$dh->read();
}

#
# START MAIN
#

#
# option parsing
# 
my %option = ();
getopts("n:l:r:tv", \%option);

if ( defined $option{v} ) {
	print "\n";
	print "modsec_by.pl version $version.\n\n";
	print "Copyright (c) 2006 by Victor Julien <victor\@inliniac.net>.\n";
	print "Released under $license.\n";
	print "\n";
	exit 0;
}

$option{n} = ""    unless defined $option{n}; # sensor name
$option{l} = ""    unless defined $option{l}; # logdir
$option{r} = "yes" unless defined $option{r}; # remove the processed files?

my %sensor = ();
$sensor{"name"}   = $option{n};
$sensor{"dir"}    = $option{l};
$sensor{"remove"} = $option{r};
if(defined($option{t})){
	$sensor{"testfiles"} = "true";
} else {
	$sensor{"testfiles"} = "false";
}

my $sguil = new SguilBarnyardComms;


if ( ( $sensor{"name"} eq "" ) || ( $sensor{"dir"} eq "" ) ) {
	print "Please supply:\n\tthe sensor name (-n <sensor name)\n\tthe log directory (-l <directory>)\n";
	exit(0);
}

# connect only if we are not in test mode
if ( $sensor{"testfiles"} eq "false" )
{
	my $ip = "127.0.0.1";
	my $port = "7735";
	
	eval { $sguil->connect ( $ip, $port, $sensor{'name'} ) };
	if ($@) { 
		print "ERROR: Setting up the connection to the sensor agent at $ip:$port failed.\n$@\n";
		exit 1;
	}
}


#
# Main loop
#
#
while ( 1 )
{
	my @list = GetAlertFiles $sensor{"dir"};
	if ( @list == 0 ){
		print "listsize is 0, sleeping 3 seconds.\n";
		sleep 3;
	} else {
		# parse each file and check if it really was an alert
		foreach my $file (@list) {
			print "file is $file\n";

			# get a ModsecAlert object
			my $alert = new ModsecAlert;

			# parse the alert file
			eval { $alert->parsefile( $file ) };
			if($@) {
				die $@;
			}

			# get a copy from the hash of the alert
			my %event_hash = %{$alert->getalerthash()};
			# create a reference to work with
			my $hash_ref = \%event_hash;

			# if we have no code, it was no alert
			if(exists($event_hash{"code"}))
			{
				print $event_hash{"code"} . "\n";

				# preprocess the data so it becomes what Sguil expects
				if ( PreprocessAlert ( $hash_ref ) == 0 ) {
					print $event_hash{"themsg"} . "\n";
			
					if( $sensor{"testfiles"} eq "false" )
					{
						# increase the cid so the alert gets it's uniq cid.
						$sguil->incrcid();

						# send the alert
						eval { $sguil->rtevent( $hash_ref ) };
						if($@) {
							print "Sending rtevent failed: $@\n";
							exit 1;
						}
					}
				}
			}

			# remove only if we want to (useful for debugging)
			if( $sensor{"remove"} eq "yes" && $sensor{"testfiles"} eq "false" ) {
				unlink $file;
			}
		}
	}

	# in test mode we just look at all files once
	if ( $sensor{"testfiles"} eq "true" ) {
		exit 0;
	}
}

eval { $sguil->disconnect() };
if($@) {
	print "Closing connection problem: $@\n";
	exit 1;
}

