Convert apache HTTP combined logs into SQL (and import it into a mysql database eventually)
source: snippets.dzone.com
you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.
it was hard to find it, that’s why i put it here.
#!/usr/bin/perl -w # Written by Aaron Jenson. # Original source: http://www.visualprose.com/software.php # Updated to work under Perl 5.6.1 by Edward Rudd # Updated 24 march 2007 by Slim Amamou <slim.amamou@alpha-studios.com> # - output SQL with the option '--sql' # - added SQL create table script to the HELP # # NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/) # use strict; use Getopt::Long qw(:config bundling); use DBI; use Date::Parse; my %options = (); my $i = 0; my $sql = ''; my $valuesSql = ''; my $line = ''; my $dbh = 0; my $sth = 0; my @parts = (); my $part; my $TIMESTAMP = 3; my $REQUEST_LINE = 4; my @cols = ( 'remote_host', ## 0 'remote_logname', ## 1 'remote_user', ## 2 'request_time', ## 3.string 'time_stamp', ## 3.posix 'request_line', ## 5 'request_method', ## 6 'request_uri', ## 7 'request_args', ## 8 'request_protocol', ## 9 'status', ## 10 'bytes_sent', ## 11 'referer', ## 12 'agent' ## 13 ); my $col = ''; GetOptions (\%options, "version" => sub { VERSION_MESSAGE(); exit 0; }, "help|?" => sub { HELP_MESSAGE(); exit 0; }, "host|h=s", "database|d=s", "table|t=s", "username|u=s", "password|p=s", "logfile|f=s", "sql"); $options{host} ||= 'localhost'; $options{database} ||= ''; $options{username} ||= ''; $options{password} ||= ''; $options{logfile} ||= ''; $options{sql} ||= ''; if( ! ($options{database} || $options{sql})) { HELP_MESSAGE(); print "Must supply a database to connect to.\n"; exit 1; } if( ! $options{table} ) { HELP_MESSAGE(); print "Must supply table name.\n"; exit 1; } if( $options{logfile} ) { if( ! -e $options{logfile} ) { print "File '$options{logfile}' doesn't exist.\n"; exit 1; } open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading."; } if( $options{database} ) { $dbh = Connect(); if (! $dbh) { exit 1; } } $sql = "INSERT INTO $options{table} ("; foreach $col (@cols) { $sql .= "$col," if( $col ); } chop($sql); $sql .= ') VALUES ('; my ($linecount,$insertcount) = (0,0); while($line = <STDIN>) { $linecount++; @parts = SplitLogLine( $line ); next if( $parts[$TIMESTAMP+1] == 0 ); $valuesSql = ''; for( $i = 0; $i < @cols; ++$i ) { $parts[$i] =~ s/\\/\\\\/g; $parts[$i] =~ s/'/\\'/g; $valuesSql .= "'$parts[$i]'," if( $cols[$i] ); } chop($valuesSql); if( $options{database} ) { $sth = $dbh->prepare("$sql$valuesSql)"); if( ! $sth->execute() ) { print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n"; } else { $insertcount++; } $sth->finish(); } if( $options{sql} ) { print "$sql$valuesSql);\n"; } } if( ! $options{sql} ) { print "Parsed $linecount Log lines\n"; print "Inserted $insertcount records\n"; print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n"; } # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Connects to a MySQL database and returns the connection. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # sub Connect { my $dsn = "DBI:mysql:$options{database};hostname=$options{host}"; return DBI->connect( $dsn, $options{username}, $options{password} ); } # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Splits up a log line into its parts. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # sub SplitLogLine { my $line = shift; my $i = 0; my $inQuote = 0; my $char = ''; my $part = ''; my @parts = (); my $count = 0; chomp($line); for( $i = 0; $i < length($line); ++$i ) { $char = substr($line, $i, 1); if( $char eq ' ' && ! $inQuote ) { ## print "Found part $part.\n"; if( $count == $TIMESTAMP ) { push(@parts, "[".$part."]"); $part = str2time($part); } push(@parts, $part); if( $count == $REQUEST_LINE ) { my @request = split(/[ ?]/, $part); push(@parts, $request[0]); push(@parts, $request[1]); if( $request[3] ) { push(@parts, $request[2]); push(@parts, $request[3]); } else { push(@parts, ''); push(@parts, $request[2]); } $count += 5; } else { ++$count; } $part = ''; } elsif( $char eq '"' || $char eq '[' || $char eq ']' ) { $inQuote = !$inQuote; } else { $part .= $char; } } push(@parts,$part) if $part; return @parts; } # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Prints the usage/help message for this program. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # sub HELP_MESSAGE { print<<EOF; Imports an Apache combined log into a MySQL database. Usage: mysql_import_combined_log.pl -d <database name> -t <table name> [-h <hostname>] [-u <username>] [-p <password>] [-f <filename] --host|-h <host name> The host to connect to. Default is localhost. --database|-d <database name> The database to use. Required. --username|-u <username> The user to connect as. --password|-p <password> The user's password. --table|-t <table name> The name of the table in which to insert data. --logfile|-f <file name> The file to read from. If not given, data is read from stdin. --sql Output SQL --help|-? Print out this help message. --version Print out the version of this software. ---------------------------------- -- SQL create statements for the table -- create table <TABLE_NAME> ( remote_host varchar(50) , remote_logname varchar(50) , remote_user varchar(50) , request_time char(28), time_stamp varchar(10) , request_line varchar(255), request_method varchar(10) , request_uri varchar(255), request_args varchar(255), request_protocol varchar(10) , status varchar(10) , bytes_sent varchar(10) , referer varchar(255) , agent varchar(255) ); EOF } # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Prints the version information for this program # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # sub VERSION_MESSAGE { print "mysql_import_combined_log.pl version 1.2\n"; print "Version 1.0 Written by Aaron Jenson.\n"; print "Update to work with perl 5.6.1 by Edward Rudd\n"; } 1;