created by tomfmason at 2008-12-08 22:53:09
This is a useful script written in bash for analazing apache logs. It may have to be adjusted if you use a non standard log format.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
# usage 
#    ./this_script search pattern log_file
#       all hosts with 5 or more matches of the given pattern will be banned
#    ./this_script ban_from_log log_file
#       all hosts that appear more than 5 times in the given log file will be banned
#    ./this_script ban_rfi log_file
#       bans all hosts that match the rfi pattern(rfi's and proxy requests)
#    ./this_script most_viewed log_file
#       shows the top ten viewed pages with the number of views
#    ./this_script statuses response log_file
#       shows the top ten viewed pages for the given response header e.g. 404, 200 ect
#    ./this_script referrers log_file 
#       shows the top ten referrers and page views for each
#
# @author tomfmason
ban_file=/etc/hosts.deny

function ban_ip() {
  exists=`grep ${1} $ban_file`
  if [ ! "$exists" ]; then
     echo "ALL: ${1}" >> $ban_file
  fi
}

function search() {
  ret=`grep ${1} ${2} |awk '{print $8}'|tail -n+5|sort|uniq|tr -d [1]`
  for r in $ret; do
      ban_ip $r
  done
}

function ban_from_log() {
  ret=`awk '{print $8}' ${1}|tail -n+5|sort|uniq|tr -d [1]`
  for r in $ret; do
     ban_ip $r
  done
}

function ban_rfi() {
  ret=`awk '{print $1 " " $7}' ${1} |grep -iE '(http|https|ftp)'|awk '{print $1}'|tail -n+5|sort|uniq`
  for r in $ret; do
    ban_ip $r
  done
}

function most_viewed() {
    awk '{print $7}' ${1} |sort |uniq -c |sort -rn |head -10
}

function statuses() {
   awk '{print $7 " " $9}' ${2} |grep -iE '${1}' |sort | uniq -c | sort -rn |awk '{print $1 "  " $2 }' |head -10
}

function referrers() {
    awk '{print $11}' ${1} |sort |uniq -c |sort -rn |head -10
}

if type "$1" | grep -qF "$1 is a function"; then "$@"; fi