I wanted a shell script to analyze my access logs from httpd. I found this from @ruanbekker, adopted it and modified it for httpd.
You can see
- Top requests by IP
- Request methods like
GET
orHEAD
- Response codes like
200
,301
,404
- Top requested pages like
/atom.xml
- Top requested pages with a 404 response
You can download the script here
#!/bin/sh
LOGFILE="/var/www/logs/access.log"
RESPONSE_CODE="200"
filters() {
grep $RESPONSE_CODE \
| grep -v "<UNKNOWN>" \
| grep -v "favicon.ico" \
| grep -v "logfile turned over"
}
filter_response_codes()
{
grep -v "<UNKNOWN>" \
| grep -v "logfile turned over" \
| awk '{print $10}'
}
filter_404_response() {
grep "404"
}
ips() {
awk '{print $2}'
}
pages() {
awk '{print $8}'
}
domain() {
awk '{print $1}'
}
methods() {
awk '{print $7}' | cut -d'"' -f2
}
sort_count() {
sort | uniq -c
}
sort_desc() {
sort -rn
}
top_ten() {
head -10
}
sep() {
echo "=================================================="
}
##
# Actions
##
action_request_ips() {
echo ""
echo "Top requests from IPs"
sep
cat $LOGFILE \
| filters \
| ips \
| sort_count \
| sort_desc \
| top_ten
echo ""
}
action_request_methods() {
echo ""
echo "Count requests methods"
sep
cat $LOGFILE \
| filters \
| methods \
| sort_count
echo ""
}
action_pages() {
echo ""
echo "Top requested pages"
sep
cat $LOGFILE \
| filters \
| pages \
| sort_count \
| sort_desc \
| top_ten
echo ""
}
action_404() {
echo ""
echo "Top requests 404"
sep
cat $LOGFILE \
| filter_404_response \
| pages \
| sort_count \
| sort_desc \
| top_ten
echo ""
}
action_response_codes() {
echo ""
echo "Response code"
sep
cat $LOGFILE \
| filter_response_codes \
| sort_count \
| sort_desc
echo ""
}
action_request_ips
action_request_methods
action_response_codes
action_pages
action_404
Output
$ analyze_access_log
Top requests from IPs
==================================================
13 1.2.3.4
8 1.2.3.5
8 1.2.3.6
4 1.2.3.7
4 1.2.3.8
2 1.2.3.9
2 1.2.3.10
2 1.2.3.11
2 1.2.3.12
2 1.2.3.13
Count requests methods
==================================================
1146 GET
10 HEAD
Response code
==================================================
1190 200
792 304
615 301
80 404
8 400
5 403
1 405
1 206
1 0
Top requested pages
==================================================
694 /atom.xml
136 /
92 /xxx
46 /yyy
23 /zzz
20 /aaa
17 /eee/fff
16 /humans.txt
14 /foo/bar
12 /something/else
Top requests 404
==================================================
61 /robots.txt
5 /css_.php
2 /admin
2 /.git/config
1 /wp-login.php?action=register
1 /login
1 /user/register
1 /index.php?option=com_user%2526task=register
1 /etc/passwd
1 /console
You can download the script here
Top comments (2)
This is sooo #SimpleElegant. Thx for sharing! šā