Real Time IP and Page Requests
$ tailf access.log | awk '{ printf("%-15s\t%s\t%s\t%s\n", $1, $6, $9, $7) }'
$ tailf access.log | awk '{
"geoiplookup " $1 " | cut -d \\: -f2 " | getline geo
printf("%-15s\t%s\t%s\t%-20s\t%s\n", $1, $6, $9, geo, $7);
}'
Most Popular URLS
cat access.log | awk '{ print $7 }' | sort | uniq -c | sort -rn | head -n 25
Rank Response Codes
$ cat access.log | awk '{ print $9 }' | sort | uniq -c | sort -rn
Count Unique Visits
$ cat access.log | awk '{ print $1 }' | sort | uniq -c | wc -l
# Today
$ cat access.log | grep `date '+%e/%b/%G'` | awk '{ print $1 }' | sort | uniq -c | wc -l
# This Month
$ cat access.log | grep `date '+%b/%G'` | awk '{ print $1 }' | sort | uniq -c | wc -l
Request IP Addresses
$ cat access.log | awk '{ print $1 }' | sort | uniq -c | sort -rn | head -n 25
$ cat access.log | awk '{ print $1 }' | sort | uniq -c | sort -rn | head -n 25 | \
awk '{ printf("%5d\t%-15s\t", $1, $2); system("geoiplookup " $2 " | cut -d \\: -f2 ") }'
Most requested URLs
awk -F\" '{print $2}' access.log | awk '{print $2}' | sort | uniq -c | sort -r
# Most requested URLs containing XYZ
awk -F\" '($2 ~ "ref"){print $2}' access.log | awk '{print $2}' | sort | uniq -c | sort -r
Count HTTP404 broken links
awk '($9 ~ /404/)' access.log | awk '{print $7}' | sort | uniq -c | sort -rn
Count HTTP502 broken links
awk '($9 ~ /502/)' access.log | awk '{print $7}' | sort | uniq -c | sort -r
Sort access by Response Codes
# cat
cat access.log | cut -d '"' -f3 | cut -d ' ' -f2 | sort | uniq -c | sort -rn
# awk
awk '{print $9}' access.log | sort | uniq -c | sort -rn
Get List of User Agents from Access Logs
```
- awk(1) - selecting full User-Agent string of GET requests
- cut(1) - using first word from it
- sort(1) - sorting
- uniq(1) - count
- sort(1) - sorting by count, reversed
awk -F’”’ ‘/GET/ {print $6}’ /var/log/nginx-access.log | cut -d’ ‘ -f1 | sort | uniq -c | sort -rn |
Top 25 user-agents
$ cat access.log | awk -F" ‘{ print $6 }’ | sort | uniq -c | sort -frn | head -n 25