blob: 74fe45de047ba1b95f4f0f83611ee0ccf827936a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#/bin/bash
#
# This script goes over Apache access.log's for a particular
# site and resolves them one by one, storing result to
# access.log-<data>.resolved.gz in the same dir. Already
# resolved logs are skipped.
#
set -e
WEB_NAME="$1"
LIMIT="$2"
WEBLOGS_CONFIG=${WEBLOGS_CONFIG:-config}
source $WEBLOGS_CONFIG
mkdir -p $WORK_PATH
cnt=0
for f in $INPUT_PATH/*access.log-*[0-9].gz; do
outname=$INPUT_PATH/$(basename $f .gz).resolved.gz
tmpname=$WORK_PATH/$TMP_LOG_NAME
if [ ! -s "$outname" ]; then
echo $f
zcat $f | grep -v "::1" > $tmpname.1
time dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $tmpname.1
python iploc.py --config=$WEBLOGS_CONFIG $tmpname.1 > $tmpname.2
gzip -c -9 $WORK_PATH/$TMP_LOG_NAME.2 > $outname
cnt=$(($cnt + 1))
if [ -n "$LIMIT" ]; then
if [ $cnt -eq "$LIMIT" ]; then
break
fi
fi
fi
done
|