aboutsummaryrefslogtreecommitdiff
path: root/resolve-logs-incremental.sh
blob: 74fe45de047ba1b95f4f0f83611ee0ccf827936a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#/bin/bash
#
# This script goes over Apache access.log's for a particular
# site and resolves them one by one, storing result to
# access.log-<data>.resolved.gz in the same dir. Already
# resolved logs are skipped.
#
set -e

WEB_NAME="$1"
LIMIT="$2"

WEBLOGS_CONFIG=${WEBLOGS_CONFIG:-config}
source $WEBLOGS_CONFIG

mkdir -p $WORK_PATH

cnt=0
for f in $INPUT_PATH/*access.log-*[0-9].gz; do
    outname=$INPUT_PATH/$(basename $f .gz).resolved.gz
    tmpname=$WORK_PATH/$TMP_LOG_NAME
    if [ ! -s "$outname" ]; then
        echo $f
        zcat $f | grep -v "::1" > $tmpname.1
        time dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $tmpname.1
        python iploc.py --config=$WEBLOGS_CONFIG $tmpname.1 > $tmpname.2
        gzip -c -9 $WORK_PATH/$TMP_LOG_NAME.2 > $outname

        cnt=$(($cnt + 1))
        if [ -n "$LIMIT" ]; then
            if [ $cnt -eq "$LIMIT" ]; then
                break
            fi
        fi
    fi
done