#/bin/bash # # This script goes over Apache access.log's for a particular # site and resolves them one by one, storing result to # access.log-.resolved.gz in the same dir. Already # resolved logs are skipped. # set -e WEB_NAME="$1" LIMIT="$2" WEBLOGS_CONFIG=${WEBLOGS_CONFIG:-config} source $WEBLOGS_CONFIG mkdir -p $WORK_PATH cnt=0 for f in $INPUT_PATH/*access.log-*[0-9].gz; do outname=$INPUT_PATH/$(basename $f .gz).resolved.gz tmpname=$WORK_PATH/$TMP_LOG_NAME if [ ! -s "$outname" ]; then #echo $f zcat $f | grep -v "::1" > $tmpname.1 dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $tmpname.1 python iploc.py --config=$WEBLOGS_CONFIG $tmpname.1 > $tmpname.2 gzip -c -9 $WORK_PATH/$TMP_LOG_NAME.2 > $outname cnt=$(($cnt + 1)) if [ -n "$LIMIT" ]; then if [ $cnt -eq "$LIMIT" ]; then break fi fi fi done