aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Sokolovsky <paul.sokolovsky@linaro.org>2016-01-13 23:43:22 +0200
committerPaul Sokolovsky <paul.sokolovsky@linaro.org>2016-01-14 20:28:04 +0200
commitcc57924fc8b530803c91f73fdf05144842a602b8 (patch)
tree647dbdb59858e8dc18d3d415a631304249f94c9a
parenta81fa880270a971f4fa4ba32a82afbb41263dc59 (diff)
downloadweblogs-cc57924fc8b530803c91f73fdf05144842a602b8.tar.gz
resolve-logs-incremental.sh: Tool to incrementally IP-resolve apache logs.
Change-Id: I99e85a5be1b77e949def72dee1b4b02ed6fcd6ed
-rwxr-xr-xresolve-logs-incremental.sh36
1 files changed, 36 insertions, 0 deletions
diff --git a/resolve-logs-incremental.sh b/resolve-logs-incremental.sh
new file mode 100755
index 0000000..7b97feb
--- /dev/null
+++ b/resolve-logs-incremental.sh
@@ -0,0 +1,36 @@
+#/bin/bash
+#
+# This script goes over Apache access.log's for a particular
+# site and resolves them one by one, storing result to
+# access.log-<data>.resolved.gz in the same dir. Already
+# resolved logs are skipped.
+#
+set -e
+
+WEB_NAME="$1"
+LIMIT="$2"
+
+WEBLOGS_CONFIG=${WEBLOGS_CONFIG:-config}
+source $WEBLOGS_CONFIG
+
+mkdir -p $WORK_PATH
+
+cnt=0
+for f in $INPUT_PATH/access.log-*[0-9].gz; do
+ outname=$INPUT_PATH/$(basename $f .gz).resolved.gz
+ tmpname=$WORK_PATH/$TMP_LOG_NAME
+ if [ ! -s "$outname" ]; then
+ echo $f
+ zcat $f | grep -v "::1" > $tmpname.1
+ time dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $tmpname.1
+ python iploc.py --config=$WEBLOGS_CONFIG $tmpname.1 > $tmpname.2
+ gzip -c -9 $WORK_PATH/$TMP_LOG_NAME.2 > $outname
+
+ cnt=$(($cnt + 1))
+ if [ -n "$LIMIT" ]; then
+ if [ $cnt -eq "$LIMIT" ]; then
+ break
+ fi
+ fi
+ fi
+done