aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Sokolovsky <paul.sokolovsky@linaro.org>2016-01-15 14:15:32 +0200
committerPaul Sokolovsky <paul.sokolovsky@linaro.org>2016-01-15 14:15:32 +0200
commit2f6efbacdddcb3a73204c7dcc38a39d206d7ae37 (patch)
treee2c9597173a4a0fbbb4029d7d9d965f22e2feffe
parent67eba98d03d6341b3961ee64005eaf0343cee99e (diff)
downloadweblogs-2f6efbacdddcb3a73204c7dcc38a39d206d7ae37.tar.gz
analyse-logs.sh: extract_logs: Switch to using incrementally resolved logs.
This requires resolve-logs-incremental.sh to have been run first. Change-Id: Ic25366dae6f0eac7660491f70716df7a96dd793b
-rwxr-xr-xanalyse-logs.sh60
1 files changed, 3 insertions, 57 deletions
diff --git a/analyse-logs.sh b/analyse-logs.sh
index c3e1fd5..32aae33 100755
--- a/analyse-logs.sh
+++ b/analyse-logs.sh
@@ -408,63 +408,8 @@ cleanup ()
extract_logs ()
{
# Build a single log file that is not gzipped.
-
- # Now in 2014 we can just preprocess all 2012 and 2013 files and save processing time for all 3 web servers
- # then just grab all of the 2014 files to process
- # *access.log-2014*
- # preprocessed-*-2013-access.log.gz
- x=`ls $INPUT_PATH/$RAW_LOG_NAME | wc -l`
- if [ x > 0 ] ; then
- if [ $DEBUG -eq $TRUE ] ; then
- echo "$WEB_NAME making access.log by zcat $INPUT_PATH/$RAW_LOG_NAME"
- fi
- zcat $INPUT_PATH/$RAW_LOG_NAME | grep -v "::1" > $WORK_PATH/$TMP_LOG_NAME
- fi
- # Previous years logs preprocessed into a single compressed file to save processing time.
- if [ $DEBUG -eq $TRUE ] ; then
- zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME || true
- else
- zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME 2>/dev/null || true
- fi
-
- if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then
- # If it's www.linaro.org build the DNS database
- # This is a tad risky as we could have differnt folks coming directly
- # into releases or snapshots then the main site, that said the risk is
- # low and the speedup huge so it's worth it.
- if [ $WEB_NAME = "www.linaro.org" ] ; then
- if [ $DEBUG -eq $TRUE ] ; then
- echo "About to do dnshistory lookup"
- fi
- if [ $DEBUG -eq $TRUE ] ; then
- /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME
- else
- /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME > /dev/null
- fi
- fi
-
- # Now translate ip addresses to DNS names for all log files
- if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then
- # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime
- # the iploc.py program was modified to read both databases and do both in one pass.
- if [ $DEBUG -eq $TRUE ] ; then
- echo "About to do GEOIP LOOKUP and dnshistory replace"
- fi
- python $STARTING_LOCATION/iploc.py --config=$STARTING_LOCATION/$CONFIG \
- $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME
- else
- # GEOIP info not requested so do the reverse DNS only
- if [ $DEBUG -eq $TRUE ] ; then
- echo "About to do dnshistory replace only"
- fi
- /usr/bin/dnshistory -T --logtype=www -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME
- fi
- else
- if [ $DEBUG -eq $TRUE ] ; then
- echo "No GEOIP LOOKUP or Reverse DNS"
- fi
- cat $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME
- fi
+ mkdir -p $WORK_PATH
+ zcat $INPUT_PATH/*access.log-20*[0-9].resolved.gz >$WORK_PATH/$PROCESSED_LOG_NAME
# now make a new file with only .gz, bz2, xz,exe, and zip files downloaded
# this grep can take some time to run, it's using a regular expression to extract compressed files
@@ -484,6 +429,7 @@ extract_logs ()
| grep -v .js \
| grep -v validation.linaro.org \
> $WORK_PATH/$FILTERED_LOG_NAME
+
if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then
if [ $DEBUG -eq $TRUE ] ; then
echo "creating toochain log"