diff options
-rwxr-xr-x | analyse-logs.sh | 60 |
1 files changed, 3 insertions, 57 deletions
diff --git a/analyse-logs.sh b/analyse-logs.sh index c3e1fd5..32aae33 100755 --- a/analyse-logs.sh +++ b/analyse-logs.sh @@ -408,63 +408,8 @@ cleanup () extract_logs () { # Build a single log file that is not gzipped. - - # Now in 2014 we can just preprocess all 2012 and 2013 files and save processing time for all 3 web servers - # then just grab all of the 2014 files to process - # *access.log-2014* - # preprocessed-*-2013-access.log.gz - x=`ls $INPUT_PATH/$RAW_LOG_NAME | wc -l` - if [ x > 0 ] ; then - if [ $DEBUG -eq $TRUE ] ; then - echo "$WEB_NAME making access.log by zcat $INPUT_PATH/$RAW_LOG_NAME" - fi - zcat $INPUT_PATH/$RAW_LOG_NAME | grep -v "::1" > $WORK_PATH/$TMP_LOG_NAME - fi - # Previous years logs preprocessed into a single compressed file to save processing time. - if [ $DEBUG -eq $TRUE ] ; then - zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME || true - else - zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME 2>/dev/null || true - fi - - if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then - # If it's www.linaro.org build the DNS database - # This is a tad risky as we could have differnt folks coming directly - # into releases or snapshots then the main site, that said the risk is - # low and the speedup huge so it's worth it. - if [ $WEB_NAME = "www.linaro.org" ] ; then - if [ $DEBUG -eq $TRUE ] ; then - echo "About to do dnshistory lookup" - fi - if [ $DEBUG -eq $TRUE ] ; then - /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME - else - /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME > /dev/null - fi - fi - - # Now translate ip addresses to DNS names for all log files - if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then - # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime - # the iploc.py program was modified to read both databases and do both in one pass. - if [ $DEBUG -eq $TRUE ] ; then - echo "About to do GEOIP LOOKUP and dnshistory replace" - fi - python $STARTING_LOCATION/iploc.py --config=$STARTING_LOCATION/$CONFIG \ - $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME - else - # GEOIP info not requested so do the reverse DNS only - if [ $DEBUG -eq $TRUE ] ; then - echo "About to do dnshistory replace only" - fi - /usr/bin/dnshistory -T --logtype=www -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME - fi - else - if [ $DEBUG -eq $TRUE ] ; then - echo "No GEOIP LOOKUP or Reverse DNS" - fi - cat $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME - fi + mkdir -p $WORK_PATH + zcat $INPUT_PATH/*access.log-20*[0-9].resolved.gz >$WORK_PATH/$PROCESSED_LOG_NAME # now make a new file with only .gz, bz2, xz,exe, and zip files downloaded # this grep can take some time to run, it's using a regular expression to extract compressed files @@ -484,6 +429,7 @@ extract_logs () | grep -v .js \ | grep -v validation.linaro.org \ > $WORK_PATH/$FILTERED_LOG_NAME + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "creating toochain log" |