diff options
author | Georgy Redkozubov <Georgy.Redkozubov@linaro.org> | 2013-10-18 18:01:40 +0400 |
---|---|---|
committer | Georgy Redkozubov <Georgy.Redkozubov@linaro.org> | 2013-10-18 18:01:40 +0400 |
commit | a2e00fb01d4753ea60b8d5fdc8826497e793d3f6 (patch) | |
tree | d62f0baf459dedcbb570f2f616129dd4f2de860e /iploc.py | |
parent | 75a0442ea39058cb08f06a55787b8f5030a2f98d (diff) |
Initial commit of tools for analyzing web logs from Linaro web servers.
Diffstat (limited to 'iploc.py')
-rw-r--r-- | iploc.py | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/iploc.py b/iploc.py new file mode 100644 index 0000000..6854f7f --- /dev/null +++ b/iploc.py @@ -0,0 +1,95 @@ +#!/usr/bin/python +# Usage: iploc.py </path/to/log> </path/to/output> + +import IP2Location +import atexit +import fileinput +import re +import struct +import sys + +from bsddb3 import db + +# Reverse DNS db file +REVERSE_DNS_DB_FILE = '/home/david.mandala/logs/dnshistory.db' +# IP2Location db file +LOC_DB_FILE = '/home/david.mandala/logs/ip2location/current-database/IP-COUNTRY-REGION-CITY-ISP.BIN' + +# Setup DB +IP2LocObj = IP2Location.IP2Location() +IP2LocObj.open(LOC_DB_FILE) + +REVERSE_DNS_DB = db.DB() +REVERSE_DNS_DB.open(REVERSE_DNS_DB_FILE, dbtype=db.DB_BTREE) +atexit.register(REVERSE_DNS_DB.close) + +temp_ident = "" +temp_user = "" + + +def get_reverse_dns(ip_address): + # XXX: this works only with IPv4 addresses. + octets = str(ip_address).split('.') + # The keys in the the reverse DNS db are stored as a char encoded string + # made from the single octet of the IP address. + key = ''.join(chr(int(x)) for x in octets) + value = REVERSE_DNS_DB.get(key) + if value: + if len(value[20:]) > 1: + # The value obtained ends with a \x00 char: need to unpack it + # and retrieve only the reverse value. Just replacing it with an + # empty string might not work if the string is encoded. + value, _ = struct.unpack( + '{0}sc'.format(len(value[20:]) - 1), value[20:]) + else: + # Value found, but no reverse DNS name in the DB. + value = None + return value + + +def main(file_names): + + for line in fileinput.input([file_names]): + locations = [] + match = re.search('\d+\.\d+\.\d+\.\d+\s[^ ]+\s[^ ]+', line) + if match: + raw_data = match.group() + data = raw_data.split() + ip = get_reverse_dns(data[0]) or data[0] + ident = data[1] + user = data[2] + rec = IP2LocObj.get_all(data[0]) + country_short = rec.country_short + region = rec.region + city = rec.city + isp = rec.isp + + if country_short != '-': + locations.append(country_short) + + if region != '-': + locations.append(region) + + if city != '-': + locations.append(city) + + temp_ident = ' '.join(locations) + + if temp_ident: + temp_ident = '"{0}"'.format(temp_ident) + else: + temp_ident = ident + + if isp != '-': + temp_user = '"{0}"'.format(isp) + else: + temp_user = user + + temp = ip + ' ' + temp_ident + ' ' + temp_user + + sys.stdout.write(line.replace(raw_data, temp)) + + del locations + +if __name__ == '__main__': + main(sys.argv[1]) |