aboutsummaryrefslogtreecommitdiff
path: root/iploc.py
diff options
context:
space:
mode:
authorGeorgy Redkozubov <Georgy.Redkozubov@linaro.org>2013-10-18 18:01:40 +0400
committerGeorgy Redkozubov <Georgy.Redkozubov@linaro.org>2013-10-18 18:01:40 +0400
commita2e00fb01d4753ea60b8d5fdc8826497e793d3f6 (patch)
treed62f0baf459dedcbb570f2f616129dd4f2de860e /iploc.py
parent75a0442ea39058cb08f06a55787b8f5030a2f98d (diff)
Initial commit of tools for analyzing web logs from Linaro web servers.
Diffstat (limited to 'iploc.py')
-rw-r--r--iploc.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/iploc.py b/iploc.py
new file mode 100644
index 0000000..6854f7f
--- /dev/null
+++ b/iploc.py
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# Usage: iploc.py </path/to/log> </path/to/output>
+
+import IP2Location
+import atexit
+import fileinput
+import re
+import struct
+import sys
+
+from bsddb3 import db
+
+# Reverse DNS db file
+REVERSE_DNS_DB_FILE = '/home/david.mandala/logs/dnshistory.db'
+# IP2Location db file
+LOC_DB_FILE = '/home/david.mandala/logs/ip2location/current-database/IP-COUNTRY-REGION-CITY-ISP.BIN'
+
+# Setup DB
+IP2LocObj = IP2Location.IP2Location()
+IP2LocObj.open(LOC_DB_FILE)
+
+REVERSE_DNS_DB = db.DB()
+REVERSE_DNS_DB.open(REVERSE_DNS_DB_FILE, dbtype=db.DB_BTREE)
+atexit.register(REVERSE_DNS_DB.close)
+
+temp_ident = ""
+temp_user = ""
+
+
+def get_reverse_dns(ip_address):
+ # XXX: this works only with IPv4 addresses.
+ octets = str(ip_address).split('.')
+ # The keys in the the reverse DNS db are stored as a char encoded string
+ # made from the single octet of the IP address.
+ key = ''.join(chr(int(x)) for x in octets)
+ value = REVERSE_DNS_DB.get(key)
+ if value:
+ if len(value[20:]) > 1:
+ # The value obtained ends with a \x00 char: need to unpack it
+ # and retrieve only the reverse value. Just replacing it with an
+ # empty string might not work if the string is encoded.
+ value, _ = struct.unpack(
+ '{0}sc'.format(len(value[20:]) - 1), value[20:])
+ else:
+ # Value found, but no reverse DNS name in the DB.
+ value = None
+ return value
+
+
+def main(file_names):
+
+ for line in fileinput.input([file_names]):
+ locations = []
+ match = re.search('\d+\.\d+\.\d+\.\d+\s[^ ]+\s[^ ]+', line)
+ if match:
+ raw_data = match.group()
+ data = raw_data.split()
+ ip = get_reverse_dns(data[0]) or data[0]
+ ident = data[1]
+ user = data[2]
+ rec = IP2LocObj.get_all(data[0])
+ country_short = rec.country_short
+ region = rec.region
+ city = rec.city
+ isp = rec.isp
+
+ if country_short != '-':
+ locations.append(country_short)
+
+ if region != '-':
+ locations.append(region)
+
+ if city != '-':
+ locations.append(city)
+
+ temp_ident = ' '.join(locations)
+
+ if temp_ident:
+ temp_ident = '"{0}"'.format(temp_ident)
+ else:
+ temp_ident = ident
+
+ if isp != '-':
+ temp_user = '"{0}"'.format(isp)
+ else:
+ temp_user = user
+
+ temp = ip + ' ' + temp_ident + ' ' + temp_user
+
+ sys.stdout.write(line.replace(raw_data, temp))
+
+ del locations
+
+if __name__ == '__main__':
+ main(sys.argv[1])