remove dependency on fetch_image server index

This removes the need to be dependent on the server_index.tar.bz2
file being up-to-date on releases.linaro.org/fetch_image. This makes
the build process easier by not having to hope its been run between
the time the build was completed and this script gets launched
diff --git a/crawler.py b/crawler.py
new file mode 100755
index 0000000..d8cdd6c
--- /dev/null
+++ b/crawler.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+
+import re
+import urllib2
+
+def list_links(url, regex=r'<a\s*href=[\'|"](.*?)[\'"].*?>'):
+    response = urllib2.urlopen(url)
+    msg = response.read()
+    links = re.compile(regex).findall(msg)
+    return links
+
+def list_hwpack_builds(url, date):
+    url = "%s/%s" % (url,date)
+    links = list_links(url, r'<a\s*href=[\'|"](\d)\/[\'"].*?>')
+    builds = []
+    for link in sorted(links, reverse=True):
+        hwurl = "%s/%s/images/hwpack/" % (url, link)
+        hwpack = list_links(hwurl, r'<a\s*href=[\'|"](hwpack.*?\.tar\.gz)[\'"].*?>')
+        if len(hwpack) is 1:
+            builds.append("%s/%s" %(hwurl, hwpack[0]))
+    return builds
+
+def latest_hwpacks(url, limit=7):
+    '''returns an array of tuples (build-date, hwpack url) like:
+       [ (20120210, http://foo.bar/hwpack.tar.gz), (20120209, blah.tar.gz) ]
+    '''
+    # only analyze the last few builds
+    links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')[:limit]
+    hwpacks = []
+    for link in sorted(links, reverse=True):
+        builds = list_hwpack_builds(url, link)
+        if len(builds) > 0:
+            hwpacks.append((link,builds[0]))
+    return hwpacks
+
+def list_rfs_builds(url, date):
+    url = "%s/%s" % (url,date)
+    links = list_links(url, r'<a\s*href=[\'|"](\d)\/[\'"].*?>')
+    builds = []
+    for link in sorted(links, reverse=True):
+        hwurl = "%s/%s/images/tar/" % (url, link)
+        hwpack = list_links(hwurl, r'<a\s*href=[\'|"](.*?\.tar\.gz)[\'"].*?>')
+        if len(hwpack) is 1:
+            builds.append("%s/%s" %(hwurl, hwpack[0]))
+    return builds
+
+def latest_rfs(url, limit=7):
+    '''
+    Returns a tuple of (builddate, url)
+    '''
+    # only analyze the last few builds
+    links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')[:limit]
+    for link in sorted(links, reverse=True):
+        builds = list_rfs_builds(url, link)
+        if len(builds) > 0:
+            return (link, builds[0])
+
+    return None
+
+if __name__ == '__main__':
+    import sys
+    for arg in sys.argv[1:]:
+        print "HWPACKS for: %s" % arg
+        hwpacks = latest_hwpacks(arg, 4)
+        for hwpack in hwpacks:
+            print "  %s: %s" % hwpack
+
+    print "latest nano:"
+    print "  %s %s" % latest_rfs('http://snapshots.linaro.org/oneiric/linaro-o-nano')