Fix pre-built images build broken by new snapshots.linaro.org deployment.
diff --git a/crawler.py b/crawler.py
index 3d67b36..35b6063 100755
--- a/crawler.py
+++ b/crawler.py
@@ -14,7 +14,7 @@
opener.addheaders.append(('Cookie', cookies))
urllib2.install_opener(opener)
-def list_links(url, regex=r'<a\s*href=[\'|"](.*?)[\'"].*?>'):
+def list_links(url, regex=r'<a\s*href=[\'"].*[\'"]>(.*)/?</a>'):
try:
response = urllib2.urlopen(url)
msg = response.read()
@@ -27,7 +27,7 @@
def list_hwpack(url):
''' returns tuple of (buildate, url)
'''
- urls = list_links(url, r'<a\s*href=[\'|"](hwpack.*?\.tar\.gz)[\'"].*?>')
+ urls = list_links(url, r'<a\s*href=[\'|"].*[\'"]>(hwpack.*?\.tar\.gz)</a>')
for link in urls:
build_date = re.compile('_(\d+)-').findall(link)
return (build_date[0], '%s/%s' % (url,link))
@@ -38,7 +38,7 @@
[ (20120210, http://foo.bar/hwpack.tar.gz), (20120209, blah.tar.gz) ]
'''
# only analyze the last few builds
- links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')
+ links = list_links(url, r'<a\s*href=[\'|"].*[\'"]>(\d+)/?</a>')
links = sorted(links, reverse=True, key=int)[:limit]
hwpacks = []
for link in links:
@@ -48,7 +48,7 @@
return hwpacks
def list_rfs(url):
- links = list_links(url, r'<a\s*href=[\'|"](linaro-.*?\d+(?!config)\.tar\.gz)[\'"].*?>')
+ links = list_links(url, r'<a\s*href=[\'"].*[\'"]>(linaro-.*?\d+(?!config)\.tar\.gz)</a>')
if len(links) is 1:
return "%s/%s" %(url,links[0])
return None
@@ -58,7 +58,7 @@
Returns a tuple of (builddate, url)
'''
# only analyze the last few builds
- links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')
+ links = list_links(url, r'<a\s*href=[\'"].*[\'"]>(\d+)/?</a>')
links = sorted(links, reverse=True, key=int)[:limit]
for link in links:
build = list_rfs('%s/%s' %(url, link))