EULA logic on snapshots changed
We were originally able to get the build listings without doing
EULA stuff. This has changed, so cookie logic being added to core
code to be safe.
diff --git a/crawler.py b/crawler.py
index 101995f..b07a8b1 100755
--- a/crawler.py
+++ b/crawler.py
@@ -1,8 +1,18 @@
#!/usr/bin/python
+import cookielib
+import os
import re
import urllib2
+def cookie_setup():
+ cookies = os.getenv('LMC_COOKIES')
+ if cookies:
+ cj = cookielib.LWPCookieJar()
+ opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+ opener.addheaders.append(('Cookie', cookies))
+ urllib2.install_opener(opener)
+
def list_links(url, regex=r'<a\s*href=[\'|"](.*?)[\'"].*?>'):
try:
response = urllib2.urlopen(url)
@@ -27,9 +37,10 @@
[ (20120210, http://foo.bar/hwpack.tar.gz), (20120209, blah.tar.gz) ]
'''
# only analyze the last few builds
- links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')[:limit]
+ links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')
+ links = sorted(links, reverse=True, key=int)[:limit]
hwpacks = []
- for link in sorted(links, reverse=True):
+ for link in links:
build = list_hwpack('%s/%s'% (url, link))
if build is not None:
hwpacks.append(build)
@@ -46,8 +57,9 @@
Returns a tuple of (builddate, url)
'''
# only analyze the last few builds
- links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')[:limit]
- for link in sorted(links, reverse=True, key=int):
+ links = list_links(url, r'<a\s*href=[\'|"](\d+)\/[\'"].*?>')
+ links = sorted(links, reverse=True, key=int)[:limit]
+ for link in links:
build = list_rfs('%s/%s' %(url, link))
if build is not None:
return (link, build)
@@ -56,6 +68,9 @@
if __name__ == '__main__':
import sys
+
+ cookie_setup()
+
for arg in sys.argv[1:]:
print "HWPACKS for: %s" % arg
hwpacks = latest_hwpacks(arg, 4)