Adapt regexps to new HTML code of snapshots.linaro.org - LP: #1153464

Why we parse HTML instead of having API for it?

Signed-off-by: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
diff --git a/crawler.py b/crawler.py
index 5d7fe89..71787c9 100755
--- a/crawler.py
+++ b/crawler.py
@@ -27,7 +27,7 @@
 def list_hwpack(url):
     ''' returns tuple of (buildate, url)
     '''
-    urls = list_links(url, r'<a\s*href=[\'|"].*[\'"]>(hwpack.*?\.tar\.gz)</a>')
+    urls = list_links(url, r'<a\s*href=[\'|"].*/(.*hwpack.*?\.tar\.gz)[\'"]')
     for link in urls:
         try:
             build_date = re.compile('_(\d+)-').findall(link)
@@ -41,7 +41,7 @@
        [ (20120210, http://foo.bar/hwpack.tar.gz), (20120209, blah.tar.gz) ]
     '''
     # only analyze the last few builds
-    links = list_links(url, r'<a\s*href=[\'|"].*[\'"]>(\d+)/?</a>')
+    links = list_links(url, r'<a\s*href=[\'"].*/(\d+)[\'"]')
     links = sorted(links, reverse=True, key=int)[:limit]
     hwpacks = []
     for link in links:
@@ -52,7 +52,7 @@
 
 def list_rfs(url):
     links = list_links(url,
-       r'<a\s*href=[\'|"].*[\'"]>(.*\-\d+\.(?!config)(?:rootfs\.)?tar\.gz)</a>')
+       r'<a\s*href=[\'|"].*/(.*(?!config)(?:rootfs\.)?tar\.gz)[\'"]')
     if len(links) is 1:
         return "%s/%s" %(url,links[0])
     return None
@@ -62,7 +62,7 @@
     Returns a tuple of (builddate, url)
     '''
     # only analyze the last few builds
-    links = list_links(url, r'<a\s*href=[\'"].*[\'"]>(\d+)/?</a>')
+    links = list_links(url, r'<a\s*href=[\'"].*/(\d+)[\'"]')
     links = sorted(links, reverse=True, key=int)[:limit]
     for link in links:
         build = list_rfs('%s/%s' %(url, link))