aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Doan <andy.doan@linaro.org>2015-08-06 10:46:05 -0500
committerAndy Doan <andy.doan@linaro.org>2015-08-18 14:17:24 -0500
commite7bda062601c0c64c7630050eeada9c5d4bc62f9 (patch)
tree994cce0767eb4daae7c805b9b0d955db2fb3efab
parentdc2e671b6a666a8f22812c3ab03ceb350c405a35 (diff)
add support for an S3Artifact
A few base test cases now need to mock out s3.get_bucket so that they can be ensured to only check locally. We then have one new test_s3 class that extends the base view tests to make sure S3 handles all the interesting checks we need. Change-Id: I02792f5bdce380fe621ac19b918dd9fe93bcb08d
-rw-r--r--license_protected_downloads/artifact/__init__.py1
-rw-r--r--license_protected_downloads/artifact/s3.py147
-rw-r--r--license_protected_downloads/common.py82
-rw-r--r--license_protected_downloads/tests/__init__.py1
-rw-r--r--license_protected_downloads/tests/test_api_v1.py6
-rw-r--r--license_protected_downloads/tests/test_api_v2.py6
-rw-r--r--license_protected_downloads/tests/test_s3.py205
-rw-r--r--license_protected_downloads/tests/test_views.py6
-rw-r--r--license_protected_downloads/views.py5
-rw-r--r--requirements.txt1
10 files changed, 451 insertions, 9 deletions
diff --git a/license_protected_downloads/artifact/__init__.py b/license_protected_downloads/artifact/__init__.py
index fbdd24a..0439b36 100644
--- a/license_protected_downloads/artifact/__init__.py
+++ b/license_protected_downloads/artifact/__init__.py
@@ -1,2 +1,3 @@
from .base import Artifact # NOQA
from .local import LocalArtifact # NOQA
+from .s3 import S3Artifact # NOQA
diff --git a/license_protected_downloads/artifact/s3.py b/license_protected_downloads/artifact/s3.py
new file mode 100644
index 0000000..68a7b52
--- /dev/null
+++ b/license_protected_downloads/artifact/s3.py
@@ -0,0 +1,147 @@
+import datetime
+import mimetypes
+import os
+import time
+
+import boto
+
+from django.conf import settings
+from django.http import HttpResponseRedirect
+
+from license_protected_downloads.artifact.base import (
+ Artifact,
+ cached_prop,
+)
+
+
+class S3Artifact(Artifact):
+ bucket = None
+
+ @classmethod
+ def get_bucket(cls):
+ '''Keeps a single bucket object cached for the duration of a request'''
+ if not cls.bucket:
+ b = getattr(settings, 'S3_BUCKET', None)
+ if b:
+ c = boto.connect_s3(
+ settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
+ cls.bucket = c.get_bucket(settings.S3_BUCKET)
+ return cls.bucket
+
+ def __init__(self, bucket, item, parent, human_readable):
+ base = '/' + os.path.dirname(item.name)
+ base = base.replace(settings.S3_PREFIX_PATH, '')
+
+ if hasattr(item, 'size'):
+ file_name = os.path.basename(item.name)
+ self.mtype = mimetypes.guess_type(item.name)[0]
+ dt = datetime.datetime.strptime(
+ item.last_modified, "%Y-%m-%dT%H:%M:%S.000Z")
+ item.last_modified = time.mktime(dt.timetuple())
+ self.item = item
+ else:
+ self.mtype = 'folder'
+ self.children = []
+ base = os.path.dirname(base)
+ file_name = os.path.basename(item.name[:-1])
+ item.size = 0
+ item.last_modified = '-'
+ self.bucket = bucket
+ self.parent = parent
+ if parent and hasattr(self.parent, 'children'):
+ self.parent.children.append(self)
+ super(S3Artifact, self).__init__(
+ base, file_name, item.size, item.last_modified, human_readable)
+
+ def get_type(self):
+ if self.human_readable:
+ if self.mtype is None:
+ return 'other'
+ elif self.mtype.split('/')[0] == 'text':
+ return 'text'
+ return self.mtype
+
+ def get_file_download_response(self):
+ "Return HttpResponse which will send path to user's browser."
+ assert not self.isdir()
+ return HttpResponseRedirect(self.item.generate_url(90))
+
+ @cached_prop
+ def build_info_buffer(self):
+ if self.parent and not self.isdir():
+ return self.parent.build_info_buffer
+
+ if self.urlbase == '/':
+ key = settings.S3_PREFIX_PATH[:-1]
+ else:
+ key = settings.S3_PREFIX_PATH + self.urlbase[1:]
+
+ if self.isdir():
+ key += '/' + self.file_name
+ key += '/BUILD-INFO.txt'
+
+ try:
+ key = boto.s3.key.Key(self.bucket, key)
+ return key.get_contents_as_string()
+ except boto.exception.S3ResponseError:
+ pass # No build-info file, return None - its okay
+
+ @cached_prop
+ def _container_eulas(self):
+ if not self.isdir() and self.parent:
+ return self.parent._container_eulas
+
+ prefix = settings.S3_PREFIX_PATH + self.urlbase[1:]
+ if prefix[-1] != '/':
+ # s3 listing needs '/' to do a dir listing
+ prefix = prefix + '/'
+
+ if self.isdir():
+ prefix += self.file_name + '/'
+
+ eulas = []
+ for x in self.bucket.list(prefix=prefix, delimiter='/'):
+ if isinstance(x, boto.s3.key.Key) and 'EULA.txt' in x.name:
+ eulas.append(os.path.basename(x.name))
+ return eulas
+
+ def get_eulas(self):
+ '''find eulas for this artifact
+
+ if this is a file, it will use the parent container's eula which
+ we keep cached, so that we only hit s3 one time
+ '''
+ return self._container_eulas
+
+ def get_file_contents(self, fname):
+ if self.urlbase == '/':
+ key = settings.S3_PREFIX_PATH[:-1]
+ else:
+ key = settings.S3_PREFIX_PATH + self.urlbase[1:]
+
+ key += '/' + self.file_name + '/' + fname
+ try:
+ key = boto.s3.key.Key(self.bucket, key)
+ return key.get_contents_as_string()
+ except boto.exception.S3ResponseError:
+ pass # return None - its okay
+
+ def get_textile_files(self):
+ assert self.isdir()
+ # NOTE: This logic is assuming some optimizations based on how files
+ # are currently published. Legacy publishing required more complex
+ # searching but all new publishing will work with this logic.
+ allowed = settings.ANDROID_FILES + settings.LINUX_FILES
+ for x in self.children:
+ if not x.isdir() and os.path.basename(x.item.name) in allowed:
+ yield (x.item.name, x.item)
+
+ def get_annotated_manifest(self):
+ assert self.isdir()
+ for x in self.children:
+ if not x.isdir() and \
+ os.path.basename(x.item.name) == settings.ANNOTATED_XML:
+ return x.item.read()
+
+ def isdir(self):
+ return self.mtype == 'folder'
diff --git a/license_protected_downloads/common.py b/license_protected_downloads/common.py
index 3c982d8..87043ab 100644
--- a/license_protected_downloads/common.py
+++ b/license_protected_downloads/common.py
@@ -1,15 +1,15 @@
import fnmatch
import os
+import boto
from django.conf import settings
from django.http import Http404
-from license_protected_downloads import(
- models,
-)
+from license_protected_downloads import models
from license_protected_downloads.artifact import(
LocalArtifact,
+ S3Artifact,
)
@@ -49,6 +49,22 @@ def _handle_wildcard(request, fullpath):
return match
+def _handle_s3_wildcard(request, bucket, prefix):
+ prefix, base = os.path.split(prefix)
+ if '*' in base or '?' in base:
+ match = None
+ prefix += '/'
+ items = list(bucket.list(delimiter='/', prefix=prefix))
+ for item in items:
+ if fnmatch.fnmatch(os.path.basename(item.name), base):
+ if match:
+ request.path = 'Multiple files match this expression'
+ raise Http404
+ match = item
+ if match:
+ return S3Artifact(bucket, match, None, False)
+
+
def _find_served_paths(path, request):
served_paths = settings.SERVED_PATHS
# if key is in request.GET["key"] then need to mod path and give
@@ -65,13 +81,34 @@ def _find_served_paths(path, request):
return served_paths, path
+def _find_s3_artifact(request, path):
+ b = S3Artifact.get_bucket()
+ if not b:
+ return # s3 isn't configured
+
+ prefix = settings.S3_PREFIX_PATH + path
+ if prefix[-1] == '/':
+ # s3 listing give sub dir, we don't want that
+ prefix = prefix[:-1]
+
+ items = b.list(delimiter='/', prefix=prefix)
+ for item in items:
+ if isinstance(item, boto.s3.prefix.Prefix):
+ if item.name == prefix + '/':
+ return S3Artifact(b, item, None, False)
+ else:
+ if item.name == prefix:
+ return S3Artifact(b, item, None, False)
+ return _handle_s3_wildcard(request, b, prefix)
+
+
def find_artifact(request, path):
"""Return a Artifact object representing a directory or file we serve"""
served_paths, path = _find_served_paths(path, request)
for basepath in served_paths:
fullpath = safe_path_join(basepath, path)
if fullpath is None:
- raise Http404
+ break
if os.path.isfile(fullpath) or os.path.isdir(fullpath):
return LocalArtifact(None, '', path, False, basepath)
@@ -80,6 +117,10 @@ def find_artifact(request, path):
basepath, path = os.path.split(fullpath)
return LocalArtifact(None, '', path, False, basepath)
+ r = _find_s3_artifact(request, path)
+ if r:
+ return r
+
raise Http404
@@ -103,15 +144,40 @@ def _sort_artifacts(a, b):
return cmp(a, b)
+def _s3_list(bucket, url):
+ prefix = settings.S3_PREFIX_PATH + url
+ if prefix[-1] != '/':
+ # s3 listing needs '/' to do a dir listing
+ prefix = prefix + '/'
+
+ for item in bucket.list(delimiter='/', prefix=prefix):
+ if item.name != prefix:
+ yield item
+
+
def dir_list(artifact, human_readable=True):
- path = artifact.full_path
url = artifact.url()
- artifacts = [LocalArtifact(artifact, url, x, human_readable, path)
- for x in os.listdir(path)]
+ artifacts = []
+ if isinstance(artifact, LocalArtifact):
+ fp = artifact.full_path
+ artifacts = [LocalArtifact(artifact, url, x, human_readable, fp)
+ for x in os.listdir(fp)]
+
+ b = S3Artifact.get_bucket()
+ if b:
+ for item in _s3_list(b, url[1:]):
+ artifacts.append(S3Artifact(b, item, artifact, human_readable))
+
artifacts.sort(_sort_artifacts)
+ # s3 and local could return duplicate names. Since the artifacts are sorted
+ # we can check if the last names match and skip duplicates if needed. This
+ # gives precedence to local artifacts since they show up first in the array
+ last_name = None
listing = []
for artifact in artifacts:
- if not artifact.hidden():
+ if last_name != artifact.file_name and not artifact.hidden():
listing.append(artifact.get_listing())
+
+ last_name = artifact.file_name
return listing
diff --git a/license_protected_downloads/tests/__init__.py b/license_protected_downloads/tests/__init__.py
index 198acf8..de81959 100644
--- a/license_protected_downloads/tests/__init__.py
+++ b/license_protected_downloads/tests/__init__.py
@@ -11,3 +11,4 @@ from license_protected_downloads.tests.test_pyflakes import *
from license_protected_downloads.tests.test_render_text_files import *
from license_protected_downloads.tests.test_splicebuildinfos import *
from license_protected_downloads.tests.test_views import *
+from license_protected_downloads.tests.test_s3 import *
diff --git a/license_protected_downloads/tests/test_api_v1.py b/license_protected_downloads/tests/test_api_v1.py
index 41cfbfe..7238fae 100644
--- a/license_protected_downloads/tests/test_api_v1.py
+++ b/license_protected_downloads/tests/test_api_v1.py
@@ -44,6 +44,12 @@ class APITests(TestCase):
self.tmpdir = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.tmpdir)
+ m = mock.patch(
+ 'license_protected_downloads.artifact.S3Artifact.get_bucket')
+ self.addCleanup(m.stop)
+ mo = m.start()
+ mo.return_value = None
+
def test_api_get_license_list(self):
target_file = "build-info/snowball-blob.txt"
digest = ViewTests.set_up_license(target_file)
diff --git a/license_protected_downloads/tests/test_api_v2.py b/license_protected_downloads/tests/test_api_v2.py
index efb5d49..12dc1fd 100644
--- a/license_protected_downloads/tests/test_api_v2.py
+++ b/license_protected_downloads/tests/test_api_v2.py
@@ -28,6 +28,12 @@ class APIv2Tests(TestCase):
self.addCleanup(m.stop)
m.start()
+ m = mock.patch(
+ 'license_protected_downloads.artifact.S3Artifact.get_bucket')
+ self.addCleanup(m.stop)
+ mo = m.start()
+ mo.return_value = None
+
def test_token_no_auth(self):
resp = self.client.get('/api/v2/token/')
self.assertEqual(401, resp.status_code)
diff --git a/license_protected_downloads/tests/test_s3.py b/license_protected_downloads/tests/test_s3.py
new file mode 100644
index 0000000..00904ed
--- /dev/null
+++ b/license_protected_downloads/tests/test_s3.py
@@ -0,0 +1,205 @@
+import os
+import shutil
+import tempfile
+import unittest
+import urlparse
+
+from django.conf import settings
+from django.http import Http404
+from django.test import TestCase
+
+import mock
+
+from license_protected_downloads.artifact import Artifact, S3Artifact
+from license_protected_downloads import common
+from license_protected_downloads.tests.test_views import (
+ BuildInfoProtectedTests,
+ EulaProtectedTests,
+ HeaderTests,
+ TESTSERVER_ROOT,
+ WildCardTests,
+)
+
+_orig_s3_prefix = getattr(settings, 'S3_PREFIX_PATH', None)
+_s3_enabled = _orig_s3_prefix is not None
+
+
+def _upload_sampleroot(bucket):
+ # make sure nothing was left from an old run
+ keys = bucket.list(settings.S3_PREFIX_PATH)
+ bucket.delete_keys(keys)
+
+ for root, dirs, files in os.walk(TESTSERVER_ROOT):
+ prefix = root[len(TESTSERVER_ROOT) + 1:]
+ for f in files:
+ if prefix:
+ path = prefix + '/' + f
+ else:
+ path = f
+ key = settings.S3_PREFIX_PATH + path
+ key = bucket.get_key(key, validate=False)
+ f = os.path.join(TESTSERVER_ROOT, path)
+ if os.path.exists(f):
+ key.set_contents_from_filename(f)
+
+
+if _s3_enabled:
+ def setUpModule():
+ settings.S3_PREFIX_PATH = settings.S3_PREFIX_PATH[:-1] + '-test/'
+ bucket = S3ViewTest.get_bucket()
+ if 'FAST_TEST' not in os.environ:
+ _upload_sampleroot(bucket)
+
+ def tearDownModule():
+ settings.S3_PREFIX_PATH = _orig_s3_prefix
+
+
+@unittest.skipIf(_s3_enabled is False, 's3 not configured')
+class S3ViewTest(BuildInfoProtectedTests, EulaProtectedTests, WildCardTests,
+ HeaderTests):
+ '''Extend all the view tests to excerise with an S3 bucket backing'''
+ bucket = None
+
+ @staticmethod
+ def get_bucket():
+ if not S3ViewTest.bucket:
+ S3ViewTest.bucket = S3Artifact.get_bucket()
+ return S3ViewTest.bucket
+
+ def setUp(self):
+ super(S3ViewTest, self).setUp()
+
+ self.request = mock.Mock()
+ self.request.GET = {}
+ self.s3_mock.return_value = S3ViewTest.get_bucket()
+
+ # force lookups to hit S3 rather than local files
+ m = mock.patch('django.conf.settings.SERVED_PATHS',
+ new_callable=lambda: [])
+ self.addCleanup(m.stop)
+ m.start()
+
+ def _test_get_file(self, path, follow_redirect):
+ # all s3 gets will be redirects, we can't follow them in the django
+ # test client, so just assert we get a 302 and the path seems sane
+ url = urlparse.urljoin(self.urlbase, path)
+ resp = self.client.get(url)
+ self.assertEqual(302, resp.status_code)
+ self.assertIn('Signature=', resp['Location'])
+
+ @staticmethod
+ def _get_artifact(path):
+ request = mock.Mock()
+ request.GET = {}
+ return common._find_s3_artifact(request, path)
+
+
+@unittest.skipIf(_s3_enabled is False, 's3 not configured')
+class TestS3(TestCase):
+ '''Tests specific to S3 not covered in test_views'''
+ def setUp(self):
+ self.request = mock.Mock()
+ self.request.GET = {}
+ self.request.bucket = S3ViewTest.get_bucket()
+
+ # force lookups to hit S3 rather than local files
+ self.served_paths = mock.patch(
+ 'django.conf.settings.SERVED_PATHS', new_callable=lambda: [])
+ self.addCleanup(self.served_paths.stop)
+ self.served_paths.start()
+
+ def test_find_artifact_404(self):
+ '''Ensure we 404 on a bad key'''
+ with self.assertRaises(Http404):
+ common.find_artifact(self.request, 'does not exist')
+
+ def test_find_artifact_partial(self):
+ '''Don't return partial s3 matches
+
+ if s3 has a key like 'foo/bar' it will return a match if you request
+ 'foo/ba'. Validate we reject that
+ '''
+ # we have two files starting with "o" under build-info
+ common.find_artifact(self.request, 'build-info/openid.txt')
+ with self.assertRaises(Http404):
+ common.find_artifact(self.request, 'build-info/o')
+
+ def test_find_artifact_directory(self):
+ '''S3 gives different listings for subdir/ and subdir
+
+ The trailing slash implies a directory listing. Assert we always
+ remove the trailing slash.
+ '''
+ a = common.find_artifact(self.request, '~linaro-android')
+ self.assertTrue(isinstance(a, common.S3Artifact))
+ self.assertTrue(a.isdir())
+ a = common.find_artifact(self.request, '~linaro-android/')
+ self.assertTrue(isinstance(a, common.S3Artifact))
+ self.assertTrue(a.isdir())
+
+ def test_find_artifact_file(self):
+ a = common.find_artifact(self.request, 'images/origen-blob.txt')
+ self.assertTrue(isinstance(a, common.S3Artifact))
+ self.assertFalse(a.isdir())
+
+ def test_build_info_cached(self):
+ '''TODO: ensure we cache build-info buffer after 1st request'''
+
+ def test_eulas_cached(self):
+ '''TODO: ensure we cache eulas for directory after 1st request'''
+
+
+@unittest.skipIf(_s3_enabled is False, 's3 not configured')
+class TestMixedBuilds(TestCase):
+ '''Ensure a can handle a build that may have local and s3 builds
+
+ eg: build_foo/
+ 1/ # this is a local file
+ 2/ # this is in S3
+ '''
+
+ def setUp(self):
+ self.request = mock.Mock()
+ self.request.GET = {}
+
+ self.tempdir = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, self.tempdir)
+
+ m = mock.patch('django.conf.settings.SERVED_PATHS',
+ new_callable=lambda: [self.tempdir])
+ self.addCleanup(m.stop)
+ m.start()
+
+ # we'll now have a layout like:
+ # ~linaro-android/staging-snowball/1 (local)
+ # ~linaro-android/staging-snowball/173 (s3)
+ path = os.path.join(self.tempdir, '~linaro-android/staging-snowball/1')
+ os.makedirs(path)
+
+ def test_find_artifact_both(self):
+ # first make sure if both s3 and local are found we return the local
+ # instance
+ a = common.find_artifact(
+ self.request, '~linaro-android/staging-snowball')
+ self.assertTrue(isinstance(a, Artifact))
+ self.assertTrue(a.isdir())
+
+ # test the listing
+ builds = [x['name'] for x in common.dir_list(a)]
+ self.assertEqual(['1', '173'], builds)
+
+ def test_prefer_local(self):
+ '''if we happen to have local and s3 build, list local'''
+ path = os.path.join(
+ self.tempdir, '~linaro-android/staging-snowball/173')
+ os.makedirs(path)
+
+ a = common.find_artifact(
+ self.request, '~linaro-android/staging-snowball')
+ # test the listing
+ listing = common.dir_list(a)
+ builds = [x['name'] for x in listing]
+ self.assertEqual(['1', '173'], builds)
+
+ # s3 folder listings have no "mtime", so we can validate with that:
+ self.assertNotEqual('-', listing[1]['mtime'])
diff --git a/license_protected_downloads/tests/test_views.py b/license_protected_downloads/tests/test_views.py
index fdd8e8c..6cf7674 100644
--- a/license_protected_downloads/tests/test_views.py
+++ b/license_protected_downloads/tests/test_views.py
@@ -38,6 +38,12 @@ class BaseServeViewTest(TestCase):
self.urlbase = 'http://testserver/'
+ m = mock.patch(
+ 'license_protected_downloads.artifact.S3Artifact.get_bucket')
+ self.addCleanup(m.stop)
+ self.s3_mock = m.start()
+ self.s3_mock.return_value = None
+
def tearDown(self):
settings.SERVED_PATHS = self.old_served_paths
settings.MASTER_API_KEY = self.old_master_api_key
diff --git a/license_protected_downloads/views.py b/license_protected_downloads/views.py
index e350e7e..5bcd6ee 100644
--- a/license_protected_downloads/views.py
+++ b/license_protected_downloads/views.py
@@ -170,6 +170,9 @@ def _handle_dir_list(request, artifact):
else:
up_dir = None
+ # must come before call to find_and_render to optimize s3
+ dirlist = dir_list(artifact)
+
download = None
if 'dl' in request.GET:
download = request.GET['dl']
@@ -178,7 +181,6 @@ def _handle_dir_list(request, artifact):
if ann:
rendered_files["Git Descriptions"] = render_descriptions(ann)
- dirlist = dir_list(artifact)
lics = [x['license_digest_list'] for x in dirlist
if x['license_digest_list']]
@@ -247,6 +249,7 @@ def file_server_get(request, path):
def get_textile_files(request):
artifact = find_artifact(request, request.GET.get("path"))
+ dir_list(request, artifact) # required for s3
rendered_files = RenderTextFiles.find_and_render(artifact)
ann = artifact.get_annotated_manifest()
if ann:
diff --git a/requirements.txt b/requirements.txt
index 33640c5..6383b64 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ python-openid
requests
South==0.7.3
textile
+boto
mock
testrepository