diff options
author | Andy Doan <andy.doan@linaro.org> | 2015-08-06 10:46:05 -0500 |
---|---|---|
committer | Andy Doan <andy.doan@linaro.org> | 2015-08-18 14:17:24 -0500 |
commit | e7bda062601c0c64c7630050eeada9c5d4bc62f9 (patch) | |
tree | 994cce0767eb4daae7c805b9b0d955db2fb3efab | |
parent | dc2e671b6a666a8f22812c3ab03ceb350c405a35 (diff) |
add support for an S3Artifact
A few base test cases now need to mock out s3.get_bucket so that they
can be ensured to only check locally. We then have one new test_s3 class
that extends the base view tests to make sure S3 handles all the
interesting checks we need.
Change-Id: I02792f5bdce380fe621ac19b918dd9fe93bcb08d
-rw-r--r-- | license_protected_downloads/artifact/__init__.py | 1 | ||||
-rw-r--r-- | license_protected_downloads/artifact/s3.py | 147 | ||||
-rw-r--r-- | license_protected_downloads/common.py | 82 | ||||
-rw-r--r-- | license_protected_downloads/tests/__init__.py | 1 | ||||
-rw-r--r-- | license_protected_downloads/tests/test_api_v1.py | 6 | ||||
-rw-r--r-- | license_protected_downloads/tests/test_api_v2.py | 6 | ||||
-rw-r--r-- | license_protected_downloads/tests/test_s3.py | 205 | ||||
-rw-r--r-- | license_protected_downloads/tests/test_views.py | 6 | ||||
-rw-r--r-- | license_protected_downloads/views.py | 5 | ||||
-rw-r--r-- | requirements.txt | 1 |
10 files changed, 451 insertions, 9 deletions
diff --git a/license_protected_downloads/artifact/__init__.py b/license_protected_downloads/artifact/__init__.py index fbdd24a..0439b36 100644 --- a/license_protected_downloads/artifact/__init__.py +++ b/license_protected_downloads/artifact/__init__.py @@ -1,2 +1,3 @@ from .base import Artifact # NOQA from .local import LocalArtifact # NOQA +from .s3 import S3Artifact # NOQA diff --git a/license_protected_downloads/artifact/s3.py b/license_protected_downloads/artifact/s3.py new file mode 100644 index 0000000..68a7b52 --- /dev/null +++ b/license_protected_downloads/artifact/s3.py @@ -0,0 +1,147 @@ +import datetime +import mimetypes +import os +import time + +import boto + +from django.conf import settings +from django.http import HttpResponseRedirect + +from license_protected_downloads.artifact.base import ( + Artifact, + cached_prop, +) + + +class S3Artifact(Artifact): + bucket = None + + @classmethod + def get_bucket(cls): + '''Keeps a single bucket object cached for the duration of a request''' + if not cls.bucket: + b = getattr(settings, 'S3_BUCKET', None) + if b: + c = boto.connect_s3( + settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) + cls.bucket = c.get_bucket(settings.S3_BUCKET) + return cls.bucket + + def __init__(self, bucket, item, parent, human_readable): + base = '/' + os.path.dirname(item.name) + base = base.replace(settings.S3_PREFIX_PATH, '') + + if hasattr(item, 'size'): + file_name = os.path.basename(item.name) + self.mtype = mimetypes.guess_type(item.name)[0] + dt = datetime.datetime.strptime( + item.last_modified, "%Y-%m-%dT%H:%M:%S.000Z") + item.last_modified = time.mktime(dt.timetuple()) + self.item = item + else: + self.mtype = 'folder' + self.children = [] + base = os.path.dirname(base) + file_name = os.path.basename(item.name[:-1]) + item.size = 0 + item.last_modified = '-' + self.bucket = bucket + self.parent = parent + if parent and hasattr(self.parent, 'children'): + self.parent.children.append(self) + super(S3Artifact, self).__init__( + base, file_name, item.size, item.last_modified, human_readable) + + def get_type(self): + if self.human_readable: + if self.mtype is None: + return 'other' + elif self.mtype.split('/')[0] == 'text': + return 'text' + return self.mtype + + def get_file_download_response(self): + "Return HttpResponse which will send path to user's browser." + assert not self.isdir() + return HttpResponseRedirect(self.item.generate_url(90)) + + @cached_prop + def build_info_buffer(self): + if self.parent and not self.isdir(): + return self.parent.build_info_buffer + + if self.urlbase == '/': + key = settings.S3_PREFIX_PATH[:-1] + else: + key = settings.S3_PREFIX_PATH + self.urlbase[1:] + + if self.isdir(): + key += '/' + self.file_name + key += '/BUILD-INFO.txt' + + try: + key = boto.s3.key.Key(self.bucket, key) + return key.get_contents_as_string() + except boto.exception.S3ResponseError: + pass # No build-info file, return None - its okay + + @cached_prop + def _container_eulas(self): + if not self.isdir() and self.parent: + return self.parent._container_eulas + + prefix = settings.S3_PREFIX_PATH + self.urlbase[1:] + if prefix[-1] != '/': + # s3 listing needs '/' to do a dir listing + prefix = prefix + '/' + + if self.isdir(): + prefix += self.file_name + '/' + + eulas = [] + for x in self.bucket.list(prefix=prefix, delimiter='/'): + if isinstance(x, boto.s3.key.Key) and 'EULA.txt' in x.name: + eulas.append(os.path.basename(x.name)) + return eulas + + def get_eulas(self): + '''find eulas for this artifact + + if this is a file, it will use the parent container's eula which + we keep cached, so that we only hit s3 one time + ''' + return self._container_eulas + + def get_file_contents(self, fname): + if self.urlbase == '/': + key = settings.S3_PREFIX_PATH[:-1] + else: + key = settings.S3_PREFIX_PATH + self.urlbase[1:] + + key += '/' + self.file_name + '/' + fname + try: + key = boto.s3.key.Key(self.bucket, key) + return key.get_contents_as_string() + except boto.exception.S3ResponseError: + pass # return None - its okay + + def get_textile_files(self): + assert self.isdir() + # NOTE: This logic is assuming some optimizations based on how files + # are currently published. Legacy publishing required more complex + # searching but all new publishing will work with this logic. + allowed = settings.ANDROID_FILES + settings.LINUX_FILES + for x in self.children: + if not x.isdir() and os.path.basename(x.item.name) in allowed: + yield (x.item.name, x.item) + + def get_annotated_manifest(self): + assert self.isdir() + for x in self.children: + if not x.isdir() and \ + os.path.basename(x.item.name) == settings.ANNOTATED_XML: + return x.item.read() + + def isdir(self): + return self.mtype == 'folder' diff --git a/license_protected_downloads/common.py b/license_protected_downloads/common.py index 3c982d8..87043ab 100644 --- a/license_protected_downloads/common.py +++ b/license_protected_downloads/common.py @@ -1,15 +1,15 @@ import fnmatch import os +import boto from django.conf import settings from django.http import Http404 -from license_protected_downloads import( - models, -) +from license_protected_downloads import models from license_protected_downloads.artifact import( LocalArtifact, + S3Artifact, ) @@ -49,6 +49,22 @@ def _handle_wildcard(request, fullpath): return match +def _handle_s3_wildcard(request, bucket, prefix): + prefix, base = os.path.split(prefix) + if '*' in base or '?' in base: + match = None + prefix += '/' + items = list(bucket.list(delimiter='/', prefix=prefix)) + for item in items: + if fnmatch.fnmatch(os.path.basename(item.name), base): + if match: + request.path = 'Multiple files match this expression' + raise Http404 + match = item + if match: + return S3Artifact(bucket, match, None, False) + + def _find_served_paths(path, request): served_paths = settings.SERVED_PATHS # if key is in request.GET["key"] then need to mod path and give @@ -65,13 +81,34 @@ def _find_served_paths(path, request): return served_paths, path +def _find_s3_artifact(request, path): + b = S3Artifact.get_bucket() + if not b: + return # s3 isn't configured + + prefix = settings.S3_PREFIX_PATH + path + if prefix[-1] == '/': + # s3 listing give sub dir, we don't want that + prefix = prefix[:-1] + + items = b.list(delimiter='/', prefix=prefix) + for item in items: + if isinstance(item, boto.s3.prefix.Prefix): + if item.name == prefix + '/': + return S3Artifact(b, item, None, False) + else: + if item.name == prefix: + return S3Artifact(b, item, None, False) + return _handle_s3_wildcard(request, b, prefix) + + def find_artifact(request, path): """Return a Artifact object representing a directory or file we serve""" served_paths, path = _find_served_paths(path, request) for basepath in served_paths: fullpath = safe_path_join(basepath, path) if fullpath is None: - raise Http404 + break if os.path.isfile(fullpath) or os.path.isdir(fullpath): return LocalArtifact(None, '', path, False, basepath) @@ -80,6 +117,10 @@ def find_artifact(request, path): basepath, path = os.path.split(fullpath) return LocalArtifact(None, '', path, False, basepath) + r = _find_s3_artifact(request, path) + if r: + return r + raise Http404 @@ -103,15 +144,40 @@ def _sort_artifacts(a, b): return cmp(a, b) +def _s3_list(bucket, url): + prefix = settings.S3_PREFIX_PATH + url + if prefix[-1] != '/': + # s3 listing needs '/' to do a dir listing + prefix = prefix + '/' + + for item in bucket.list(delimiter='/', prefix=prefix): + if item.name != prefix: + yield item + + def dir_list(artifact, human_readable=True): - path = artifact.full_path url = artifact.url() - artifacts = [LocalArtifact(artifact, url, x, human_readable, path) - for x in os.listdir(path)] + artifacts = [] + if isinstance(artifact, LocalArtifact): + fp = artifact.full_path + artifacts = [LocalArtifact(artifact, url, x, human_readable, fp) + for x in os.listdir(fp)] + + b = S3Artifact.get_bucket() + if b: + for item in _s3_list(b, url[1:]): + artifacts.append(S3Artifact(b, item, artifact, human_readable)) + artifacts.sort(_sort_artifacts) + # s3 and local could return duplicate names. Since the artifacts are sorted + # we can check if the last names match and skip duplicates if needed. This + # gives precedence to local artifacts since they show up first in the array + last_name = None listing = [] for artifact in artifacts: - if not artifact.hidden(): + if last_name != artifact.file_name and not artifact.hidden(): listing.append(artifact.get_listing()) + + last_name = artifact.file_name return listing diff --git a/license_protected_downloads/tests/__init__.py b/license_protected_downloads/tests/__init__.py index 198acf8..de81959 100644 --- a/license_protected_downloads/tests/__init__.py +++ b/license_protected_downloads/tests/__init__.py @@ -11,3 +11,4 @@ from license_protected_downloads.tests.test_pyflakes import * from license_protected_downloads.tests.test_render_text_files import * from license_protected_downloads.tests.test_splicebuildinfos import * from license_protected_downloads.tests.test_views import * +from license_protected_downloads.tests.test_s3 import * diff --git a/license_protected_downloads/tests/test_api_v1.py b/license_protected_downloads/tests/test_api_v1.py index 41cfbfe..7238fae 100644 --- a/license_protected_downloads/tests/test_api_v1.py +++ b/license_protected_downloads/tests/test_api_v1.py @@ -44,6 +44,12 @@ class APITests(TestCase): self.tmpdir = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, self.tmpdir) + m = mock.patch( + 'license_protected_downloads.artifact.S3Artifact.get_bucket') + self.addCleanup(m.stop) + mo = m.start() + mo.return_value = None + def test_api_get_license_list(self): target_file = "build-info/snowball-blob.txt" digest = ViewTests.set_up_license(target_file) diff --git a/license_protected_downloads/tests/test_api_v2.py b/license_protected_downloads/tests/test_api_v2.py index efb5d49..12dc1fd 100644 --- a/license_protected_downloads/tests/test_api_v2.py +++ b/license_protected_downloads/tests/test_api_v2.py @@ -28,6 +28,12 @@ class APIv2Tests(TestCase): self.addCleanup(m.stop) m.start() + m = mock.patch( + 'license_protected_downloads.artifact.S3Artifact.get_bucket') + self.addCleanup(m.stop) + mo = m.start() + mo.return_value = None + def test_token_no_auth(self): resp = self.client.get('/api/v2/token/') self.assertEqual(401, resp.status_code) diff --git a/license_protected_downloads/tests/test_s3.py b/license_protected_downloads/tests/test_s3.py new file mode 100644 index 0000000..00904ed --- /dev/null +++ b/license_protected_downloads/tests/test_s3.py @@ -0,0 +1,205 @@ +import os +import shutil +import tempfile +import unittest +import urlparse + +from django.conf import settings +from django.http import Http404 +from django.test import TestCase + +import mock + +from license_protected_downloads.artifact import Artifact, S3Artifact +from license_protected_downloads import common +from license_protected_downloads.tests.test_views import ( + BuildInfoProtectedTests, + EulaProtectedTests, + HeaderTests, + TESTSERVER_ROOT, + WildCardTests, +) + +_orig_s3_prefix = getattr(settings, 'S3_PREFIX_PATH', None) +_s3_enabled = _orig_s3_prefix is not None + + +def _upload_sampleroot(bucket): + # make sure nothing was left from an old run + keys = bucket.list(settings.S3_PREFIX_PATH) + bucket.delete_keys(keys) + + for root, dirs, files in os.walk(TESTSERVER_ROOT): + prefix = root[len(TESTSERVER_ROOT) + 1:] + for f in files: + if prefix: + path = prefix + '/' + f + else: + path = f + key = settings.S3_PREFIX_PATH + path + key = bucket.get_key(key, validate=False) + f = os.path.join(TESTSERVER_ROOT, path) + if os.path.exists(f): + key.set_contents_from_filename(f) + + +if _s3_enabled: + def setUpModule(): + settings.S3_PREFIX_PATH = settings.S3_PREFIX_PATH[:-1] + '-test/' + bucket = S3ViewTest.get_bucket() + if 'FAST_TEST' not in os.environ: + _upload_sampleroot(bucket) + + def tearDownModule(): + settings.S3_PREFIX_PATH = _orig_s3_prefix + + +@unittest.skipIf(_s3_enabled is False, 's3 not configured') +class S3ViewTest(BuildInfoProtectedTests, EulaProtectedTests, WildCardTests, + HeaderTests): + '''Extend all the view tests to excerise with an S3 bucket backing''' + bucket = None + + @staticmethod + def get_bucket(): + if not S3ViewTest.bucket: + S3ViewTest.bucket = S3Artifact.get_bucket() + return S3ViewTest.bucket + + def setUp(self): + super(S3ViewTest, self).setUp() + + self.request = mock.Mock() + self.request.GET = {} + self.s3_mock.return_value = S3ViewTest.get_bucket() + + # force lookups to hit S3 rather than local files + m = mock.patch('django.conf.settings.SERVED_PATHS', + new_callable=lambda: []) + self.addCleanup(m.stop) + m.start() + + def _test_get_file(self, path, follow_redirect): + # all s3 gets will be redirects, we can't follow them in the django + # test client, so just assert we get a 302 and the path seems sane + url = urlparse.urljoin(self.urlbase, path) + resp = self.client.get(url) + self.assertEqual(302, resp.status_code) + self.assertIn('Signature=', resp['Location']) + + @staticmethod + def _get_artifact(path): + request = mock.Mock() + request.GET = {} + return common._find_s3_artifact(request, path) + + +@unittest.skipIf(_s3_enabled is False, 's3 not configured') +class TestS3(TestCase): + '''Tests specific to S3 not covered in test_views''' + def setUp(self): + self.request = mock.Mock() + self.request.GET = {} + self.request.bucket = S3ViewTest.get_bucket() + + # force lookups to hit S3 rather than local files + self.served_paths = mock.patch( + 'django.conf.settings.SERVED_PATHS', new_callable=lambda: []) + self.addCleanup(self.served_paths.stop) + self.served_paths.start() + + def test_find_artifact_404(self): + '''Ensure we 404 on a bad key''' + with self.assertRaises(Http404): + common.find_artifact(self.request, 'does not exist') + + def test_find_artifact_partial(self): + '''Don't return partial s3 matches + + if s3 has a key like 'foo/bar' it will return a match if you request + 'foo/ba'. Validate we reject that + ''' + # we have two files starting with "o" under build-info + common.find_artifact(self.request, 'build-info/openid.txt') + with self.assertRaises(Http404): + common.find_artifact(self.request, 'build-info/o') + + def test_find_artifact_directory(self): + '''S3 gives different listings for subdir/ and subdir + + The trailing slash implies a directory listing. Assert we always + remove the trailing slash. + ''' + a = common.find_artifact(self.request, '~linaro-android') + self.assertTrue(isinstance(a, common.S3Artifact)) + self.assertTrue(a.isdir()) + a = common.find_artifact(self.request, '~linaro-android/') + self.assertTrue(isinstance(a, common.S3Artifact)) + self.assertTrue(a.isdir()) + + def test_find_artifact_file(self): + a = common.find_artifact(self.request, 'images/origen-blob.txt') + self.assertTrue(isinstance(a, common.S3Artifact)) + self.assertFalse(a.isdir()) + + def test_build_info_cached(self): + '''TODO: ensure we cache build-info buffer after 1st request''' + + def test_eulas_cached(self): + '''TODO: ensure we cache eulas for directory after 1st request''' + + +@unittest.skipIf(_s3_enabled is False, 's3 not configured') +class TestMixedBuilds(TestCase): + '''Ensure a can handle a build that may have local and s3 builds + + eg: build_foo/ + 1/ # this is a local file + 2/ # this is in S3 + ''' + + def setUp(self): + self.request = mock.Mock() + self.request.GET = {} + + self.tempdir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.tempdir) + + m = mock.patch('django.conf.settings.SERVED_PATHS', + new_callable=lambda: [self.tempdir]) + self.addCleanup(m.stop) + m.start() + + # we'll now have a layout like: + # ~linaro-android/staging-snowball/1 (local) + # ~linaro-android/staging-snowball/173 (s3) + path = os.path.join(self.tempdir, '~linaro-android/staging-snowball/1') + os.makedirs(path) + + def test_find_artifact_both(self): + # first make sure if both s3 and local are found we return the local + # instance + a = common.find_artifact( + self.request, '~linaro-android/staging-snowball') + self.assertTrue(isinstance(a, Artifact)) + self.assertTrue(a.isdir()) + + # test the listing + builds = [x['name'] for x in common.dir_list(a)] + self.assertEqual(['1', '173'], builds) + + def test_prefer_local(self): + '''if we happen to have local and s3 build, list local''' + path = os.path.join( + self.tempdir, '~linaro-android/staging-snowball/173') + os.makedirs(path) + + a = common.find_artifact( + self.request, '~linaro-android/staging-snowball') + # test the listing + listing = common.dir_list(a) + builds = [x['name'] for x in listing] + self.assertEqual(['1', '173'], builds) + + # s3 folder listings have no "mtime", so we can validate with that: + self.assertNotEqual('-', listing[1]['mtime']) diff --git a/license_protected_downloads/tests/test_views.py b/license_protected_downloads/tests/test_views.py index fdd8e8c..6cf7674 100644 --- a/license_protected_downloads/tests/test_views.py +++ b/license_protected_downloads/tests/test_views.py @@ -38,6 +38,12 @@ class BaseServeViewTest(TestCase): self.urlbase = 'http://testserver/' + m = mock.patch( + 'license_protected_downloads.artifact.S3Artifact.get_bucket') + self.addCleanup(m.stop) + self.s3_mock = m.start() + self.s3_mock.return_value = None + def tearDown(self): settings.SERVED_PATHS = self.old_served_paths settings.MASTER_API_KEY = self.old_master_api_key diff --git a/license_protected_downloads/views.py b/license_protected_downloads/views.py index e350e7e..5bcd6ee 100644 --- a/license_protected_downloads/views.py +++ b/license_protected_downloads/views.py @@ -170,6 +170,9 @@ def _handle_dir_list(request, artifact): else: up_dir = None + # must come before call to find_and_render to optimize s3 + dirlist = dir_list(artifact) + download = None if 'dl' in request.GET: download = request.GET['dl'] @@ -178,7 +181,6 @@ def _handle_dir_list(request, artifact): if ann: rendered_files["Git Descriptions"] = render_descriptions(ann) - dirlist = dir_list(artifact) lics = [x['license_digest_list'] for x in dirlist if x['license_digest_list']] @@ -247,6 +249,7 @@ def file_server_get(request, path): def get_textile_files(request): artifact = find_artifact(request, request.GET.get("path")) + dir_list(request, artifact) # required for s3 rendered_files = RenderTextFiles.find_and_render(artifact) ann = artifact.get_annotated_manifest() if ann: diff --git a/requirements.txt b/requirements.txt index 33640c5..6383b64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ python-openid requests South==0.7.3 textile +boto mock testrepository |