From 1513aaa2f1b65a2923928900c800b7da5176b86a Mon Sep 17 00:00:00 2001 From: Andy Doan Date: Wed, 3 May 2017 13:05:21 -0500 Subject: fix last commit The last commit caused a big issue with *existing* files in S3 that had URL encodable characters. Specifically the "+" character is fine in S3, doesn't need to be encoded, and the change breaks doing lookups of it. This change converts things for us to build up a known list of substitiuions. Change-Id: I2a1abbac6ec63aa9612196a7aba4f3be5ac4b1b1 --- license_protected_downloads/api/v3.py | 3 +-- license_protected_downloads/artifact/s3.py | 18 +++++++++++++++--- license_protected_downloads/common.py | 3 +-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/license_protected_downloads/api/v3.py b/license_protected_downloads/api/v3.py index 9ef6470..37b8a1a 100644 --- a/license_protected_downloads/api/v3.py +++ b/license_protected_downloads/api/v3.py @@ -1,5 +1,4 @@ import os -import urllib from django.conf import settings from django.http import HttpResponse @@ -28,7 +27,7 @@ class PublishResource(v2.PublishResource): if not b: raise HttpResponseError('S3 is not enabled', 403) - path = urllib.pathname2url(self.path) + path = S3Artifact.pathname2url(self.path) k = b.new_key(settings.S3_PREFIX_PATH + path) if k.exists(): APILog.mark(self.request, 'FILE_OVERWRITE_DENIED') diff --git a/license_protected_downloads/artifact/s3.py b/license_protected_downloads/artifact/s3.py index 0ba0cca..6bd870b 100644 --- a/license_protected_downloads/artifact/s3.py +++ b/license_protected_downloads/artifact/s3.py @@ -1,7 +1,6 @@ import datetime import mimetypes import os -import urllib import time import boto @@ -29,6 +28,19 @@ class S3Artifact(Artifact): cls.bucket = c.get_bucket(settings.S3_BUCKET) return cls.bucket + @staticmethod + def pathname2url(path): + '''There are certain characters S3 doesn't deal with for key names when + trying to generate signed urls. This url-encodes them in a way that + makes S3 happy.''' + # currently the "~" is the only character we know breaks things + return path.replace('~', '%7E') + + @staticmethod + def url2pathname(path): + '''Reverses the changes done by pathname2url''' + return path.replace('%7E', '~') + def __init__(self, bucket, item, parent, human_readable): base = item.name.replace(settings.S3_PREFIX_PATH, '') if base: @@ -51,7 +63,7 @@ class S3Artifact(Artifact): file_name = '' item.size = 0 item.last_modified = '-' - file_name = urllib.url2pathname(file_name) + file_name = self.url2pathname(file_name) self.bucket = bucket self.parent = parent if parent and hasattr(self.parent, 'children'): @@ -130,7 +142,7 @@ class S3Artifact(Artifact): else: key += '/' + os.path.dirname(self.file_name) + fname try: - key = boto.s3.key.Key(self.bucket, urllib.pathname2url(key)) + key = boto.s3.key.Key(self.bucket, self.pathname2url(key)) return key.get_contents_as_string() except boto.exception.S3ResponseError: pass # return None - its okay diff --git a/license_protected_downloads/common.py b/license_protected_downloads/common.py index fe4722d..a10f318 100644 --- a/license_protected_downloads/common.py +++ b/license_protected_downloads/common.py @@ -1,6 +1,5 @@ import fnmatch import os -import urllib import boto @@ -98,7 +97,7 @@ def _find_s3_artifact(request, path): if not b: return # s3 isn't configured - prefix = settings.S3_PREFIX_PATH + urllib.pathname2url(path) + prefix = settings.S3_PREFIX_PATH + S3Artifact.pathname2url(path) if prefix[-1] == '/': # s3 listing give sub dir, we don't want that prefix = prefix[:-1] -- cgit v1.2.3