aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKelley Spoon <kelley.spoon@linaro.org>2019-11-17 14:52:19 -0600
committerKelley Spoon <kelley.spoon@linaro.org>2019-11-27 02:34:16 +0000
commit70b49fc7aa6f108285721094b05199f7ecd1b341 (patch)
tree1bc08cc75278fedf797fa954593cd01190af8a17
parent7855130ddee12625d16df56f79ef8a4b9d16ac7f (diff)
downloadlinaro-license-protection-70b49fc7aa6f108285721094b05199f7ecd1b341.tar.gz
s3_flatten: break bucket listing up by delimiters
Boto's bucket.list_version() function has the option to specify a delimiter for the key names that are returned. Delimiters will treat any occurance of the character as if it was a new directory and will not "descend" into it. This change uses delimiters to traverse through all s3 file keys in the given --prefix, and will clean up the objects table before descending into a subdirectory in order to prevent wasting memory storing s3 key data for objects that are no longer going to be accessed. Change-Id: I294937c99c60e89a87439789b9031f50e2e5cefc Reviewed-on: https://review.linaro.org/c/infrastructure/linaro-license-protection/+/33359 Reviewed-by: Benjamin Copeland <ben.copeland@linaro.org>
-rw-r--r--license_protected_downloads/management/commands/s3_flatten.py51
1 files changed, 38 insertions, 13 deletions
diff --git a/license_protected_downloads/management/commands/s3_flatten.py b/license_protected_downloads/management/commands/s3_flatten.py
index 4441555..f5e6540 100644
--- a/license_protected_downloads/management/commands/s3_flatten.py
+++ b/license_protected_downloads/management/commands/s3_flatten.py
@@ -5,7 +5,8 @@ import logging
import datetime
from fnmatch import fnmatch
from boto.s3.connection import S3Connection
-from boto.s3 import deletemarker,key
+from boto.s3 import deletemarker,key,prefix
+import sys
logging.getLogger().setLevel(logging.INFO)
@@ -15,6 +16,8 @@ class Command(BaseCommand):
help = 'Mark files as deleted or delete files for good, which are older \
than X days'
+ bucket = None
+
@staticmethod
def add_arguments(parser):
parser.add_argument('--dryrun', action='store_true',
@@ -35,6 +38,9 @@ class Command(BaseCommand):
@staticmethod
def print_key(key):
+ if isinstance(key, prefix.Prefix):
+ return "DIRECTORY: %s" % key.name
+
if key.is_latest:
latest = "*"
else:
@@ -63,22 +69,34 @@ class Command(BaseCommand):
def handle(self, *args, **options):
conn = S3Connection(settings.AWS_ACCESS_KEY_ID,
settings.AWS_SECRET_ACCESS_KEY)
- bucket = conn.get_bucket(settings.S3_BUCKET, validate=False)
- now_mark = self.x_days_ago(int(options['markdays']))
- now_delete = self.x_days_ago(int(options['deletedays']))
+ self.bucket = conn.get_bucket(settings.S3_BUCKET, validate=False)
+ self.now_mark = self.x_days_ago(int(options['markdays']))
+ self.now_delete = self.x_days_ago(int(options['deletedays']))
+
+ self.handle_bucket(*args, **options)
+
+ def handle_bucket(self, *args, **options):
+ logging.info( "--> %s" % options['prefix'])
- bucket_keys = bucket.list_versions(options['prefix'])
+ bucket_keys = self.bucket.list_versions(options['prefix'], delimiter='/')
objs = {}
delete_list = []
+ subdirs = []
if options['verbose']:
- logging.info( "Delete day: %s" % now_delete)
- logging.info( "Mark day: %s" % now_mark)
+ logging.info( "Delete day: %s" % self.now_delete)
+ logging.info( "Mark day: %s" % self.now_mark)
for key in bucket_keys:
if options['verbose']:
- logging.info(self.print_key(key))
+ logging.info("%s - %s" %(self.print_key(key), type(key)))
+
+ # if it's a subdir, then we need to descend into in a separate
+ # call
+ if isinstance(key, prefix.Prefix):
+ subdirs.append(key.name)
+ continue
if key.name not in objs:
objs[key.name] = {'last':None, 'delete':None}
@@ -112,7 +130,7 @@ class Command(BaseCommand):
# purge as we go
if len(delete_list) > 1000:
while delete_list:
- self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ self.delete_objects(self.bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
delete_list = delete_list[1000:]
if options['verbose']:
@@ -131,7 +149,7 @@ class Command(BaseCommand):
delete_list.append(objs[candidate]['delete'])
else:
# check last_modified on the last real file, not delete marker
- if objs[candidate]['last'].last_modified < now_delete:
+ if objs[candidate]['last'].last_modified < self.now_delete:
delete_list.append(objs[candidate]['delete'])
delete_list.append(objs[candidate]['last'])
@@ -145,7 +163,7 @@ class Command(BaseCommand):
logging.info("excluded: %s" % candidate)
continue
else:
- if objs[candidate]['last'].last_modified < now_mark:
+ if objs[candidate]['last'].last_modified < self.now_mark:
if not options['dryrun']:
# by appending only the name rather than the key
# object, S3 should insert a delete marker
@@ -158,8 +176,15 @@ class Command(BaseCommand):
while len(delete_list) > 1000:
- self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ self.delete_objects(self.bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
delete_list = delete_list[1000:]
- self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ self.delete_objects(self.bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
if options['verbose']:
logging.info("done with cleanup.")
+
+ # clean up mem and descend to any child directories
+ del objs
+ for s in subdirs:
+ new_opts = options
+ new_opts['prefix'] = s
+ self.handle_bucket(*args, **new_opts)