diff options
author | Kelley Spoon <kelley.spoon@linaro.org> | 2019-11-08 08:50:30 -0600 |
---|---|---|
committer | Kelley Spoon <kelley.spoon@linaro.org> | 2019-11-12 15:14:18 +0000 |
commit | 7855130ddee12625d16df56f79ef8a4b9d16ac7f (patch) | |
tree | e3a06ddf3cf45f924499f0393827f4b22c652b0b | |
parent | c1d7e20344026be1b9b4e57071e03459e12ec6e6 (diff) | |
download | linaro-license-protection-7855130ddee12625d16df56f79ef8a4b9d16ac7f.tar.gz |
s3_flatten: add s3_flatten script
This script is intended to replace the s3_purge command.
It simplifies the logic by breaking the clean up process
into 3 parts:
1 - "flatten" all file objects to have no more than 1
version object and optionally 1 deletemarker regardless
of prefix
2 - delete any expired files from prefixes that have not been
excluded via S3_PURGE_EXCLUDES
3 - place deletemarkers on files that are older than MARK_DAYS
that have not been excluded via S3_PURGE_EXCLUDES
The other difference with this script is that it attempts to
determine what objects to delete as it iterates through the
versioned bucket listing, and will collect multiple delete
operations into a single multidelete request.
Change-Id: I25a227f574504bb4e828f29670aec366e87952c9
Reviewed-on: https://review.linaro.org/c/infrastructure/linaro-license-protection/+/33268
Reviewed-by: Benjamin Copeland <ben.copeland@linaro.org>
-rw-r--r-- | license_protected_downloads/management/commands/s3_flatten.py | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/license_protected_downloads/management/commands/s3_flatten.py b/license_protected_downloads/management/commands/s3_flatten.py new file mode 100644 index 0000000..4441555 --- /dev/null +++ b/license_protected_downloads/management/commands/s3_flatten.py @@ -0,0 +1,165 @@ +from django.conf import settings +from django.core.management.base import BaseCommand + +import logging +import datetime +from fnmatch import fnmatch +from boto.s3.connection import S3Connection +from boto.s3 import deletemarker,key + +logging.getLogger().setLevel(logging.INFO) + + +class Command(BaseCommand): + + help = 'Mark files as deleted or delete files for good, which are older \ + than X days' + + @staticmethod + def add_arguments(parser): + parser.add_argument('--dryrun', action='store_true', + help='Do not perform any actions, just report') + parser.add_argument('--markdays', default=90, + help='Number of days to mark files as deleted') + parser.add_argument('--deletedays', default=180, + help='Number of days to delete files for good') + parser.add_argument('--prefix', default='snapshots/aarch64laptops', + help='Custom prefix path') + parser.add_argument('-V', '--verbose', action='store_true', + help='log detailed information on actions to INFO') + + @staticmethod + def x_days_ago(days): + date = datetime.datetime.now() - datetime.timedelta(days=days) + return date.isoformat() + + @staticmethod + def print_key(key): + if key.is_latest: + latest = "*" + else: + latest = " " + + if isinstance(key, deletemarker.DeleteMarker): + dm = "DEL" + else: + dm = " " + return '%s: %s %s(%s) %s' % (key.name, dm, latest,key.last_modified, key.version_id) + + @staticmethod + def delete_objects(bucket, delete_list, excludes_list=[], dryrun=True, verbose=False): + if verbose: + for x in delete_list: + if isinstance(x, key.Key) or isinstance(x, deletemarker.DeleteMarker): + logging.info("deleting: %s %s" % (x.name,x.version_id)) + else: + logging.info("deleting: %s" % (x)) + + if not dryrun: + bucket.delete_keys(delete_list) + else: + logging.info( "DRYRUN: delete_keys for %s keys" % len(delete_list) ) + + def handle(self, *args, **options): + conn = S3Connection(settings.AWS_ACCESS_KEY_ID, + settings.AWS_SECRET_ACCESS_KEY) + bucket = conn.get_bucket(settings.S3_BUCKET, validate=False) + now_mark = self.x_days_ago(int(options['markdays'])) + now_delete = self.x_days_ago(int(options['deletedays'])) + + bucket_keys = bucket.list_versions(options['prefix']) + + objs = {} + delete_list = [] + + if options['verbose']: + logging.info( "Delete day: %s" % now_delete) + logging.info( "Mark day: %s" % now_mark) + + for key in bucket_keys: + if options['verbose']: + logging.info(self.print_key(key)) + + if key.name not in objs: + objs[key.name] = {'last':None, 'delete':None} + + # flatten everything by filtering out everything except the + # latest versions of the key and/or deletemarker + if isinstance(key, deletemarker.DeleteMarker): + if objs[key.name]['delete'] is None: + objs[key.name]['delete'] = key + elif key.last_modified > objs[key.name]['delete'].last_modified: + delete_list.append(objs[key.name]['delete']) + objs[key.name]['delete'] = key + else: + delete_list.append(key) + else: + if objs[key.name]['last'] is None: + objs[key.name]['last'] = key + elif key.last_modified > objs[key.name]['last'].last_modified: + delete_list.append(objs[key.name]['last']) + objs[key.name]['last'] = key + else: + delete_list.append(key) + + # if the new 'last' is newer than an existing deletemarker, + # delete the deletemarker + if objs[key.name]['delete'] and \ + objs[key.name]['last'].last_modified >= objs[key.name]['delete'].last_modified: + delete_list.append(objs[key.name]['delete']) + objs[key.name]['delete'] = None + + # purge as we go + if len(delete_list) > 1000: + while delete_list: + self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose']) + delete_list = delete_list[1000:] + + if options['verbose']: + logging.info("done with flatten") + + # search through everything w/ a delete marker to delete + for candidate in [ x for x in objs if objs[x]['delete']]: + # if in exclude we ignore it even if it has a delete marker + if any(fnmatch(candidate, p) for p in settings.S3_PURGE_EXCLUDES): + if options['verbose']: + logging.info("excluded: %s" % candidate) + continue + else: + if objs[candidate]['last'] is None: + # no point in keeping around a delete marker that points to nothing + delete_list.append(objs[candidate]['delete']) + else: + # check last_modified on the last real file, not delete marker + if objs[candidate]['last'].last_modified < now_delete: + delete_list.append(objs[candidate]['delete']) + delete_list.append(objs[candidate]['last']) + + if options['verbose']: + logging.info("done with now_delete") + + # search through everything w/o a delete marker to possibly mark + for candidate in [ x for x in objs if not objs[x]['delete']]: + if any(fnmatch(candidate, p) for p in settings.S3_PURGE_EXCLUDES): + if options['verbose']: + logging.info("excluded: %s" % candidate) + continue + else: + if objs[candidate]['last'].last_modified < now_mark: + if not options['dryrun']: + # by appending only the name rather than the key + # object, S3 should insert a delete marker + delete_list.append(objs[candidate]['last'].name) + else: + logging.info("DRYRUN: setting deletemarker on %s - %s" % (objs[candidate]['last'].name, objs[candidate]['last'].version_id)) + + if options['verbose']: + logging.info("done with now_mark") + + + while len(delete_list) > 1000: + self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose']) + delete_list = delete_list[1000:] + self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose']) + if options['verbose']: + logging.info("done with cleanup.") |