aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKelley Spoon <kelley.spoon@linaro.org>2019-11-08 08:50:30 -0600
committerKelley Spoon <kelley.spoon@linaro.org>2019-11-12 15:14:18 +0000
commit7855130ddee12625d16df56f79ef8a4b9d16ac7f (patch)
treee3a06ddf3cf45f924499f0393827f4b22c652b0b
parentc1d7e20344026be1b9b4e57071e03459e12ec6e6 (diff)
downloadlinaro-license-protection-7855130ddee12625d16df56f79ef8a4b9d16ac7f.tar.gz
s3_flatten: add s3_flatten script
This script is intended to replace the s3_purge command. It simplifies the logic by breaking the clean up process into 3 parts: 1 - "flatten" all file objects to have no more than 1 version object and optionally 1 deletemarker regardless of prefix 2 - delete any expired files from prefixes that have not been excluded via S3_PURGE_EXCLUDES 3 - place deletemarkers on files that are older than MARK_DAYS that have not been excluded via S3_PURGE_EXCLUDES The other difference with this script is that it attempts to determine what objects to delete as it iterates through the versioned bucket listing, and will collect multiple delete operations into a single multidelete request. Change-Id: I25a227f574504bb4e828f29670aec366e87952c9 Reviewed-on: https://review.linaro.org/c/infrastructure/linaro-license-protection/+/33268 Reviewed-by: Benjamin Copeland <ben.copeland@linaro.org>
-rw-r--r--license_protected_downloads/management/commands/s3_flatten.py165
1 files changed, 165 insertions, 0 deletions
diff --git a/license_protected_downloads/management/commands/s3_flatten.py b/license_protected_downloads/management/commands/s3_flatten.py
new file mode 100644
index 0000000..4441555
--- /dev/null
+++ b/license_protected_downloads/management/commands/s3_flatten.py
@@ -0,0 +1,165 @@
+from django.conf import settings
+from django.core.management.base import BaseCommand
+
+import logging
+import datetime
+from fnmatch import fnmatch
+from boto.s3.connection import S3Connection
+from boto.s3 import deletemarker,key
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+class Command(BaseCommand):
+
+ help = 'Mark files as deleted or delete files for good, which are older \
+ than X days'
+
+ @staticmethod
+ def add_arguments(parser):
+ parser.add_argument('--dryrun', action='store_true',
+ help='Do not perform any actions, just report')
+ parser.add_argument('--markdays', default=90,
+ help='Number of days to mark files as deleted')
+ parser.add_argument('--deletedays', default=180,
+ help='Number of days to delete files for good')
+ parser.add_argument('--prefix', default='snapshots/aarch64laptops',
+ help='Custom prefix path')
+ parser.add_argument('-V', '--verbose', action='store_true',
+ help='log detailed information on actions to INFO')
+
+ @staticmethod
+ def x_days_ago(days):
+ date = datetime.datetime.now() - datetime.timedelta(days=days)
+ return date.isoformat()
+
+ @staticmethod
+ def print_key(key):
+ if key.is_latest:
+ latest = "*"
+ else:
+ latest = " "
+
+ if isinstance(key, deletemarker.DeleteMarker):
+ dm = "DEL"
+ else:
+ dm = " "
+ return '%s: %s %s(%s) %s' % (key.name, dm, latest,key.last_modified, key.version_id)
+
+ @staticmethod
+ def delete_objects(bucket, delete_list, excludes_list=[], dryrun=True, verbose=False):
+ if verbose:
+ for x in delete_list:
+ if isinstance(x, key.Key) or isinstance(x, deletemarker.DeleteMarker):
+ logging.info("deleting: %s %s" % (x.name,x.version_id))
+ else:
+ logging.info("deleting: %s" % (x))
+
+ if not dryrun:
+ bucket.delete_keys(delete_list)
+ else:
+ logging.info( "DRYRUN: delete_keys for %s keys" % len(delete_list) )
+
+ def handle(self, *args, **options):
+ conn = S3Connection(settings.AWS_ACCESS_KEY_ID,
+ settings.AWS_SECRET_ACCESS_KEY)
+ bucket = conn.get_bucket(settings.S3_BUCKET, validate=False)
+ now_mark = self.x_days_ago(int(options['markdays']))
+ now_delete = self.x_days_ago(int(options['deletedays']))
+
+ bucket_keys = bucket.list_versions(options['prefix'])
+
+ objs = {}
+ delete_list = []
+
+ if options['verbose']:
+ logging.info( "Delete day: %s" % now_delete)
+ logging.info( "Mark day: %s" % now_mark)
+
+ for key in bucket_keys:
+ if options['verbose']:
+ logging.info(self.print_key(key))
+
+ if key.name not in objs:
+ objs[key.name] = {'last':None, 'delete':None}
+
+ # flatten everything by filtering out everything except the
+ # latest versions of the key and/or deletemarker
+ if isinstance(key, deletemarker.DeleteMarker):
+ if objs[key.name]['delete'] is None:
+ objs[key.name]['delete'] = key
+ elif key.last_modified > objs[key.name]['delete'].last_modified:
+ delete_list.append(objs[key.name]['delete'])
+ objs[key.name]['delete'] = key
+ else:
+ delete_list.append(key)
+ else:
+ if objs[key.name]['last'] is None:
+ objs[key.name]['last'] = key
+ elif key.last_modified > objs[key.name]['last'].last_modified:
+ delete_list.append(objs[key.name]['last'])
+ objs[key.name]['last'] = key
+ else:
+ delete_list.append(key)
+
+ # if the new 'last' is newer than an existing deletemarker,
+ # delete the deletemarker
+ if objs[key.name]['delete'] and \
+ objs[key.name]['last'].last_modified >= objs[key.name]['delete'].last_modified:
+ delete_list.append(objs[key.name]['delete'])
+ objs[key.name]['delete'] = None
+
+ # purge as we go
+ if len(delete_list) > 1000:
+ while delete_list:
+ self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ delete_list = delete_list[1000:]
+
+ if options['verbose']:
+ logging.info("done with flatten")
+
+ # search through everything w/ a delete marker to delete
+ for candidate in [ x for x in objs if objs[x]['delete']]:
+ # if in exclude we ignore it even if it has a delete marker
+ if any(fnmatch(candidate, p) for p in settings.S3_PURGE_EXCLUDES):
+ if options['verbose']:
+ logging.info("excluded: %s" % candidate)
+ continue
+ else:
+ if objs[candidate]['last'] is None:
+ # no point in keeping around a delete marker that points to nothing
+ delete_list.append(objs[candidate]['delete'])
+ else:
+ # check last_modified on the last real file, not delete marker
+ if objs[candidate]['last'].last_modified < now_delete:
+ delete_list.append(objs[candidate]['delete'])
+ delete_list.append(objs[candidate]['last'])
+
+ if options['verbose']:
+ logging.info("done with now_delete")
+
+ # search through everything w/o a delete marker to possibly mark
+ for candidate in [ x for x in objs if not objs[x]['delete']]:
+ if any(fnmatch(candidate, p) for p in settings.S3_PURGE_EXCLUDES):
+ if options['verbose']:
+ logging.info("excluded: %s" % candidate)
+ continue
+ else:
+ if objs[candidate]['last'].last_modified < now_mark:
+ if not options['dryrun']:
+ # by appending only the name rather than the key
+ # object, S3 should insert a delete marker
+ delete_list.append(objs[candidate]['last'].name)
+ else:
+ logging.info("DRYRUN: setting deletemarker on %s - %s" % (objs[candidate]['last'].name, objs[candidate]['last'].version_id))
+
+ if options['verbose']:
+ logging.info("done with now_mark")
+
+
+ while len(delete_list) > 1000:
+ self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ delete_list = delete_list[1000:]
+ self.delete_objects(bucket, delete_list[0:1000], settings.S3_PURGE_EXCLUDES, options['dryrun'], options['verbose'])
+ if options['verbose']:
+ logging.info("done with cleanup.")