diff options
Diffstat (limited to 'import_mbox.py')
-rwxr-xr-x | import_mbox.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/import_mbox.py b/import_mbox.py new file mode 100755 index 0000000..25a7488 --- /dev/null +++ b/import_mbox.py @@ -0,0 +1,187 @@ +#!/usr/bin/python3 + +from bin import django_setup, add_logging_arguments + +import logging +import imaplib +import mailbox +import git +import datetime +import dateutil.parser +import re +import requests + +django_setup() # must be called to get sys.path and django settings in place + +from django.db import IntegrityError +from django.conf import settings +from django.utils.module_loading import import_string + +from patchwork import parser +from patchwork.models import Patch, State + +import patch_matcher + +log = logging.getLogger("import_mbox") + + +def x_days_ago(days): + start_date = datetime.datetime.now() - datetime.timedelta(days=days) + return start_date.strftime("%Y-%m-%d") + + +def get_commits_from_author(repo, start_date, end_date): + start_date, end_date = [ + dateutil.parser.parse(d).date() for d in (start_date, end_date) + ] + return [ + commit + for commit in repo.iter_commits("master") + if start_date <= commit.committed_datetime.date() <= end_date + ] + + +def find_old_revisions(patch): + log.debug("looking for old versions of patch %d", patch.id) + it = patch_matcher.get_patches_matching( + patch.project, [patch.submitter], patch.name, patch.diff + ) + for p in it: + # skip ourself + if p.id != patch.id: + yield p + + +def scan_for_latest_repo(url, targ): + resp = requests.get('/'.join([url, targ])) + last_git = None + if resp.status_code >= 200 and resp.status_code < 300: + for line in resp.text.split('\n'): + if re.match(r'\s+git clone.+%s/%s/.+' % (url, targ), str(line)): + # isolate the url and set it to be last one seen + fields = line.split() + for x in fields: + if x.startswith('http'): + last_git = x + + return last_git + + +def process_mbox_repo(mailing_list, start_date, end_date, days): + log.info("processing mailbox %s", mailing_list) + + url = scan_for_latest_repo("https://lore.kernel.org", mailing_list) + to_path = "/srv/mailinglists/%s" % mailing_list + repo = None + try: + repo = git.Repo(to_path) + log.info("Using exisiting git repo %s", to_path) + except git.exc.GitError as ex: + print("exception", ex) + + if repo is None: + # if none must be empty + log.info("Clone repository from {}".format(url)) + repo = git.Repo.clone_from(url, to_path) + else: + repo.git.reset("--hard") + repo.git.clean("-xdf") + repo.git.checkout("master") + repo.git.pull() + if days: + start_date = x_days_ago(int(days)) + commits = list(get_commits_from_author(repo, start_date, end_date)) + for commit in commits: + try: + repo.git.checkout(commit.hexsha) + log.info( + "processing commit %s, %s, %s", + commit, + commit.message, + commit.committed_datetime, + ) + process_mbox(repo._working_tree_dir + "/m") + except git.exc.GitCommandError: + repo.git.checkout("-f") + + +def line_prepender(filename, line): + with open(filename, "r+") as f: + content = f.read() + f.seek(0, 0) + f.write(line.rstrip("\r\n") + "\n" + content) + + +def process_mbox(mbox): + line_prepender(mbox, "From mboxrd@z Thu Jan 1 00:00:00 1970") + mbox = mailbox.mbox(mbox) + log.info(mbox) + for message in mbox: + if message["From"] is None: + # some reason messages from lore have a none type in from field + break + log.info("processing mailbox %s", message["subject"]) + p = None + try: + p = parser.parse_mail(message) + except IntegrityError as e: + log.info(e) + if p: + log.info("saved mail: %d", p.id) + for patch in Patch.objects.filter(msgid=p.msgid): + for old in find_old_revisions(patch): + log.info( + "marking patch %d as superseded by %d", + old.id, + patch.id, + ) + old.state = State.objects.get(name="Superseded") + old.save() + return p + + +def get_monkey_patcher(): + p = getattr(settings, "PARSEMAIL_MONKEY_PATCHER", None) + if p: + return import_string(p) + + +if __name__ == "__main__": + import argparse + + arg_parser = argparse.ArgumentParser( + description="Check configured inbox for new patches to import" + ) + arg_parser.add_argument( + "--start_date", + default="2020-01-01", + help="""Start date of emails to analyze. + default=%(default)d""", + ) + arg_parser.add_argument( + "--end_date", + default=datetime.datetime.now().strftime("%Y-%m-%d"), + help="""End date of emails to analyze. + default=%(default)d""", + ) + arg_parser.add_argument( + "--days", help="""Number of days instead of start/end date""" + ) + arg_parser.add_argument("--mbox") + arg_parser.add_argument("--mbox_repo") + add_logging_arguments(arg_parser) + args = arg_parser.parse_args() + parser.logger = log + + mail = imaplib.IMAP4_SSL(settings.IMAP_SERVER) + status, _ = mail.login(settings.IMAP_USER, settings.IMAP_PASS) + assert status == "OK" + + if args.mbox_repo: + process_mbox_repo( + args.mbox_repo, args.start_date, args.end_date, args.days + ) + + if args.mbox: + with open(args.mbox) as file: + process_mbox(args.mbox) |