summaryrefslogtreecommitdiff
path: root/import_mbox.py
diff options
context:
space:
mode:
Diffstat (limited to 'import_mbox.py')
-rwxr-xr-ximport_mbox.py187
1 files changed, 187 insertions, 0 deletions
diff --git a/import_mbox.py b/import_mbox.py
new file mode 100755
index 0000000..25a7488
--- /dev/null
+++ b/import_mbox.py
@@ -0,0 +1,187 @@
+#!/usr/bin/python3
+
+from bin import django_setup, add_logging_arguments
+
+import logging
+import imaplib
+import mailbox
+import git
+import datetime
+import dateutil.parser
+import re
+import requests
+
+django_setup() # must be called to get sys.path and django settings in place
+
+from django.db import IntegrityError
+from django.conf import settings
+from django.utils.module_loading import import_string
+
+from patchwork import parser
+from patchwork.models import Patch, State
+
+import patch_matcher
+
+log = logging.getLogger("import_mbox")
+
+
+def x_days_ago(days):
+ start_date = datetime.datetime.now() - datetime.timedelta(days=days)
+ return start_date.strftime("%Y-%m-%d")
+
+
+def get_commits_from_author(repo, start_date, end_date):
+ start_date, end_date = [
+ dateutil.parser.parse(d).date() for d in (start_date, end_date)
+ ]
+ return [
+ commit
+ for commit in repo.iter_commits("master")
+ if start_date <= commit.committed_datetime.date() <= end_date
+ ]
+
+
+def find_old_revisions(patch):
+ log.debug("looking for old versions of patch %d", patch.id)
+ it = patch_matcher.get_patches_matching(
+ patch.project, [patch.submitter], patch.name, patch.diff
+ )
+ for p in it:
+ # skip ourself
+ if p.id != patch.id:
+ yield p
+
+
+def scan_for_latest_repo(url, targ):
+ resp = requests.get('/'.join([url, targ]))
+ last_git = None
+ if resp.status_code >= 200 and resp.status_code < 300:
+ for line in resp.text.split('\n'):
+ if re.match(r'\s+git clone.+%s/%s/.+' % (url, targ), str(line)):
+ # isolate the url and set it to be last one seen
+ fields = line.split()
+ for x in fields:
+ if x.startswith('http'):
+ last_git = x
+
+ return last_git
+
+
+def process_mbox_repo(mailing_list, start_date, end_date, days):
+ log.info("processing mailbox %s", mailing_list)
+
+ url = scan_for_latest_repo("https://lore.kernel.org", mailing_list)
+ to_path = "/srv/mailinglists/%s" % mailing_list
+ repo = None
+ try:
+ repo = git.Repo(to_path)
+ log.info("Using exisiting git repo %s", to_path)
+ except git.exc.GitError as ex:
+ print("exception", ex)
+
+ if repo is None:
+ # if none must be empty
+ log.info("Clone repository from {}".format(url))
+ repo = git.Repo.clone_from(url, to_path)
+ else:
+ repo.git.reset("--hard")
+ repo.git.clean("-xdf")
+ repo.git.checkout("master")
+ repo.git.pull()
+ if days:
+ start_date = x_days_ago(int(days))
+ commits = list(get_commits_from_author(repo, start_date, end_date))
+ for commit in commits:
+ try:
+ repo.git.checkout(commit.hexsha)
+ log.info(
+ "processing commit %s, %s, %s",
+ commit,
+ commit.message,
+ commit.committed_datetime,
+ )
+ process_mbox(repo._working_tree_dir + "/m")
+ except git.exc.GitCommandError:
+ repo.git.checkout("-f")
+
+
+def line_prepender(filename, line):
+ with open(filename, "r+") as f:
+ content = f.read()
+ f.seek(0, 0)
+ f.write(line.rstrip("\r\n") + "\n" + content)
+
+
+def process_mbox(mbox):
+ line_prepender(mbox, "From mboxrd@z Thu Jan 1 00:00:00 1970")
+ mbox = mailbox.mbox(mbox)
+ log.info(mbox)
+ for message in mbox:
+ if message["From"] is None:
+ # some reason messages from lore have a none type in from field
+ break
+ log.info("processing mailbox %s", message["subject"])
+ p = None
+ try:
+ p = parser.parse_mail(message)
+ except IntegrityError as e:
+ log.info(e)
+ if p:
+ log.info("saved mail: %d", p.id)
+ for patch in Patch.objects.filter(msgid=p.msgid):
+ for old in find_old_revisions(patch):
+ log.info(
+ "marking patch %d as superseded by %d",
+ old.id,
+ patch.id,
+ )
+ old.state = State.objects.get(name="Superseded")
+ old.save()
+ return p
+
+
+def get_monkey_patcher():
+ p = getattr(settings, "PARSEMAIL_MONKEY_PATCHER", None)
+ if p:
+ return import_string(p)
+
+
+if __name__ == "__main__":
+ import argparse
+
+ arg_parser = argparse.ArgumentParser(
+ description="Check configured inbox for new patches to import"
+ )
+ arg_parser.add_argument(
+ "--start_date",
+ default="2020-01-01",
+ help="""Start date of emails to analyze.
+ default=%(default)d""",
+ )
+ arg_parser.add_argument(
+ "--end_date",
+ default=datetime.datetime.now().strftime("%Y-%m-%d"),
+ help="""End date of emails to analyze.
+ default=%(default)d""",
+ )
+ arg_parser.add_argument(
+ "--days", help="""Number of days instead of start/end date"""
+ )
+ arg_parser.add_argument("--mbox")
+ arg_parser.add_argument("--mbox_repo")
+ add_logging_arguments(arg_parser)
+ args = arg_parser.parse_args()
+ parser.logger = log
+
+ mail = imaplib.IMAP4_SSL(settings.IMAP_SERVER)
+ status, _ = mail.login(settings.IMAP_USER, settings.IMAP_PASS)
+ assert status == "OK"
+
+ if args.mbox_repo:
+ process_mbox_repo(
+ args.mbox_repo, args.start_date, args.end_date, args.days
+ )
+
+ if args.mbox:
+ with open(args.mbox) as file:
+ process_mbox(args.mbox)