summaryrefslogtreecommitdiff
path: root/gen_project_json.py
diff options
context:
space:
mode:
Diffstat (limited to 'gen_project_json.py')
-rwxr-xr-xgen_project_json.py186
1 files changed, 186 insertions, 0 deletions
diff --git a/gen_project_json.py b/gen_project_json.py
new file mode 100755
index 0000000..8d098c2
--- /dev/null
+++ b/gen_project_json.py
@@ -0,0 +1,186 @@
+#!/usr/bin/python3
+""" A script to generate a projects.json file from Patchworks.
+
+ Details of the projects.json file can be found at:
+ https://chaoss.github.io/grimoirelab-tutorial/sirmordred/projects.html
+
+ This script should be run with the following exports:
+ export PYTHONPATH=$PYTHONPATH:../project:/srv/linaro-git-tools
+ export DJANGO_SETTINGS_MODULE=local_settings
+"""
+
+import sys
+import os
+import json
+import re
+import django
+from patchwork.models import Project
+from linaro_metrics.models import Team, TeamCredit
+
+django.setup()
+
+# hack to make python 2.7 use unicode by default
+# since some of our usernames have non-ascii chars
+sys.setdefaultencoding('utf8')
+
+OUTFILE = "/tmp/projects.json"
+# a table of Project objects from patchworks
+PROJECTS = {}
+EXCLUDE_PROJECTS = [
+ 'Unknown',
+ 'No Project',
+ 'Not upstream'
+]
+# a table representing the json that will be used
+# to create the projects.json file
+PW_PROJECT_TABLE = {}
+
+
+def clean_git(git_string):
+ """ Some of the git URLs in the pw db have trailing junk
+ that needs to be removed. """
+ url = re.sub(';.*$', '', git_string)
+ # remove cruft from http string
+ url = re.sub('/commit(.*)?$', '', url)
+ # make sure doesn't end with a /
+ url = re.sub('/$', '', url)
+
+ return url
+
+
+def compare_repo(a_url, b_url, depth=2):
+ """ Attempts to compare 2 URLs to determine if they
+ are both pointing to the same repo up to <depth>
+ directories. Returns True if there's a match,
+ False if repos appear to be distinct. """
+ a_base = get_base_url(a_url)
+ b_base = get_base_url(b_url)
+ a = a_base.split('/')
+ b = b_base.split('/')
+
+ a.reverse()
+ b.reverse()
+
+ a_server = a.pop()
+ b_server = b.pop()
+
+ # server's don't match
+ if a_server != b_server:
+ return False
+
+ matches = 0
+ if len(a) < len(b):
+ limit = len(a)
+ else:
+ limit = len(b)
+
+ # step through reversed paths until we
+ # either find a mistmatch, run out of
+ # path fields for one of the repos, or
+ # reach our depth limit
+ while matches < limit and matches < depth:
+ if a[matches] != b[matches]:
+ return False
+ matches += 1
+
+ return True
+
+
+def get_base_url(url):
+ """ returns a URL with the protocol and any trailing ".git"
+ stripped off. This is meant to create an abstract
+ URL that can be used to compare a git:// and http://
+ url to see if they refer to the same repository """
+ noproto = re.sub('^.*://', '', url)
+ noproto = re.sub('.git$', '', noproto)
+ return noproto
+
+
+def load_projects():
+ """ Create a table of active projects and any git or
+ pipermail URLs associated with it. These will
+ later be aggregated into a team's "project" entry
+ in the projects.json file. """
+ projs = {}
+
+ for proj in Project.objects.all():
+ if proj.name in EXCLUDE_PROJECTS:
+ continue
+
+ projs[proj.name] = {}
+
+ projs[proj.name]['pipermail'] = []
+ projs[proj.name]['git'] = []
+ projs[proj.name]['github'] = []
+
+ # Project can have either a git:// or http[s]://
+ # repo url, so check both scm and webscm. Also
+ # github links need to be separated out to their
+ # own list.
+ for url in [proj.scm_url, proj.webscm_url]:
+ if url not in [None, '', 'n/a']:
+ clean_url = clean_git(url)
+ if 'github' in clean_url:
+ target = 'github'
+ else:
+ target = 'git'
+
+ projs[proj.name][target].append(clean_url)
+
+ # hack.. assume if hostname is "lists" that it's using mailman
+ # and try to guess pipermail URL
+ if '@lists' in proj.listemail:
+ (mlist, host) = proj.listemail.split('@')
+ pipermail = 'https://{0}/pipermail/{1}'.format(host, mlist)
+ projs[proj.name]['pipermail'].append(pipermail)
+
+ return projs
+
+
+# start of main program
+PROJECTS = load_projects()
+
+if os.path.isfile(OUTFILE):
+ os.unlink(OUTFILE)
+
+# iterate through each team in the PW db, and get a list
+# of the TeamCredit objects for the team. From there,
+# we can use the TeamCredit object to look up which Project
+# was contributed to, and then add that PW Project's information
+# to the GL team "project" entry.
+for t in Team.objects.filter(active=True):
+ team_name = t.display_name
+ teamcredits = TeamCredit.objects.filter(team=t)
+ projects = []
+
+ for c in teamcredits:
+ proj_name = c.patch.project.name
+
+ if proj_name not in projects and proj_name in PROJECTS:
+ projects.append(proj_name)
+
+ PW_PROJECT_TABLE[team_name] = {}
+ PW_PROJECT_TABLE[team_name]['git'] = []
+ PW_PROJECT_TABLE[team_name]['github'] = []
+ PW_PROJECT_TABLE[team_name]['pipermail'] = []
+
+ for p in projects:
+ for t in ['git', 'github']:
+ for candidate_url in list(PROJECTS[p][t]):
+ git_matches = [compare_repo(candidate_url, x)
+ for x in list(PW_PROJECT_TABLE[team_name][t])]
+ if True not in git_matches:
+ PW_PROJECT_TABLE[team_name][t].append(candidate_url)
+ PW_PROJECT_TABLE[team_name]['pipermail'] += PROJECTS[p]['pipermail']
+
+# remove empty entries
+for t in PW_PROJECT_TABLE:
+ if not PW_PROJECT_TABLE[t]['git']:
+ del PW_PROJECT_TABLE[t]['git']
+ if not PW_PROJECT_TABLE[t]['github']:
+ del PW_PROJECT_TABLE[t]['github']
+ if not PW_PROJECT_TABLE[t]['pipermail']:
+ del PW_PROJECT_TABLE[t]['pipermail']
+
+with open(OUTFILE, "w") as outfile:
+ json.dump(PW_PROJECT_TABLE, outfile, indent=4)