diff options
Diffstat (limited to 'gen_project_json.py')
-rwxr-xr-x | gen_project_json.py | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/gen_project_json.py b/gen_project_json.py new file mode 100755 index 0000000..8d098c2 --- /dev/null +++ b/gen_project_json.py @@ -0,0 +1,186 @@ +#!/usr/bin/python3 +""" A script to generate a projects.json file from Patchworks. + + Details of the projects.json file can be found at: + https://chaoss.github.io/grimoirelab-tutorial/sirmordred/projects.html + + This script should be run with the following exports: + export PYTHONPATH=$PYTHONPATH:../project:/srv/linaro-git-tools + export DJANGO_SETTINGS_MODULE=local_settings +""" + +import sys +import os +import json +import re +import django +from patchwork.models import Project +from linaro_metrics.models import Team, TeamCredit + +django.setup() + +# hack to make python 2.7 use unicode by default +# since some of our usernames have non-ascii chars +sys.setdefaultencoding('utf8') + +OUTFILE = "/tmp/projects.json" +# a table of Project objects from patchworks +PROJECTS = {} +EXCLUDE_PROJECTS = [ + 'Unknown', + 'No Project', + 'Not upstream' +] +# a table representing the json that will be used +# to create the projects.json file +PW_PROJECT_TABLE = {} + + +def clean_git(git_string): + """ Some of the git URLs in the pw db have trailing junk + that needs to be removed. """ + url = re.sub(';.*$', '', git_string) + # remove cruft from http string + url = re.sub('/commit(.*)?$', '', url) + # make sure doesn't end with a / + url = re.sub('/$', '', url) + + return url + + +def compare_repo(a_url, b_url, depth=2): + """ Attempts to compare 2 URLs to determine if they + are both pointing to the same repo up to <depth> + directories. Returns True if there's a match, + False if repos appear to be distinct. """ + a_base = get_base_url(a_url) + b_base = get_base_url(b_url) + a = a_base.split('/') + b = b_base.split('/') + + a.reverse() + b.reverse() + + a_server = a.pop() + b_server = b.pop() + + # server's don't match + if a_server != b_server: + return False + + matches = 0 + if len(a) < len(b): + limit = len(a) + else: + limit = len(b) + + # step through reversed paths until we + # either find a mistmatch, run out of + # path fields for one of the repos, or + # reach our depth limit + while matches < limit and matches < depth: + if a[matches] != b[matches]: + return False + matches += 1 + + return True + + +def get_base_url(url): + """ returns a URL with the protocol and any trailing ".git" + stripped off. This is meant to create an abstract + URL that can be used to compare a git:// and http:// + url to see if they refer to the same repository """ + noproto = re.sub('^.*://', '', url) + noproto = re.sub('.git$', '', noproto) + return noproto + + +def load_projects(): + """ Create a table of active projects and any git or + pipermail URLs associated with it. These will + later be aggregated into a team's "project" entry + in the projects.json file. """ + projs = {} + + for proj in Project.objects.all(): + if proj.name in EXCLUDE_PROJECTS: + continue + + projs[proj.name] = {} + + projs[proj.name]['pipermail'] = [] + projs[proj.name]['git'] = [] + projs[proj.name]['github'] = [] + + # Project can have either a git:// or http[s]:// + # repo url, so check both scm and webscm. Also + # github links need to be separated out to their + # own list. + for url in [proj.scm_url, proj.webscm_url]: + if url not in [None, '', 'n/a']: + clean_url = clean_git(url) + if 'github' in clean_url: + target = 'github' + else: + target = 'git' + + projs[proj.name][target].append(clean_url) + + # hack.. assume if hostname is "lists" that it's using mailman + # and try to guess pipermail URL + if '@lists' in proj.listemail: + (mlist, host) = proj.listemail.split('@') + pipermail = 'https://{0}/pipermail/{1}'.format(host, mlist) + projs[proj.name]['pipermail'].append(pipermail) + + return projs + + +# start of main program +PROJECTS = load_projects() + +if os.path.isfile(OUTFILE): + os.unlink(OUTFILE) + +# iterate through each team in the PW db, and get a list +# of the TeamCredit objects for the team. From there, +# we can use the TeamCredit object to look up which Project +# was contributed to, and then add that PW Project's information +# to the GL team "project" entry. +for t in Team.objects.filter(active=True): + team_name = t.display_name + teamcredits = TeamCredit.objects.filter(team=t) + projects = [] + + for c in teamcredits: + proj_name = c.patch.project.name + + if proj_name not in projects and proj_name in PROJECTS: + projects.append(proj_name) + + PW_PROJECT_TABLE[team_name] = {} + PW_PROJECT_TABLE[team_name]['git'] = [] + PW_PROJECT_TABLE[team_name]['github'] = [] + PW_PROJECT_TABLE[team_name]['pipermail'] = [] + + for p in projects: + for t in ['git', 'github']: + for candidate_url in list(PROJECTS[p][t]): + git_matches = [compare_repo(candidate_url, x) + for x in list(PW_PROJECT_TABLE[team_name][t])] + if True not in git_matches: + PW_PROJECT_TABLE[team_name][t].append(candidate_url) + PW_PROJECT_TABLE[team_name]['pipermail'] += PROJECTS[p]['pipermail'] + +# remove empty entries +for t in PW_PROJECT_TABLE: + if not PW_PROJECT_TABLE[t]['git']: + del PW_PROJECT_TABLE[t]['git'] + if not PW_PROJECT_TABLE[t]['github']: + del PW_PROJECT_TABLE[t]['github'] + if not PW_PROJECT_TABLE[t]['pipermail']: + del PW_PROJECT_TABLE[t]['pipermail'] + +with open(OUTFILE, "w") as outfile: + json.dump(PW_PROJECT_TABLE, outfile, indent=4) |