#!/usr/bin/env python import contextlib import json import logging import os import re import sys import textwrap import urllib2 from datetime import datetime sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from bin import django_setup, add_logging_arguments django_setup() # must be called to get sys.path and django settings in place from django.conf import settings from patchwork.models import Patch, Project, State from linaro_metrics.crowd import Crowd from linaro_metrics.models import TeamCredit from linaro_metrics.parsemail import get_linaro_person log = logging.getLogger('sync_github_changes') STATE_MAP = {'closed': 'Accepted', 'open': 'New'} GITHUB_REPOS = [ # tuple of: owner, repo, patchwork-project ('jackmitch', 'libsoc', 'libsoc'), ('OP-TEE', 'optee_os', 'optee_os'), ('OP-TEE', 'optee_test', 'optee_test'), ('OP-TEE', 'optee_client', 'optee_client'), ('OP-TEE', 'build', 'optee_build'), ('OP-TEE', 'manifest', 'optee_manifest'), ('linaro-swg', 'optee_android_manifest', 'optee_android_manifest'), ('linaro-swg', 'optee_benchmark', 'optee_benchmark'), ('linaro-swg', 'linux', 'optee_linux'), ('linaro-swg', 'gen_rootfs', 'optee_gen_rootfs'), ('linaro-swg', 'bios_qemu_tz_arm', 'optee_bios_qemu_tz_arm'), ('linaro-swg', 'hello_world', 'optee_hello_world'), ('scheduler-tools', 'rt-app', 'rt-app'), ('WebPlatformForEmbedded', 'meta-wpe', 'meta-wpe'), ('WebPlatformForEmbedded', 'WPEWebKit', 'WPEWebKit'), ('ndechesne', 'meta-qcom', 'meta-qcom'), ('zephyrproject-rtos', 'zephyr', 'Zephyr'), ] def _get(url): headers = {'Authorization': 'token %s' % settings.GITHUB_OAUTH_TOKEN} request = urllib2.Request(url, headers=headers) try: return urllib2.urlopen(request) except urllib2.HTTPError as e: log.error('HTTP_%d while GETing %s:\n %s', e.getcode(), url, e.readlines()) sys.exit(1) def get_pull_requests(owner, repo, last_update=None): url = 'https://api.github.com/repos/%s/%s/pulls?state=all&sort=updated' url = url % (owner, repo) while url: resp = _get(url) data = json.loads(resp.read()) for x in data: ts = datetime.strptime(x['updated_at'], '%Y-%m-%dT%H:%M:%SZ') if last_update and ts < last_update: log.debug('Hit old pull requests, exiting') return try: yield x except: log.error('Unable to process pr(%r)', x) raise url = resp.headers.get('link') if url: # find the <$URL>; rel="next" to get the next page of results m = re.match('<(\S+)>; rel="next"', url) url = None if m: url = m.group(1) def get_author(crowd, pr): resp = _get(pr['commits_url']) data = json.loads(resp.read()) if not len(data): # some PR's have no commits: https://github.com/docker/docker/pull/5894 return email = data[0]['commit']['author']['email'] return get_linaro_person(crowd, email) def patchwork_state(github_status): return State.objects.get(name=STATE_MAP[github_status]) def get_patch_content(owner, repo, pr): fmt = textwrap.dedent('''\ # %s This represents a change submitted via Github. It is mirrored here so that it is included in our statistics.''') return fmt % pr['html_url'] def create_or_update(proj, owner, repo, author, pr): msgid = '%s/%s@%d' % (owner, repo, pr['number']) created = datetime.strptime(pr['created_at'], '%Y-%m-%dT%H:%M:%SZ') updated = datetime.strptime(pr['updated_at'], '%Y-%m-%dT%H:%M:%SZ') fields = { 'name': pr['title'], 'project': Project.objects.get(name=proj), 'state': patchwork_state(pr['state']), } try: p = Patch.objects.get(msgid=msgid) tcs = TeamCredit.objects.filter(patch=p) if updated > tcs[0].last_state_change: for k, v in fields.iteritems(): setattr(p, k, v) p.save() TeamCredit.objects.filter(patch=p).update( last_state_change=updated) except Patch.DoesNotExist: fields['msgid'] = msgid fields['date'] = created fields['submitter'] = author fields['content'] = get_patch_content(owner, repo, pr) p = Patch.objects.create(**fields) # teamcredits are auto-set to "now", so we need to update it to what # came from gerrit TeamCredit.objects.filter(patch=p).update(last_state_change=updated) @contextlib.contextmanager def repo_cache(): def dt_serialize(obj): if isinstance(obj, datetime): return obj.isoformat() return obj fname = os.path.join(settings.REPO_DIR, 'github.cache') data = {} try: with open(fname) as f: data = json.load(f) for repo, dt in data.items(): data[repo] = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S.%f') except: log.exception('ignoring') yield data with open(fname, 'w') as f: json.dump(data, f, default=dt_serialize) def main(args): crwd = Crowd(settings.CROWD_USER, settings.CROWD_PASS, settings.CROWD_URL) with crwd.cached(settings.CROWD_CACHE), repo_cache() as repos: for owner, repo, proj in GITHUB_REPOS: repo_path = '%s/%s' % (owner, repo) log.info('Looking at: %s', repo_path) now = datetime.now() last_update = repos.get(repo_path) x = 0 try: for pr in get_pull_requests(owner, repo, last_update): x += 1 auth = get_author(crwd, pr) if auth: log.debug('checking change: %d', pr['number']) create_or_update(proj, owner, repo, auth, pr) repos[repo_path] = now finally: log.info('analayzed %d pull-requests', x) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description='Synchronize Linaro changes from github projects') add_logging_arguments(parser) args = parser.parse_args() main(args)