summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Doan <andy.doan@linaro.org>2014-12-15 12:24:22 -0600
committerAndy Doan <andy.doan@linaro.org>2014-12-15 14:47:12 -0600
commitef625b499f09c4f60ac45f6dfb9c077d0f6ed029 (patch)
tree1ba1e328026394855e217c79b6ea6d58471d2f29
parent4001b9cd80239521ec52b75d35f834cad7f9ad92 (diff)
improve performance of sync script
This logic was written very inefficiently by iterating over every known Person and then hitting Crowd for their group memberships. Its much easier to just ask Crowd for the members of each group. I decided to remove this "utils.py" module because the code is specific to the script. I really didn't use much of utils.py when implementing the new sync logic. This makes things run in seconds rather than minutes Change-Id: Ie6582087303ee78554bb81f858ec035278b4eef6
-rwxr-xr-xapps/patchmetrics/bin/get-linaro-membership.py97
-rw-r--r--apps/patchmetrics/crowd.py26
-rw-r--r--apps/patchmetrics/utils.py151
3 files changed, 78 insertions, 196 deletions
diff --git a/apps/patchmetrics/bin/get-linaro-membership.py b/apps/patchmetrics/bin/get-linaro-membership.py
index 535ed7b..ae7e980 100755
--- a/apps/patchmetrics/bin/get-linaro-membership.py
+++ b/apps/patchmetrics/bin/get-linaro-membership.py
@@ -1,36 +1,87 @@
-#!/usr/bin/python
+#!/usr/bin/env python
import _pythonpath
+import logging
import sys
-from patchmetrics.crowd import Crowd
from django.conf import settings
-from patchmetrics.utils import (
- sync_user_memberships,
+from django.contrib.auth.models import User
+
+from patchmetrics.crowd import Crowd
+from patchmetrics.models import (
+ Team,
+ TeamMembership,
)
from patchwork.models import Person
+logging.basicConfig()
+log = logging.getLogger()
+log.setLevel(logging.INFO)
+
+
+def get_or_create_user(crowd, email):
+ name = None
+ try:
+ person = Person.objects.get(email__iexact=email)
+ except Person.DoesNotExist:
+ # use crowd to get the "display-name" for the user
+ user = crowd.get_user(email)
+ name = user.display_name
+ log.info('Creating person %s(%s)', name, email)
+ person = Person(name=name, email=email)
+ person.save()
+
+ if not person.user:
+ users = User.objects.filter(person__email=email)
+ if users.count() == 0:
+ if not name:
+ name = crowd.get_user(email).display_name
+ users = User.objects.filter(username=name)
+ if users.count() == 0:
+ log.info('Creating user for %s', email)
+ user = User.objects.create_user(name, email, password=None)
+
+ person.user = user
+ person.save()
+
+ return person.user
+
+
+def sync_group(crowd, group, emails, user_memberships):
+ team, created = Team.objects.get_or_create(name=group)
+ if created:
+ log.info('new team created: %s', team.name)
+ for email in emails:
+ user_memberships.setdefault(email, []).append(group)
+ user = get_or_create_user(crowd, email)
+ _, created = TeamMembership.objects.get_or_create(
+ team=team, member=user)
+ if created:
+ log.info('New team membership created for: %s', email)
+
+
+def sync_crowd(crowd, groups):
+ user_memberships = {}
+ for group in groups:
+ emails = crowd.get_group(group)
+ if len(emails):
+ log.info('syncing group: %s - (%s)', group, emails)
+ sync_group(crowd, group, emails, user_memberships)
+ else:
+ log.warn('empty group definition in crowd for: %s', group)
+
+ for person in Person.objects.all():
+ memberships = user_memberships.get(person.email, [])
+ for team in TeamMembership.objects.filter(member=person.user):
+ if team.team.name not in memberships:
+ log.warn('TODO: Delete %s\'s membership in %s',
+ person.email, team.team.name)
+ #team.delete()
+
if __name__ == '__main__':
- """Maps email addresses to Linaro personnel to track Linaro's patches
-
- Pulls all email addresses out of the database, checks to see if each
- email address is associated with a Linaro Login user who is a member of
- a Linaro sub-team, and if they are, add a mapping between that Person
- object and a User object (many people (email addresses) can be mapped
- onto a single User (Linaro identity)). This allows us to map patches
- by email address on to Linaro users, and thus Linaro teams.
-
- This script has been significantly re-written. Previously we pulled all
- users who were members of a linaro sub-team out of Launchpad and saved
- their email addresses. We lost the privilages to do this though, so now
- we look up email addresses that we know. We only know email addresses
- because someone has emailed a patch to patches@linaro.org. For this reason
- we don't automatically pick up new Linaro engineers until they have
- submitted a patch in this way, which is a change vs the old behaviour.
+ """Syncronize memberships to Crowd groups for Persons in the DB
"""
- input_email_addresses = [person.email for person in Person.objects.all()]
-
if settings.AUTH_CROWD_APPLICATION_USER:
whitelisted_groups = []
if settings.CROWD_GROUPS_WHITELIST is None:
@@ -43,7 +94,7 @@ if __name__ == '__main__':
crwd = Crowd(settings.AUTH_CROWD_APPLICATION_USER,
settings.AUTH_CROWD_APPLICATION_PASSWORD,
settings.AUTH_CROWD_SERVER_REST_URI)
- sync_user_memberships(input_email_addresses, crwd, whitelisted_groups)
+ sync_crowd(crwd, whitelisted_groups)
else:
print "No Crowd credentials provided, cannot continue."
sys.exit(1)
diff --git a/apps/patchmetrics/crowd.py b/apps/patchmetrics/crowd.py
index 7d3e722..49178d8 100644
--- a/apps/patchmetrics/crowd.py
+++ b/apps/patchmetrics/crowd.py
@@ -158,28 +158,10 @@ class Crowd(object):
resource = "/user?{0}".format(urllib.urlencode(params))
return CrowdUser.from_json_s(self._get_rest_usermanagement(resource))
- def get_user_with_groups(self, email):
- """Gets all the groups a user is member of.
-
- :param email: The user email.
- :return A CrowdUser object.
- """
- # First get the user, if it does not exist, we skip all the operations
- # here.
- user = self.get_user(email)
-
- params = {"username": email}
-
- resource = "/user/group/nested?{0}".format(
- urllib.urlencode(params))
- data = json.loads(self._get_rest_usermanagement(resource))
-
- teams = []
- if data["groups"]:
- teams = [x["name"] for x in data["groups"]]
- user.teams = teams
-
- return user
+ def get_group(self, grp):
+ resource = '/group/user/nested?' + urllib.urlencode({'groupname': grp})
+ users = json.loads(self._get_rest_usermanagement(resource))['users']
+ return [x['name'] for x in users]
def is_valid_user(self, email):
"""Handy function to check if a user exists or not.
diff --git a/apps/patchmetrics/utils.py b/apps/patchmetrics/utils.py
deleted file mode 100644
index c900c41..0000000
--- a/apps/patchmetrics/utils.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import logging
-import re
-
-from patchmetrics.crowd import (
- CrowdException,
- CrowdNotFoundException,
-)
-from django.contrib.auth.models import User
-from django.db.models import Q
-from patchmetrics.models import Team
-from patchwork.models import Person
-
-logging.basicConfig()
-logger = logging.getLogger()
-
-
-def create_team_display_name(team):
- """Very simple and hackish way to create a display name for a team.
-
- :param team: The team name.
- """
- # XXX: since Crowd does not expose the displayName of a team, we hack one.
- parts = re.split("-|_", team)
- display_name = " ".join(parts)
- return display_name.title()
-
-
-def sync_crowd_memberships(memberships):
- """Make sure the given memberships are represented in the database.
-
- :param memberships: A dict mapping each team to a list of members. Each
- member is in turn represented by a CrowdUser object.
- For example: {Team: [CrowdUser1, CrowdUser2, ...]}
- """
- created_memberships = []
- for team, members in memberships.iteritems():
- team_display_name = create_team_display_name(team)
- try:
- db_team = Team.objects.get(name=team)
- except Team.DoesNotExist:
- logger.info(
- 'Creating new team: {0} ({1})'.format(team, team_display_name))
- db_team = Team(name=team, display_name=team_display_name)
- db_team.save()
-
- for member in members:
- people = []
- for email in member.emails:
- try:
- person = Person.objects.get(email=email)
- except Person.DoesNotExist:
- logger.info("Creating new person: {0} "
- "({1})".format(member.name, email))
- person = Person(name=member.display_name, email=email)
- person.save()
- people.append(person)
-
- # Get all Person entries that might represent other email
- # addresses of this same user.
- people.extend(Person.objects.filter(name=member.display_name))
-
- user = get_user(member.name, member.emails)
- if user is None:
- user = User.objects.create_user(
- member.name, member.emails[0], password=None)
-
- # Now link all the Person entries to the user account.
- for person in people:
- person.user = user
- person.save()
-
- # And finally, make sure the user is a member of the team.
- if user not in db_team.members:
- logger.info("Adding {0} as a member "
- "of {1}".format(member.name, db_team.name))
- membership = db_team.add_member(user)
- membership.save()
- created_memberships.append(membership)
-
- return created_memberships
-
-
-def get_user(name, emails):
- """Return the user linked to the person with one of the given emails.
-
- If there are no users linked to a person with any of the given emails,
- return None.
- """
- query = (Q(person__in=Person.objects.filter(email__in=emails))
- | Q(username=name))
- users = User.objects.filter(query).distinct()
- if users.count() == 1:
- return users[0]
- elif users.count() > 1:
- logger.info("Found more than one user for {0}; "
- "using the first one".format(emails))
- return users[0]
- else:
- return None
-
-
-def sync_user_memberships(email_addresses, crowd, whitelisted_groups=[]):
- """If an input email matches one from Linaro Login, add them to the db.
-
- If an email address matches a user in Linaro Login, takes the CrowdUser
- object and passes it to sync_crowd_memberships, where CrowdUser are
- matched with User objects (many CrowdUser can map to a single User).
- The tail call to `sync_openid_urls` then adds OpenID URLs to each User
- object.
-
- :param: email_addresses: List of email addresses to look in Linaro Login.
- :type list
- :param crowd: The Crowd object instance to perform query to Linaro Login.
- :type Crowd
- :param whitelisted_groups: A list of valid groups/teams to create.
- :type list
- """
- memberships = {}
- for email in email_addresses:
- email = email.lower()
-
- user = None
- try:
- user = crowd.get_user_with_groups(email)
- except CrowdNotFoundException:
- # If there is not a user matching that email addess in Linaro,
- # move on.
- pass
- except CrowdException:
- # If something else went wrong, report it.
- logger.warning("Error while searching email address "
- "'{0}'.".format(email))
-
- # No user with that email, or user is not part of any team.
- if user is None or not user.teams:
- continue
- else:
- for team in user.teams:
- # If we have a list of valid groups, obey it. Otherwise, all
- # groups are valid.
- if whitelisted_groups:
- if team in whitelisted_groups:
- if not team in memberships:
- memberships[team] = []
- memberships[team].append(user)
- else:
- if not team in memberships:
- memberships[team] = []
- memberships[team].append(user)
-
- sync_crowd_memberships(memberships)