summaryrefslogtreecommitdiff
path: root/alt_email.py
blob: 5901b1a0df2e2d33596a962d791030dfe76c8f6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#!/usr/bin/python3
""" A script to export an identities.yaml file from Patchworks that
    attempts to cross-reference with user information from LDAP.

    This script shold be run with the following exports:
    export PYTHONPATH=$PYTHONPATH:../project:/srv/linaro-git-tools
    export DJANGO_SETTINGS_MODULE=local_settings
"""

import sys
import os
import time
from jinja2 import Template
import linaro_ldap
from patchwork.models import User
from patchwork.models import Person
import django

django.setup()

# hack to make python 2.7 use unicode by default
# since some of our usernames have non-ascii chars
sys.setdefaultencoding('utf8')

SKIP_PROFILES = [
    'bugzilla-daemon'
]

OUTFILE = "/tmp/identities.yaml"

if os.path.isfile(OUTFILE):
    os.unlink(OUTFILE)

MEMBER_DOMAINS_TABLE = {}

# TODO: add more domains
KNOWN_DOMAINS_TABLE = {
    "debian.org": "Debian",
    "gcc.gnu.org": "GNU",
    "gnu.org": "GNU",
    "lge.com": "LG",
    "ibm.com": "IBM",
    "il.ibm.com": "IBM",
    "in.ibm.com": "IBM",
    "linux.vnet.ibm.com": "IBM",
    "suse.com": "SuSE",
    "linuxfoundation.org": "Linux Foundation",
    "arm.com": "ARM",
    "freescale.com": "Freescale",
    "hp.com": "HP",
    "hpe.com": "HP",
    "caviumnetworks.com": "Cavium Networks",
    "ubuntu.com": "Ubuntu",
    "canonical.com": "Canonical",
    "stericsson.com": "ST",
    "amd.com": "AMD",
    "broadcom.com": "Broadcom",
    "qti.qualcomm.com": "Qualcomm",
    "quicinc.com": "Qualcomm",
    "collabora.com": "Collabora",
    "collabora.co.uk": "Collabora"
}


MERGE_TABLE = {}


class NoLdapUserException(Exception):
    pass


def build_member_table():
    """ Builds a lookup table using email domains as the key to match
        email addresses with member organizations. """
    member_ous = linaro_ldap.do_complex_query(
        search_filter='(organizationalStatus=*)',
        attrlist=['*'])

    for mou in member_ous:
        member_name = mou[1]["description"][0]
        if "mail" in mou[1]:
            for domain in mou[1]["mail"]:
                MEMBER_DOMAINS_TABLE[domain] = member_name


def get_name(target_email, target_attr='displayName'):
    """ Attempts to get the user's real name from LDAP based on email address.
        Raises an NoLdapUserException if the user is not found, otherwise
        returns the "target_attr" setting from LDAP (defaults to
        "displayName")."""
    try:
        result = linaro_ldap.do_query(
            search_attr='mail',
            search_pat=target_email,
            attrlist=[target_attr])
        if result and target_attr in result[0][1]:
            return result[0][1][target_attr][0]
    except linaro_ldap.ldap.FILTER_ERROR:
        # user entered in a bogus email and used illegel chars that
        # caused an LDAP error
        pass

    raise NoLdapUserException(
        "no user found in LDAP for %s" % target_email)


def get_org(target_email):
    """ Attempts to determine the organization a user belongs to based
        on their email addresses.  Look first for a linaro.org address,
        then searches through member domains.  If nothing is found,
        returns 'Unknown' as default. """
    # if they have a l.o address, claim them
    if target_email.endswith('@linaro.org'):
        return 'Linaro'

    domain = target_email.split('@')[-1]
    # if still here, see if we can match a domain to a member
    if domain in MEMBER_DOMAINS_TABLE:
        return MEMBER_DOMAINS_TABLE[domain]

    # last ditch effort, try to see if it's a domain we recognize
    if domain in KNOWN_DOMAINS_TABLE:
        return KNOWN_DOMAINS_TABLE[domain]

    return 'Unknown'


def merge_entry(uid, target_emails, target_org, target_end_date=None):
    """ Add a profile entry for the user or merge with existing entry
        to prevent duplicates """
    if uid in MERGE_TABLE:
        for e in target_emails:
            if e not in MERGE_TABLE[uid]['emails']:
                MERGE_TABLE[uid]['emails'].append(e)
        # if the new org is Linaro or previous org unknown, override it
        if target_org == 'Linaro' or MERGE_TABLE[uid]['org'] == 'Unknown':
            MERGE_TABLE[uid]['org'] = target_org
        if target_end_date is not None:
            MERGE_TABLE[uid]['end_date'] = target_end_date
    else:
        MERGE_TABLE[uid] = {}
        MERGE_TABLE[uid]['emails'] = target_emails
        MERGE_TABLE[uid]['org'] = target_org
        MERGE_TABLE[uid]['end_date'] = target_end_date


TMPL = Template(u'''\
- profile:
    name: {{ username }}
  enrollments:
    - organization: {{ org }}
{%- if end_date %}
      end_date: {{ end_date }}
{%- endif %}
  email:
{%- for email in emails %}
    - {{ email }}
{%- endfor %}

''')


def write_entry(uid, target_emails, target_org, target_end_date=None):
    """ Write a profile entry for the specified user to output """
    try:

        entry_template = TMPL.render(
            username=uid,
            org=target_org,
            end_date=target_end_date,
            emails=target_emails
        )
        with open(OUTFILE, 'a') as outfile:
            outfile.write(entry_template)
    except TypeError:
        pass


build_member_table()

for user in User.objects.filter(is_active=True):
    # skip unwanted profiles
    if user.username in SKIP_PROFILES:
        continue

    persons = Person.objects.filter(user=user)
    # concat all emails from Person objs, but skip '(address hidden)'
    emails = [x.email for x in persons if '@' in x.email]

    # some cases we have a linaro Person linked to non-Linaro User..
    # make sure their email is included before we start looking
    # for organization membership
    if '@' in user.username and user.username not in emails:
        emails.append(user.username)

    # don't bother if user just has an account but no email addresses
    if not emails:
        continue

    end_date = None
    org = None

    stop_asking_ldap = False
    for email in emails:
        # if we've already found them in ldap, no need to keep searching
        if stop_asking_ldap:
            continue

        # see if this email is in LDAP.  If yes, it's an
        # an active account (either @linaro.org, member,
        # or community account)
        try:
            name = get_name(email, "displayName")
            stop_asking_ldap = True
            org = get_org(email)
            # if it's still unknown, let's call it "Linaro Community"
            if org == "Unknown":
                org = "Linaro Community"
        except NoLdapUserException:
            # not in ldap.  No matter what happens next,
            # we have to get username from patchworks.
            name = user.username

            # If it's a l.o address, then assume user is no longer
            # an employee since no LDAP entry.  Exit loop.
            if email.endswith('@linaro.org'):
                org = "Linaro"
                end_date = time.strftime('%Y-%m-%d', time.localtime())
                stop_asking_ldap = True
            else:
                # only bother doing a look up if the
                # org hasn't been set yet.  This will
                # continue through the loop and let us
                # pick up @l.o addresses if they occur
                # later.
                if org is None or org == 'Unknown':
                    org = get_org(email)

    merge_entry(name, emails, org, end_date)

for uid in MERGE_TABLE.keys():
    write_entry(uid,
                MERGE_TABLE[uid]['emails'],
                MERGE_TABLE[uid]['org'],
                MERGE_TABLE[uid]['end_date'])