summaryrefslogtreecommitdiff
path: root/patch_matcher.py
blob: e6dfe345a6e54150c80baeee166ada81bc575a2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import difflib
import email.utils
import logging
import re

from patchwork.models import Patch, Person, State

log = logging.getLogger('patch-matcher')


def _is_revert(name1, name2):
    name1 = re.sub(r'\[[^\]]*\]', '', name1).strip()
    name2 = re.sub(r'\[[^\]]*\]', '', name2).strip()
    if name1.startswith('Revert') and not name2.startswith('Revert'):
        return True
    return False


def _patches_similar(name1, diff1, name2, diff2):
    # Be conservative and use 0.9 as the similarity ratio between the
    # commit's title and content to prevent false-positives.
    name_ratio = difflib.SequenceMatcher(None, name1, name2).ratio()
    if name_ratio < 0.7:
        return False
    diff_ratio = difflib.SequenceMatcher(None, diff1, diff2).ratio()
    log.debug(
        'name_ratio(%f) diff_ratio(%f) for: %s', name_ratio, diff_ratio, name1)

    if diff_ratio > 0.9 and not _is_revert(name1, name2):
        return True
    if name_ratio > 0.9:
        if diff_ratio > 0.9 or (name_ratio > .99 and diff_ratio > 0.6):
            if not _is_revert(name1, name2):
                return True
    return False


def _get_patchwork_author_committer(commit):
    _, auth_email = email.utils.parseaddr(commit.author)
    _, comm_email = email.utils.parseaddr(commit.committer)
    try:
        auth = Person.objects.get(email=auth_email)
    except Person.DoesNotExist:
        auth = None
    try:
        comm = Person.objects.get(email=comm_email)
    except Person.DoesNotExist:
        comm = None
    return auth, comm


def get_patches_matching(project, submitters, name, content):
    accepted = State.objects.get(name='Accepted')
    superseded = State.objects.get(name='Superseded')

    pending = Patch.objects.filter(
        project=project,
        submitter__in=submitters,
    ).exclude(
        state__in=(accepted, superseded),
    )
    for p in pending:
        # Ignore pull requests (p.pull_url) for the purpose of this function
        if not p.pull_url and \
                _patches_similar(p.name, p.diff, name, content):
            yield p


def get_patches_matching_commit(project, repo, commit):
    """Return the Patch objects that are likely matches to the given commit.

    Patches that are either Accepted or Superseded are never included in the
    results.
    """
    persons = [x for x in _get_patchwork_author_committer(commit) if x]
    patch = repo.get_patch(commit)
    name = commit.message.split('\n')[0]
    return get_patches_matching(project, persons, name, patch)