summaryrefslogtreecommitdiff
path: root/gitrepo.py
blob: 7fed03949f531aa0db3d97bf70461a2b11e9b519 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import datetime
import logging
import os
import subprocess
import tempfile

import dulwich.repo

from django.conf import settings
from patchwork.parser import parse_patch

log = logging.getLogger("gitrepo")


def croncmd(args, cwd="./", timeout=None, get_fail_logger=None):
    if timeout:
        args = ["timeout", str(timeout)] + args

    with tempfile.SpooledTemporaryFile(max_size=4096) as f:
        try:
            subprocess.check_call(args, cwd=cwd, stdout=f, stderr=f)
            if log.level == logging.DEBUG:
                log.debug("Results of cmd(%s): %r", cwd, args)
                f.seek()
                log.debug("COMBINED_OUTPUT\n%s", f.read())
        except subprocess.CalledProcessError as e:
            logger = log.error
            if get_fail_logger:
                logger = get_fail_logger()
            logger("Unable to run command(%s): %r", cwd, args)
            if timeout and e.returncode == 124:
                logger("Command timed out")
            f.seek(0)
            if e.output:
                logger("STDOUT:\n%s", e.output)
                logger("STDERR:\n%s", f.read())
            else:
                logger("COMBINED OUTPUT:\n%s", f.read())
            raise


class Repo(object):
    """Our patchwork deployments try and automatically update patches by
    looking at the change history on a repository. This class provides a
    simple interface to analyze new commits
    """

    def __init__(self, repo_dir, name, scm_url):
        self.path = os.path.join(repo_dir, name)
        self.scm_url = scm_url
        self._repo = None

    @property
    def repo(self):
        if not self._repo:
            self._repo = dulwich.repo.Repo(self.path)
        return self._repo

    def __getitem__(self, key):
        return self.repo[key]

    def _clone(self):
        croncmd(["git", "clone", "--mirror", self.scm_url, self.path])

    def _pull(self):
        fail_file = os.path.join(self.path, "failures")

        def get_fail_logger():
            with open(fail_file, "a+") as f:
                f.write("failed at: %s\n" % str(datetime.datetime.now()))
                f.seek(0)
                for count, line in enumerate(f, 1):
                    if count > 3:
                        return log.error
            return log.info

        timeout = str(getattr(settings, "REPO_TIMEOUT", 120))
        croncmd(
            ["git", "remote", "-v", "update", "--prune"],
            self.path,
            timeout,
            get_fail_logger,
        )
        if os.path.exists(fail_file):
            # clean out old failures, now that we've succeeded
            os.unlink(fail_file)

    def update(self):
        if not os.path.exists(self.path):
            self._clone()
        else:
            try:
                self._pull()
            except subprocess.CalledProcessError:
                # We've already logged the exception. Code can continue to run
                # because its just going to call process_unchecked_commits and
                # essentially be a no-op
                pass

    def process_unchecked_commits(self, save_state=True):
        last_commit = os.path.join(self.path, "patchwork-last-commit")
        if os.path.exists(last_commit):
            with open(last_commit, "r") as f:
                start_at = f.read().strip()
        else:
            start_at = "HEAD~100"

        log.debug("looking for commits since: %s", start_at)
        args = ["git", "rev-list", "--reverse", start_at + "..HEAD"]
        with open("/dev/null", "w") as f:
            rc = subprocess.call(
                ["git", "show", start_at], cwd=self.path, stdout=f, stderr=f
            )
        if rc != 0:
            # we may have had a branch who's history was re-written
            # just try and get changes for past day
            args = ["git", "rev-list", "--reverse", "--since",
                    "1 day ago", "HEAD"]

        try:
            for x in subprocess.check_output(
                    args, cwd=self.path, text=True).split("\n"):
                if x:
                    yield self.repo[x.encode("utf-8")]
                    start_at = x
        finally:
            if save_state:
                with open(last_commit, "w") as f:
                    f.write(start_at)

    def get_patch(self, commit):
        args = ["git", "show", "--format=format:%e", "-M", commit.id.decode()]
        patch = subprocess.check_output(args, cwd=self.path)
        # the patchwork parser code operates character by character so we must
        # convert to unicode so it can be handled properly
        patch = patch.decode("utf-8", errors="replace")
        # Don't try and process >5Mb patches, they flood the server
        if len(patch) > 5000000:
            raise MemoryError("patch too large to process: %d" % len(patch))
        patch = parse_patch(patch)[0]
        if patch is None:
            # happens for binary only patches like:
            # https://git.linaro.org/uefi/OpenPlatformPkg.git/commit/ \
            # ?id=7ab4bb34b2464a2491868264bdf2931f2acd6452
            patch = ""
        return patch