blob: 52dd8d48317b0923c144e12790218fc3c5f6be4c [file] [log] [blame]
Galina Kistanovaaeca1732019-10-18 05:25:47 +00001# LLVM buildbot needs to watch multiple projects within a single repository.
2
3# Based on the buildbot.changes.gitpoller.GitPoller source code.
4# For buildbot v0.8.5
5
6import time
7import tempfile
8import os
9import re
10import itertools
11
12from twisted.python import log
13from twisted.internet import defer, utils
14
15from buildbot.util import deferredLocked
16from buildbot.changes import base
17from buildbot.util import epoch2datetime
18
19class LLVMPoller(base.PollingChangeSource):
20 """
21 Poll LLVM repository for changes and submit them to the change master.
22 Following Multiple Projects.
23
24 This source will poll a remote LLVM git _monorepo_ for changes and submit
25 them to the change master."""
26
27 _repourl = "https://github.com/llvm/llvm-project"
28 _branch = "master"
29 _categories = {
30 # Project: Category:
31 'llvm' : 'llvm',
32 'cfe' : 'clang',
33 'polly' : 'polly',
34 'compiler-rt' : 'compiler-rt',
35 'libcxx' : 'libcxx',
36 'libcxxabi' : 'libcxxabi',
37 'lld' : 'lld',
38 'lldb' : 'lldb',
39 'llgo' : 'llgo',
40 'openmp' : 'openmp',
41 }
42
43 compare_attrs = ["repourl", "branch", "workdir",
44 "pollInterval", "gitbin", "usetimestamps",
45 "category", "project",
46 "projects"]
47
48 projects = None # Projects and branches to watch.
49
50 def __init__(self, repourl=_repourl, branch=_branch,
51 workdir=None, pollInterval=10*60,
52 gitbin='git', usetimestamps=True,
53 category=None, project=None,
54 pollinterval=-2, fetch_refspec=None,
55 encoding='utf-8', projects=None):
56
57 self.cleanRe = re.compile(r"Require(?:s?)\s*.*\s*clean build", re.IGNORECASE + re.MULTILINE)
58 self.cleanCfg = re.compile(r"(CMakeLists\.txt$|\.cmake$|\.cmake\.in$)")
59
60 # projects is a list of projects to watch or None to watch all.
61 if projects:
62 if isinstance(projects, str) or isinstance(projects, tuple):
63 projects = [projects]
64 assert isinstance(projects, list)
65 assert len(projects) > 0
66
67 # Each project to watch is a string (project name) or a tuple
68 # (project name, branch) like ('llvm', 'branches/release_30').
69 # But we want it always to be a tuple, so we convert a project
70 # name string to a tuple (project, 'master').
71 self.projects = set()
72 for project in projects:
73 if isinstance(project, str):
74 project = (project, branch)
75
76 assert isinstance(project, tuple)
77 self.projects.add(project)
78
79 # for backward compatibility; the parameter used to be spelled with 'i'
80 if pollinterval != -2:
81 pollInterval = pollinterval
82 if project is None: project = ''
83
84 self.repourl = repourl
85 self.branch = branch
86 self.pollInterval = pollInterval
87 self.fetch_refspec = fetch_refspec
88 self.encoding = encoding
89 self.lastChange = time.time()
90 self.lastPoll = time.time()
91 self.gitbin = gitbin
92 self.workdir = workdir
93 self.usetimestamps = usetimestamps
94 self.category = category
95 self.project = project
96 self.changeCount = 0
97 self.commitInfo = {}
98 self.initLock = defer.DeferredLock()
99
100 if self.workdir == None:
101 self.workdir = tempfile.gettempdir() + '/gitpoller_work'
102 log.msg("WARNING: LLVMGitPoller using deprecated temporary workdir " +
103 "'%s'; consider setting workdir=" % self.workdir)
104
105 def startService(self):
106 # make our workdir absolute, relative to the master's basedir
107 if not os.path.isabs(self.workdir):
108 self.workdir = os.path.join(self.master.basedir, self.workdir)
109 log.msg("LLVMGitPoller: using workdir '%s'" % self.workdir)
110
111 # initialize the repository we'll use to get changes; note that
112 # startService is not an event-driven method, so this method will
113 # instead acquire self.initLock immediately when it is called.
114 if not os.path.exists(self.workdir + r'/.git'):
115 d = self.initRepository()
116 d.addErrback(log.err, 'while initializing LLVMGitPoller repository')
117 else:
118 log.msg("LLVMGitPoller repository already exists")
119
120 # call this *after* initRepository, so that the initLock is locked first
121 base.PollingChangeSource.startService(self)
122
123 @deferredLocked('initLock')
124 def initRepository(self):
125 d = defer.succeed(None)
126 def make_dir(_):
127 dirpath = os.path.dirname(self.workdir.rstrip(os.sep))
128 if not os.path.exists(dirpath):
129 log.msg('LLVMGitPoller: creating parent directories for workdir')
130 os.makedirs(dirpath)
131 d.addCallback(make_dir)
132
133 def git_init(_):
134 log.msg('LLVMGitPoller: initializing working dir from %s' % self.repourl)
135 d = utils.getProcessOutputAndValue(self.gitbin,
136 ['init', self.workdir], env=dict(PATH=os.environ['PATH']))
137 d.addCallback(self._convert_nonzero_to_failure)
138 d.addErrback(self._stop_on_failure)
139 return d
140 d.addCallback(git_init)
141
142 def git_remote_add(_):
143 d = utils.getProcessOutputAndValue(self.gitbin,
144 ['remote', 'add', 'origin', self.repourl],
145 path=self.workdir, env=dict(PATH=os.environ['PATH']))
146 d.addCallback(self._convert_nonzero_to_failure)
147 d.addErrback(self._stop_on_failure)
148 return d
149 d.addCallback(git_remote_add)
150
151 def git_fetch_origin(_):
152 args = ['fetch', 'origin']
153 self._extend_with_fetch_refspec(args)
154 d = utils.getProcessOutputAndValue(self.gitbin, args,
155 path=self.workdir, env=dict(PATH=os.environ['PATH']))
156 d.addCallback(self._convert_nonzero_to_failure)
157 d.addErrback(self._stop_on_failure)
158 return d
159 d.addCallback(git_fetch_origin)
160
161 def set_master(_):
162 log.msg('LLVMGitPoller: checking out %s' % self.branch)
163 if self.branch == 'master': # repo is already on branch 'master', so reset
164 d = utils.getProcessOutputAndValue(self.gitbin,
165 ['reset', '--hard', 'origin/%s' % self.branch],
166 path=self.workdir, env=dict(PATH=os.environ['PATH']))
167 else:
168 d = utils.getProcessOutputAndValue(self.gitbin,
169 ['checkout', '-b', self.branch, 'origin/%s' % self.branch],
170 path=self.workdir, env=dict(PATH=os.environ['PATH']))
171 d.addCallback(self._convert_nonzero_to_failure)
172 d.addErrback(self._stop_on_failure)
173 return d
174 d.addCallback(set_master)
175 def get_rev(_):
176 d = utils.getProcessOutputAndValue(self.gitbin,
177 ['rev-parse', self.branch],
178 path=self.workdir, env={})
179 d.addCallback(self._convert_nonzero_to_failure)
180 d.addErrback(self._stop_on_failure)
181 d.addCallback(lambda (out, err, code) : out.strip())
182 return d
183 d.addCallback(get_rev)
184 def print_rev(rev):
185 log.msg("LLVMGitPoller: finished initializing working dir from %s at rev %s"
186 % (self.repourl, rev))
187 d.addCallback(print_rev)
188 return d
189
190 def describe(self):
191 status = ""
192 if not self.master:
193 status = "[STOPPED - check log]"
194 str = 'LLVMGitPoller watching the remote git repository %s, branch: %s %s' \
195 % (self.repourl, self.branch, status)
196 return str
197
198 @deferredLocked('initLock')
199 def poll(self):
200 d = self._get_changes()
201 d.addCallback(self._process_changes)
202 d.addErrback(self._process_changes_failure)
203 d.addCallback(self._catch_up)
204 d.addErrback(self._catch_up_failure)
205 return d
206
207 def _get_commit_comments(self, rev):
208 args = ['log', rev, '--no-walk', r'--format=%s%n%b']
209 d = utils.getProcessOutput(self.gitbin, args, path=self.workdir, env=dict(PATH=os.environ['PATH']), errortoo=False )
210 def process(git_output):
211 stripped_output = git_output.strip().decode(self.encoding)
212 if len(stripped_output) == 0:
213 raise EnvironmentError('could not get commit comment for rev')
214 #log.msg("LLVMGitPoller: _get_commit_comments: '%s'" % stripped_output)
215 return stripped_output
216 d.addCallback(process)
217 return d
218
219 def _get_commit_timestamp(self, rev):
220 # unix timestamp
221 args = ['log', rev, '--no-walk', r'--format=%ct']
222 d = utils.getProcessOutput(self.gitbin, args, path=self.workdir, env=dict(PATH=os.environ['PATH']), errortoo=False )
223 def process(git_output):
224 stripped_output = git_output.strip()
225 if self.usetimestamps:
226 try:
227 stamp = float(stripped_output)
228 #log.msg("LLVMGitPoller: _get_commit_timestamp: \'%s\'" % stamp)
229 except Exception, e:
230 log.msg('LLVMGitPoller: caught exception converting output \'%s\' to timestamp' % stripped_output)
231 raise e
232 return stamp
233 else:
234 return None
235 d.addCallback(process)
236 return d
237
238 def _get_commit_files(self, rev):
239 args = ['log', rev, '--name-only', '--no-walk', r'--format=%n']
240 d = utils.getProcessOutput(self.gitbin, args, path=self.workdir, env=dict(PATH=os.environ['PATH']), errortoo=False )
241 def process(git_output):
242 fileList = git_output.split()
243 #log.msg("LLVMGitPoller: _get_commit_files: \'%s\'" % fileList)
244 return fileList
245 d.addCallback(process)
246 return d
247
248 def _get_commit_name(self, rev):
249 args = ['log', rev, '--no-walk', r'--format=%aN <%aE>']
250 d = utils.getProcessOutput(self.gitbin, args, path=self.workdir, env=dict(PATH=os.environ['PATH']), errortoo=False )
251 def process(git_output):
252 stripped_output = git_output.strip().decode(self.encoding)
253 if len(stripped_output) == 0:
254 raise EnvironmentError('could not get commit name for rev')
255 #log.msg("LLVMGitPoller: _get_commit_name: \'%s\'" % stripped_output)
256 return stripped_output
257 d.addCallback(process)
258 return d
259
260 def _get_changes(self):
261 log.msg('LLVMGitPoller: polling git repo at %s' % self.repourl)
262
263 self.lastPoll = time.time()
264
265 # get a deferred object that performs the fetch
266 args = ['fetch', 'origin']
267 self._extend_with_fetch_refspec(args)
268
269 # This command always produces data on stderr, but we actually do not care
270 # about the stderr or stdout from this command. We set errortoo=True to
271 # avoid an errback from the deferred. The callback which will be added to this
272 # deferred will not use the response.
273 d = utils.getProcessOutput(self.gitbin, args,
274 path=self.workdir,
275 env=dict(PATH=os.environ['PATH']), errortoo=True )
276
277 return d
278
279 def _transform_path(self, fileList):
280 """
281 Parses the given list of files, and returns a list of two-entry tuples
282 (PROJECT, [FILES]) if PROJECT is watched one,
283 or None otherwise.
284
285 NOTE: we don't change result path, just extract a project name.
286 """
287 #log.msg("LLVMGitPoller: _transform_path: got a file list: %s" % fileList)
288
289 if fileList is None or len(fileList) == 0:
290 return None
291
292 result = {}
293
294 # turn libcxxabi/include/__cxxabi_config.h into
295 # ("libcxxabi", "libcxxabi/include/__cxxabi_config.h")
296 # and filter projects we are not watching.
297
298 for path in fileList:
299 pieces = path.split('/')
300 project = pieces.pop(0)
301 #NOTE:TODO: a dirty hack for backward compatibility.
302 if project == "clang":
303 project = "cfe"
304
305 #log.msg("LLVMGitPoller: _transform_path: processing path %s: project: %s" % (path, project))
306 if self.projects:
307 #NOTE: multibranch is not supported.
308 #log.msg("LLVMGitPoller: _transform_path: (%s, %s) in projects: %s" % (project, self.branch, (project, self.branch) in self.projects))
309 if (project, self.branch) in self.projects:
310 # Collect file path for each detected projects.
311 if project in result:
312 result[project].append(path)
313 else:
314 result[project] = [path]
315
316 #log.msg("LLVMGitPoller: _transform_path: result: %s" % result)
317 return [(k, result[k]) for k in result]
318
319 @defer.deferredGenerator
320 def _process_changes(self, unused_output):
321 # get the change list
322 revListArgs = ['log', '%s..origin/%s' % (self.branch, self.branch), r'--format=%H']
323 self.changeCount = 0
324 d = utils.getProcessOutput(self.gitbin, revListArgs, path=self.workdir,
325 env=dict(PATH=os.environ['PATH']), errortoo=False )
326 wfd = defer.waitForDeferred(d)
327 yield wfd
328 results = wfd.getResult()
329
330 # process oldest change first
331 revList = results.split()
332 if not revList:
333 return
334
335 revList.reverse()
336 self.changeCount = len(revList)
337
338 log.msg('LLVMGitPoller: processing %d changes: %s in "%s"'
339 % (self.changeCount, revList, self.workdir) )
340
341 for rev in revList:
342 #log.msg('LLVMGitPoller: waiting defer for revision: %s' % rev)
343 dl = defer.DeferredList([
344 self._get_commit_timestamp(rev),
345 self._get_commit_name(rev),
346 self._get_commit_files(rev),
347 self._get_commit_comments(rev),
348 ], consumeErrors=True)
349
350 wfd = defer.waitForDeferred(dl)
351 yield wfd
352 results = wfd.getResult()
353 #log.msg('LLVMGitPoller: got defer results: %s' % results)
354
355 # check for failures
356 failures = [ r[1] for r in results if not r[0] ]
357 if failures:
358 # just fail on the first error; they're probably all related!
359 raise failures[0]
360
361 #log.msg('LLVMGitPoller: begin change adding cycle for revision: %s' % rev)
362
363 timestamp, name, files, comments = [ r[1] for r in results ]
364 where = self._transform_path(files)
365 #log.msg('LLVMGitPoller: walking over transformed path/projects: %s' % where)
366 for wh in where:
367 where_project, where_project_files = wh
368 #log.msg('LLVMGitPoller: processing transformed pair: %s, files:' % where_project, where_project_files)
369
370 properties = dict()
371 if self.cleanRe.search(comments) or \
372 any([m for f in where_project_files for m in [self.cleanCfg.search(f)] if m]):
373 log.msg("LLVMGitPoller: creating a change with the 'clean' property for r%s" % rev)
374 properties['clean_obj'] = (True, "change")
375
376 log.msg("LLVMGitPoller: creating a change rev=%s" % rev)
377 d = self.master.addChange(
378 author=name,
379 revision=rev,
380 files=where_project_files,
381 comments=comments,
382 when_timestamp=epoch2datetime(timestamp),
383 branch=self.branch,
384 category=self._categories.get(where_project, self.category),
385 project=where_project,
386 repository=self.repourl,
387 src='git',
388 properties=properties)
389 wfd = defer.waitForDeferred(d)
390 yield wfd
391 results = wfd.getResult()
392
393 def _process_changes_failure(self, f):
394 log.msg('LLVMGitPoller: repo poll failed')
395 log.err(f)
396 # eat the failure to continue along the defered chain - we still want to catch up
397 return None
398
399 def _catch_up(self, res):
400 if self.changeCount == 0:
401 log.msg('LLVMGitPoller: no changes, no catch_up')
402 return
403 log.msg('LLVMGitPoller: catching up tracking branch')
404 args = ['reset', '--hard', 'origin/%s' % (self.branch,)]
405 d = utils.getProcessOutputAndValue(self.gitbin, args, path=self.workdir, env=dict(PATH=os.environ['PATH']))
406 d.addCallback(self._convert_nonzero_to_failure)
407 return d
408
409 def _catch_up_failure(self, f):
410 log.err(f)
411 log.msg('LLVMGitPoller: please resolve issues in local repo: %s' % self.workdir)
412 # this used to stop the service, but this is (a) unfriendly to tests and (b)
413 # likely to leave the error message lost in a sea of other log messages
414
415 def _convert_nonzero_to_failure(self, res):
416 "utility method to handle the result of getProcessOutputAndValue"
417 (stdout, stderr, code) = res
418 if code != 0:
419 raise EnvironmentError('command failed with exit code %d: %s' % (code, stderr))
420 return (stdout, stderr, code)
421
422 def _stop_on_failure(self, f):
423 "utility method to stop the service when a failure occurs"
424 if self.running:
425 d = defer.maybeDeferred(lambda : self.stopService())
426 d.addErrback(log.err, 'while stopping broken GitPoller service')
427 return f
428
429 def _extend_with_fetch_refspec(self, args):
430 if self.fetch_refspec:
431 if type(self.fetch_refspec) in (list,set):
432 args.extend(self.fetch_refspec)
433 else:
434 args.append(self.fetch_refspec)
435
436
437# Run: python -m zorg.buildbot.changes.llvmgitpoller
438if __name__ == '__main__':
439 print "Testing Git LLVMPoller..."
440 poller = LLVMPoller(projects = [
441 "llvm",
442 "cfe",
443 "clang-tests-external",
444 "clang-tools-extra",
445 "polly",
446 "compiler-rt",
447 "libcxx",
448 "libcxxabi",
449 "libunwind",
450 "lld",
451 "lldb",
452 "openmp",
453 "lnt",
454 "test-suite"
455 ],
456 workdir = os.getcwd()
457 )
458
459 # Test _transform_path method.
460 fileList = [
461 "clang-tools-extra/clang-doc/Generators.cpp",
462 "clang-tools-extra/clang-doc/Generators.h",
463 "clang-tools-extra/clang-doc/HTMLGenerator.cpp",
464 "clang-tools-extra/clang-doc/MDGenerator.cpp",
465 "clang-tools-extra/clang-doc/Representation.cpp",
466 "clang-tools-extra/clang-doc/Representation.h",
467 "clang-tools-extra/clang-doc/YAMLGenerator.cpp",
468 "clang-tools-extra/clang-doc/assets/clang-doc-default-stylesheet.css",
469 "clang-tools-extra/clang-doc/assets/index.js",
470 "clang-tools-extra/clang-doc/stylesheets/clang-doc-default-stylesheet.css",
471 "clang-tools-extra/clang-doc/tool/CMakeLists.txt",
472 "clang-tools-extra/clang-doc/tool/ClangDocMain.cpp",
473 "clang-tools-extra/unittests/clang-doc/CMakeLists.txt",
474 "clang-tools-extra/unittests/clang-doc/ClangDocTest.cpp",
475 "clang-tools-extra/unittests/clang-doc/ClangDocTest.h",
476 "clang-tools-extra/unittests/clang-doc/GeneratorTest.cpp",
477 "clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp",
478
479 "llvm/docs/BugpointRedesign.md",
480 "llvm/test/Reduce/Inputs/remove-funcs.sh",
481 "llvm/test/Reduce/remove-funcs.ll",
482 "llvm/tools/LLVMBuild.txt",
483 "llvm/tools/llvm-reduce/CMakeLists.txt",
484 "llvm/tools/llvm-reduce/DeltaManager.h",
485 "llvm/tools/llvm-reduce/LLVMBuild.txt",
486 "llvm/tools/llvm-reduce/TestRunner.cpp",
487 "llvm/tools/llvm-reduce/TestRunner.h",
488 "llvm/tools/llvm-reduce/deltas/Delta.h",
489 "llvm/tools/llvm-reduce/deltas/RemoveFunctions.cpp",
490 "llvm/tools/llvm-reduce/deltas/RemoveFunctions.h",
491 "llvm/tools/llvm-reduce/llvm-reduce.cpp",
492
493 "openmp/libomptarget/test/mapping/declare_mapper_api.cpp",
494
495 "unknown/lib/unknonw.cpp"
496 ]
497
498 where = poller._transform_path(fileList)
499 for wh in where:
500 where_project, where_project_files = wh
501 print "category: %s" % poller._categories.get(where_project, poller.category)
502 print "project: %s, files(%s): %s\n" % (where_project, len(where_project_files), where_project_files)