aboutsummaryrefslogtreecommitdiff
path: root/lnt/testing/util/compilers.py
blob: c672f2de2694f352c4c99f38007477a655b38871 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import hashlib
import os
import re
import tempfile

from lnt.util import logger
from commands import capture
from commands import fatal
from commands import rm_f


def ishexhash(string):
    return len(string) == 40 and \
        len([c
             for c in string
             if c.isdigit() or c in 'abcdef']) == 40


def is_valid(path):
    """Does this path point to a valid executable?"""
    return os.path.isfile(path) and os.access(path, os.X_OK)


def get_cc_info(path, cc_flags=[]):
    """get_cc_info(path) -> { ... }

    Extract various information on the given compiler and return a dictionary
    of the results."""

    cc = path

    # Interrogate the compiler.
    cc_version = capture([cc, '-v', '-E'] + cc_flags +
                         ['-x', 'c', '/dev/null', '-###'],
                         include_stderr=True).strip()

    # Determine the assembler version, as found by the compiler.
    cc_as_version = capture([cc, "-c", '-Wa,-v', '-o', '/dev/null'] +
                            cc_flags + ['-x', 'assembler', '/dev/null'],
                            include_stderr=True).strip()

    if "clang: error: unsupported argument '-v'" in cc_as_version:
        cc_as_version = "Clang built in."

    # Determine the linker version, as found by the compiler.
    tf = tempfile.NamedTemporaryFile(suffix='.c')
    name = tf.name
    tf.close()
    tf = open(name, 'w')
    print >>tf, "int main() { return 0; }"
    tf.close()
    cc_ld_version = capture(([cc, "-Wl,-v", '-o', '/dev/null'] +
                             cc_flags + [tf.name]),
                            include_stderr=True).strip()
    rm_f(tf.name)

    # Extract the default target .ll (or assembly, for non-LLVM compilers).
    cc_target_assembly = capture([cc, '-S', '-flto', '-o', '-'] + cc_flags +
                                 ['-x', 'c', '/dev/null'],
                                 include_stderr=True).strip()

    # Extract the compiler's response to -dumpmachine as the target.
    cc_target = cc_dumpmachine = capture([cc, '-dumpmachine']).strip()

    # Default the target to the response from dumpmachine.
    cc_target = cc_dumpmachine

    # Parse out the compiler's version line and the path to the "cc1" binary.
    cc1_binary = None
    version_ln = None
    cc_name = cc_version_num = cc_build_string = cc_extra = ""
    for ln in cc_version.split('\n'):
        if ' version ' in ln:
            version_ln = ln
        elif 'cc1' in ln or 'clang-cc' in ln:
            m = re.match(r' "?([^"]*)"?.*"?-E"?.*', ln)
            if not m:
                fatal("unable to determine cc1 binary: %r: %r" % (cc, ln))
            cc1_binary, = m.groups()
        elif "-_Amachine" in ln:
            m = re.match(r'([^ ]*) *-.*', ln)
            if not m:
                fatal("unable to determine cc1 binary: %r: %r" % (cc, ln))
            cc1_binary, = m.groups()
    if cc1_binary is None:
        logger.error("unable to find compiler cc1 binary: %r: %r" %
                     (cc, cc_version))
    if version_ln is None:
        logger.error("unable to find compiler version: %r: %r" %
                     (cc, cc_version))
    else:
        m = re.match(r'(.*) version ([^ ]*) +(\([^(]*\))(.*)', version_ln)
        if m is not None:
            cc_name, cc_version_num, cc_build_string, cc_extra = m.groups()
        else:
            # If that didn't match, try a more basic pattern.
            m = re.match(r'(.*) version ([^ ]*)', version_ln)
            if m is not None:
                cc_name, cc_version_num = m.groups()
            else:
                logger.error("unable to determine compiler version: %r: %r" %
                             (cc, version_ln))
                cc_name = "unknown"

    # Compute normalized compiler name and type. We try to grab source
    # revisions, branches, and tags when possible.
    cc_norm_name = None
    cc_build = None
    cc_src_branch = cc_alt_src_branch = None
    cc_src_revision = cc_alt_src_revision = None
    cc_src_tag = None
    llvm_capable = False
    cc_extra = cc_extra.strip()
    if cc_name == 'icc':
        cc_norm_name = 'icc'
        cc_build = 'PROD'
        cc_src_tag = cc_version_num

    elif cc_name == 'gcc' and (cc_extra == '' or
                               re.match(r' \(dot [0-9]+\)', cc_extra)):
        cc_norm_name = 'gcc'
        m = re.match(r'\(Apple Inc. build ([0-9]*)\)', cc_build_string)
        if m:
            cc_build = 'PROD'
            cc_src_tag, = m.groups()
        else:
            logger.error('unable to determine gcc build version: %r' %
                         cc_build_string)
    elif (cc_name in ('clang', 'LLVM', 'Debian clang', 'Apple clang',
                      'Apple LLVM') and
          (cc_extra == '' or 'based on LLVM' in cc_extra or
           (cc_extra.startswith('(') and cc_extra.endswith(')')))):
        llvm_capable = True
        if cc_name == 'Apple clang' or cc_name == 'Apple LLVM':
            cc_norm_name = 'apple_clang'
        else:
            cc_norm_name = 'clang'

        m = re.match(r'\(([^ ]*)( ([0-9]+))?\)', cc_build_string)
        if m:
            cc_src_branch, _, cc_src_revision = m.groups()

            # With a CMake build, the branch is not emitted.
            if cc_src_branch and not cc_src_revision and \
                    cc_src_branch.isdigit():
                cc_src_revision = cc_src_branch
                cc_src_branch = ""

            # These show up with git-svn.
            if cc_src_branch == '$URL$':
                cc_src_branch = ""
        else:
            # Otherwise, see if we can match a branch and a tag name. That
            # could be a git hash.
            m = re.match(r'\((.+) ([^ ]+)\)', cc_build_string)
            if m:
                cc_src_branch, cc_src_revision = m.groups()
            else:
                logger.error('unable to determine '
                             'Clang development build info: %r' %
                             ((cc_name, cc_build_string, cc_extra),))
                cc_src_branch = ""

        m = re.search('clang-([0-9.]*)', cc_src_branch)
        if m:
            cc_build = 'PROD'
            cc_src_tag, = m.groups()

            # We sometimes use a tag of 9999 to indicate a dev build.
            if cc_src_tag == '9999':
                cc_build = 'DEV'
        else:
            cc_build = 'DEV'

        # Newer Clang's can report separate versions for LLVM and Clang. Parse
        # the cc_extra text so we can get the maximum SVN version.
        if cc_extra.startswith('(') and cc_extra.endswith(')'):
            m = re.match(r'\((.+) ([^ ]+)\)', cc_extra)
            if m:
                cc_alt_src_branch, cc_alt_src_revision = m.groups()

                # With a CMake build, the branch is not emitted.
                if cc_src_branch and not cc_src_revision and \
                        cc_src_branch.isdigit():
                    cc_alt_src_revision = cc_alt_src_branch
                    cc_alt_src_branch = ""

            else:
                logger.error('unable to determine '
                             'Clang development build info: %r' %
                             ((cc_name, cc_build_string, cc_extra), ))

    elif cc_name == 'gcc' and 'LLVM build' in cc_extra:
        llvm_capable = True
        cc_norm_name = 'llvm-gcc'
        m = re.match(r' \(LLVM build ([0-9.]+)\)', cc_extra)
        if m:
            llvm_build, = m.groups()
            if llvm_build:
                cc_src_tag = llvm_build.strip()
            cc_build = 'PROD'
        else:
            cc_build = 'DEV'
    else:
        logger.error("unable to determine compiler name: %r" %
                     ((cc_name, cc_build_string),))

    if cc_build is None:
        logger.error("unable to determine compiler build: %r" % cc_version)

    # If LLVM capable, fetch the llvm target instead.
    if llvm_capable:
        m = re.search('target triple = "(.*)"', cc_target_assembly)
        if m:
            cc_target, = m.groups()
        else:
            logger.error("unable to determine LLVM compiler target: %r: %r" %
                         (cc, cc_target_assembly))

    cc_exec_hash = hashlib.sha1()
    cc_exec_hash.update(open(cc, 'rb').read())

    info = {
        'cc_build': cc_build,
        'cc_name': cc_norm_name,
        'cc_version_number': cc_version_num,
        'cc_dumpmachine': cc_dumpmachine,
        'cc_target': cc_target,
        'cc_version': cc_version,
        'cc_exec_hash': cc_exec_hash.hexdigest(),
        'cc_as_version': cc_as_version,
        'cc_ld_version': cc_ld_version,
        'cc_target_assembly': cc_target_assembly,
    }
    if cc1_binary is not None and os.path.exists(cc1_binary):
        cc1_exec_hash = hashlib.sha1()
        cc1_exec_hash.update(open(cc1_binary, 'rb').read())
        info['cc1_exec_hash'] = cc1_exec_hash.hexdigest()
    if cc_src_tag is not None:
        info['cc_src_tag'] = cc_src_tag
    if cc_src_revision is not None:
        info['cc_src_revision'] = cc_src_revision
    if cc_src_branch:
        info['cc_src_branch'] = cc_src_branch
    if cc_alt_src_revision is not None:
        info['cc_alt_src_revision'] = cc_alt_src_revision
    if cc_alt_src_branch is not None:
        info['cc_alt_src_branch'] = cc_alt_src_branch

    # Infer the run order from the other things we have computed.
    info['inferred_run_order'] = get_inferred_run_order(info)

    return info


def get_inferred_run_order(info):
    # If the CC has an integral src revision, use that.
    if info.get('cc_src_revision', '').isdigit():
        order = int(info['cc_src_revision'])

        # If the CC has an alt src revision, use that if it is greater:
        if info.get('cc_alt_src_revision', '').isdigit():
            order = max(order, int(info.get('cc_alt_src_revision')))

        return str(order)

    # Otherwise if we have a git hash, use that
    if ishexhash(info.get('cc_src_revision', '')):
        # If we also have an alt src revision, combine them.
        #
        # We don't try and support a mix of integral and hash revisions.
        if ishexhash(info.get('cc_alt_src_revision', '')):
            return '%s,%s' % (info['cc_src_revision'],
                              info['cc_alt_src_revision'])

        return info['cc_src_revision']

    # If this is a production compiler, look for a source tag. We don't accept
    # 0 or 9999 as valid source tag, since that is what llvm-gcc builds use
    # when no build number is given.
    if info.get('cc_build') == 'PROD':
        m = re.match(r'^[0-9]+(.[0-9]+)*$', info.get('cc_src_tag', ''))
        if m:
            return m.group(0)

    # If that failed, infer from the LLVM revision (if specified on input).
    #
    # FIXME: This is only used when using llvm source builds with 'lnt runtest
    # nt', which itself is deprecated. We should remove this eventually.
    if info.get('llvm_revision', '').isdigit():
        return info['llvm_revision']

    # Otherwise, force at least some value for run_order, as it is now
    # generally required by parts of the "simple" schema.
    return '0'


def infer_cxx_compiler(cc_path):
    # If this is obviously a compiler name, then try replacing with the '++'
    # name.
    name = os.path.basename(cc_path)
    if 'clang' in name:
        expected_cxx_name = 'clang++'
        cxx_name = name.replace('clang', expected_cxx_name)
    elif 'gcc' in name:
        expected_cxx_name = 'g++'
        cxx_name = name.replace('gcc', expected_cxx_name)
    elif 'icc' in name:
        expected_cxx_name = 'icpc'
        cxx_name = name.replace('icc', expected_cxx_name)
    else:
        # We have no idea, give up.
        return None

    # Check if the compiler exists at that path.
    cxx_path = os.path.join(os.path.dirname(cc_path), cxx_name)
    if os.path.exists(cxx_path):
        return cxx_path

    # Otherwise, try to let the compiler itself tell us what the '++' version
    # would be. This is useful when the compiler under test is a symlink to the
    # real compiler.
    cxx_path = capture([cc_path,
                        '-print-prog-name=%s' % expected_cxx_name]).strip()
    if os.path.exists(cxx_path):
        return cxx_path


o__all__ = ['get_cc_info', 'infer_cxx_compiler']


if __name__ == '__main__':
    import pprint
    import sys
    pprint.pprint(('get_cc_info', get_cc_info(sys.argv[1], sys.argv[2:])))
    pprint.pprint(('infer_cxx_compiler', infer_cxx_compiler(sys.argv[1])))