diff options
author | Aaron Watry <awatry@gmail.com> | 2013-03-11 18:25:00 -0500 |
---|---|---|
committer | Aaron Watry <awatry@gmail.com> | 2013-03-12 08:24:45 -0500 |
commit | b7184ad66af06f1f56857ee79982e347cd553eb8 (patch) | |
tree | 07a96d012cb09a22e2c146859791971b0a673c2f | |
parent | 772a0d0b00c5c947e61acbcbae8b36468759c031 (diff) |
Add a python generator for integer builtin function tests [v2]
Functions tested:
abs, abs_diff, add_sat, hadd, rhadd, clz, clamp, mad_hi,
mad_sat, max, min, mul_hi, rotate, sub_sat, upsample, mad24, mul24
Data Types tested:
char, uchar, short, ushort, int, uint, long, ulong
Sizes tested:
Scalar, Vector sizes 2/4/8/16
v2:
- Move file into generated_tests and rename to generate-cl-int-builtins.py
- Generate tests in cl/builtin/int (creating dir if necessary)
-rwxr-xr-x | generated_tests/generate-cl-int-builtins.py | 879 |
1 files changed, 879 insertions, 0 deletions
diff --git a/generated_tests/generate-cl-int-builtins.py b/generated_tests/generate-cl-int-builtins.py new file mode 100755 index 00000000..24605946 --- /dev/null +++ b/generated_tests/generate-cl-int-builtins.py @@ -0,0 +1,879 @@ +#!/usr/bin/env python + +import os + +#Builtins is a data structure of the following: +# builtins = { +# '{data_type}': { #data type is any of [u]char, [u]short, [u]int, [u]long +# '{builtin_function_name}': { +# 'arg_types': ['{data_type}', '{data_type}', ...], +# 'function_type': 'ttt'|'tss', +# #ttt = all arguments are same-length vectors +# #tss = all arguments are either same-length vectors, +# or a vector followed by scalars +# 'values': [ +# [array of test output (arg0) values], +# [array of arg1 values], +# [array of arg2 values], +# ... +# ] +# }, +# '{next_function}': {...}, +# ... +# }, +# '{next_type}': {...}, +# ... +# } +# +# The builtins_generic, builtins_signed/unsigned are the same, but lack the +# {datatype} layer + +#Define placeholders to reduce magic number usage +MAX = 'MAX_VAL' +MIN = 'MIN_VAL' + +SIGNED_TYPES = ['char', 'short', 'int', 'long'] +UNSIGNED_TYPES = ['uchar', 'ushort', 'uint', 'ulong'] +DATA_TYPES = SIGNED_TYPES + UNSIGNED_TYPES +DATA_SIZES = { + 'char' : 8, + 'uchar' : 8, + 'short' : 16, + 'ushort': 16, + 'int' : 32, + 'uint' : 32, + 'long' : 64, + 'ulong' : 64 +} + +#By default, just test what is part of the CL1.1 spec, leave vec3 for later +#VEC_WIDTHS = (2, 3, 4, 8, 16) +VEC_WIDTHS = (2, 4, 8, 16) +#ALL_WIDTHS = [1, 2, 3, 4, 8, 16] +ALL_WIDTHS = [1, 2, 4, 8, 16] + +MIN_VALUES = { + 'char' : -128, + 'uchar' : 0, + 'short' : -32768, + 'ushort' : 0, + 'int' : -2147483648, + 'uint' : 0, + 'long' : -9223372036854775808, + 'ulong' : 0 +} + +MAX_VALUES = { + 'char' : 127, + 'uchar' : 255, + 'short' : 32767, + 'ushort' : 65535, + 'int' : 2147483647, + 'uint' : 4294967295, + 'long' : 9223372036854775807, + 'ulong' : 18446744073709551615 +} + +#Identity type list +T = { + 'char' : 'char', + 'uchar' : 'uchar', + 'short' : 'short', + 'ushort': 'ushort', + 'int' : 'int', + 'uint' : 'uint', + 'long' : 'long', + 'ulong' : 'ulong' +} +#Signed type for each type +SIGNED = { + 'char' : 'char', + 'uchar' : 'char', + 'short' : 'short', + 'ushort': 'short', + 'int' : 'int', + 'uint' : 'int', + 'long' : 'long', + 'ulong' : 'long' +} +#Unsigned type for each source type +U = { + 'char' : 'uchar', + 'uchar' : 'uchar', + 'short' : 'ushort', + 'ushort': 'ushort', + 'int' : 'uint', + 'uint' : 'uint', + 'long' : 'ulong', + 'ulong' : 'ulong' +} +#Next larger type with same signedness +B = { + 'char': 'short', + 'uchar': 'ushort', + 'short': 'int', + 'ushort': 'uint', + 'int': 'long', + 'uint': 'ulong', +} + +BMIN = 'min_for_larger_type' +BMAX = 'max_for_larger_type' +SMIN = 'signed_min_for_type' +SMAX = 'signed_max_for_type' +UMIN = 'unsigned_min_for_type' +UMAX = 'unsigned_max_for_type' +TYPE = 'TYPE' +SIZE = 'SIZE' + +CLC_VERSION_MIN = { + 'abs' : 10, + 'abs_diff' : 10, + 'add_sat' : 10, + 'hadd' : 10, + 'rhadd' : 10, + 'clz' : 10, + 'clamp' : 11, + 'mad_hi' : 10, + 'mad_sat' : 10, + 'max' : 11, #max/min are only same-size in CL1.0, but TSS in CL1.1 + 'min' : 11, + 'mul_hi' : 10, + 'rotate' : 10, + 'sub_sat' : 10, + 'upsample' : 10, + 'mad24' : 10, + 'mul24' : 10 +} + +def abs(val): + if (val < 0): + return val*-1 + return val + +def add(val1, val2): + return val1+val2 + +#Given a data type, return the next bigger type of given signedness. +def big(type): + return B[type] + +def clz(type, val): + if (val < 0): + return 0 + else: + #Count the number of times that we can right shift before value = 0 then + #subtract that from (data_size - 1) + count=0 + while(val > 0): + if (val > 0): + val = val >> 1 + count = count + 1 + return DATA_SIZES[type] - count + +def div(val1, val2): + return val1 / val2 + +def mad_hi(x, y, z, type): + res = (x*y) >> DATA_SIZES[type] + res = res + z + while (res > MAX_VALUES[type]): #Emulate overflow... Necessary? + res = res - (2**DATA_SIZES[type]) + return res + +def mul(val1, val2): + return val1 * val2 + +def mul_hi(x,y,type): + res = (x*y) >> DATA_SIZES[type] + return res + +#def pop(val,type): +# #TODO: Calculate number of non-zero bits in value (CLC 1.2) +# return 0 + +def pow(val,pow): + return val ** pow + +def rotate_right(x, n, bits): + mask = (2L**n) - 1 + mask_bits = x & mask + return (x >> n) | (mask_bits << (bits - n)) + +def rotate_left(x, n, bits): + return rotate_right(x, bits - n, bits) + +def rot(x, n, bits): + if (n < 0): + return rotate_right(x, -1*n, bits) + else: + return rotate_left(x, n, bits) + +def sub(val1, val2): + return val1-val2 + +def getValue(type, val): + #Check if val is a str, list, or value + if (isinstance(val, str)): + if (val == MIN): + return MIN_VALUES[type] + elif (val == MAX): + return MAX_VALUES[type] + elif (val == BMIN): + return MIN_VALUES[B[type]] + elif (val == BMAX): + return MAX_VALUES[B[type]] + elif (val == SMIN): + return MIN_VALUES[SIGNED[type]] + elif (val == SMAX): + return MAX_VALUES[SIGNED[type]] + elif (val == UMIN): + return MIN_VALUES[U[type]] + elif (val == UMAX): + return MAX_VALUES[U[type]] + elif (val == TYPE): + return type + elif (val == SIZE): + return DATA_SIZES[type] + else: + print('Unknown string value: '+val+'\n') + elif (isinstance(val, list)): + #The list should be of the format: [op, arg1, ... argN] where op is a Fn + #ref and arg[1-n] are either MIN/MAX or numbers (They could be nested + #lists). The exception for arg1 is TYPE, which means to substitute the + #data type + + #Evaluate the value of the requested function and arguments + #TODO: Change to varargs calls after unshifting the first list element + if (len(val) == 2): + return (val[0])(getValue(type,val[1])) + elif (len(val) == 3): + return (val[0])(getValue(type,val[1]), getValue(type, val[2])) + elif (len(val) == 4): + return (val[0])(getValue(type,val[1]), getValue(type, val[2]), \ + getValue(type, val[3])) + else: + return (val[0])(getValue(type,val[1]), getValue(type, val[2]), \ + getValue(type, val[3]), getValue(type, val[4])) + + #At this point, we should have been passed a number + if (isinstance(val, int)): + return val; + + print('Invalid value '+val+' encountered in getValue\n') + +def getStrVal(type, val): + return str(getValue(type,val)) + +#Tests which don't depend on the signedness or bit-width of the inputs +generic_tests = { + 'abs': { + 'arg_types': [U, T], + 'function_type': 'ttt', + 'values': [ + [0, 2, [abs,MIN], [abs,MAX]], + [0, 2, MIN, MAX] + ] + }, + 'abs_diff': { + 'arg_types': [U, T, T], + 'function_type': 'ttt', + 'values': [ + [0, 1, 1, UMAX, UMAX], + [0, 1, 0, MIN, MAX], + [0, 0, 1, MAX, MIN] + ] + }, + 'add_sat': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [MAX, MAX ], + [MAX, [sub,MAX,1]], + [ 64, 50 ] + ] + }, + 'hadd': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [32, 0, 10, MAX, MIN, + [div,[add,MAX,MIN], 2], + [div,[add,MIN,MAX], 2] + ], + [32, 1, 12, MAX, MIN, MAX, MIN], + [33, 0, 8, MAX, MIN, MIN, MAX] + ] + }, + 'clz': { + 'arg_types': [T, T], + 'function_type': 'ttt', + 'values': [ + [ [clz,TYPE,1], [clz,TYPE,64], [clz,TYPE,0], [clz,TYPE, MAX], + [clz,TYPE,MIN] + ], + [ 1 , 64 , 0 , MAX , + MIN + ] + ] + }, + 'clamp': { + 'arg_types': [T, T, T, T], + 'function_type': 'tss', + 'values': [ + [64, [div, MIN, 2], 1], + [92, MIN, 64], + [ 0, [div, MIN, 2], 0], + [64, 0, 1] + ] + }, + 'mad_hi': { + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [ [mad_hi,[div,MAX,2],3,1,TYPE], [mad_hi,MIN,2,2,TYPE], 4, 1, + [mad_hi,MAX,MAX,MAX,TYPE], [mad_hi,MIN,MIN,MIN,TYPE], + [mad_hi,MIN,MAX,MAX,TYPE], [mad_hi,MAX, 2, 2,TYPE] + ], + [ [div,MAX,2], MIN, 12, MAX, MAX, MIN, MIN, MAX], + [ 3, 2, 4, 1, MAX, MIN, MAX, 2], + [ 1, 2, 4, 1, MAX, MIN, MAX, 2] + ] + }, + 'mad_sat': { + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [52, MAX, 93, 0, MAX, MAX], + [12, MAX, 92, 0, MAX, MAX], + [ 4, 1, 1, 0, 2, MAX], + [ 4, 1, 1, 0, 2, MAX] + ] + }, + 'max': { + 'arg_types': [T, T, T], + 'function_type': 'tss', + 'values': [ + [92, 2, 12, MAX, 1, MAX, MIN, MAX, MAX, 0], + [92, 2, 12, MAX, MIN, MAX, MIN, MIN, MAX, 0], + [ 2, MIN, 4, 1, 1, MAX, MIN, MAX, MIN, 0] + ] + }, + 'min': { + 'arg_types': [T, T, T], + 'function_type': 'tss', + 'values': [ + [ 2, 1, MIN, 4, 1, MIN, MAX, MIN, MIN, 0, MAX], + [92, 64, 2, 12, MAX, MIN, MAX, MIN, MIN, 0, MAX], + [ 2, 1, MIN, 4, 1, 1, MAX, MIN, MAX, 0, MAX] + ] + }, + 'mul_hi': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [0, 0, 0, [mul_hi,MAX,MAX,TYPE], [mul_hi,MIN,MIN,TYPE], 0, + [mul_hi,MAX,2,TYPE], [div,MIN,2] + ], + [0, 92, MAX, MAX, MIN, 92, MAX, MIN], + [0, 2, 1, MAX, MIN, 1, 2, MAX] + ] + }, + 'rhadd': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [33, 1, 10], + [32, 1, 12], + [33, 0, 8] + ] + }, + 'rotate': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [1, 8, 1, 2, 1], + [1, 1, 1, 1, 1], + [0, 3, SIZE, [add,SIZE,1], [mul,SIZE,10]] + ] + }, + 'sub_sat': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [1, 25, MAX, 0, 0, MIN, MIN], + [1, 57, MAX, MAX, MIN, MIN, [div,MIN,2]], + [0, 32, MIN, MAX, MIN, MAX, [add,[div,MAX,2],1]] + ] + }, + 'upsample': { + 'arg_types': [B,T,U], + 'function_type': 'ttt', + 'values': [ + [[pow,2,SIZE], [add,[pow,2,SIZE],1], BMAX, 0, MAX, + [add,[pow,2,SIZE],7] + ], + [ 1, 1, MAX, 0, 0, 1], + [ 0, 1, UMAX, 0, MAX, 7] + ] + } +} + +#Any test that conceivably includes a negative number as input/output +signed_generic_tests = { + 'abs': { + 'arg_types': [U, T], + 'function_type': 'ttt', + 'values': [ + [ 1, 13], + [-1, -13] + ] + }, + 'abs_diff': { + 'arg_types': [U, T, T], + 'function_type': 'ttt', + 'values': [ + [1, 15], + [0, -8], + [1, 7] + ] + }, + 'add_sat': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [ 0, -2, [sub,MAX,63], MIN, MIN ], + [ 32, -8, MAX, MIN, [add,MIN,10]], + [-32, 6, -63, -12, -50 ] + ] + }, + 'clamp': { + 'arg_types': [T, T, T, T], + 'function_type': 'tss', + 'values': [ + [ -64, 0], + [-128, -1], + [ -64, 0], + [ 0, 1] + ] + }, + 'mad_hi': { + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [MIN, -2], + [ -1, 1], + [MIN, -1], + [MIN, -1] + ] + }, + 'mad_sat': { + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [ 0, MIN, MIN, MAX, MIN, -2], + [ -1, MIN, MIN, MIN, MIN, 1], + [ MIN, 2, 1, MIN, MAX, -1], + [ MIN, 2, -1, MIN, MAX, -1] + ] + }, + 'max': { + 'arg_types': [T, T, T], + 'function_type': 'tss', + 'values': [ + [ -1, 1], + [ -1, 1], + [ MIN, -1] + ] + }, + 'min': { + 'arg_types': [T, T, T], + 'function_type': 'tss', + 'values': [ + [MIN, -1, MIN], + [ -1, 1, MIN], + [MIN, -1, -1] + ] + }, + 'mul_hi': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [ 0, -1, -1, -1], + [ -1, MIN, MIN, 1], + [MIN, 2, 1, -1] + ] + }, + 'rhadd': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [-32], + [-33], + [-32] + ] + }, + 'rotate': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [ MIN, MIN, [rot,1,-3,SIZE], 1, + MIN, [pow,2,[sub,SIZE,2]], MIN], + [ 1, 1, 1, 1, + 1, 1, 1], + [[sub,SIZE,1], -1, -3, [mul,SIZE,-1], + [mul,[add,SIZE,1],-1], [mul,[add,SIZE,2],-1], [sub,SIZE,1]] + ] + }, + 'sub_sat': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [ MAX , 81], + [[sub,MAX,8], 1], + [ -32 , -80] + ] + }, + 'upsample': { + 'arg_types': [B,T,U], + 'function_type': 'ttt', + 'values': [ + [ -1, [mul,[pow,2,SIZE],-1]], + [ -1, -1], + [ UMAX, 0] + ] + } + +} + +#This list contains all numeric tests which never include negative integers +#that can't go into generic_tests. +unsigned_generic_tests = { + 'mad_sat': { + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [ 2, MIN, MAX], + [MIN, MIN, MIN], + [ 2, MIN, MAX], + [ 2, MIN, MAX] + ] + }, + 'rotate': { + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [[div,[add,MAX,1],2], [div,[add,MAX,1],2]], + [ 1, 1], + [ [sub,SIZE,1], [sub,SIZE,1]] + ] + }, +} + +#Hand-coded tests which are data type specific. +builtins = { + 'int': { + 'mad24':{ + 'arg_types': [T, T, T, T], + 'function_type': 'ttt', + 'values': [ + [0,2, 0,520, 1], + [0,1,-1, 32, 2**30], + [0,1,-1, 16, 1], + [0,1,-1, 8, 1] + ] + }, + 'mul24':{ + 'arg_types': [T, T, T], + 'function_type': 'ttt', + 'values': [ + [0,1, 1, 512, 0], + [0,1,-1, 32, 2**30], + [0,1,-1, 16, 1] + ] + } + } +} + +#### Define helper functions #### + +def addTestValues(origDef, origValues): + fnDef = dict(origDef) + values = list(origValues) + if (not 'values' in fnDef): + fnDef['values'] = [] + for idx in range(0, len(values)): + fnDef['values'].append(list(values[idx])) + + else: + for arg in range(0, len(values)): + fnDef['values'][arg] += values[arg] + return fnDef + +#Given a data type and function name, create one set of combined applicable +#test definitions. +def mergedTestDefinition(dataType, fnName): + mergedTest = dict() + + testLists = [generic_tests] + if (dataType in SIGNED_TYPES): + testLists += [signed_generic_tests] + if (dataType in UNSIGNED_TYPES): + testLists += [unsigned_generic_tests] + if (dataType in builtins): + testLists += [builtins[dataType]] + + for testList in testLists: + if (fnName in testList): + fnDef = dict(testList[fnName]) + if (not 'arg_types' in mergedTest): + mergedTest['arg_types'] = list(fnDef['arg_types']) + mergedTest['function_type'] = fnDef['function_type'] + mergedTest = addTestValues(dict(mergedTest), list(fnDef['values'])) + return mergedTest + +def getFnNames(): + fnNames = [] + fnNames += generic_tests.keys() + fnNames += signed_generic_tests.keys() + fnNames += unsigned_generic_tests.keys() + for type in DATA_TYPES: + if (type in builtins): + fnNames += builtins[type].keys() + + #Get the sorted unique set of function names + return sorted(list(set(fnNames))) + +#vecSizes has the layout [in0width, ..., inNwidth] where outType width is +#assumed to match the width of the first input +def gen_kernel(f, fnName, inTypes, outType, vecSizes, typePrefix): + f.write( \ + 'kernel void test_' + typePrefix + str(vecSizes[0]) + '_' + fnName + \ + '_' + inTypes[0]+'(global '+outType+'* out') + for arg in range(0, len(inTypes)): + f.write(', global '+inTypes[arg]+'* in'+str(arg)) + f.write('){\n') + + suffix = ';' + if (vecSizes[0] == 1): + f.write(' *out = ') + else: + f.write(' vstore'+str(vecSizes[0])+'(') + suffix = ', 0, out)' + suffix + + f.write(fnName+'(') + suffix = ')' + suffix + + for arg in range(0, len(inTypes)): + if (arg > 0): + f.write(', ') + #if scalar, don't print vload/vstore + if (vecSizes[arg] == 1): + f.write('*in'+str(arg)) + else: + f.write('vload'+str(vecSizes[arg])+'(0, in'+str(arg)+')') + + f.write(suffix+'\n}\n\n') + + +def getArgType(baseType, argType): + #If the argType is a string, it's a literal data type... return it + if (isinstance(argType, str)): + return argType + #otherwise it's a list to pick from + return argType[baseType] + +def getArgTypes(baseType, argTypes): + ret = [] + for argType in argTypes: + ret.append(getArgType(baseType,argType)) + return ret + +#Print a test with all-vector inputs/outputs and/or mixed vector/scalar args +def print_test(f, fnName, argType, functionDef, tests, testIdx, vecSize, tss): + #If the test allows mixed vector/scalar arguments, handle the case with + #only vector arguments through a recursive call. + if (tss): + print_test(f, fnName, argType, functionDef, tests, testIdx, vecSize, \ + False) + + #The tss && vecSize==1 case is handled in the non-tss case. + if (tss and vecSize == 1): + return + + #If we're handling mixed vector/scalar input widths, the kernels have + #different names than when the vector widths match + tssStr = 'tss_' + if (not tss): + tssStr = '' + + #Write the test header + f.write('[test]\n' + \ + 'name: ' + fnName + ' ' + argType + str(vecSize) + '\n' + \ + 'kernel_name: test_'+ tssStr + str(vecSize) + '_' + fnName + '_' + \ + argType + '\n' + ) + + argTypes = getArgTypes(argType, functionDef['arg_types']) + argCount = len(argTypes) + + #For each argument, write a line containing its type, index, and values + for arg in range(0, argCount): + argInOut = '' + argVal = getStrVal(argType, tests[arg][testIdx]) + if arg == 0: + argInOut = 'arg_out: ' + else: + argInOut = 'arg_in: ' + + #The output argument and first tss argument are vectors, any that follow + #are scalar. If !tss, then everything has a matching vector width + if (arg < 2 or not tss): + f.write(argInOut + str(arg) + ' buffer ' + argTypes[arg] + \ + '[' + str(vecSize) + '] ' + \ + ' '.join([argVal]*vecSize) + \ + '\n' + ) + else: + argInOut = 'arg_in: ' + f.write(argInOut + str(arg) + ' buffer ' + argTypes[arg] + \ + '[1] ' + argVal + '\n' + ) + + #Blank line between tests for formatting reasons + f.write('\n') + +def gen_kernel_1_arg(f, fnName, inType, outType): + for vecSize in ALL_WIDTHS: + gen_kernel(f, fnName, [inType], outType, [vecSize], '') + +# 2 argument kernel with input types that match +def gen_kernel_2_arg_same_type(f, fnName, inType, outType): + for vecSize in ALL_WIDTHS: + gen_kernel(f, fnName, [inType, inType], outType, [vecSize, vecSize], '') + +# 2 argument kernel with 1 vector and one scalar input argument +def gen_kernel_2_arg_mixed_size(f, fnName, inType, outType): + for vecSize in VEC_WIDTHS: + gen_kernel(f, fnName, [inType, inType], outType, [vecSize, 1], 'tss_') + +# 2 argument kernel with 1 vector and one scalar input argument with multiple +# input data types +def gen_kernel_2_arg_mixed_sign(f, fnName, inType1, inType2, outType): + for vecSize in ALL_WIDTHS: + gen_kernel(f, fnName, [inType1, inType2], outType, [vecSize, vecSize], \ + '') + +# 3-argument built-in functions + +def gen_kernel_3_arg_same_type(f, fnName, inType, outType): + for vecSize in ALL_WIDTHS: + gen_kernel(f, fnName, [inType, inType, inType], outType, \ + [vecSize, vecSize, vecSize], '') + +def gen_kernel_3_arg_mixed_size_vector(f, fnName, inType, outType, vecSize): + f.write( \ + 'kernel void test_tss_' + vecSize + '_' + fnName + '_' + inType + \ + '(global ' + outType + '* out, global ' + inType + '* in1, global ' + \ + inType+'* in2, global '+inType+'* in3){\n' + \ + ' vstore' + vecSize + '(' + fnName + '(vload' + vecSize + \ + '(0, in1), *in2, *in3), 0, out);\n' + \ + '}\n\n' + ) + +def gen_kernel_3_arg_mixed_size(f, fnName, inType, outType): + for vecSize in VEC_WIDTHS: + gen_kernel(f, fnName, [inType, inType, inType], outType, \ + [vecSize, 1, 1], 'tss_') + +def generate_kernels(f, dataType, fnName, fnDef): + argTypes = getArgTypes(dataType,fnDef['arg_types']) + + #For len(argTypes), remember that this includes the output arg + if (len(argTypes) == 2): + gen_kernel_1_arg(f, fnName, argTypes[1], argTypes[0]) + return + + if (len(argTypes) == 3 and not fnName is 'upsample'): + gen_kernel_2_arg_same_type(f, fnName, argTypes[1], argTypes[0]) + if (fnDef['function_type'] is 'tss'): + gen_kernel_2_arg_mixed_size(f, fnName, argTypes[1], argTypes[0]) + return + + if (len(argTypes) == 4): + gen_kernel_3_arg_same_type(f, fnName, argTypes[1], argTypes[0]) + if (fnDef['function_type'] is 'tss'): + gen_kernel_3_arg_mixed_size(f, fnName, argTypes[1], argTypes[0]) + return + + if (fnName is 'upsample'): + gen_kernel_2_arg_mixed_sign(f, fnName, argTypes[1], argTypes[2], \ + argTypes[0]) + return + +#### Main logic start #### + +def main(): + #Create the output directory if required + dirName = os.path.join( "cl", "builtin", "int") + if not os.path.exists(dirName): + os.makedirs(dirName) + + #Loop over all data types being tested. Create one output file per data type + for dataType in DATA_TYPES: + functions = getFnNames() #List of all built-in functions + for fnName in functions: + if (fnName is 'upsample' and (dataType is 'long' \ + or dataType is 'ulong')): + continue + #Merge all of the generic/signed/unsigned/custom test definitions + functionDef = mergedTestDefinition(dataType, fnName) + + #Check if the function actually exists for this data type + if (not functionDef.keys()): + continue + + clcVersionMin = CLC_VERSION_MIN[fnName] + + fileName = 'builtin-' + dataType + '-' + fnName + '-' + \ + str(float(clcVersionMin)/10)+'.generated.cl' + + fileName = os.path.join(dirName, fileName) + + f = open(fileName, 'w') + print(fileName) + #Write the file header + f.write( \ + '/*!\n' + \ + '[config]\n' + \ + 'name: Test '+dataType+' '+fnName+' built-in on CL 1.1\n'+ \ + 'clc_version_min: '+str(clcVersionMin)+'\n' + \ + 'dimensions: 1\n' + \ + 'global_size: 1 0 0\n\n' + ) + + #Write all tests for the built-in function + tests = functionDef['values'] + argCount = len(functionDef['arg_types']) + fnType = functionDef['function_type'] + + outputValues = tests[0] + numTests = len(outputValues) + + #Handle all available scalar/vector widths + sizes = sorted(VEC_WIDTHS) + sizes.insert(0,1) #Add 1-wide scalar to the vector widths + for vecSize in sizes: + for testIdx in range(0, numTests): + print_test(f, fnName, dataType, functionDef, tests, \ + testIdx, vecSize, (fnType is 'tss')) + + #Terminate the header section + f.write('!*/\n\n') + + #Generate the actual kernels + generate_kernels(f, dataType, fnName, functionDef) + + #Hopefully this next part is obvious :) + f.close() +main() |