/* * extractExternal.cpp */ //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// #include #include #include #include #include #include #include /* Given a set of n object files h ('external' object files) and a set of m object files o ('internal' object files), 1. Determines r, the subset of h that o depends on, directly or indirectly 2. Removes the files in h - r from the file system 3. For each external symbol defined in some file in r, rename it in r U o by prefixing it with "__kmp_external_" Usage: hide.exe Thus, the prefixed symbols become hidden in the sense that they now have a special prefix. */ using namespace std; void stop(char* errorMsg) { printf("%s\n", errorMsg); exit(1); } // an entry in the symbol table of a .OBJ file class Symbol { public: __int64 name; unsigned value; unsigned short sectionNum, type; char storageClass, nAux; }; class _rstream : public istrstream { private: const char *buf; protected: _rstream(pair p):istrstream(p.first,p.second),buf(p.first){} ~_rstream() { delete[]buf; } }; /* A stream encapuslating the content of a file or the content of a string, overriding the >> operator to read various integer types in binary form, as well as a symbol table entry. */ class rstream : public _rstream { private: template inline rstream& doRead(T &x) { read((char*)&x, sizeof(T)); return *this; } static pair getBuf(const char *fileName) { ifstream raw(fileName,ios::binary | ios::in); if(!raw.is_open()) stop("rstream.getBuf: Error opening file"); raw.seekg(0,ios::end); streampos fileSize = raw.tellg(); if(fileSize < 0) stop("rstream.getBuf: Error reading file"); char *buf = new char[fileSize]; raw.seekg(0,ios::beg); raw.read(buf, fileSize); return pair(buf,fileSize); } public: // construct from a string rstream(const char *buf,streamsize size):_rstream(pair(buf, size)){} /* construct from a file whole content is fully read once to initialize the content of this stream */ rstream(const char *fileName):_rstream(getBuf(fileName)){} rstream& operator>>(int &x) { return doRead(x); } rstream& operator>>(unsigned &x) { return doRead(x); } rstream& operator>>(short &x) { return doRead(x); } rstream& operator>>(unsigned short &x) { return doRead(x); } rstream& operator>>(Symbol &e) { read((char*)&e, 18); return *this; } }; // string table in a .OBJ file class StringTable { private: map directory; size_t length; char *data; // make from bytes in void makeDirectory(void) { unsigned i = 4; while(i < length) { string s = string(data + i); directory.insert(make_pair(s, i)); i += s.size() + 1; } } // initialize and with contents specified by the arguments void init(const char *_data) { unsigned _length = *(unsigned*)_data; if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) stop("StringTable.init: Invalid symbol table"); if(_data[_length - 1]) { // to prevent runaway strings, make sure the data ends with a zero data = new char[length = _length + 1]; data[_length] = 0; } else { data = new char[length = _length]; } *(unsigned*)data = length; KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), length - sizeof(unsigned)); makeDirectory(); } public: StringTable(rstream &f) { /* Construct string table by reading from f. */ streampos s; unsigned strSize; char *strData; s = f.tellg(); f>>strSize; if(strSize < sizeof(unsigned)) stop("StringTable: Invalid string table"); strData = new char[strSize]; *(unsigned*)strData = strSize; // read the raw data into f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); s = f.tellg() - s; if(s < strSize) stop("StringTable: Unexpected EOF"); init(strData); delete[]strData; } StringTable(const set &strings) { /* Construct string table from given strings. */ char *p; set::const_iterator it; size_t s; // count required size for data for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { size_t l = (*it).size(); if(l > (unsigned) 0xFFFFFFFF) stop("StringTable: String too long"); if(l > 8) { length += l + 1; if(length > (unsigned) 0xFFFFFFFF) stop("StringTable: Symbol table too long"); } } data = new char[length]; *(unsigned*)data = length; // populate data and directory for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { const string &str = *it; size_t l = str.size(); if(l > 8) { directory.insert(make_pair(str, p - data)); KMP_MEMCPY(p, str.c_str(), l); p[l] = 0; p += l + 1; } } } ~StringTable() { delete[] data; } /* Returns encoding for given string based on this string table. Error if string length is greater than 8 but string is not in the string table--returns 0. */ __int64 encode(const string &str) { __int64 r; if(str.size() <= 8) { // encoded directly ((char*)&r)[7] = 0; KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); return r; } else { // represented as index into table map::const_iterator it = directory.find(str); if(it == directory.end()) stop("StringTable::encode: String now found in string table"); ((unsigned*)&r)[0] = 0; ((unsigned*)&r)[1] = (*it).second; return r; } } /* Returns string represented by x based on this string table. Error if x references an invalid position in the table--returns the empty string. */ string decode(__int64 x) const { if(*(unsigned*)&x == 0) { // represented as index into table unsigned &p = ((unsigned*)&x)[1]; if(p >= length) stop("StringTable::decode: Invalid string table lookup"); return string(data + p); } else { // encoded directly char *p = (char*)&x; int i; for(i = 0; i < 8 && p[i]; ++i); return string(p, i); } } void write(ostream &os) { os.write(data, length); } }; /* for the named object file, determines the set of defined symbols and the set of undefined external symbols and writes them to and respectively */ void computeExternalSymbols(const char *fileName, set *defined, set *undefined){ streampos fileSize; size_t strTabStart; unsigned symTabStart, symNEntries; rstream f(fileName); f.seekg(0,ios::end); fileSize = f.tellg(); f.seekg(8); f >> symTabStart >> symNEntries; // seek to the string table f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); if(f.eof()) { printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", fileName, (unsigned long) fileSize, symTabStart, symNEntries); stop("computeExternalSymbols: Unexpected EOF 1"); } StringTable stringTable(f); // read the string table if(f.tellg() != fileSize) stop("computeExternalSymbols: Unexpected data after string table"); f.clear(); f.seekg(symTabStart); // seek to the symbol table defined->clear(); undefined->clear(); for(int i = 0; i < symNEntries; ++i) { // process each entry Symbol e; if(f.eof()) stop("computeExternalSymbols: Unexpected EOF 2"); f>>e; if(f.fail()) stop("computeExternalSymbols: File read error"); if(e.nAux) { // auxiliary entry: skip f.seekg(e.nAux * 18, ios::cur); i += e.nAux; } // if symbol is extern and defined in the current file, insert it if(e.storageClass == 2) if(e.sectionNum) defined->insert(stringTable.decode(e.name)); else undefined->insert(stringTable.decode(e.name)); } } /* For each occurrence of an external symbol in the object file named by by that is a member of , renames it by prefixing with "__kmp_external_", writing back the file in-place */ void hideSymbols(char *fileName, const set &hide) { static const string prefix("__kmp_external_"); set strings; // set of all occurring symbols, appropriately prefixed streampos fileSize; size_t strTabStart; unsigned symTabStart, symNEntries; int i; rstream in(fileName); in.seekg(0,ios::end); fileSize = in.tellg(); in.seekg(8); in >> symTabStart >> symNEntries; in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); if(in.eof()) stop("hideSymbols: Unexpected EOF"); StringTable stringTableOld(in); // read original string table if(in.tellg() != fileSize) stop("hideSymbols: Unexpected data after string table"); // compute set of occurring strings with prefix added for(i = 0; i < symNEntries; ++i) { Symbol e; in.seekg(symTabStart + i * 18); if(in.eof()) stop("hideSymbols: Unexpected EOF"); in >> e; if(in.fail()) stop("hideSymbols: File read error"); if(e.nAux) i += e.nAux; const string &s = stringTableOld.decode(e.name); // if symbol is extern and found in , prefix and insert into strings, // otherwise, just insert into strings without prefix strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); } ofstream out(fileName, ios::trunc | ios::out | ios::binary); if(!out.is_open()) stop("hideSymbols: Error opening output file"); // make new string table from string set StringTable stringTableNew = StringTable(strings); // copy input file to output file up to just before the symbol table in.seekg(0); char *buf = new char[symTabStart]; in.read(buf, symTabStart); out.write(buf, symTabStart); delete []buf; // copy input symbol table to output symbol table with name translation for(i = 0; i < symNEntries; ++i) { Symbol e; in.seekg(symTabStart + i*18); if(in.eof()) stop("hideSymbols: Unexpected EOF"); in >> e; if(in.fail()) stop("hideSymbols: File read error"); const string &s = stringTableOld.decode(e.name); out.seekp(symTabStart + i*18); e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); out.write((char*)&e, 18); if(out.fail()) stop("hideSymbols: File write error"); if(e.nAux) { // copy auxiliary symbol table entries int nAux = e.nAux; for(int j = 1; j <= nAux; ++j) { in >> e; out.seekp(symTabStart + (i + j) * 18); out.write((char*)&e, 18); } i += nAux; } } // output string table stringTableNew.write(out); } // returns true iff and have no common element template bool isDisjoint(const set &a, const set &b) { set::const_iterator ita, itb; for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { const T &ta = *ita, &tb = *itb; if(ta < tb) ++ita; else if (tb < ta) ++itb; else return false; } return true; } /* precondition: and are arrays with elements where >= . The first elements correspond to the external object files and the rest correspond to the internal object files. postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not disjoint. Returns the transitive closure of the set of internal object files, as a set of file indexes, under the 'depends on' relation, minus the set of internal object files. */ set *findRequiredExternal(int nExternal, int nTotal, set *defined, set *undefined) { set *required = new set; set fresh[2]; int i, cur = 0; bool changed; for(i = nTotal - 1; i >= nExternal; --i) fresh[cur].insert(i); do { changed = false; for(set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { set &s = undefined[*it]; for(i = 0; i < nExternal; ++i) { if(required->find(i) == required->end()) { if(!isDisjoint(defined[i], s)) { // found a new qualifying element required->insert(i); fresh[1 - cur].insert(i); changed = true; } } } } fresh[cur].clear(); cur = 1 - cur; } while(changed); return required; } int main(int argc, char **argv) { int nExternal, nInternal, i; set *defined, *undefined; set::iterator it; if(argc < 3) stop("Please specify a positive integer followed by a list of object filenames"); nExternal = atoi(argv[1]); if(nExternal <= 0) stop("Please specify a positive integer followed by a list of object filenames"); if(nExternal + 2 > argc) stop("Too few external objects"); nInternal = argc - nExternal - 2; defined = new set[argc - 2]; undefined = new set[argc - 2]; // determine the set of defined and undefined external symbols for(i = 2; i < argc; ++i) computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); // determine the set of required external files set *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); set hide; /* determine the set of symbols to hide--namely defined external symbols of the required external files */ for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { int idx = *it; set::iterator it2; /* We have to insert one element at a time instead of inserting a range because the insert member function taking a range doesn't exist on Windows* OS, at least at the time of this writing. */ for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) hide.insert(*it2); } /* process the external files--removing those that are not required and hiding the appropriate symbols in the others */ for(i = 0; i < nExternal; ++i) if(requiredExternal->find(i) != requiredExternal->end()) hideSymbols(argv[2 + i], hide); else remove(argv[2 + i]); // hide the appropriate symbols in the internal files for(i = nExternal + 2; i < argc; ++i) hideSymbols(argv[i], hide); return 0; }