Intel® OpenMP* Runtime Library
extractExternal.cpp
1 /*
2  * extractExternal.cpp
3  */
4 
5 /* <copyright>
6  Copyright (c) 2006-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include <stdlib.h>
36 #include <iostream>
37 #include <strstream>
38 #include <fstream>
39 #include <string>
40 #include <set>
41 #include <map>
42 
43 /* Given a set of n object files h ('external' object files) and a set of m
44  object files o ('internal' object files),
45  1. Determines r, the subset of h that o depends on, directly or indirectly
46  2. Removes the files in h - r from the file system
47  3. For each external symbol defined in some file in r, rename it in r U o
48  by prefixing it with "__kmp_external_"
49  Usage:
50  hide.exe <n> <filenames for h> <filenames for o>
51 
52  Thus, the prefixed symbols become hidden in the sense that they now have a special
53  prefix.
54 */
55 
56 using namespace std;
57 
58 void stop(char* errorMsg) {
59  printf("%s\n", errorMsg);
60  exit(1);
61 }
62 
63 // an entry in the symbol table of a .OBJ file
64 class Symbol {
65 public:
66  __int64 name;
67  unsigned value;
68  unsigned short sectionNum, type;
69  char storageClass, nAux;
70 };
71 
72 class _rstream : public istrstream {
73 private:
74  const char *buf;
75 protected:
76  _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
77  ~_rstream() {
78  delete[]buf;
79  }
80 };
81 
82 /* A stream encapuslating the content of a file or the content of a string, overriding the
83  >> operator to read various integer types in binary form, as well as a symbol table
84  entry.
85 */
86 class rstream : public _rstream {
87 private:
88  template<class T>
89  inline rstream& doRead(T &x) {
90  read((char*)&x, sizeof(T));
91  return *this;
92  }
93  static pair<const char*, streamsize> getBuf(const char *fileName) {
94  ifstream raw(fileName,ios::binary | ios::in);
95  if(!raw.is_open())
96  stop("rstream.getBuf: Error opening file");
97  raw.seekg(0,ios::end);
98  streampos fileSize = raw.tellg();
99  if(fileSize < 0)
100  stop("rstream.getBuf: Error reading file");
101  char *buf = new char[fileSize];
102  raw.seekg(0,ios::beg);
103  raw.read(buf, fileSize);
104  return pair<const char*, streamsize>(buf,fileSize);
105  }
106 public:
107  // construct from a string
108  rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
109  /* construct from a file whole content is fully read once to initialize the content of
110  this stream
111  */
112  rstream(const char *fileName):_rstream(getBuf(fileName)){}
113  rstream& operator>>(int &x) {
114  return doRead(x);
115  }
116  rstream& operator>>(unsigned &x) {
117  return doRead(x);
118  }
119  rstream& operator>>(short &x) {
120  return doRead(x);
121  }
122  rstream& operator>>(unsigned short &x) {
123  return doRead(x);
124  }
125  rstream& operator>>(Symbol &e) {
126  read((char*)&e, 18);
127  return *this;
128  }
129 };
130 
131 // string table in a .OBJ file
132 class StringTable {
133 private:
134  map<string, unsigned> directory;
135  size_t length;
136  char *data;
137 
138  // make <directory> from <length> bytes in <data>
139  void makeDirectory(void) {
140  unsigned i = 4;
141  while(i < length) {
142  string s = string(data + i);
143  directory.insert(make_pair(s, i));
144  i += s.size() + 1;
145  }
146  }
147  // initialize <length> and <data> with contents specified by the arguments
148  void init(const char *_data) {
149  unsigned _length = *(unsigned*)_data;
150 
151  if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
152  stop("StringTable.init: Invalid symbol table");
153  if(_data[_length - 1]) {
154  // to prevent runaway strings, make sure the data ends with a zero
155  data = new char[length = _length + 1];
156  data[_length] = 0;
157  } else {
158  data = new char[length = _length];
159  }
160  *(unsigned*)data = length;
161  KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
162  length - sizeof(unsigned));
163  makeDirectory();
164  }
165 public:
166  StringTable(rstream &f) {
167  /* Construct string table by reading from f.
168  */
169  streampos s;
170  unsigned strSize;
171  char *strData;
172 
173  s = f.tellg();
174  f>>strSize;
175  if(strSize < sizeof(unsigned))
176  stop("StringTable: Invalid string table");
177  strData = new char[strSize];
178  *(unsigned*)strData = strSize;
179  // read the raw data into <strData>
180  f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
181  s = f.tellg() - s;
182  if(s < strSize)
183  stop("StringTable: Unexpected EOF");
184  init(strData);
185  delete[]strData;
186  }
187  StringTable(const set<string> &strings) {
188  /* Construct string table from given strings.
189  */
190  char *p;
191  set<string>::const_iterator it;
192  size_t s;
193 
194  // count required size for data
195  for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
196  size_t l = (*it).size();
197 
198  if(l > (unsigned) 0xFFFFFFFF)
199  stop("StringTable: String too long");
200  if(l > 8) {
201  length += l + 1;
202  if(length > (unsigned) 0xFFFFFFFF)
203  stop("StringTable: Symbol table too long");
204  }
205  }
206  data = new char[length];
207  *(unsigned*)data = length;
208  // populate data and directory
209  for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
210  const string &str = *it;
211  size_t l = str.size();
212  if(l > 8) {
213  directory.insert(make_pair(str, p - data));
214  KMP_MEMCPY(p, str.c_str(), l);
215  p[l] = 0;
216  p += l + 1;
217  }
218  }
219  }
220  ~StringTable() {
221  delete[] data;
222  }
223  /* Returns encoding for given string based on this string table.
224  Error if string length is greater than 8 but string is not in
225  the string table--returns 0.
226  */
227  __int64 encode(const string &str) {
228  __int64 r;
229 
230  if(str.size() <= 8) {
231  // encoded directly
232  ((char*)&r)[7] = 0;
233  KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
234  return r;
235  } else {
236  // represented as index into table
237  map<string,unsigned>::const_iterator it = directory.find(str);
238  if(it == directory.end())
239  stop("StringTable::encode: String now found in string table");
240  ((unsigned*)&r)[0] = 0;
241  ((unsigned*)&r)[1] = (*it).second;
242  return r;
243  }
244  }
245  /* Returns string represented by x based on this string table.
246  Error if x references an invalid position in the table--returns
247  the empty string.
248  */
249  string decode(__int64 x) const {
250  if(*(unsigned*)&x == 0) {
251  // represented as index into table
252  unsigned &p = ((unsigned*)&x)[1];
253  if(p >= length)
254  stop("StringTable::decode: Invalid string table lookup");
255  return string(data + p);
256  } else {
257  // encoded directly
258  char *p = (char*)&x;
259  int i;
260 
261  for(i = 0; i < 8 && p[i]; ++i);
262  return string(p, i);
263  }
264  }
265  void write(ostream &os) {
266  os.write(data, length);
267  }
268 };
269 
270 /* for the named object file, determines the set of defined symbols and the set of undefined external symbols
271  and writes them to <defined> and <undefined> respectively
272 */
273 void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
274  streampos fileSize;
275  size_t strTabStart;
276  unsigned symTabStart, symNEntries;
277  rstream f(fileName);
278 
279  f.seekg(0,ios::end);
280  fileSize = f.tellg();
281 
282  f.seekg(8);
283  f >> symTabStart >> symNEntries;
284  // seek to the string table
285  f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
286  if(f.eof()) {
287  printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
288  fileName, (unsigned long) fileSize, symTabStart, symNEntries);
289  stop("computeExternalSymbols: Unexpected EOF 1");
290  }
291  StringTable stringTable(f); // read the string table
292  if(f.tellg() != fileSize)
293  stop("computeExternalSymbols: Unexpected data after string table");
294 
295  f.clear();
296  f.seekg(symTabStart); // seek to the symbol table
297 
298  defined->clear(); undefined->clear();
299  for(int i = 0; i < symNEntries; ++i) {
300  // process each entry
301  Symbol e;
302 
303  if(f.eof())
304  stop("computeExternalSymbols: Unexpected EOF 2");
305  f>>e;
306  if(f.fail())
307  stop("computeExternalSymbols: File read error");
308  if(e.nAux) { // auxiliary entry: skip
309  f.seekg(e.nAux * 18, ios::cur);
310  i += e.nAux;
311  }
312  // if symbol is extern and defined in the current file, insert it
313  if(e.storageClass == 2)
314  if(e.sectionNum)
315  defined->insert(stringTable.decode(e.name));
316  else
317  undefined->insert(stringTable.decode(e.name));
318  }
319 }
320 
321 /* For each occurrence of an external symbol in the object file named by
322  by <fileName> that is a member of <hide>, renames it by prefixing
323  with "__kmp_external_", writing back the file in-place
324 */
325 void hideSymbols(char *fileName, const set<string> &hide) {
326  static const string prefix("__kmp_external_");
327  set<string> strings; // set of all occurring symbols, appropriately prefixed
328  streampos fileSize;
329  size_t strTabStart;
330  unsigned symTabStart, symNEntries;
331  int i;
332  rstream in(fileName);
333 
334  in.seekg(0,ios::end);
335  fileSize = in.tellg();
336 
337  in.seekg(8);
338  in >> symTabStart >> symNEntries;
339  in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
340  if(in.eof())
341  stop("hideSymbols: Unexpected EOF");
342  StringTable stringTableOld(in); // read original string table
343 
344  if(in.tellg() != fileSize)
345  stop("hideSymbols: Unexpected data after string table");
346 
347  // compute set of occurring strings with prefix added
348  for(i = 0; i < symNEntries; ++i) {
349  Symbol e;
350 
351  in.seekg(symTabStart + i * 18);
352  if(in.eof())
353  stop("hideSymbols: Unexpected EOF");
354  in >> e;
355  if(in.fail())
356  stop("hideSymbols: File read error");
357  if(e.nAux)
358  i += e.nAux;
359  const string &s = stringTableOld.decode(e.name);
360  // if symbol is extern and found in <hide>, prefix and insert into strings,
361  // otherwise, just insert into strings without prefix
362  strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
363  prefix + s : s);
364  }
365 
366  ofstream out(fileName, ios::trunc | ios::out | ios::binary);
367  if(!out.is_open())
368  stop("hideSymbols: Error opening output file");
369 
370  // make new string table from string set
371  StringTable stringTableNew = StringTable(strings);
372 
373  // copy input file to output file up to just before the symbol table
374  in.seekg(0);
375  char *buf = new char[symTabStart];
376  in.read(buf, symTabStart);
377  out.write(buf, symTabStart);
378  delete []buf;
379 
380  // copy input symbol table to output symbol table with name translation
381  for(i = 0; i < symNEntries; ++i) {
382  Symbol e;
383 
384  in.seekg(symTabStart + i*18);
385  if(in.eof())
386  stop("hideSymbols: Unexpected EOF");
387  in >> e;
388  if(in.fail())
389  stop("hideSymbols: File read error");
390  const string &s = stringTableOld.decode(e.name);
391  out.seekp(symTabStart + i*18);
392  e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
393  prefix + s : s);
394  out.write((char*)&e, 18);
395  if(out.fail())
396  stop("hideSymbols: File write error");
397  if(e.nAux) {
398  // copy auxiliary symbol table entries
399  int nAux = e.nAux;
400  for(int j = 1; j <= nAux; ++j) {
401  in >> e;
402  out.seekp(symTabStart + (i + j) * 18);
403  out.write((char*)&e, 18);
404  }
405  i += nAux;
406  }
407  }
408  // output string table
409  stringTableNew.write(out);
410 }
411 
412 // returns true iff <a> and <b> have no common element
413 template <class T>
414 bool isDisjoint(const set<T> &a, const set<T> &b) {
415  set<T>::const_iterator ita, itb;
416 
417  for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
418  const T &ta = *ita, &tb = *itb;
419  if(ta < tb)
420  ++ita;
421  else if (tb < ta)
422  ++itb;
423  else
424  return false;
425  }
426  return true;
427 }
428 
429 /* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
430  <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
431  files and the rest correspond to the internal object files.
432  postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
433  disjoint. Returns the transitive closure of the set of internal object files, as a set of
434  file indexes, under the 'depends on' relation, minus the set of internal object files.
435 */
436 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
437  set<int> *required = new set<int>;
438  set<int> fresh[2];
439  int i, cur = 0;
440  bool changed;
441 
442  for(i = nTotal - 1; i >= nExternal; --i)
443  fresh[cur].insert(i);
444  do {
445  changed = false;
446  for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
447  set<string> &s = undefined[*it];
448 
449  for(i = 0; i < nExternal; ++i) {
450  if(required->find(i) == required->end()) {
451  if(!isDisjoint(defined[i], s)) {
452  // found a new qualifying element
453  required->insert(i);
454  fresh[1 - cur].insert(i);
455  changed = true;
456  }
457  }
458  }
459  }
460  fresh[cur].clear();
461  cur = 1 - cur;
462  } while(changed);
463  return required;
464 }
465 
466 int main(int argc, char **argv) {
467  int nExternal, nInternal, i;
468  set<string> *defined, *undefined;
469  set<int>::iterator it;
470 
471  if(argc < 3)
472  stop("Please specify a positive integer followed by a list of object filenames");
473  nExternal = atoi(argv[1]);
474  if(nExternal <= 0)
475  stop("Please specify a positive integer followed by a list of object filenames");
476  if(nExternal + 2 > argc)
477  stop("Too few external objects");
478  nInternal = argc - nExternal - 2;
479  defined = new set<string>[argc - 2];
480  undefined = new set<string>[argc - 2];
481 
482  // determine the set of defined and undefined external symbols
483  for(i = 2; i < argc; ++i)
484  computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
485 
486  // determine the set of required external files
487  set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
488  set<string> hide;
489 
490  /* determine the set of symbols to hide--namely defined external symbols of the
491  required external files
492  */
493  for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
494  int idx = *it;
495  set<string>::iterator it2;
496  /* We have to insert one element at a time instead of inserting a range because
497  the insert member function taking a range doesn't exist on Windows* OS, at least
498  at the time of this writing.
499  */
500  for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
501  hide.insert(*it2);
502  }
503 
504  /* process the external files--removing those that are not required and hiding
505  the appropriate symbols in the others
506  */
507  for(i = 0; i < nExternal; ++i)
508  if(requiredExternal->find(i) != requiredExternal->end())
509  hideSymbols(argv[2 + i], hide);
510  else
511  remove(argv[2 + i]);
512  // hide the appropriate symbols in the internal files
513  for(i = nExternal + 2; i < argc; ++i)
514  hideSymbols(argv[i], hide);
515  return 0;
516 }