klee
ModuleUtil.cpp
Go to the documentation of this file.
1//===-- ModuleUtil.cpp ----------------------------------------------------===//
2//
3// The KLEE Symbolic Virtual Machine
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
11
12#include "klee/Config/Version.h"
13#include "klee/Support/Debug.h"
15
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/BinaryFormat/Magic.h"
18#include "llvm/Bitcode/BitcodeReader.h"
19#include "llvm/IR/AssemblyAnnotationWriter.h"
20#include "llvm/IR/DiagnosticInfo.h"
21#include "llvm/IR/DiagnosticPrinter.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IR/ValueSymbolTable.h"
28#include "llvm/IRReader/IRReader.h"
29#include "llvm/Linker/Linker.h"
30#include "llvm/Object/Archive.h"
31#include "llvm/Object/Error.h"
32#include "llvm/Object/ObjectFile.h"
33#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/Path.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/Support/SourceMgr.h"
38
39
40#include <algorithm>
41#include <fstream>
42#include <map>
43#include <set>
44#include <sstream>
45#include <string>
46
47using namespace llvm;
48using namespace klee;
49
64static void
65GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
66 static const std::string llvmIntrinsicPrefix="llvm.";
67 std::set<std::string> DefinedSymbols;
68 UndefinedSymbols.clear();
69 KLEE_DEBUG_WITH_TYPE("klee_linker",
70 dbgs() << "*** Computing undefined symbols for "
71 << M->getModuleIdentifier() << " ***\n");
72
73 for (auto const &Function : *M) {
74 if (Function.hasName()) {
75 if (Function.isDeclaration())
76 UndefinedSymbols.insert(Function.getName().str());
77 else if (!Function.hasLocalLinkage()) {
78 assert(!Function.hasDLLImportStorageClass() &&
79 "Found dllimported non-external symbol!");
80 DefinedSymbols.insert(Function.getName().str());
81 }
82 }
83 }
84
85 for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
86 I != E; ++I)
87 if (I->hasName()) {
88 if (I->isDeclaration())
89 UndefinedSymbols.insert(I->getName().str());
90 else if (!I->hasLocalLinkage()) {
91 assert(!I->hasDLLImportStorageClass() && "Found dllimported non-external symbol!");
92 DefinedSymbols.insert(I->getName().str());
93 }
94 }
95
96 for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
97 I != E; ++I)
98 if (I->hasName())
99 DefinedSymbols.insert(I->getName().str());
100
101 // Prune out any defined symbols from the undefined symbols set
102 // and other symbols we don't want to treat as an undefined symbol
103 std::vector<std::string> SymbolsToRemove;
104 for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
105 I != UndefinedSymbols.end(); ++I )
106 {
107 if (DefinedSymbols.find(*I) != DefinedSymbols.end()) {
108 SymbolsToRemove.push_back(*I);
109 continue;
110 }
111
112 // Strip out llvm intrinsics
113 if ( (I->size() >= llvmIntrinsicPrefix.size() ) &&
114 (I->compare(0, llvmIntrinsicPrefix.size(), llvmIntrinsicPrefix) == 0) )
115 {
116 KLEE_DEBUG_WITH_TYPE("klee_linker", dbgs() << "LLVM intrinsic " << *I <<
117 " has will be removed from undefined symbols"<< "\n");
118 SymbolsToRemove.push_back(*I);
119 continue;
120 }
121
122 // Symbol really is undefined
123 KLEE_DEBUG_WITH_TYPE("klee_linker",
124 dbgs() << "Symbol " << *I << " is undefined.\n");
125 }
126
127 // Now remove the symbols from undefined set.
128 for (auto const &symbol : SymbolsToRemove)
129 UndefinedSymbols.erase(symbol);
130
131 KLEE_DEBUG_WITH_TYPE("klee_linker",
132 dbgs() << "*** Finished computing undefined symbols ***\n");
133}
134
135static bool linkTwoModules(llvm::Module *Dest,
136 std::unique_ptr<llvm::Module> Src,
137 std::string &errorMsg) {
138 // Get the potential error message (Src is moved and won't be available later)
139 errorMsg = "Linking module " + Src->getModuleIdentifier() + " failed";
140 auto linkResult = Linker::linkModules(*Dest, std::move(Src));
141
142 return !linkResult;
143}
144
145std::unique_ptr<llvm::Module>
146klee::linkModules(std::vector<std::unique_ptr<llvm::Module>> &modules,
147 llvm::StringRef entryFunction, std::string &errorMsg) {
148 assert(!modules.empty() && "modules list should not be empty");
149
150 if (entryFunction.empty()) {
151 // If no entry function is provided, link all modules together into one
152 std::unique_ptr<llvm::Module> composite = std::move(modules.back());
153 modules.pop_back();
154
155 // Just link all modules together
156 for (auto &module : modules) {
157 if (linkTwoModules(composite.get(), std::move(module), errorMsg))
158 continue;
159
160 // Linking failed
161 errorMsg = "Linking archive module with composite failed:" + errorMsg;
162 return nullptr;
163 }
164
165 // clean up every module as we already linked in every module
166 modules.clear();
167 return composite;
168 }
169
170 // Starting from the module containing the entry function, resolve unresolved
171 // dependencies recursively
172
173
174 // search for the module containing the entry function
175 std::unique_ptr<llvm::Module> composite;
176 for (auto &module : modules) {
177 if (!module || !module->getNamedValue(entryFunction))
178 continue;
179 if (composite) {
180 errorMsg =
181 "Function " + entryFunction.str() +
182 " defined in different modules (" + module->getModuleIdentifier() +
183 " already defined in: " + composite->getModuleIdentifier() + ")";
184 return nullptr;
185 }
186 composite = std::move(module);
187 }
188
189 // fail if not found
190 if (!composite) {
191 errorMsg =
192 "Entry function '" + entryFunction.str() + "' not found in module.";
193 return nullptr;
194 }
195
196 auto containsUsedSymbols = [](const llvm::Module *module) {
197 GlobalValue *GV =
198 dyn_cast_or_null<GlobalValue>(module->getNamedValue("llvm.used"));
199 if (!GV)
200 return false;
201 KLEE_DEBUG_WITH_TYPE("klee_linker", dbgs() << "Used attribute in "
202 << module->getModuleIdentifier()
203 << '\n');
204 return true;
205 };
206
207 for (auto &module : modules) {
208 if (!module || !containsUsedSymbols(module.get()))
209 continue;
210 if (!linkTwoModules(composite.get(), std::move(module), errorMsg)) {
211 // Linking failed
212 errorMsg = "Linking module containing '__attribute__((used))'"
213 " symbols with composite failed:" +
214 errorMsg;
215 return nullptr;
216 }
217 module = nullptr;
218 }
219
220 bool symbolsLinked = true;
221 while (symbolsLinked) {
222 symbolsLinked = false;
223 std::set<std::string> undefinedSymbols;
224 GetAllUndefinedSymbols(composite.get(), undefinedSymbols);
225 auto hasRequiredDefinition = [&undefinedSymbols](
226 const llvm::Module *module) {
227 for (auto symbol : undefinedSymbols) {
228 GlobalValue *GV =
229 dyn_cast_or_null<GlobalValue>(module->getNamedValue(symbol));
230 if (GV && !GV->isDeclaration()) {
231 KLEE_DEBUG_WITH_TYPE("klee_linker",
232 dbgs() << "Found " << GV->getName() << " in "
233 << module->getModuleIdentifier() << "\n");
234 return true;
235 }
236 }
237 return false;
238 };
239
240 // Stop in nothing is undefined
241 if (undefinedSymbols.empty())
242 break;
243
244 for (auto &module : modules) {
245 if (!module)
246 continue;
247
248 if (!hasRequiredDefinition(module.get()))
249 continue;
250
251 if (!linkTwoModules(composite.get(), std::move(module), errorMsg)) {
252 // Linking failed
253 errorMsg = "Linking archive module with composite failed:" + errorMsg;
254 return nullptr;
255 }
256 module = nullptr;
257 symbolsLinked = true;
258 }
259 }
260
261 // Condense the module array
262 std::vector<std::unique_ptr<llvm::Module>> LeftoverModules;
263 for (auto &module : modules) {
264 if (module)
265 LeftoverModules.emplace_back(std::move(module));
266 }
267
268 modules.swap(LeftoverModules);
269 return composite;
270}
271
274 const CallBase &cs,
275#else
276 const CallSite &cs,
277#endif
278 bool moduleIsFullyLinked) {
279#if LLVM_VERSION_CODE >= LLVM_VERSION(8, 0)
280 Value *v = cs.getCalledOperand();
281#else
282 Value *v = cs.getCalledValue();
283#endif
284 bool viaConstantExpr = false;
285 // Walk through aliases and bitcasts to try to find
286 // the function being called.
287 do {
288 if (isa<llvm::GlobalVariable>(v)) {
289 // We don't care how we got this GlobalVariable
290 viaConstantExpr = false;
291
292 // Global variables won't be a direct call target. Instead, their
293 // value need to be read and is handled as indirect call target.
294 v = nullptr;
295 } else if (Function *f = dyn_cast<Function>(v)) {
296 return f;
297 } else if (llvm::GlobalAlias *ga = dyn_cast<GlobalAlias>(v)) {
298 if (moduleIsFullyLinked || !(ga->isInterposable())) {
299 v = ga->getAliasee();
300 } else {
301 v = nullptr;
302 }
303 } else if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(v)) {
304 viaConstantExpr = true;
305 v = ce->getOperand(0)->stripPointerCasts();
306 } else {
307 v = nullptr;
308 }
309 } while (v != nullptr);
310
311 // NOTE: This assert may fire, it isn't necessarily a problem and
312 // can be disabled, I just wanted to know when and if it happened.
313 (void) viaConstantExpr;
314 assert((!viaConstantExpr) &&
315 "FIXME: Unresolved direct target for a constant expression");
316 return nullptr;
317}
318
319static bool valueIsOnlyCalled(const Value *v) {
320 for (auto user : v->users()) {
321#if LLVM_VERSION_CODE >= LLVM_VERSION(8, 0)
322 // Make sure the instruction is a call or invoke.
323 if (const auto *cs_ptr = dyn_cast<CallBase>(user)) {
324 const CallBase &cs = *cs_ptr;
325#else
326 if (const auto *instr = dyn_cast<Instruction>(user)) {
327 // Make sure the instruction is a call or invoke.
328 const CallSite cs(const_cast<Instruction *>(instr));
329 if (!cs) return false;
330#endif
331 // Make sure that the value is only the target of this call and
332 // not an argument.
333 if (cs.hasArgument(v))
334 return false;
335 } else if (const auto *ce = dyn_cast<ConstantExpr>(user)) {
336 if (ce->getOpcode() == Instruction::BitCast)
337 if (valueIsOnlyCalled(ce))
338 continue;
339 return false;
340 } else if (const auto *ga = dyn_cast<GlobalAlias>(user)) {
341 if (v == ga->getAliasee() && !valueIsOnlyCalled(ga))
342 return false;
343 } else if (isa<BlockAddress>(user)) {
344 // only valid as operand to indirectbr or comparison against null
345 continue;
346 } else {
347 return false;
348 }
349 }
350
351 return true;
352}
353
354bool klee::functionEscapes(const Function *f) {
355 return !valueIsOnlyCalled(f);
356}
357
358bool klee::loadFile(const std::string &fileName, LLVMContext &context,
359 std::vector<std::unique_ptr<llvm::Module>> &modules,
360 std::string &errorMsg) {
361 KLEE_DEBUG_WITH_TYPE("klee_loader", dbgs()
362 << "Load file " << fileName << "\n");
363
364 ErrorOr<std::unique_ptr<MemoryBuffer>> bufferErr =
365 MemoryBuffer::getFileOrSTDIN(fileName);
366 std::error_code ec = bufferErr.getError();
367 if (ec) {
368 klee_error("Loading file %s failed: %s", fileName.c_str(),
369 ec.message().c_str());
370 }
371
372 MemoryBufferRef Buffer = bufferErr.get()->getMemBufferRef();
373 file_magic magic = identify_magic(Buffer.getBuffer());
374
375 if (magic == file_magic::bitcode) {
376 SMDiagnostic Err;
377 std::unique_ptr<llvm::Module> module(parseIR(Buffer, Err, context));
378 if (!module) {
379 klee_error("Loading file %s failed: %s", fileName.c_str(),
380 Err.getMessage().str().c_str());
381 }
382 modules.push_back(std::move(module));
383 return true;
384 }
385
386 if (magic == file_magic::archive) {
387 Expected<std::unique_ptr<object::Binary> > archOwner =
388 object::createBinary(Buffer, &context);
389 if (!archOwner)
390 ec = errorToErrorCode(archOwner.takeError());
391 llvm::object::Binary *arch = archOwner.get().get();
392 if (ec)
393 klee_error("Loading file %s failed: %s", fileName.c_str(),
394 ec.message().c_str());
395
396 if (auto archive = dyn_cast<object::Archive>(arch)) {
397// Load all bitcode files into memory
398 auto Err = Error::success();
399 for (auto AI = archive->child_begin(Err), AE = archive->child_end();
400 AI != AE; ++AI)
401 {
402
403 StringRef memberName;
404 std::error_code ec;
405 ErrorOr<object::Archive::Child> childOrErr = *AI;
406 ec = childOrErr.getError();
407 if (ec) {
408 errorMsg = ec.message();
409 return false;
410 }
411 auto memberNameErr = childOrErr->getName();
412 ec = memberNameErr ? std::error_code() :
413 errorToErrorCode(memberNameErr.takeError());
414 if (!ec) {
415 memberName = memberNameErr.get();
416 KLEE_DEBUG_WITH_TYPE("klee_linker", dbgs()
417 << "Loading archive member "
418 << memberName << "\n");
419 } else {
420 errorMsg = "Archive member does not have a name!\n";
421 return false;
422 }
423
424 Expected<std::unique_ptr<llvm::object::Binary> > child =
425 childOrErr->getAsBinary();
426 if (!child)
427 ec = errorToErrorCode(child.takeError());
428 if (ec) {
429// If we can't open as a binary object file its hopefully a bitcode file
430 auto buff = childOrErr->getMemoryBufferRef();
431 ec = buff ? std::error_code() : errorToErrorCode(buff.takeError());
432 if (ec) {
433 errorMsg = "Failed to get MemoryBuffer: " + ec.message();
434 return false;
435 }
436
437 if (buff) {
438 // FIXME: Maybe load bitcode file lazily? Then if we need to link,
439 // materialise
440 // the module
441 SMDiagnostic Err;
442 std::unique_ptr<llvm::Module> module =
443 parseIR(buff.get(), Err, context);
444 if (!module) {
445 klee_error("Loading file %s failed: %s", fileName.c_str(),
446 Err.getMessage().str().c_str());
447 }
448
449 modules.push_back(std::move(module));
450 } else {
451 errorMsg = "Buffer was NULL!";
452 return false;
453 }
454
455 } else if (child.get()->isObject()) {
456 errorMsg = "Object file " + child.get()->getFileName().str() +
457 " in archive is not supported";
458 return false;
459 } else {
460 errorMsg = "Loading archive child with error " + ec.message();
461 return false;
462 }
463 }
464 if (Err) {
465 errorMsg = "Cannot iterate over archive";
466 return false;
467 }
468 }
469
470 return true;
471 }
472 if (magic.is_object()) {
473 errorMsg = "Loading file " + fileName +
474 " Object file as input is currently not supported";
475 return false;
476 }
477 // This might still be an assembly file. Let's try to parse it.
478 SMDiagnostic Err;
479 std::unique_ptr<llvm::Module> module(parseIR(Buffer, Err, context));
480 if (!module) {
481 klee_error("Loading file %s failed: Unrecognized file type.",
482 fileName.c_str());
483 }
484 modules.push_back(std::move(module));
485 return true;
486}
static bool linkTwoModules(llvm::Module *Dest, std::unique_ptr< llvm::Module > Src, std::string &errorMsg)
Definition: ModuleUtil.cpp:135
static bool valueIsOnlyCalled(const Value *v)
Definition: ModuleUtil.cpp:319
static void GetAllUndefinedSymbols(Module *M, std::set< std::string > &UndefinedSymbols)
Definition: ModuleUtil.cpp:65
#define LLVM_VERSION_CODE
Definition: Version.h:16
#define LLVM_VERSION(major, minor)
Definition: Version.h:15
Definition: main.cpp:291
std::unique_ptr< llvm::Module > linkModules(std::vector< std::unique_ptr< llvm::Module > > &modules, llvm::StringRef entryFunction, std::string &errorMsg)
Definition: ModuleUtil.cpp:146
bool loadFile(const std::string &libraryName, llvm::LLVMContext &context, std::vector< std::unique_ptr< llvm::Module > > &modules, std::string &errorMsg)
bool functionEscapes(const llvm::Function *f)
llvm::Function * getDirectCallTarget(const llvm::CallBase &cb, bool moduleIsFullyLinked)
void klee_error(const char *msg,...) __attribute__((format(printf