зеркало из https://github.com/github/codeql.git
Swift: teach extractor to emit build artifacts for later consumption
This commit is contained in:
Родитель
1997d6b18c
Коммит
4d81206a87
|
@ -3,6 +3,8 @@ load("//swift:rules.bzl", "swift_cc_binary")
|
|||
swift_cc_binary(
|
||||
name = "extractor",
|
||||
srcs = [
|
||||
"SwiftOutputRewrite.cpp",
|
||||
"SwiftOutputRewrite.h",
|
||||
"SwiftExtractor.cpp",
|
||||
"SwiftExtractor.h",
|
||||
"SwiftExtractorConfiguration.h",
|
||||
|
|
|
@ -80,11 +80,17 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
|
|||
<< "': " << ec.message() << "\n";
|
||||
return;
|
||||
}
|
||||
trapStream << "// extractor-args: ";
|
||||
trapStream << "/* extractor-args:\n";
|
||||
for (auto opt : config.frontendOptions) {
|
||||
trapStream << std::quoted(opt) << " ";
|
||||
trapStream << " " << std::quoted(opt) << " \\\n";
|
||||
}
|
||||
trapStream << "\n\n";
|
||||
trapStream << "\n*/\n";
|
||||
|
||||
trapStream << "/* swift-frontend-args:\n";
|
||||
for (auto opt : config.patchedFrontendOptions) {
|
||||
trapStream << " " << std::quoted(opt) << " \\\n";
|
||||
}
|
||||
trapStream << "\n*/\n";
|
||||
|
||||
TrapOutput trap{trapStream};
|
||||
TrapArena arena{};
|
||||
|
|
|
@ -16,7 +16,22 @@ struct SwiftExtractorConfiguration {
|
|||
// Subdirectory of the scratchDir.
|
||||
std::string tempTrapDir;
|
||||
|
||||
// VFS (virtual file system) support.
|
||||
// A temporary directory that contains VFS files used during extraction.
|
||||
// Subdirectory of the scratchDir.
|
||||
std::string VFSDir;
|
||||
// A temporary directory that contains temp VFS files before they moved into VFSDir.
|
||||
// Subdirectory of the scratchDir.
|
||||
std::string tempVFSDir;
|
||||
|
||||
// A temporary directory that contains build artifacts generated by the extractor during the
|
||||
// overall extraction process.
|
||||
// Subdirectory of the scratchDir.
|
||||
std::string tempArtifactDir;
|
||||
|
||||
// The original arguments passed to the extractor. Used for debugging.
|
||||
std::vector<std::string> frontendOptions;
|
||||
// The patched arguments passed to the swift::performFrontend/ Used for debugging.
|
||||
std::vector<std::string> patchedFrontendOptions;
|
||||
};
|
||||
} // namespace codeql
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
#include "SwiftOutputRewrite.h"
|
||||
#include "swift/extractor/SwiftExtractorConfiguration.h"
|
||||
|
||||
#include <llvm/ADT/SmallString.h>
|
||||
#include <llvm/Support/FileSystem.h>
|
||||
#include <llvm/Support/Path.h>
|
||||
#include <swift/Basic/OutputFileMap.h>
|
||||
#include <swift/Basic/Platform.h>
|
||||
#include <unistd.h>
|
||||
#include <unordered_set>
|
||||
#include <optional>
|
||||
#include <iostream>
|
||||
|
||||
// Creates a copy of the output file map and updated remapping table in place
|
||||
// It does not change the original map file as it is dependent upon by the original compiler
|
||||
// Returns path to the newly created output file map on success, or None in a case of failure
|
||||
static std::optional<std::string> rewriteOutputFileMap(
|
||||
const codeql::SwiftExtractorConfiguration& config,
|
||||
const std::string& outputFileMapPath,
|
||||
const std::vector<std::string>& inputs,
|
||||
std::unordered_map<std::string, std::string>& remapping) {
|
||||
auto newPath = config.tempArtifactDir + '/' + outputFileMapPath;
|
||||
|
||||
// TODO: do not assume absolute path for the second parameter
|
||||
auto outputMapOrError = swift::OutputFileMap::loadFromPath(outputFileMapPath, "");
|
||||
if (!outputMapOrError) {
|
||||
return std::nullopt;
|
||||
}
|
||||
auto oldOutputMap = outputMapOrError.get();
|
||||
swift::OutputFileMap newOutputMap;
|
||||
std::vector<llvm::StringRef> keys;
|
||||
for (auto& key : inputs) {
|
||||
auto oldMap = oldOutputMap.getOutputMapForInput(key);
|
||||
if (!oldMap) {
|
||||
continue;
|
||||
}
|
||||
keys.push_back(key);
|
||||
auto& newMap = newOutputMap.getOrCreateOutputMapForInput(key);
|
||||
newMap.copyFrom(*oldMap);
|
||||
for (auto& entry : newMap) {
|
||||
auto oldPath = entry.getSecond();
|
||||
auto newPath = config.tempArtifactDir + '/' + oldPath;
|
||||
entry.getSecond() = newPath;
|
||||
remapping[oldPath] = newPath;
|
||||
}
|
||||
}
|
||||
std::error_code ec;
|
||||
llvm::SmallString<PATH_MAX> filepath(newPath);
|
||||
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
|
||||
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
|
||||
std::cerr << "Cannot create relocated output map dir: '" << parent.str()
|
||||
<< "': " << ec.message() << "\n";
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
llvm::raw_fd_ostream fd(newPath, ec, llvm::sys::fs::OF_None);
|
||||
newOutputMap.write(fd, keys);
|
||||
return newPath;
|
||||
}
|
||||
|
||||
// This is Xcode-specific workaround to produce alias names for an existing .swiftmodule file.
|
||||
// In the case of Xcode, it calls the Swift compiler and asks it to produce a Swift module.
|
||||
// Once it's done, Xcode moves the .swiftmodule file in another location, and the location is
|
||||
// rather arbitrary. Here are examples of such locations:
|
||||
// Original file produced by the frontend:
|
||||
// DerivedData/<Project>/Build/Intermediates.noindex/<Project>.build/<BuiltType>-<Target>/<Project>.build/Objects-normal/<Arch>/<ModuleName>.swiftmodule
|
||||
// where:
|
||||
// Project: name of a project, target, or scheme
|
||||
// BuildType: Debug, Release, etc.
|
||||
// Target: macOS, iphoneos, appletvsimulator, etc.
|
||||
// Arch: arm64, x86_64, etc.
|
||||
//
|
||||
// So far we observed that Xcode can move the module into different locations, and it's not
|
||||
// entirely clear how to deduce the destination from the context available for the extractor.
|
||||
// 1. First case:
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
|
||||
// 2. Second case:
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
|
||||
// 2. Third case:
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Arch>.swiftmodule
|
||||
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Triple>.swiftmodule
|
||||
// The <Triple> here is a normalized target triple (e.g. arm64-apple-iphoneos15.4 ->
|
||||
// arm64-apple-iphoneos).
|
||||
//
|
||||
// This method construct those aliases for a module only if it comes from Xcode, which is detected
|
||||
// by the presence of `Intermediates.noindex` directory in the module path.
|
||||
//
|
||||
// In the case of Swift Package Manager (`swift build`) this is not needed.
|
||||
static std::vector<std::string> computeModuleAliases(llvm::StringRef modulePath,
|
||||
const std::string& targetTriple) {
|
||||
if (modulePath.empty()) {
|
||||
return {};
|
||||
}
|
||||
if (!modulePath.endswith(".swiftmodule")) {
|
||||
return {};
|
||||
}
|
||||
|
||||
llvm::SmallVector<llvm::StringRef> chunks;
|
||||
modulePath.split(chunks, '/');
|
||||
size_t intermediatesDirIndex = 0;
|
||||
for (size_t i = 0; i < chunks.size(); i++) {
|
||||
if (chunks[i] == "Intermediates.noindex") {
|
||||
intermediatesDirIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Not built by Xcode, skipping
|
||||
if (intermediatesDirIndex == 0) {
|
||||
return {};
|
||||
}
|
||||
// e.g. Debug-iphoneos, Release-iphonesimulator, etc.
|
||||
auto destinationDir = chunks[intermediatesDirIndex + 2].str();
|
||||
auto arch = chunks[intermediatesDirIndex + 5].str();
|
||||
auto moduleNameWithExt = chunks.back();
|
||||
auto moduleName = moduleNameWithExt.substr(0, moduleNameWithExt.find_last_of('.'));
|
||||
std::string relocatedModulePath = chunks[0].str();
|
||||
for (size_t i = 1; i < intermediatesDirIndex; i++) {
|
||||
relocatedModulePath += '/' + chunks[i].str();
|
||||
}
|
||||
relocatedModulePath += "/Products/";
|
||||
relocatedModulePath += destinationDir + '/';
|
||||
|
||||
std::vector<std::string> moduleLocations;
|
||||
|
||||
std::string firstCase = relocatedModulePath;
|
||||
firstCase += moduleNameWithExt.str() + '/';
|
||||
moduleLocations.push_back(firstCase);
|
||||
|
||||
std::string secondCase = relocatedModulePath;
|
||||
secondCase += moduleName.str() + '/';
|
||||
secondCase += moduleNameWithExt.str() + '/';
|
||||
moduleLocations.push_back(secondCase);
|
||||
|
||||
std::string thirdCase = relocatedModulePath;
|
||||
thirdCase += moduleName.str() + '/';
|
||||
thirdCase += moduleName.str() + ".framework/Modules/";
|
||||
thirdCase += moduleNameWithExt.str() + '/';
|
||||
moduleLocations.push_back(thirdCase);
|
||||
|
||||
std::vector<std::string> aliases;
|
||||
for (auto& location : moduleLocations) {
|
||||
aliases.push_back(location + arch + ".swiftmodule");
|
||||
if (!targetTriple.empty()) {
|
||||
llvm::Triple triple(targetTriple);
|
||||
auto moduleTriple = swift::getTargetSpecificModuleTriple(triple);
|
||||
aliases.push_back(location + moduleTriple.normalize() + ".swiftmodule");
|
||||
}
|
||||
}
|
||||
|
||||
return aliases;
|
||||
}
|
||||
|
||||
namespace codeql {
|
||||
|
||||
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
|
||||
SwiftExtractorConfiguration& config,
|
||||
std::vector<std::string>& CLIArgs) {
|
||||
std::unordered_map<std::string, std::string> remapping;
|
||||
|
||||
// TODO: handle filelists?
|
||||
std::unordered_set<std::string> pathRewriteOptions({
|
||||
"-emit-dependencies-path",
|
||||
"-emit-module-path",
|
||||
"-emit-module-doc-path",
|
||||
"-emit-module-source-info-path",
|
||||
"-emit-objc-header-path",
|
||||
"-emit-reference-dependencies-path",
|
||||
"-index-store-path",
|
||||
"-module-cache-path",
|
||||
"-o",
|
||||
"-pch-output-dir",
|
||||
"-serialize-diagnostics-path",
|
||||
});
|
||||
|
||||
std::unordered_set<std::string> outputFileMaps(
|
||||
{"-supplementary-output-file-map", "-output-file-map"});
|
||||
|
||||
std::vector<size_t> outputFileMapIndexes;
|
||||
std::vector<std::string> maybeInput;
|
||||
std::string targetTriple;
|
||||
|
||||
std::vector<std::string> newLocations;
|
||||
for (size_t i = 0; i < CLIArgs.size(); i++) {
|
||||
if (pathRewriteOptions.count(CLIArgs[i])) {
|
||||
auto oldPath = CLIArgs[i + 1];
|
||||
auto newPath = config.tempArtifactDir + '/' + oldPath;
|
||||
CLIArgs[++i] = newPath;
|
||||
newLocations.push_back(newPath);
|
||||
|
||||
remapping[oldPath] = newPath;
|
||||
} else if (outputFileMaps.count(CLIArgs[i])) {
|
||||
// collect output map indexes for further rewriting and skip the following argument
|
||||
// We don't patch the map in place as we need to collect all the input files first
|
||||
outputFileMapIndexes.push_back(++i);
|
||||
} else if (CLIArgs[i] == "-target") {
|
||||
targetTriple = CLIArgs[++i];
|
||||
} else if (CLIArgs[i][0] != '-') {
|
||||
// TODO: add support for input file lists?
|
||||
// We need to collect input file names to later use them to extract information from the
|
||||
// output file maps.
|
||||
maybeInput.push_back(CLIArgs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto index : outputFileMapIndexes) {
|
||||
auto oldPath = CLIArgs[index];
|
||||
auto maybeNewPath = rewriteOutputFileMap(config, oldPath, maybeInput, remapping);
|
||||
if (maybeNewPath) {
|
||||
auto newPath = maybeNewPath.value();
|
||||
CLIArgs[index] = newPath;
|
||||
remapping[oldPath] = newPath;
|
||||
}
|
||||
}
|
||||
|
||||
// This doesn't really belong here, but we've got Xcode...
|
||||
for (auto& [oldPath, newPath] : remapping) {
|
||||
llvm::StringRef path(oldPath);
|
||||
auto aliases = computeModuleAliases(path, targetTriple);
|
||||
for (auto& alias : aliases) {
|
||||
remapping[alias] = newPath;
|
||||
}
|
||||
}
|
||||
|
||||
return remapping;
|
||||
}
|
||||
|
||||
void ensureNewPathsExist(const std::unordered_map<std::string, std::string>& remapping) {
|
||||
for (auto& [_, newPath] : remapping) {
|
||||
llvm::SmallString<PATH_MAX> filepath(newPath);
|
||||
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
|
||||
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
|
||||
std::cerr << "Cannot create redirected directory: " << ec.message() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
|
||||
const std::unordered_map<std::string, std::string>& remapping) {
|
||||
// Only create remapping for the .swiftmodule files
|
||||
std::unordered_map<std::string, std::string> modules;
|
||||
for (auto& [oldPath, newPath] : remapping) {
|
||||
if (llvm::StringRef(oldPath).endswith(".swiftmodule")) {
|
||||
modules[oldPath] = newPath;
|
||||
}
|
||||
}
|
||||
|
||||
if (modules.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::error_code ec = llvm::sys::fs::create_directories(config.tempVFSDir)) {
|
||||
std::cerr << "Cannot create temp VFS directory: " << ec.message() << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::error_code ec = llvm::sys::fs::create_directories(config.VFSDir)) {
|
||||
std::cerr << "Cannot create VFS directory: " << ec.message() << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
// Constructing the VFS yaml file in a temp folder so that the other process doesn't read it
|
||||
// while it is not complete
|
||||
// TODO: Pick a more robust way to not collide with files from other processes
|
||||
auto tempVfsPath = config.tempVFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream fd(tempVfsPath, ec, llvm::sys::fs::OF_None);
|
||||
if (ec) {
|
||||
std::cerr << "Cannot create temp VFS file: '" << tempVfsPath << "': " << ec.message() << "\n";
|
||||
return;
|
||||
}
|
||||
// TODO: there must be a better API than this
|
||||
// LLVM expects the version to be 0
|
||||
fd << "{ version: 0,\n";
|
||||
// This tells the FS not to fallback to the physical file system in case the remapped file is not
|
||||
// present
|
||||
fd << " fallthrough: false,\n";
|
||||
fd << " roots: [\n";
|
||||
for (auto& [oldPath, newPath] : modules) {
|
||||
fd << " {\n";
|
||||
fd << " type: 'file',\n";
|
||||
fd << " name: '" << oldPath << "\',\n";
|
||||
fd << " external-contents: '" << newPath << "\'\n";
|
||||
fd << " },\n";
|
||||
}
|
||||
fd << " ]\n";
|
||||
fd << "}\n";
|
||||
|
||||
fd.flush();
|
||||
auto vfsPath = config.VFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
|
||||
if (std::error_code ec = llvm::sys::fs::rename(tempVfsPath, vfsPath)) {
|
||||
std::cerr << "Cannot move temp VFS file '" << tempVfsPath << "' -> '" << vfsPath
|
||||
<< "': " << ec.message() << "\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config) {
|
||||
auto vfsDir = config.VFSDir + '/';
|
||||
if (!llvm::sys::fs::exists(vfsDir)) {
|
||||
return {};
|
||||
}
|
||||
std::vector<std::string> overlays;
|
||||
std::error_code ec;
|
||||
llvm::sys::fs::directory_iterator it(vfsDir, ec);
|
||||
while (!ec && it != llvm::sys::fs::directory_iterator()) {
|
||||
llvm::StringRef path(it->path());
|
||||
if (path.endswith("vfs.yaml")) {
|
||||
overlays.push_back(path.str());
|
||||
}
|
||||
it.increment(ec);
|
||||
}
|
||||
|
||||
return overlays;
|
||||
}
|
||||
|
||||
} // namespace codeql
|
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace codeql {
|
||||
|
||||
struct SwiftExtractorConfiguration;
|
||||
|
||||
// Rewrites all the output CLI args to point to a scratch dir instead of the actual locations.
|
||||
// This is needed to ensure that the artifacts produced by the extractor do not collide with the
|
||||
// artifacts produced by the actual Swift compiler.
|
||||
// Returns the map containing remapping oldpath -> newPath.
|
||||
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
|
||||
SwiftExtractorConfiguration& config,
|
||||
std::vector<std::string>& CLIArgs);
|
||||
|
||||
// Recreate all the redirected new paths as the Swift compiler expects them to be present
|
||||
void ensureNewPathsExist(const std::unordered_map<std::string, std::string>& remapping);
|
||||
|
||||
// Stores remapped `.swiftmoduile`s in a YAML file for later consumption by the
|
||||
// llvm::RedirectingFileSystem via Swift's VFSOverlayFiles.
|
||||
void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
|
||||
const std::unordered_map<std::string, std::string>& remapping);
|
||||
|
||||
// Returns a list of VFS YAML files produced by all the extractor processes.
|
||||
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config);
|
||||
|
||||
} // namespace codeql
|
|
@ -1,27 +1,32 @@
|
|||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <stdlib.h>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include <swift/Basic/LLVMInitialize.h>
|
||||
#include <swift/FrontendTool/FrontendTool.h>
|
||||
|
||||
#include "SwiftExtractor.h"
|
||||
#include "SwiftOutputRewrite.h"
|
||||
|
||||
using namespace std::string_literals;
|
||||
|
||||
// This is part of the swiftFrontendTool interface, we hook into the
|
||||
// compilation pipeline and extract files after the Swift frontend performed
|
||||
// semantic analysys
|
||||
// semantic analysis
|
||||
class Observer : public swift::FrontendObserver {
|
||||
public:
|
||||
explicit Observer(const codeql::SwiftExtractorConfiguration& config) : config{config} {}
|
||||
|
||||
void parsedArgs(swift::CompilerInvocation& invocation) override {
|
||||
// Original compiler and the extractor-compiler get into conflicts when
|
||||
// both produce the same output files.
|
||||
// TODO: change the final artifact destinations instead of disabling
|
||||
// the artifact generation completely?
|
||||
invocation.getFrontendOptions().RequestedAction = swift::FrontendOptions::ActionType::Typecheck;
|
||||
auto& overlays = invocation.getSearchPathOptions().VFSOverlayFiles;
|
||||
auto vfsFiles = codeql::collectVFSFiles(config);
|
||||
for (auto& vfsFile : vfsFiles) {
|
||||
overlays.push_back(vfsFile);
|
||||
}
|
||||
}
|
||||
|
||||
void performedSemanticAnalysis(swift::CompilerInstance& compiler) override {
|
||||
|
@ -54,12 +59,26 @@ int main(int argc, char** argv) {
|
|||
configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", ".");
|
||||
|
||||
configuration.tempTrapDir = configuration.scratchDir + "/swift-trap-temp";
|
||||
configuration.VFSDir = configuration.scratchDir + "/swift-vfs";
|
||||
configuration.tempVFSDir = configuration.scratchDir + "/swift-vfs-temp";
|
||||
configuration.tempArtifactDir = configuration.scratchDir + "/swift-extraction-artifacts";
|
||||
|
||||
configuration.frontendOptions.reserve(argc - 1);
|
||||
for (int i = 1; i < argc; i++) {
|
||||
configuration.frontendOptions.push_back(argv[i]);
|
||||
}
|
||||
configuration.patchedFrontendOptions = configuration.frontendOptions;
|
||||
|
||||
auto remapping =
|
||||
codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions);
|
||||
codeql::ensureNewPathsExist(remapping);
|
||||
codeql::storeRemappingForVFS(configuration, remapping);
|
||||
|
||||
std::vector<const char*> args;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
args.push_back(argv[i]);
|
||||
for (auto& arg : configuration.patchedFrontendOptions) {
|
||||
args.push_back(arg.c_str());
|
||||
}
|
||||
std::copy(std::begin(args), std::end(args), std::back_inserter(configuration.frontendOptions));
|
||||
|
||||
Observer observer(configuration);
|
||||
int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer);
|
||||
return frontend_rc;
|
||||
|
|
|
@ -67,9 +67,6 @@ function RegisterExtractorPack(id)
|
|||
return nil
|
||||
end
|
||||
|
||||
-- Skip actions in which we cannot extract anything
|
||||
if compilerArguments.argv[1] == '-merge-modules' then return nil end
|
||||
|
||||
strip_unsupported_args(compilerArguments.argv)
|
||||
insert_resource_dir_if_needed(compilerPath, compilerArguments.argv)
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче