Swift: teach extractor to emit build artifacts for later consumption

This commit is contained in:
Alex Denisov 2022-06-23 09:17:02 +02:00
Родитель 1997d6b18c
Коммит 4d81206a87
7 изменённых файлов: 402 добавлений и 15 удалений

Просмотреть файл

@ -3,6 +3,8 @@ load("//swift:rules.bzl", "swift_cc_binary")
swift_cc_binary(
name = "extractor",
srcs = [
"SwiftOutputRewrite.cpp",
"SwiftOutputRewrite.h",
"SwiftExtractor.cpp",
"SwiftExtractor.h",
"SwiftExtractorConfiguration.h",

Просмотреть файл

@ -80,11 +80,17 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
<< "': " << ec.message() << "\n";
return;
}
trapStream << "// extractor-args: ";
trapStream << "/* extractor-args:\n";
for (auto opt : config.frontendOptions) {
trapStream << std::quoted(opt) << " ";
trapStream << " " << std::quoted(opt) << " \\\n";
}
trapStream << "\n\n";
trapStream << "\n*/\n";
trapStream << "/* swift-frontend-args:\n";
for (auto opt : config.patchedFrontendOptions) {
trapStream << " " << std::quoted(opt) << " \\\n";
}
trapStream << "\n*/\n";
TrapOutput trap{trapStream};
TrapArena arena{};

Просмотреть файл

@ -16,7 +16,22 @@ struct SwiftExtractorConfiguration {
// Subdirectory of the scratchDir.
std::string tempTrapDir;
// VFS (virtual file system) support.
// A temporary directory that contains VFS files used during extraction.
// Subdirectory of the scratchDir.
std::string VFSDir;
// A temporary directory that contains temp VFS files before they moved into VFSDir.
// Subdirectory of the scratchDir.
std::string tempVFSDir;
// A temporary directory that contains build artifacts generated by the extractor during the
// overall extraction process.
// Subdirectory of the scratchDir.
std::string tempArtifactDir;
// The original arguments passed to the extractor. Used for debugging.
std::vector<std::string> frontendOptions;
// The patched arguments passed to the swift::performFrontend/ Used for debugging.
std::vector<std::string> patchedFrontendOptions;
};
} // namespace codeql

Просмотреть файл

@ -0,0 +1,318 @@
#include "SwiftOutputRewrite.h"
#include "swift/extractor/SwiftExtractorConfiguration.h"
#include <llvm/ADT/SmallString.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Path.h>
#include <swift/Basic/OutputFileMap.h>
#include <swift/Basic/Platform.h>
#include <unistd.h>
#include <unordered_set>
#include <optional>
#include <iostream>
// Creates a copy of the output file map and updated remapping table in place
// It does not change the original map file as it is dependent upon by the original compiler
// Returns path to the newly created output file map on success, or None in a case of failure
static std::optional<std::string> rewriteOutputFileMap(
const codeql::SwiftExtractorConfiguration& config,
const std::string& outputFileMapPath,
const std::vector<std::string>& inputs,
std::unordered_map<std::string, std::string>& remapping) {
auto newPath = config.tempArtifactDir + '/' + outputFileMapPath;
// TODO: do not assume absolute path for the second parameter
auto outputMapOrError = swift::OutputFileMap::loadFromPath(outputFileMapPath, "");
if (!outputMapOrError) {
return std::nullopt;
}
auto oldOutputMap = outputMapOrError.get();
swift::OutputFileMap newOutputMap;
std::vector<llvm::StringRef> keys;
for (auto& key : inputs) {
auto oldMap = oldOutputMap.getOutputMapForInput(key);
if (!oldMap) {
continue;
}
keys.push_back(key);
auto& newMap = newOutputMap.getOrCreateOutputMapForInput(key);
newMap.copyFrom(*oldMap);
for (auto& entry : newMap) {
auto oldPath = entry.getSecond();
auto newPath = config.tempArtifactDir + '/' + oldPath;
entry.getSecond() = newPath;
remapping[oldPath] = newPath;
}
}
std::error_code ec;
llvm::SmallString<PATH_MAX> filepath(newPath);
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
std::cerr << "Cannot create relocated output map dir: '" << parent.str()
<< "': " << ec.message() << "\n";
return std::nullopt;
}
llvm::raw_fd_ostream fd(newPath, ec, llvm::sys::fs::OF_None);
newOutputMap.write(fd, keys);
return newPath;
}
// This is Xcode-specific workaround to produce alias names for an existing .swiftmodule file.
// In the case of Xcode, it calls the Swift compiler and asks it to produce a Swift module.
// Once it's done, Xcode moves the .swiftmodule file in another location, and the location is
// rather arbitrary. Here are examples of such locations:
// Original file produced by the frontend:
// DerivedData/<Project>/Build/Intermediates.noindex/<Project>.build/<BuiltType>-<Target>/<Project>.build/Objects-normal/<Arch>/<ModuleName>.swiftmodule
// where:
// Project: name of a project, target, or scheme
// BuildType: Debug, Release, etc.
// Target: macOS, iphoneos, appletvsimulator, etc.
// Arch: arm64, x86_64, etc.
//
// So far we observed that Xcode can move the module into different locations, and it's not
// entirely clear how to deduce the destination from the context available for the extractor.
// 1. First case:
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
// 2. Second case:
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Arch>.swiftmodule
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.swiftmodule/<Triple>.swiftmodule
// 2. Third case:
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Arch>.swiftmodule
// DerivedData/<Project>/Build/Products/<BuiltType>-<Target>/<ModuleName>/<ModuleName>.framework/Modules/<ModuleName>.swiftmodule/<Triple>.swiftmodule
// The <Triple> here is a normalized target triple (e.g. arm64-apple-iphoneos15.4 ->
// arm64-apple-iphoneos).
//
// This method construct those aliases for a module only if it comes from Xcode, which is detected
// by the presence of `Intermediates.noindex` directory in the module path.
//
// In the case of Swift Package Manager (`swift build`) this is not needed.
static std::vector<std::string> computeModuleAliases(llvm::StringRef modulePath,
const std::string& targetTriple) {
if (modulePath.empty()) {
return {};
}
if (!modulePath.endswith(".swiftmodule")) {
return {};
}
llvm::SmallVector<llvm::StringRef> chunks;
modulePath.split(chunks, '/');
size_t intermediatesDirIndex = 0;
for (size_t i = 0; i < chunks.size(); i++) {
if (chunks[i] == "Intermediates.noindex") {
intermediatesDirIndex = i;
break;
}
}
// Not built by Xcode, skipping
if (intermediatesDirIndex == 0) {
return {};
}
// e.g. Debug-iphoneos, Release-iphonesimulator, etc.
auto destinationDir = chunks[intermediatesDirIndex + 2].str();
auto arch = chunks[intermediatesDirIndex + 5].str();
auto moduleNameWithExt = chunks.back();
auto moduleName = moduleNameWithExt.substr(0, moduleNameWithExt.find_last_of('.'));
std::string relocatedModulePath = chunks[0].str();
for (size_t i = 1; i < intermediatesDirIndex; i++) {
relocatedModulePath += '/' + chunks[i].str();
}
relocatedModulePath += "/Products/";
relocatedModulePath += destinationDir + '/';
std::vector<std::string> moduleLocations;
std::string firstCase = relocatedModulePath;
firstCase += moduleNameWithExt.str() + '/';
moduleLocations.push_back(firstCase);
std::string secondCase = relocatedModulePath;
secondCase += moduleName.str() + '/';
secondCase += moduleNameWithExt.str() + '/';
moduleLocations.push_back(secondCase);
std::string thirdCase = relocatedModulePath;
thirdCase += moduleName.str() + '/';
thirdCase += moduleName.str() + ".framework/Modules/";
thirdCase += moduleNameWithExt.str() + '/';
moduleLocations.push_back(thirdCase);
std::vector<std::string> aliases;
for (auto& location : moduleLocations) {
aliases.push_back(location + arch + ".swiftmodule");
if (!targetTriple.empty()) {
llvm::Triple triple(targetTriple);
auto moduleTriple = swift::getTargetSpecificModuleTriple(triple);
aliases.push_back(location + moduleTriple.normalize() + ".swiftmodule");
}
}
return aliases;
}
namespace codeql {
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
SwiftExtractorConfiguration& config,
std::vector<std::string>& CLIArgs) {
std::unordered_map<std::string, std::string> remapping;
// TODO: handle filelists?
std::unordered_set<std::string> pathRewriteOptions({
"-emit-dependencies-path",
"-emit-module-path",
"-emit-module-doc-path",
"-emit-module-source-info-path",
"-emit-objc-header-path",
"-emit-reference-dependencies-path",
"-index-store-path",
"-module-cache-path",
"-o",
"-pch-output-dir",
"-serialize-diagnostics-path",
});
std::unordered_set<std::string> outputFileMaps(
{"-supplementary-output-file-map", "-output-file-map"});
std::vector<size_t> outputFileMapIndexes;
std::vector<std::string> maybeInput;
std::string targetTriple;
std::vector<std::string> newLocations;
for (size_t i = 0; i < CLIArgs.size(); i++) {
if (pathRewriteOptions.count(CLIArgs[i])) {
auto oldPath = CLIArgs[i + 1];
auto newPath = config.tempArtifactDir + '/' + oldPath;
CLIArgs[++i] = newPath;
newLocations.push_back(newPath);
remapping[oldPath] = newPath;
} else if (outputFileMaps.count(CLIArgs[i])) {
// collect output map indexes for further rewriting and skip the following argument
// We don't patch the map in place as we need to collect all the input files first
outputFileMapIndexes.push_back(++i);
} else if (CLIArgs[i] == "-target") {
targetTriple = CLIArgs[++i];
} else if (CLIArgs[i][0] != '-') {
// TODO: add support for input file lists?
// We need to collect input file names to later use them to extract information from the
// output file maps.
maybeInput.push_back(CLIArgs[i]);
}
}
for (auto index : outputFileMapIndexes) {
auto oldPath = CLIArgs[index];
auto maybeNewPath = rewriteOutputFileMap(config, oldPath, maybeInput, remapping);
if (maybeNewPath) {
auto newPath = maybeNewPath.value();
CLIArgs[index] = newPath;
remapping[oldPath] = newPath;
}
}
// This doesn't really belong here, but we've got Xcode...
for (auto& [oldPath, newPath] : remapping) {
llvm::StringRef path(oldPath);
auto aliases = computeModuleAliases(path, targetTriple);
for (auto& alias : aliases) {
remapping[alias] = newPath;
}
}
return remapping;
}
void ensureNewPathsExist(const std::unordered_map<std::string, std::string>& remapping) {
for (auto& [_, newPath] : remapping) {
llvm::SmallString<PATH_MAX> filepath(newPath);
llvm::StringRef parent = llvm::sys::path::parent_path(filepath);
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
std::cerr << "Cannot create redirected directory: " << ec.message() << "\n";
}
}
}
void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
const std::unordered_map<std::string, std::string>& remapping) {
// Only create remapping for the .swiftmodule files
std::unordered_map<std::string, std::string> modules;
for (auto& [oldPath, newPath] : remapping) {
if (llvm::StringRef(oldPath).endswith(".swiftmodule")) {
modules[oldPath] = newPath;
}
}
if (modules.empty()) {
return;
}
if (std::error_code ec = llvm::sys::fs::create_directories(config.tempVFSDir)) {
std::cerr << "Cannot create temp VFS directory: " << ec.message() << "\n";
return;
}
if (std::error_code ec = llvm::sys::fs::create_directories(config.VFSDir)) {
std::cerr << "Cannot create VFS directory: " << ec.message() << "\n";
return;
}
// Constructing the VFS yaml file in a temp folder so that the other process doesn't read it
// while it is not complete
// TODO: Pick a more robust way to not collide with files from other processes
auto tempVfsPath = config.tempVFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
std::error_code ec;
llvm::raw_fd_ostream fd(tempVfsPath, ec, llvm::sys::fs::OF_None);
if (ec) {
std::cerr << "Cannot create temp VFS file: '" << tempVfsPath << "': " << ec.message() << "\n";
return;
}
// TODO: there must be a better API than this
// LLVM expects the version to be 0
fd << "{ version: 0,\n";
// This tells the FS not to fallback to the physical file system in case the remapped file is not
// present
fd << " fallthrough: false,\n";
fd << " roots: [\n";
for (auto& [oldPath, newPath] : modules) {
fd << " {\n";
fd << " type: 'file',\n";
fd << " name: '" << oldPath << "\',\n";
fd << " external-contents: '" << newPath << "\'\n";
fd << " },\n";
}
fd << " ]\n";
fd << "}\n";
fd.flush();
auto vfsPath = config.VFSDir + '/' + std::to_string(getpid()) + "-vfs.yaml";
if (std::error_code ec = llvm::sys::fs::rename(tempVfsPath, vfsPath)) {
std::cerr << "Cannot move temp VFS file '" << tempVfsPath << "' -> '" << vfsPath
<< "': " << ec.message() << "\n";
return;
}
}
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config) {
auto vfsDir = config.VFSDir + '/';
if (!llvm::sys::fs::exists(vfsDir)) {
return {};
}
std::vector<std::string> overlays;
std::error_code ec;
llvm::sys::fs::directory_iterator it(vfsDir, ec);
while (!ec && it != llvm::sys::fs::directory_iterator()) {
llvm::StringRef path(it->path());
if (path.endswith("vfs.yaml")) {
overlays.push_back(path.str());
}
it.increment(ec);
}
return overlays;
}
} // namespace codeql

Просмотреть файл

@ -0,0 +1,30 @@
#pragma once
#include <vector>
#include <string>
#include <unordered_map>
namespace codeql {
struct SwiftExtractorConfiguration;
// Rewrites all the output CLI args to point to a scratch dir instead of the actual locations.
// This is needed to ensure that the artifacts produced by the extractor do not collide with the
// artifacts produced by the actual Swift compiler.
// Returns the map containing remapping oldpath -> newPath.
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
SwiftExtractorConfiguration& config,
std::vector<std::string>& CLIArgs);
// Recreate all the redirected new paths as the Swift compiler expects them to be present
void ensureNewPathsExist(const std::unordered_map<std::string, std::string>& remapping);
// Stores remapped `.swiftmoduile`s in a YAML file for later consumption by the
// llvm::RedirectingFileSystem via Swift's VFSOverlayFiles.
void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
const std::unordered_map<std::string, std::string>& remapping);
// Returns a list of VFS YAML files produced by all the extractor processes.
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config);
} // namespace codeql

Просмотреть файл

@ -1,27 +1,32 @@
#include <fstream>
#include <iomanip>
#include <stdlib.h>
#include <unordered_set>
#include <vector>
#include <string>
#include <iostream>
#include <swift/Basic/LLVMInitialize.h>
#include <swift/FrontendTool/FrontendTool.h>
#include "SwiftExtractor.h"
#include "SwiftOutputRewrite.h"
using namespace std::string_literals;
// This is part of the swiftFrontendTool interface, we hook into the
// compilation pipeline and extract files after the Swift frontend performed
// semantic analysys
// semantic analysis
class Observer : public swift::FrontendObserver {
public:
explicit Observer(const codeql::SwiftExtractorConfiguration& config) : config{config} {}
void parsedArgs(swift::CompilerInvocation& invocation) override {
// Original compiler and the extractor-compiler get into conflicts when
// both produce the same output files.
// TODO: change the final artifact destinations instead of disabling
// the artifact generation completely?
invocation.getFrontendOptions().RequestedAction = swift::FrontendOptions::ActionType::Typecheck;
auto& overlays = invocation.getSearchPathOptions().VFSOverlayFiles;
auto vfsFiles = codeql::collectVFSFiles(config);
for (auto& vfsFile : vfsFiles) {
overlays.push_back(vfsFile);
}
}
void performedSemanticAnalysis(swift::CompilerInstance& compiler) override {
@ -54,12 +59,26 @@ int main(int argc, char** argv) {
configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", ".");
configuration.tempTrapDir = configuration.scratchDir + "/swift-trap-temp";
configuration.VFSDir = configuration.scratchDir + "/swift-vfs";
configuration.tempVFSDir = configuration.scratchDir + "/swift-vfs-temp";
configuration.tempArtifactDir = configuration.scratchDir + "/swift-extraction-artifacts";
configuration.frontendOptions.reserve(argc - 1);
for (int i = 1; i < argc; i++) {
configuration.frontendOptions.push_back(argv[i]);
}
configuration.patchedFrontendOptions = configuration.frontendOptions;
auto remapping =
codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions);
codeql::ensureNewPathsExist(remapping);
codeql::storeRemappingForVFS(configuration, remapping);
std::vector<const char*> args;
for (int i = 1; i < argc; i++) {
args.push_back(argv[i]);
for (auto& arg : configuration.patchedFrontendOptions) {
args.push_back(arg.c_str());
}
std::copy(std::begin(args), std::end(args), std::back_inserter(configuration.frontendOptions));
Observer observer(configuration);
int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer);
return frontend_rc;

Просмотреть файл

@ -67,9 +67,6 @@ function RegisterExtractorPack(id)
return nil
end
-- Skip actions in which we cannot extract anything
if compilerArguments.argv[1] == '-merge-modules' then return nil end
strip_unsupported_args(compilerArguments.argv)
insert_resource_dir_if_needed(compilerPath, compilerArguments.argv)