8253757: Add LLVM-based backend for hsdis

Co-authored-by: Magnus Ihse Bursie <ihse@openjdk.org>
Co-authored-by: Ludovic Henry <luhenry@openjdk.org>
Co-authored-by: Jorn Vernee <jvernee@openjdk.org>
Co-authored-by: Nick Gasson <ngasson@openjdk.org>
Reviewed-by: erikj, luhenry
This commit is contained in:
Magnus Ihse Bursie 2022-02-21 10:37:44 +00:00
Родитель bdae1d87c1
Коммит d7a706a540
4 изменённых файлов: 495 добавлений и 5 удалений

Просмотреть файл

@ -63,6 +63,24 @@ ifeq ($(HSDIS_BACKEND), capstone)
-DCAPSTONE_MODE=$(CAPSTONE_MODE)
endif
ifeq ($(HSDIS_BACKEND), llvm)
# Use C++ instead of C
HSDIS_TOOLCHAIN_CFLAGS := $(CXXFLAGS_JDKLIB)
HSDIS_TOOLCHAIN := TOOLCHAIN_LINK_CXX
ifeq ($(call isTargetOs, linux), true)
LLVM_OS := pc-linux-gnu
else ifeq ($(call isTargetOs, macosx), true)
LLVM_OS := apple-darwin
else ifeq ($(call isTargetOs, windows), true)
LLVM_OS := pc-windows-msvc
else
$(error No support for LLVM on this platform)
endif
HSDIS_CFLAGS += -DLLVM_DEFAULT_TRIPLET='"$(OPENJDK_TARGET_CPU)-$(LLVM_OS)"'
endif
ifeq ($(HSDIS_BACKEND), binutils)
ifeq ($(call isTargetOs, windows), true)
# On windows, we need to "fake" a completely different toolchain using gcc

Просмотреть файл

@ -811,11 +811,14 @@ AC_DEFUN([JDKOPT_BUILD_BINUTILS],
AC_DEFUN_ONCE([JDKOPT_SETUP_HSDIS],
[
AC_ARG_WITH([hsdis], [AS_HELP_STRING([--with-hsdis],
[what hsdis backend to use ('none', 'capstone', 'binutils') @<:@none@:>@])])
[what hsdis backend to use ('none', 'capstone', 'llvm', 'binutils') @<:@none@:>@])])
AC_ARG_WITH(capstone, [AS_HELP_STRING([--with-capstone],
[where to find the Capstone files needed for hsdis/capstone])])
AC_ARG_WITH([llvm], [AS_HELP_STRING([--with-llvm],
[where to find the LLVM files needed for hsdis/llvm])])
AC_ARG_WITH([binutils], [AS_HELP_STRING([--with-binutils],
[where to find the binutils files needed for hsdis/binutils])])
@ -864,6 +867,59 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_HSDIS],
AC_MSG_ERROR([Cannot continue])
fi
fi
elif test "x$with_hsdis" = xllvm; then
HSDIS_BACKEND=llvm
AC_MSG_RESULT(['llvm'])
if test "x$with_llvm" != x; then
LLVM_DIR="$with_llvm"
fi
if test "x$OPENJDK_TARGET_OS" != xwindows; then
if test "x$LLVM_DIR" = x; then
# Macs with homebrew can have llvm in different places
UTIL_LOOKUP_PROGS(LLVM_CONFIG, llvm-config, [$PATH:/usr/local/opt/llvm/bin:/opt/homebrew/opt/llvm/bin])
if test "x$LLVM_CONFIG" = x; then
AC_MSG_NOTICE([Cannot locate llvm-config which is needed for hsdis/llvm. Try using --with-llvm=<LLVM home>.])
AC_MSG_ERROR([Cannot continue])
fi
else
UTIL_LOOKUP_PROGS(LLVM_CONFIG, llvm-config, [$LLVM_DIR/bin])
if test "x$LLVM_CONFIG" = x; then
AC_MSG_NOTICE([Cannot locate llvm-config in $LLVM_DIR. Check your --with-llvm argument.])
AC_MSG_ERROR([Cannot continue])
fi
fi
# We need the LLVM flags and libs, and llvm-config provides them for us.
HSDIS_CFLAGS=`$LLVM_CONFIG --cflags`
HSDIS_LDFLAGS=`$LLVM_CONFIG --ldflags`
HSDIS_LIBS=`$LLVM_CONFIG --libs $OPENJDK_TARGET_CPU_ARCH ${OPENJDK_TARGET_CPU_ARCH}disassembler`
else
if test "x$LLVM_DIR" = x; then
AC_MSG_NOTICE([--with-llvm is needed on Windows to point out the LLVM home])
AC_MSG_ERROR([Cannot continue])
fi
# Official Windows installation of LLVM do not ship llvm-config, and self-built llvm-config
# produced unusable output, so just ignore it on Windows.
if ! test -e $LLVM_DIR/include/llvm-c/lto.h; then
AC_MSG_NOTICE([$LLVM_DIR does not seem like a valid LLVM home; include dir is missing])
AC_MSG_ERROR([Cannot continue])
fi
if ! test -e $LLVM_DIR/include/llvm-c/Disassembler.h; then
AC_MSG_NOTICE([$LLVM_DIR does not point to a complete LLVM installation. ])
AC_MSG_NOTICE([The official LLVM distribution is missing crucical files; you need to build LLVM yourself or get all include files elsewhere])
AC_MSG_ERROR([Cannot continue])
fi
if ! test -e $LLVM_DIR/lib/llvm-c.lib; then
AC_MSG_NOTICE([$LLVM_DIR does not seem like a valid LLVM home; lib dir is missing])
AC_MSG_ERROR([Cannot continue])
fi
HSDIS_CFLAGS="-I$LLVM_DIR/include"
HSDIS_LDFLAGS="-libpath:$LLVM_DIR/lib"
HSDIS_LIBS="llvm-c.lib"
fi
elif test "x$with_hsdis" = xbinutils; then
HSDIS_BACKEND=binutils
AC_MSG_RESULT(['binutils'])

Просмотреть файл

@ -51,15 +51,15 @@ The files in this directory are built independently of the HotSpot JVM.
hsdis is an interface exposed by Hotspot. There are several backends that
implement this interface, using different disassembly engines. Included in the
JDK is support for building hsdis with Capstone or GNU binutils. The interface
is fairly straightforward and easy to implement using other backends.
JDK is support for building hsdis with Capstone, LLVM or GNU binutils. The
interface is fairly straightforward and easy to implement using other backends.
## Building and installing
To compile hsdis, you need to activate hsdis support, and select the proper
backend to use. This is done with the configure switch `--with-hsdis=<backend>`,
where `<backend>` is either `capstone` or `binutils`. For details, see the
sections on the respective backends below.
where `<backend>` is either `capstone`, `llvm` or `binutils`. For details, see
the sections on the respective backends below.
To build the hsdis library, run `make build-hsdis`. This will build the library
in a separate directory, but not make it available to the JDK in the
@ -76,6 +76,12 @@ diagnostic option `-XX:+PrintAssembly`. Note that since this is a diagnostic
option, you need to unlock these first, so in practice you activate it using
`-XX:+UnlockDiagnosticVMOptions -XX:+PrintAssembly`.
If using the LLVM backend on Windows, you need to be sure that the LLVM DLL file
(or files) can be found by hsdis. In practice, this means that you either need
to copy `LLVM-C.DLL` to a place on your `PATH` or the JDK `bin` directory, or
you need to augment your `PATH` variable to also point to where you installed
LLVM (like `C:\LLVM\bin`).
More information is available at the [HotSpot
wiki](https://wiki.openjdk.java.net/display/HotSpot/PrintAssembly).
@ -100,6 +106,58 @@ this fails, or if you are building on Windows, you need to specify where
Capstone is located using `--with-capstone=<path>`. This path should point to
where you have extracted the Core Engine zip file.
## Building with LLVM
To build this project using LLVM you need to have LLVM installed. Typical ways
of installation can be `sudo apt install llvm` (on Debian and derivatives), or
`brew install llvm` (on macOS with Homebrew). For Windows, see below.
This has been tested with LLVM v13.0.0, but earlier (and later) versions are
also likely to work.
To build hsdis using LLVM, you must enable it in configure by `bash configure
--with-hsdis=llvm`.
If `llvm-config` is not in your path, you will need to specify the LLVM home using
`--with-llvm=<LLVM home>`. Example: If your `llvm-config` is in `~/my-llvm/bin`,
then you should use `--with-llvm=~/my-llvm`.
### Building with LLVM on Windows
Getting a usable installation on Windows is trickier than on the other
platforms. You can download (and patch) the official distribution, or you can
build it yourself.
Links to the latest version of the official build is available at [LLVMs
download page](https://releases.llvm.org/download.html). Download the file
*LLVM-nn.n.n-win64.exe*, and run it to let it install itself. The default
installation location is `C:\Program Files\LLVM`. This is not ideal due to the
spaces in the path, so it is recommended to put it elsewhere (e.g. `C:\LLVM`).
For very unclear reasons, the official Windows build is missing almost all LLVM
include files. (At least this was the case up to and including LLVM 13.) You
will need to complement your installation with the proper include files. One way
to do this is to install LLVM in Cygwin. This will give you (apart from the Cygwin-based dll
files which are unusable with Visual Studio) a complete set of the
headers. These are located in `/usr/include/llvm` and `/usr/include/llvm-c`. Copy
these directories, with all their content, into `$LLVM_HOME/include`.
Alternatively, you can build LLVM yourself from source. This process is
documented at the [LLVM Visual Studio
page](https://llvm.org/docs/GettingStartedVS.html).
Either which way, you must tell configure the location of your LLVM installation
using `--with-llvm=<path to LLVM home>`.
The `llvm-config` tool, which configure uses on other platforms to get the
proper compile and link flags to use, is unfortunately not usable on Windows. In
the official distribution, it is just missing. And the self-built version tend
to give broken and unusable output. Therefore configure uses heuristics to setup
proper flags to the compiler and linker. This was verified to work for LLVM v13,
but might be incorrect for other versions. Manual override of `HSDIS_CFLAGS`,
`HSDIS_LDFLAGS` and/or `HSDIS_LIBS` on the make command line might be needed in
that case.
## Building with binutils
To build this project using binutils you need a copy of GNU binutils to build

Просмотреть файл

@ -0,0 +1,358 @@
/*
* Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to
* any person obtaining a copy of this software, associated documentation
* and/or data (collectively the "Software"), free of charge and under any
* and all copyright rights in the Software, and any and all patent rights
* owned or freely licensable by each licensor hereunder covering either (i)
* the unmodified Software as contributed to or provided by such licensor,
* or (ii) the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file
* if one is included with the Software (each a "Larger Work" to which the
* Software is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy,
* create derivative works of, display, perform, and distribute the Software
* and make, use, sell, offer for sale, import, export, have made, and have
* sold the Software and the Larger Work(s), and to sublicense the foregoing
* rights on either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or
* at a minimum a reference to the UPL must be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
* NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/* hsdis.cpp -- dump a range of addresses as native instructions
This implements the plugin protocol required by the
HotSpot PrintAssembly option.
*/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <inttypes.h>
#include <string.h>
#include <llvm-c/Disassembler.h>
#include <llvm-c/DisassemblerTypes.h>
#include <llvm-c/Target.h>
#include <llvm-c/TargetMachine.h>
#include "hsdis.h"
/* short names for stuff in hsdis.h */
typedef decode_instructions_event_callback_ftype event_callback_t;
typedef decode_instructions_printf_callback_ftype printf_callback_t;
class hsdis_backend_base {
protected:
uintptr_t _start_va;
uintptr_t _end_va;
unsigned char* _buffer;
uintptr_t _length;
event_callback_t _event_callback;
void* _event_stream;
printf_callback_t _printf_callback;
void* _printf_stream;
int _do_newline;
bool _losing;
const char* _arch_name;
virtual void print_help(const char* msg, const char* arg) = 0;
virtual void print_insns_config() = 0;
virtual size_t decode_instruction(uintptr_t p, uintptr_t start, uintptr_t end) = 0;
virtual const char* format_insn_close(const char* close, char* buf, size_t bufsize) = 0;
private:
/* ignore all events, return a null */
static void* null_event_callback(void* ignore_stream, const char* ignore_event, void* arg) {
return NULL;
}
/* print all events as XML markup */
static void* xml_event_callback(void* stream, const char* event, void* arg) {
FILE* fp = (FILE*) stream;
#define NS_PFX "dis:"
if (event[0] != '/') {
/* issue the tag, with or without a formatted argument */
fprintf(fp, "<" NS_PFX);
fprintf(fp, event, arg);
fprintf(fp, ">");
} else {
++event; /* skip slash */
const char* argp = strchr(event, ' ');
if (argp == NULL) {
/* no arguments; just issue the closing tag */
fprintf(fp, "</" NS_PFX "%s>", event);
} else {
/* split out the closing attributes as <dis:foo_done attr='val'/> */
size_t event_prefix =(argp - event);
fprintf(fp, "<" NS_PFX "%.*s_done", (int) event_prefix, event);
fprintf(fp, argp, arg);
fprintf(fp, "/></" NS_PFX "%.*s>", (int) event_prefix, event);
}
}
#undef NS_PFX
return NULL;
}
protected:
hsdis_backend_base(uintptr_t start_va, uintptr_t end_va,
unsigned char* buffer, uintptr_t length,
event_callback_t event_callback, void* event_stream,
printf_callback_t printf_callback, void* printf_stream,
int do_newline) :
_start_va(start_va), _end_va(end_va),
_buffer(buffer), _length(length),
_event_callback(event_callback), _event_stream(event_stream),
_printf_callback(printf_callback), _printf_stream(printf_stream),
_do_newline(do_newline),
_losing(false), _arch_name(NULL)
{
/* Make reasonable defaults for null callbacks.
A non-null stream for a null callback is assumed to be a FILE* for output.
Events are rendered as XML.
*/
if (_printf_callback == NULL) {
int (*fprintf_callback)(FILE*, const char*, ...) = &fprintf;
FILE* fprintf_stream = stdout;
_printf_callback = (printf_callback_t) fprintf_callback;
if (_printf_stream == NULL)
_printf_stream = (void*) fprintf_stream;
}
if (_event_callback == NULL) {
if (_event_stream == NULL)
_event_callback = (event_callback_t)&null_event_callback;
else
_event_callback = (event_callback_t)&xml_event_callback;
}
}
public:
void* decode() {
uintptr_t start = _start_va;
uintptr_t end = _end_va;
uintptr_t p = start;
(*_event_callback)(_event_stream, "insns", (void*)start);
print_insns_config();
while (p < end && !_losing) {
(*_event_callback)(_event_stream, "insn", (void*) p);
size_t size = decode_instruction(p, start, end);
if (size > 0) p += size;
else _losing = true;
if (!_losing) {
char buf[128];
const char* insn_close = format_insn_close("/insn", buf, sizeof(buf));
(*_event_callback)(_event_stream, insn_close, (void*) p);
if (_do_newline) {
/* follow each complete insn by a nice newline */
(*_printf_callback)(_printf_stream, "\n");
}
}
}
if (_losing) (*_event_callback)(_event_stream, "/insns", (void*) p);
return (void*) p;
}
};
class hsdis_backend : public hsdis_backend_base {
private:
LLVMDisasmContextRef _dcontext;
char _target_triple[128];
void parse_caller_options(const char* options) {
memset(&_target_triple, 0, sizeof(_target_triple));
const char* p;
for (p = options; p != NULL; ) {
const char* q = strchr(p, ',');
size_t plen = (q == NULL) ? strlen(p) : ((q++) - p);
if (plen == 4 && strncmp(p, "help", plen) == 0) {
print_help(NULL, NULL);
} else if (plen > 6 && strncmp(p, "hsdis-", 6) == 0) {
// do not pass these to the next level
} else if (plen >= 14 && strncmp(p, "target_triple=", 14) == 0) {
char* target_triple = _target_triple;
size_t target_triple_size = sizeof(_target_triple);
target_triple_size -= 1; /*leave room for the null*/
if (plen > target_triple_size) plen = target_triple_size;
strncpy(target_triple, p, plen);
target_triple[plen] = '\0';
}
p = q;
}
}
const char* native_target_triple() {
return LLVM_DEFAULT_TRIPLET;
}
public:
hsdis_backend(uintptr_t start_va, uintptr_t end_va,
unsigned char* buffer, uintptr_t length,
event_callback_t event_callback, void* event_stream,
printf_callback_t printf_callback, void* printf_stream,
const char* options, int newline)
: hsdis_backend_base(start_va, end_va,
buffer, length,
event_callback, event_stream,
printf_callback, printf_stream,
newline),
_dcontext(NULL) {
/* Look into _options for anything interesting. */
if (options != NULL)
parse_caller_options(options);
/* Discover which architecture we are going to disassemble. */
_arch_name = &_target_triple[0];
if (_arch_name[0] == '\0')
_arch_name = native_target_triple();
if (LLVMInitializeNativeTarget() != 0) {
static bool complained = false;
if (!complained)
(*_printf_callback)(_printf_stream, "failed to initialize LLVM native target\n");
complained = true;
/* must bail out */
_losing = true;
return;
}
if (LLVMInitializeNativeAsmPrinter() != 0) {
static bool complained = false;
if (!complained)
(*_printf_callback)(_printf_stream, "failed to initialize LLVM native asm printer\n");
complained = true;
/* must bail out */
_losing = true;
return;
}
if (LLVMInitializeNativeDisassembler() != 0) {
static bool complained = false;
if (!complained)
(*_printf_callback)(_printf_stream, "failed to initialize LLVM native disassembler\n");
complained = true;
/* must bail out */
_losing = true;
return;
}
if ((_dcontext = LLVMCreateDisasm(_arch_name, NULL, 0, NULL, NULL)) == NULL) {
static bool complained = false;
const char* bad = _arch_name;
if (bad == &_target_triple[0])
print_help("bad target_triple=%s", bad);
else if (!complained)
print_help("bad native target_triple=%s; please port hsdis to this platform", bad);
complained = true;
/* must bail out */
_losing = true;
return;
}
LLVMSetDisasmOptions(_dcontext, LLVMDisassembler_Option_PrintImmHex);
}
~hsdis_backend() {
if (_dcontext != NULL) {
LLVMDisasmDispose(_dcontext);
}
}
protected:
virtual void print_help(const char* msg, const char* arg) {
if (msg != NULL) {
(*_printf_callback)(_printf_stream, "hsdis: ");
(*_printf_callback)(_printf_stream, msg, arg);
(*_printf_callback)(_printf_stream, "\n");
}
(*_printf_callback)(_printf_stream, "hsdis output options:\n");
(*_printf_callback)(_printf_stream, " target_triple=<triple> select disassembly target\n");
(*_printf_callback)(_printf_stream, " help print this message\n");
}
virtual void print_insns_config() {
(*_event_callback)(_event_stream, "target_triple name='%s'",
(void*) _arch_name);
}
virtual size_t decode_instruction(uintptr_t p, uintptr_t start, uintptr_t end) {
char buf[128];
size_t size = LLVMDisasmInstruction(_dcontext, (uint8_t*)p, (uint64_t)(end - start), (uint64_t)p, buf, sizeof(buf));
if (size > 0) {
(*_printf_callback)(_printf_stream, "%s", buf);
} else {
// LLVM encountered an unknown instruction
if (end - start >= 4) {
// Print the following word and skip past it
snprintf(buf, sizeof(buf), "\t.inst\t#0x%08x ; undefined", *(uint32_t*)p);
size = 4;
} else {
snprintf(buf, sizeof(buf), "\t<invalid instruction, aborting hsdis>");
}
}
return size;
}
virtual const char* format_insn_close(const char* close, char* buf, size_t bufsize) {
return close;
}
};
void* decode_instructions_virtual(uintptr_t start_va, uintptr_t end_va,
unsigned char* buffer, uintptr_t length,
event_callback_t event_callback_arg, void* event_stream_arg,
printf_callback_t printf_callback_arg, void* printf_stream_arg,
const char* options, int newline) {
return hsdis_backend(start_va, end_va,
buffer, length,
event_callback_arg, event_stream_arg,
printf_callback_arg, printf_stream_arg,
options, newline == 0 ? false : true)
.decode();
}
/* This is the compatability interface for older version of hotspot */
void* decode_instructions(void* start_pv, void* end_pv,
event_callback_t event_callback_arg, void* event_stream_arg,
printf_callback_t printf_callback_arg, void* printf_stream_arg,
const char* options) {
return decode_instructions_virtual((uintptr_t)start_pv,
(uintptr_t)end_pv,
(unsigned char*)start_pv,
(uintptr_t)end_pv - (uintptr_t)start_pv,
event_callback_arg,
event_stream_arg,
printf_callback_arg,
printf_stream_arg,
options, false);
}