/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef NS_WINDOWS_DLL_INTERCEPTOR_H_ #define NS_WINDOWS_DLL_INTERCEPTOR_H_ #include "mozilla/Assertions.h" #include "mozilla/ArrayUtils.h" #include "mozilla/UniquePtr.h" #include "nsWindowsHelpers.h" #include #include #include /* * Simple function interception. * * We have two separate mechanisms for intercepting a function: We can use the * built-in nop space, if it exists, or we can create a detour. * * Using the built-in nop space works as follows: On x86-32, DLL functions * begin with a two-byte nop (mov edi, edi) and are preceeded by five bytes of * NOP instructions. * * When we detect a function with this prelude, we do the following: * * 1. Write a long jump to our interceptor function into the five bytes of NOPs * before the function. * * 2. Write a short jump -5 into the two-byte nop at the beginning of the function. * * This mechanism is nice because it's thread-safe. It's even safe to do if * another thread is currently running the function we're modifying! * * When the WindowsDllNopSpacePatcher is destroyed, we overwrite the short jump * but not the long jump, so re-intercepting the same function won't work, * because its prelude won't match. * * * Unfortunately nop space patching doesn't work on functions which don't have * this magic prelude (and in particular, x86-64 never has the prelude). So * when we can't use the built-in nop space, we fall back to using a detour, * which works as follows: * * 1. Save first N bytes of OrigFunction to trampoline, where N is a * number of bytes >= 5 that are instruction aligned. * * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook * function. * * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to * continue original program flow. * * 4. Hook function needs to call the trampoline during its execution, * to invoke the original function (so address of trampoline is * returned). * * When the WindowsDllDetourPatcher object is destructed, OrigFunction is * patched again to jump directly to the trampoline instead of going through * the hook function. As such, re-intercepting the same function won't work, as * jump instructions are not supported. * * Note that this is not thread-safe. Sad day. * */ #include #define COPY_CODES(NBYTES) do { \ memcpy(&tramp[nTrampBytes], &origBytes[nOrigBytes], NBYTES); \ nOrigBytes += NBYTES; \ nTrampBytes += NBYTES; \ } while (0) namespace mozilla { namespace internal { class AutoVirtualProtect { public: AutoVirtualProtect(void* aFunc, size_t aSize, DWORD aProtect) : mFunc(aFunc), mSize(aSize), mNewProtect(aProtect), mOldProtect(0), mSuccess(false) {} ~AutoVirtualProtect() { if (mSuccess) { VirtualProtectEx(GetCurrentProcess(), mFunc, mSize, mOldProtect, &mOldProtect); } } bool Protect() { mSuccess = !!VirtualProtectEx(GetCurrentProcess(), mFunc, mSize, mNewProtect, &mOldProtect); if (!mSuccess) { // printf("VirtualProtectEx failed! %d\n", GetLastError()); } return mSuccess; } private: void* const mFunc; size_t const mSize; DWORD const mNewProtect; DWORD mOldProtect; bool mSuccess; }; class WindowsDllNopSpacePatcher { typedef uint8_t* byteptr_t; HMODULE mModule; // Dumb array for remembering the addresses of functions we've patched. // (This should be nsTArray, but non-XPCOM code uses this class.) static const size_t maxPatchedFns = 16; byteptr_t mPatchedFns[maxPatchedFns]; size_t mPatchedFnsLen; public: WindowsDllNopSpacePatcher() : mModule(0) , mPatchedFnsLen(0) {} #if defined(_M_IX86) ~WindowsDllNopSpacePatcher() { // Restore the mov edi, edi to the beginning of each function we patched. for (size_t i = 0; i < mPatchedFnsLen; i++) { byteptr_t fn = mPatchedFns[i]; // Ensure we can write to the code. AutoVirtualProtect protect(fn, 2, PAGE_EXECUTE_READWRITE); if (!protect.Protect()) { continue; } // mov edi, edi *((uint16_t*)fn) = 0xff8b; // I don't think this is actually necessary, but it can't hurt. FlushInstructionCache(GetCurrentProcess(), /* ignored */ nullptr, /* ignored */ 0); } } void Init(const char* aModuleName) { if (!IsCompatible()) { #if defined(MOZILLA_INTERNAL_API) NS_WARNING("NOP space patching is unavailable for compatibility reasons"); #endif return; } mModule = LoadLibraryExA(aModuleName, nullptr, 0); if (!mModule) { //printf("LoadLibraryEx for '%s' failed\n", aModuleName); return; } } /** * NVIDIA Optimus drivers utilize Microsoft Detours 2.x to patch functions * in our address space. There is a bug in Detours 2.x that causes it to * patch at the wrong address when attempting to detour code that is already * NOP space patched. This function is an effort to detect the presence of * this NVIDIA code in our address space and disable NOP space patching if it * is. We also check AppInit_DLLs since this is the mechanism that the Optimus * drivers use to inject into our process. */ static bool IsCompatible() { // These DLLs are known to have bad interactions with this style of patching const wchar_t* kIncompatibleDLLs[] = { L"detoured.dll", L"_etoured.dll", L"nvd3d9wrap.dll", L"nvdxgiwrap.dll" }; // See if the infringing DLLs are already loaded for (unsigned int i = 0; i < mozilla::ArrayLength(kIncompatibleDLLs); ++i) { if (GetModuleHandleW(kIncompatibleDLLs[i])) { return false; } } if (GetModuleHandleW(L"user32.dll")) { // user32 is loaded but the infringing DLLs are not, assume we're safe to // proceed. return true; } // If user32 has not loaded yet, check AppInit_DLLs to ensure that Optimus // won't be loaded once user32 is initialized. HKEY hkey = NULL; if (!RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Windows", 0, KEY_QUERY_VALUE, &hkey)) { nsAutoRegKey key(hkey); DWORD numBytes = 0; const wchar_t kAppInitDLLs[] = L"AppInit_DLLs"; // Query for required buffer size LONG status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr, nullptr, nullptr, &numBytes); mozilla::UniquePtr data; if (!status) { // Allocate the buffer and query for the actual data data = mozilla::MakeUnique(numBytes / sizeof(wchar_t)); status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr, nullptr, (LPBYTE)data.get(), &numBytes); } if (!status) { // For each token, split up the filename components and then check the // name of the file. const wchar_t kDelimiters[] = L", "; wchar_t* tokenContext = nullptr; wchar_t* token = wcstok_s(data.get(), kDelimiters, &tokenContext); while (token) { wchar_t fname[_MAX_FNAME] = {0}; if (!_wsplitpath_s(token, nullptr, 0, nullptr, 0, fname, mozilla::ArrayLength(fname), nullptr, 0)) { // nvinit.dll is responsible for bootstrapping the DLL injection, so // that is the library that we check for here const wchar_t kNvInitName[] = L"nvinit"; if (!_wcsnicmp(fname, kNvInitName, mozilla::ArrayLength(kNvInitName))) { return false; } } token = wcstok_s(nullptr, kDelimiters, &tokenContext); } } } return true; } bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc) { if (!mModule) { return false; } if (!IsCompatible()) { #if defined(MOZILLA_INTERNAL_API) NS_WARNING("NOP space patching is unavailable for compatibility reasons"); #endif return false; } MOZ_RELEASE_ASSERT(mPatchedFnsLen < maxPatchedFns, "No room for the hook"); byteptr_t fn = reinterpret_cast(GetProcAddress(mModule, aName)); if (!fn) { //printf ("GetProcAddress failed\n"); return false; } fn = ResolveRedirectedAddress(fn); // Ensure we can read and write starting at fn - 5 (for the long jmp we're // going to write) and ending at fn + 2 (for the short jmp up to the long // jmp). These bytes may span two pages with different protection. AutoVirtualProtect protectBefore(fn - 5, 5, PAGE_EXECUTE_READWRITE); AutoVirtualProtect protectAfter(fn, 2, PAGE_EXECUTE_READWRITE); if (!protectBefore.Protect() || !protectAfter.Protect()) { return false; } bool rv = WriteHook(fn, aHookDest, aOrigFunc); if (rv) { mPatchedFns[mPatchedFnsLen] = fn; mPatchedFnsLen++; } return rv; } bool WriteHook(byteptr_t aFn, intptr_t aHookDest, void** aOrigFunc) { // Check that the 5 bytes before aFn are NOP's or INT 3's, // and that the 2 bytes after aFn are mov(edi, edi). // // It's safe to read aFn[-5] because we set it to PAGE_EXECUTE_READWRITE // before calling WriteHook. for (int i = -5; i <= -1; i++) { if (aFn[i] != 0x90 && aFn[i] != 0xcc) { // nop or int 3 return false; } } // mov edi, edi. Yes, there are two ways to encode the same thing: // // 0x89ff == mov r/m, r // 0x8bff == mov r, r/m // // where "r" is register and "r/m" is register or memory. Windows seems to // use 8bff; I include 89ff out of paranoia. if ((aFn[0] != 0x8b && aFn[0] != 0x89) || aFn[1] != 0xff) { return false; } // Write a long jump into the space above the function. aFn[-5] = 0xe9; // jmp *((intptr_t*)(aFn - 4)) = aHookDest - (uintptr_t)(aFn); // target displacement // Set aOrigFunc here, because after this point, aHookDest might be called, // and aHookDest might use the aOrigFunc pointer. *aOrigFunc = aFn + 2; // Short jump up into our long jump. *((uint16_t*)(aFn)) = 0xf9eb; // jmp $-5 // I think this routine is safe without this, but it can't hurt. FlushInstructionCache(GetCurrentProcess(), /* ignored */ nullptr, /* ignored */ 0); return true; } private: static byteptr_t ResolveRedirectedAddress(const byteptr_t aOriginalFunction) { // If function entry is jmp rel8 stub to the internal implementation, we // resolve redirected address from the jump target. if (aOriginalFunction[0] == 0xeb) { int8_t offset = (int8_t)(aOriginalFunction[1]); if (offset <= 0) { // Bail out for negative offset: probably already patched by some // third-party code. return aOriginalFunction; } for (int8_t i = 0; i < offset; i++) { if (aOriginalFunction[2 + i] != 0x90) { // Bail out on insufficient nop space. return aOriginalFunction; } } return aOriginalFunction + 2 + offset; } // If function entry is jmp [disp32] such as used by kernel32, // we resolve redirected address from import table. if (aOriginalFunction[0] == 0xff && aOriginalFunction[1] == 0x25) { return (byteptr_t)(**((uint32_t**) (aOriginalFunction + 2))); } return aOriginalFunction; } #else void Init(const char* aModuleName) { // Not implemented except on x86-32. } bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc) { // Not implemented except on x86-32. return false; } #endif }; class WindowsDllDetourPatcher { typedef unsigned char* byteptr_t; public: WindowsDllDetourPatcher() : mModule(0), mHookPage(0), mMaxHooks(0), mCurHooks(0) { } ~WindowsDllDetourPatcher() { int i; byteptr_t p; for (i = 0, p = mHookPage; i < mCurHooks; i++, p += kHookSize) { #if defined(_M_IX86) size_t nBytes = 1 + sizeof(intptr_t); #elif defined(_M_X64) size_t nBytes = 2 + sizeof(intptr_t); #else #error "Unknown processor type" #endif byteptr_t origBytes = (byteptr_t)DecodePointer(*((byteptr_t*)p)); // ensure we can modify the original code AutoVirtualProtect protect(origBytes, nBytes, PAGE_EXECUTE_READWRITE); if (!protect.Protect()) { continue; } // Remove the hook by making the original function jump directly // in the trampoline. intptr_t dest = (intptr_t)(p + sizeof(void*)); #if defined(_M_IX86) // Ensure the JMP from CreateTrampoline is where we expect it to be. if (origBytes[0] != 0xE9) continue; *((intptr_t*)(origBytes + 1)) = dest - (intptr_t)(origBytes + 5); // target displacement #elif defined(_M_X64) // Ensure the MOV R11 from CreateTrampoline is where we expect it to be. if (origBytes[0] != 0x49 || origBytes[1] != 0xBB) continue; *((intptr_t*)(origBytes + 2)) = dest; #else #error "Unknown processor type" #endif } } void Init(const char* aModuleName, int aNumHooks = 0) { if (mModule) { return; } mModule = LoadLibraryExA(aModuleName, nullptr, 0); if (!mModule) { //printf("LoadLibraryEx for '%s' failed\n", aModuleName); return; } int hooksPerPage = 4096 / kHookSize; if (aNumHooks == 0) { aNumHooks = hooksPerPage; } mMaxHooks = aNumHooks + (hooksPerPage % aNumHooks); mHookPage = (byteptr_t)VirtualAllocEx(GetCurrentProcess(), nullptr, mMaxHooks * kHookSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READ); if (!mHookPage) { mModule = 0; return; } } bool Initialized() { return !!mModule; } bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc) { if (!mModule) { return false; } void* pAddr = (void*)GetProcAddress(mModule, aName); if (!pAddr) { //printf ("GetProcAddress failed\n"); return false; } pAddr = ResolveRedirectedAddress((byteptr_t)pAddr); CreateTrampoline(pAddr, aHookDest, aOrigFunc); if (!*aOrigFunc) { //printf ("CreateTrampoline failed\n"); return false; } return true; } protected: const static int kPageSize = 4096; const static int kHookSize = 128; HMODULE mModule; byteptr_t mHookPage; int mMaxHooks; int mCurHooks; // rex bits static const BYTE kMaskHighNibble = 0xF0; static const BYTE kRexOpcode = 0x40; static const BYTE kMaskRexW = 0x08; static const BYTE kMaskRexR = 0x04; static const BYTE kMaskRexX = 0x02; static const BYTE kMaskRexB = 0x01; // mod r/m bits static const BYTE kRegFieldShift = 3; static const BYTE kMaskMod = 0xC0; static const BYTE kMaskReg = 0x38; static const BYTE kMaskRm = 0x07; static const BYTE kRmNeedSib = 0x04; static const BYTE kModReg = 0xC0; static const BYTE kModDisp32 = 0x80; static const BYTE kModDisp8 = 0x40; static const BYTE kModNoRegDisp = 0x00; static const BYTE kRmNoRegDispDisp32 = 0x05; // sib bits static const BYTE kMaskSibScale = 0xC0; static const BYTE kMaskSibIndex = 0x38; static const BYTE kMaskSibBase = 0x07; static const BYTE kSibBaseEbp = 0x05; // Register bit IDs. static const BYTE kRegAx = 0x0; static const BYTE kRegCx = 0x1; static const BYTE kRegDx = 0x2; static const BYTE kRegBx = 0x3; static const BYTE kRegSp = 0x4; static const BYTE kRegBp = 0x5; static const BYTE kRegSi = 0x6; static const BYTE kRegDi = 0x7; // Special ModR/M codes. These indicate operands that cannot be simply // memcpy-ed. // Operand is a 64-bit RIP-relative address. static const int kModOperand64 = -2; // Operand is not yet handled by our trampoline. static const int kModUnknown = -1; /** * Returns the number of bytes taken by the ModR/M byte, SIB (if present) * and the instruction's operand. In special cases, the special MODRM codes * above are returned. * aModRm points to the ModR/M byte of the instruction. * On return, aSubOpcode (if present) is filled with the subopcode/register * code found in the ModR/M byte. */ int CountModRmSib(const BYTE *aModRm, BYTE* aSubOpcode = nullptr) { if (!aModRm) { MOZ_ASSERT(aModRm, "Missing ModRM byte"); return kModUnknown; } int numBytes = 1; // Start with 1 for mod r/m byte itself switch (*aModRm & kMaskMod) { case kModReg: return numBytes; case kModDisp8: numBytes += 1; break; case kModDisp32: numBytes += 4; break; case kModNoRegDisp: if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) { #if defined(_M_X64) if (aSubOpcode) { *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift; } return kModOperand64; #else // On IA-32, all ModR/M instruction modes address memory relative to 0 numBytes += 4; #endif } else if (((*aModRm & kMaskRm) == kRmNeedSib && (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) { numBytes += 4; } break; default: // This should not be reachable MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits"); return kModUnknown; } if ((*aModRm & kMaskRm) == kRmNeedSib) { // SIB byte numBytes += 1; } if (aSubOpcode) { *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift; } return numBytes; } #if defined(_M_X64) // To patch for JMP and JE enum JumpType { Je, Jne, Jmp, Call }; struct JumpPatch { JumpPatch() : mHookOffset(0), mJumpAddress(0), mType(JumpType::Jmp) { } JumpPatch(size_t aOffset, intptr_t aAddress, JumpType aType = JumpType::Jmp) : mHookOffset(aOffset), mJumpAddress(aAddress), mType(aType) { } size_t GenerateJump(uint8_t* aCode) { size_t offset = mHookOffset; if (mType == JumpType::Je) { // JNE RIP+14 aCode[offset] = 0x75; aCode[offset + 1] = 14; offset += 2; } else if (mType == JumpType::Jne) { // JE RIP+14 aCode[offset] = 0x74; aCode[offset + 1] = 14; offset += 2; } // Near call/jmp, absolute indirect, address given in r/m32 if (mType == JumpType::Call) { // CALL [RIP+0] aCode[offset] = 0xff; aCode[offset + 1] = 0x15; // The offset to jump destination -- ie it is placed 2 bytes after the offset. *reinterpret_cast(aCode + offset + 2) = 2; aCode[offset + 2 + 4] = 0xeb; // JMP +8 (jump over mJumpAddress) aCode[offset + 2 + 4 + 1] = 8; *reinterpret_cast(aCode + offset + 2 + 4 + 2) = mJumpAddress; return offset + 2 + 4 + 2 + 8; } else { // JMP [RIP+0] aCode[offset] = 0xff; aCode[offset + 1] = 0x25; // The offset to jump destination is 0 *reinterpret_cast(aCode + offset + 2) = 0; *reinterpret_cast(aCode + offset + 2 + 4) = mJumpAddress; return offset + 2 + 4 + 8; } } size_t mHookOffset; intptr_t mJumpAddress; JumpType mType; }; #endif enum ePrefixGroupBits { eNoPrefixes = 0, ePrefixGroup1 = (1 << 0), ePrefixGroup2 = (1 << 1), ePrefixGroup3 = (1 << 2), ePrefixGroup4 = (1 << 3) }; int CountPrefixBytes(byteptr_t aBytes, const int aBytesIndex, unsigned char* aOutGroupBits) { unsigned char& groupBits = *aOutGroupBits; groupBits = eNoPrefixes; int index = aBytesIndex; while (true) { switch (aBytes[index]) { // Group 1 case 0xF0: // LOCK case 0xF2: // REPNZ case 0xF3: // REP / REPZ if (groupBits & ePrefixGroup1) { return -1; } groupBits |= ePrefixGroup1; ++index; break; // Group 2 case 0x2E: // CS override / branch not taken case 0x36: // SS override case 0x3E: // DS override / branch taken case 0x64: // FS override case 0x65: // GS override if (groupBits & ePrefixGroup2) { return -1; } groupBits |= ePrefixGroup2; ++index; break; // Group 3 case 0x66: // operand size override if (groupBits & ePrefixGroup3) { return -1; } groupBits |= ePrefixGroup3; ++index; break; // Group 4 case 0x67: // Address size override if (groupBits & ePrefixGroup4) { return -1; } groupBits |= ePrefixGroup4; ++index; break; default: return index - aBytesIndex; } } } // Return a ModR/M byte made from the 2 Mod bits, the register used for the // reg bits and the register used for the R/M bits. BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm) { MOZ_ASSERT((aRm & kMaskRm) == aRm); MOZ_ASSERT((aModBits & kMaskMod) == aModBits); MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) == (aReg << kRegFieldShift)); return aModBits | (aReg << kRegFieldShift) | aRm; } void CreateTrampoline(void* aOrigFunction, intptr_t aDest, void** aOutTramp) { *aOutTramp = nullptr; AutoVirtualProtect protectHookPage(mHookPage, mMaxHooks * kHookSize, PAGE_EXECUTE_READWRITE); if (!protectHookPage.Protect()) { return; } byteptr_t tramp = FindTrampolineSpace(); if (!tramp) { return; } // We keep the address of the original function in the first bytes of // the trampoline buffer *((void**)tramp) = EncodePointer(aOrigFunction); tramp += sizeof(void*); byteptr_t origBytes = (byteptr_t)aOrigFunction; // # of bytes of the original function that we can overwrite. int nOrigBytes = 0; #if defined(_M_IX86) int pJmp32 = -1; while (nOrigBytes < 5) { // Understand some simple instructions that might be found in a // prologue; we might need to extend this as necessary. // // Note! If we ever need to understand jump instructions, we'll // need to rewrite the displacement argument. unsigned char prefixGroups; int numPrefixBytes = CountPrefixBytes(origBytes, nOrigBytes, &prefixGroups); if (numPrefixBytes < 0 || (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) { // Either the prefix sequence was bad, or there are prefixes that // we don't currently support (groups 3 and 4) MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } nOrigBytes += numPrefixBytes; if (origBytes[nOrigBytes] >= 0x88 && origBytes[nOrigBytes] <= 0x8B) { // various MOVs ++nOrigBytes; int len = CountModRmSib(origBytes + nOrigBytes); if (len < 0) { MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence"); return; } nOrigBytes += len; } else if (origBytes[nOrigBytes] == 0xA1) { // MOV eax, [seg:offset] nOrigBytes += 5; } else if (origBytes[nOrigBytes] == 0xB8) { // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8 nOrigBytes += 5; } else if (origBytes[nOrigBytes] == 0x33 && (origBytes[nOrigBytes+1] & kMaskMod) == kModReg) { // XOR r32, r32 nOrigBytes += 2; } else if ((origBytes[nOrigBytes] & 0xf8) == 0x40) { // INC r32 nOrigBytes += 1; } else if (origBytes[nOrigBytes] == 0x83) { // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r/m, imm8 unsigned char b = origBytes[nOrigBytes + 1]; if ((b & 0xc0) == 0xc0) { // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r, imm8 nOrigBytes += 3; } else { // bail MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x68) { // PUSH with 4-byte operand nOrigBytes += 5; } else if ((origBytes[nOrigBytes] & 0xf0) == 0x50) { // 1-byte PUSH/POP nOrigBytes++; } else if (origBytes[nOrigBytes] == 0x6A) { // PUSH imm8 nOrigBytes += 2; } else if (origBytes[nOrigBytes] == 0xe9) { pJmp32 = nOrigBytes; // jmp 32bit offset nOrigBytes += 5; } else if (origBytes[nOrigBytes] == 0xff && origBytes[nOrigBytes + 1] == 0x25) { // jmp [disp32] nOrigBytes += 6; } else if (origBytes[nOrigBytes] == 0xc2) { // ret imm16. We can't handle this but it happens. We don't ASSERT but we do fail to hook. #if defined(MOZILLA_INTERNAL_API) NS_WARNING("Cannot hook method -- RET opcode found"); #endif return; } else { //printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n", origBytes[nOrigBytes]); MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } // The trampoline is a copy of the instructions that we just traced, // followed by a jump that we add below. memcpy(tramp, aOrigFunction, nOrigBytes); #elif defined(_M_X64) // The number of bytes used by the trampoline. int nTrampBytes = 0; bool foundJmp = false; while (nOrigBytes < 13) { // If we found JMP 32bit offset, we require that the next bytes must // be NOP or INT3. There is no reason to copy them. // TODO: This used to trigger for Je as well. Now that I allow // instructions after CALL and JE, I don't think I need that. // The only real value of this condition is that if code follows a JMP // then its _probably_ the target of a JMP somewhere else and we // will be overwriting it, which would be tragic. This seems // highly unlikely. if (foundJmp) { if (origBytes[nOrigBytes] == 0x90 || origBytes[nOrigBytes] == 0xcc) { nOrigBytes++; continue; } MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP"); return; } if (origBytes[nOrigBytes] == 0x0f) { COPY_CODES(1); if (origBytes[nOrigBytes] == 0x1f) { // nop (multibyte) COPY_CODES(1); if ((origBytes[nOrigBytes] & 0xc0) == 0x40 && (origBytes[nOrigBytes] & 0x7) == 0x04) { COPY_CODES(3); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x05) { // syscall COPY_CODES(1); } else if (origBytes[nOrigBytes] == 0x10 || origBytes[nOrigBytes] == 0x11) { // SSE: movups xmm, xmm/m128 // movups xmm/m128, xmm COPY_CODES(1); int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes]); if (nModRmSibBytes < 0) { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } else { COPY_CODES(nModRmSibBytes); } } else if (origBytes[nOrigBytes] == 0x84) { // je rel32 JumpPatch jump(nTrampBytes - 1, // overwrite the 0x0f we copied above (intptr_t)(origBytes + nOrigBytes + 5 + *(reinterpret_cast(origBytes + nOrigBytes + 1))), JumpType::Je); nTrampBytes = jump.GenerateJump(tramp); nOrigBytes += 5; } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x40 || origBytes[nOrigBytes] == 0x41) { // Plain REX or REX.B COPY_CODES(1); if ((origBytes[nOrigBytes] & 0xf0) == 0x50) { // push/pop with Rx register COPY_CODES(1); } else if (origBytes[nOrigBytes] >= 0xb8 && origBytes[nOrigBytes] <= 0xbf) { // mov r32, imm32 COPY_CODES(5); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x44) { // REX.R COPY_CODES(1); // TODO: Combine with the "0x89" case below in the REX.W section if (origBytes[nOrigBytes] == 0x89) { // mov r/m32, r32 COPY_CODES(1); int len = CountModRmSib(origBytes + nOrigBytes); if (len < 0) { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } COPY_CODES(len); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x45) { // REX.R & REX.B COPY_CODES(1); if (origBytes[nOrigBytes] == 0x33) { // xor r32, r32 COPY_CODES(2); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if ((origBytes[nOrigBytes] & 0xfa) == 0x48) { // REX.W | REX.WR | REX.WRB | REX.WB COPY_CODES(1); if (origBytes[nOrigBytes] == 0x81 && (origBytes[nOrigBytes + 1] & 0xf8) == 0xe8) { // sub r, dword COPY_CODES(6); } else if (origBytes[nOrigBytes] == 0x83 && (origBytes[nOrigBytes + 1] & 0xf8) == 0xe8) { // sub r, byte COPY_CODES(3); } else if (origBytes[nOrigBytes] == 0x83 && (origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == kModReg) { // add r, byte COPY_CODES(3); } else if (origBytes[nOrigBytes] == 0x83 && (origBytes[nOrigBytes + 1] & 0xf8) == 0x60) { // and [r+d], imm8 COPY_CODES(5); } else if (origBytes[nOrigBytes] == 0x2b && (origBytes[nOrigBytes + 1] & kMaskMod) == kModReg) { // sub r64, r64 COPY_CODES(2); } else if (origBytes[nOrigBytes] == 0x85) { // 85 /r => TEST r/m32, r32 if ((origBytes[nOrigBytes + 1] & 0xc0) == 0xc0) { COPY_CODES(2); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if ((origBytes[nOrigBytes] & 0xfd) == 0x89) { // MOV r/m64, r64 | MOV r64, r/m64 BYTE reg; int len = CountModRmSib(origBytes + nOrigBytes + 1, ®); if (len < 0) { MOZ_ASSERT(len == kModOperand64); if (len != kModOperand64) { return; } nOrigBytes += 2; // skip the MOV and MOD R/M bytes // The instruction MOVs 64-bit data from a RIP-relative memory // address (determined with a 32-bit offset from RIP) into a // 64-bit register. int64_t* absAddr = reinterpret_cast(origBytes + nOrigBytes + 4 + *reinterpret_cast(origBytes + nOrigBytes)); nOrigBytes += 4; if (reg == kRegAx) { // Destination is RAX. Encode instruction as MOVABS with a // 64-bit absolute address as its immediate operand. tramp[nTrampBytes] = 0xa1; ++nTrampBytes; int64_t** trampOperandPtr = reinterpret_cast(tramp + nTrampBytes); *trampOperandPtr = absAddr; nTrampBytes += 8; } else { // The MOV must be done in two steps. First, we MOVABS the // absolute 64-bit address into our target register. // Then, we MOV from that address into the register // using register-indirect addressing. tramp[nTrampBytes] = 0xb8 + reg; ++nTrampBytes; int64_t** trampOperandPtr = reinterpret_cast(tramp + nTrampBytes); *trampOperandPtr = absAddr; nTrampBytes += 8; tramp[nTrampBytes] = 0x48; tramp[nTrampBytes+1] = 0x8b; tramp[nTrampBytes+2] = BuildModRmByte(kModNoRegDisp, reg, reg); nTrampBytes += 3; } } else { COPY_CODES(len+1); } } else if (origBytes[nOrigBytes] == 0xc7) { // MOV r/m64, imm32 if (origBytes[nOrigBytes + 1] == 0x44) { // MOV [r64+disp8], imm32 // ModR/W + SIB + disp8 + imm32 COPY_CODES(8); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0xff) { // JMP /4 if ((origBytes[nOrigBytes + 1] & 0xc0) == 0x0 && (origBytes[nOrigBytes + 1] & 0x07) == 0x5) { // [rip+disp32] // convert JMP 32bit offset to JMP 64bit direct JumpPatch jump(nTrampBytes - 1, // overwrite the REX.W/REX.WR we copied above *reinterpret_cast(origBytes + nOrigBytes + 6 + *reinterpret_cast(origBytes + nOrigBytes + 2)), JumpType::Jmp); nTrampBytes = jump.GenerateJump(tramp); nOrigBytes += 6; foundJmp = true; } else { // not support yet! MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x63 && (origBytes[nOrigBytes + 1] & kMaskMod) == kModReg) { // movsxd r64, r32 (move + sign extend) COPY_CODES(2); } else { // not support yet! MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x66) { // operand override prefix COPY_CODES(1); // This is the same as the x86 version if (origBytes[nOrigBytes] >= 0x88 && origBytes[nOrigBytes] <= 0x8B) { // various MOVs unsigned char b = origBytes[nOrigBytes + 1]; if (((b & 0xc0) == 0xc0) || (((b & 0xc0) == 0x00) && ((b & 0x07) != 0x04) && ((b & 0x07) != 0x05))) { // REG=r, R/M=r or REG=r, R/M=[r] COPY_CODES(2); } else if ((b & 0xc0) == 0x40) { if ((b & 0x07) == 0x04) { // REG=r, R/M=[SIB + disp8] COPY_CODES(4); } else { // REG=r, R/M=[r + disp8] COPY_CODES(3); } } else { // complex MOV, bail MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence"); return; } } } else if ((origBytes[nOrigBytes] & 0xf0) == 0x50) { // 1-byte push/pop COPY_CODES(1); } else if (origBytes[nOrigBytes] == 0x65) { // GS prefix // // The entry of GetKeyState on Windows 10 has the following code. // 65 48 8b 04 25 30 00 00 00 mov rax,qword ptr gs:[30h] // (GS prefix + REX + MOV (0x8b) ...) if (origBytes[nOrigBytes + 1] == 0x48 && (origBytes[nOrigBytes + 2] >= 0x88 && origBytes[nOrigBytes + 2] <= 0x8b)) { COPY_CODES(3); int len = CountModRmSib(origBytes + nOrigBytes); if (len < 0) { // no way to support this yet. MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } COPY_CODES(len); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else if (origBytes[nOrigBytes] == 0x80 && origBytes[nOrigBytes + 1] == 0x3d) { // cmp byte ptr [rip-relative address], imm8 // We'll compute the absolute address and do the cmp in r11 // push r11 (to save the old value) tramp[nTrampBytes] = 0x49; ++nTrampBytes; tramp[nTrampBytes] = 0x53; ++nTrampBytes; byteptr_t absAddr = reinterpret_cast(origBytes + nOrigBytes + 7 + *reinterpret_cast(origBytes + nOrigBytes + 2)); nOrigBytes += 6; // mov r11, absolute address tramp[nTrampBytes] = 0x49; ++nTrampBytes; tramp[nTrampBytes] = 0xbb; ++nTrampBytes; *reinterpret_cast(tramp + nTrampBytes) = absAddr; nTrampBytes += 8; // cmp byte ptr [r11],... tramp[nTrampBytes] = 0x41; ++nTrampBytes; tramp[nTrampBytes] = 0x80; ++nTrampBytes; tramp[nTrampBytes] = 0x3b; ++nTrampBytes; // ...imm8 COPY_CODES(1); // pop r11 (doesn't affect the flags from the cmp) tramp[nTrampBytes] = 0x49; ++nTrampBytes; tramp[nTrampBytes] = 0x5b; ++nTrampBytes; } else if (origBytes[nOrigBytes] == 0x90) { // nop COPY_CODES(1); } else if ((origBytes[nOrigBytes] & 0xf8) == 0xb8) { // MOV r32, imm32 COPY_CODES(5); } else if (origBytes[nOrigBytes] == 0x33) { // xor r32, r/m32 COPY_CODES(2); } else if (origBytes[nOrigBytes] == 0xf6) { // test r/m8, imm8 (used by ntdll on Windows 10 x64) // (no flags are affected by near jmp since there is no task switch, // so it is ok for a jmp to be written immediately after a test) BYTE subOpcode = 0; int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes + 1], &subOpcode); if (nModRmSibBytes < 0 || subOpcode != 0) { // Unsupported MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } COPY_CODES(2 + nModRmSibBytes); } else if (origBytes[nOrigBytes] == 0x85) { // test r/m32, r32 int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes + 1]); if (nModRmSibBytes < 0) { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } COPY_CODES(1 + nModRmSibBytes); } else if (origBytes[nOrigBytes] == 0xd1 && (origBytes[nOrigBytes+1] & kMaskMod) == kModReg) { // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32 // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR) COPY_CODES(2); } else if (origBytes[nOrigBytes] == 0xc3) { // ret COPY_CODES(1); } else if (origBytes[nOrigBytes] == 0xcc) { // int 3 COPY_CODES(1); } else if (origBytes[nOrigBytes] == 0xe8 || origBytes[nOrigBytes] == 0xe9) { // CALL (0xe8) or JMP (0xe9) 32bit offset foundJmp = origBytes[nOrigBytes] == 0xe9; JumpPatch jump(nTrampBytes, (intptr_t)(origBytes + nOrigBytes + 5 + *(reinterpret_cast(origBytes + nOrigBytes + 1))), origBytes[nOrigBytes] == 0xe8 ? JumpType::Call : JumpType::Jmp); nTrampBytes = jump.GenerateJump(tramp); nOrigBytes += 5; } else if (origBytes[nOrigBytes] == 0x74 || // je rel8 (0x74) origBytes[nOrigBytes] == 0x75) { // jne rel8 (0x75) char offset = origBytes[nOrigBytes + 1]; auto jumpType = JumpType::Je; if (origBytes[nOrigBytes] == 0x75) jumpType = JumpType::Jne; JumpPatch jump(nTrampBytes, (intptr_t)(origBytes + nOrigBytes + 2 + offset), jumpType); nTrampBytes = jump.GenerateJump(tramp); nOrigBytes += 2; } else if (origBytes[nOrigBytes] == 0xff) { if ((origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == 0xf0) { // push r64 COPY_CODES(2); } else if (origBytes[nOrigBytes + 1] == 0x25) { // jmp absolute indirect m32 foundJmp = true; int32_t offset = *(reinterpret_cast(origBytes + nOrigBytes + 2)); int64_t* ptrToJmpDest = reinterpret_cast(origBytes + nOrigBytes + 6 + offset); intptr_t jmpDest = static_cast(*ptrToJmpDest); JumpPatch jump(nTrampBytes, jmpDest, JumpType::Jmp); nTrampBytes = jump.GenerateJump(tramp); nOrigBytes += 6; } else if ((origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == BuildModRmByte(kModReg, 2, 0)) { // CALL reg (ff nn) COPY_CODES(2); } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } else { MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence"); return; } } #else #error "Unknown processor type" #endif if (nOrigBytes > 100) { //printf ("Too big!"); return; } // target address of the final jmp instruction in the trampoline byteptr_t trampDest = origBytes + nOrigBytes; #if defined(_M_IX86) if (pJmp32 >= 0) { // Jump directly to the original target of the jump instead of jumping to the // original function. // Adjust jump target displacement to jump location in the trampoline. *((intptr_t*)(tramp + pJmp32 + 1)) += origBytes - tramp; } else { tramp[nOrigBytes] = 0xE9; // jmp *((intptr_t*)(tramp + nOrigBytes + 1)) = (intptr_t)trampDest - (intptr_t)(tramp + nOrigBytes + 5); // target displacement } #elif defined(_M_X64) // If the we found a Jmp, we don't need to add another instruction. However, // if we found a _conditional_ jump or a CALL (or no control operations // at all) then we still need to run the rest of aOriginalFunction. if (!foundJmp) { JumpPatch patch(nTrampBytes, reinterpret_cast(trampDest)); patch.GenerateJump(tramp); } #endif // The trampoline is now valid. *aOutTramp = tramp; // ensure we can modify the original code AutoVirtualProtect protect(aOrigFunction, nOrigBytes, PAGE_EXECUTE_READWRITE); if (!protect.Protect()) { return; } #if defined(_M_IX86) // now modify the original bytes origBytes[0] = 0xE9; // jmp *((intptr_t*)(origBytes + 1)) = aDest - (intptr_t)(origBytes + 5); // target displacement #elif defined(_M_X64) // mov r11, address origBytes[0] = 0x49; origBytes[1] = 0xbb; *((intptr_t*)(origBytes + 2)) = aDest; // jmp r11 origBytes[10] = 0x41; origBytes[11] = 0xff; origBytes[12] = 0xe3; #endif } byteptr_t FindTrampolineSpace() { if (mCurHooks >= mMaxHooks) { return 0; } byteptr_t p = mHookPage + mCurHooks * kHookSize; mCurHooks++; return p; } static void* ResolveRedirectedAddress(const byteptr_t aOriginalFunction) { // If function entry is jmp rel8 stub to the internal implementation, we // resolve redirected address from the jump target. if (aOriginalFunction[0] == 0xeb) { int8_t offset = (int8_t)(aOriginalFunction[1]); if (offset <= 0) { // Bail out for negative offset: probably already patched by some // third-party code. return aOriginalFunction; } for (int8_t i = 0; i < offset; i++) { if (aOriginalFunction[2 + i] != 0x90) { // Bail out on insufficient nop space. return aOriginalFunction; } } return aOriginalFunction + 2 + offset; } #if defined(_M_IX86) // If function entry is jmp [disp32] such as used by kernel32, // we resolve redirected address from import table. if (aOriginalFunction[0] == 0xff && aOriginalFunction[1] == 0x25) { return (void*)(**((uint32_t**) (aOriginalFunction + 2))); } #elif defined(_M_X64) if (aOriginalFunction[0] == 0xe9) { // require for TestDllInterceptor with --disable-optimize int32_t offset = *((int32_t*)(aOriginalFunction + 1)); return aOriginalFunction + 5 + offset; } #endif return aOriginalFunction; } }; } // namespace internal class WindowsDllInterceptor { internal::WindowsDllNopSpacePatcher mNopSpacePatcher; internal::WindowsDllDetourPatcher mDetourPatcher; const char* mModuleName; int mNHooks; public: WindowsDllInterceptor() : mModuleName(nullptr) , mNHooks(0) {} void Init(const char* aModuleName, int aNumHooks = 0) { if (mModuleName) { return; } mModuleName = aModuleName; mNHooks = aNumHooks; mNopSpacePatcher.Init(aModuleName); // Lazily initialize mDetourPatcher, since it allocates memory and we might // not need it. } /** * Hook/detour the method aName from the DLL we set in Init so that it calls * aHookDest instead. Returns the original method pointer in aOrigFunc * and returns true if successful. * * IMPORTANT: If you use this method, please add your case to the * TestDllInterceptor in order to detect future failures. Even if this * succeeds now, updates to the hooked DLL could cause it to fail in * the future. */ bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc) { // Use a nop space patch if possible, otherwise fall back to a detour. // This should be the preferred method for adding hooks. if (!mModuleName) { return false; } if (mNopSpacePatcher.AddHook(aName, aHookDest, aOrigFunc)) { return true; } return AddDetour(aName, aHookDest, aOrigFunc); } /** * Detour the method aName from the DLL we set in Init so that it calls * aHookDest instead. Returns the original method pointer in aOrigFunc * and returns true if successful. * * IMPORTANT: If you use this method, please add your case to the * TestDllInterceptor in order to detect future failures. Even if this * succeeds now, updates to the detoured DLL could cause it to fail in * the future. */ bool AddDetour(const char* aName, intptr_t aHookDest, void** aOrigFunc) { // Generally, code should not call this method directly. Use AddHook unless // there is a specific need to avoid nop space patches. if (!mModuleName) { return false; } if (!mDetourPatcher.Initialized()) { mDetourPatcher.Init(mModuleName, mNHooks); } return mDetourPatcher.AddHook(aName, aHookDest, aOrigFunc); } }; } // namespace mozilla #endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */