Bug 1576303 - Fix and tune xptcall for ppc64le, and harmonize with x86_64's. r=tcampbell,froydnj

Fix handling of ABI arguments in xptcinvoke for PPC64 platforms. Previously, non-floating-point arguments would advance the index of which floating-point register to use. This cleans up both of the platform-specific invoke and stubs to be better aligned to the x86_64 model.

Differential Revision: https://phabricator.services.mozilla.com/D46421

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Cameron Kaiser 2019-09-19 19:45:20 +00:00
Родитель 4f283867db
Коммит 1db7aa3620
3 изменённых файлов: 202 добавлений и 143 удалений

Просмотреть файл

@ -151,10 +151,10 @@ NS_InvokeByIndex:
ld r2,STACK_TOC(r1) # Load our own TOC pointer
ld r1,0(r1) # Revert stack frame
ld 0,16(r1) # Reload lr
mtlr 0
ld 29,-24(r1) # Restore NVGPRS
ld 30,-16(r1)
ld 31,-8(r1)
mtlr 0
blr
#if _CALL_ELF == 2

Просмотреть файл

@ -5,93 +5,126 @@
// Platform specific code to invoke XPCOM methods on native objects
#include "xptcprivate.h"
// The purpose of NS_InvokeByIndex() is to map a platform
// independent call to the platform ABI. To do that,
// NS_InvokeByIndex() has to determine the method to call via vtable
// access. The parameters for the method are read from the
// nsXPTCVariant* and prepared for the native ABI.
// The PowerPC64 platform ABI can be found here:
// http://www.freestandards.org/spec/ELF/ppc64/
//
// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
// here:
// https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
// and in particular:
// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
// https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
// http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
// and in particular section 2.2, page 22. However, most big-endian ppc64
// systems still use ELF v1, so this file should support both.
#include <stdio.h>
#include "xptcprivate.h"
// 7 integral parameters are passed in registers, not including |this|
// (i.e., r3-r10, with r3 being |this|).
const uint32_t GPR_COUNT = 7;
// 8 integral parameters are passed in registers, not including 'that'
#define GPR_COUNT 7
// 13 floating point parameters are passed in registers, either single or
// double precision (i.e., f1-f13).
const uint32_t FPR_COUNT = 13;
// 8 floating point parameters are passed in registers, floats are
// promoted to doubles when passed in registers
#define FPR_COUNT 13
// Both ABIs use the same register assignment strategy, as per this
// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
//
// typedef struct {
// int a;
// double dd;
// } sparm;
// sparm s, t;
// int c, d, e;
// long double ld;
// double ff, gg, hh;
//
// x = func(c, ff, d, ld, s, gg, t, e, hh);
//
// Parameter Register Offset in parameter save area
// c r3 0-7 (not stored in parameter save area)
// ff f1 8-15 (not stored)
// d r5 16-23 (not stored)
// ld f2,f3 24-39 (not stored)
// s r8,r9 40-55 (not stored)
// gg f4 56-63 (not stored)
// t (none) 64-79 (stored in parameter save area)
// e (none) 80-87 (stored)
// hh f5 88-95 (not stored)
//
// i.e., each successive FPR usage skips a GPR, but not the other way around.
extern "C" uint32_t
invoke_count_words(uint32_t paramCount, nsXPTCVariant* s)
extern "C" void invoke_copy_to_stack(uint64_t* gpregs, double* fpregs,
uint32_t paramCount, nsXPTCVariant* s,
uint64_t* d)
{
return uint32_t(((paramCount * 2) + 3) & ~3);
}
uint32_t nr_gpr = 0u;
uint32_t nr_fpr = 0u;
uint64_t value = 0u;
extern "C" void
invoke_copy_to_stack(uint64_t* gpregs,
double* fpregs,
uint32_t paramCount,
nsXPTCVariant* s,
uint64_t* d)
{
uint64_t tempu64;
for(uint32_t i = 0; i < paramCount; i++, s++) {
if(s->IsIndirect())
tempu64 = (uint64_t) &s->val;
for (uint32_t i = 0; i < paramCount; i++, s++) {
if (s->IsIndirect())
value = (uint64_t) &s->val;
else {
switch(s->type) {
case nsXPTType::T_FLOAT: break;
case nsXPTType::T_DOUBLE: break;
case nsXPTType::T_I8: tempu64 = s->val.i8; break;
case nsXPTType::T_I16: tempu64 = s->val.i16; break;
case nsXPTType::T_I32: tempu64 = s->val.i32; break;
case nsXPTType::T_I64: tempu64 = s->val.i64; break;
case nsXPTType::T_U8: tempu64 = s->val.u8; break;
case nsXPTType::T_U16: tempu64 = s->val.u16; break;
case nsXPTType::T_U32: tempu64 = s->val.u32; break;
case nsXPTType::T_U64: tempu64 = s->val.u64; break;
case nsXPTType::T_BOOL: tempu64 = s->val.b; break;
case nsXPTType::T_CHAR: tempu64 = s->val.c; break;
case nsXPTType::T_WCHAR: tempu64 = s->val.wc; break;
default: tempu64 = (uint64_t) s->val.p; break;
switch (s->type) {
case nsXPTType::T_FLOAT: break;
case nsXPTType::T_DOUBLE: break;
case nsXPTType::T_I8: value = s->val.i8; break;
case nsXPTType::T_I16: value = s->val.i16; break;
case nsXPTType::T_I32: value = s->val.i32; break;
case nsXPTType::T_I64: value = s->val.i64; break;
case nsXPTType::T_U8: value = s->val.u8; break;
case nsXPTType::T_U16: value = s->val.u16; break;
case nsXPTType::T_U32: value = s->val.u32; break;
case nsXPTType::T_U64: value = s->val.u64; break;
case nsXPTType::T_BOOL: value = s->val.b; break;
case nsXPTType::T_CHAR: value = s->val.c; break;
case nsXPTType::T_WCHAR: value = s->val.wc; break;
default: value = (uint64_t) s->val.p; break;
}
}
if (!s->IsIndirect() && s->type == nsXPTType::T_DOUBLE) {
if (i < FPR_COUNT)
fpregs[i] = s->val.d;
else
*(double *)d = s->val.d;
if (nr_fpr < FPR_COUNT) {
fpregs[nr_fpr++] = s->val.d;
nr_gpr++;
} else {
*((double *)d) = s->val.d;
d++;
}
}
else if (!s->IsIndirect() && s->type == nsXPTType::T_FLOAT) {
if (i < FPR_COUNT) {
fpregs[i] = s->val.f; // if passed in registers, floats are promoted to doubles
if (nr_fpr < FPR_COUNT) {
// Single-precision floats are passed in FPRs too.
fpregs[nr_fpr++] = s->val.f;
nr_gpr++;
} else {
float *p = (float *)d;
#ifndef __LITTLE_ENDIAN__
#ifdef __LITTLE_ENDIAN__
*((float *)d) = s->val.f;
#else
// Big endian needs adjustment to point to the least
// significant word.
float* p = (float*)d;
p++;
#endif
*p = s->val.f;
#endif
d++;
}
}
else {
if (i < GPR_COUNT)
gpregs[i] = tempu64;
else
*d = tempu64;
if (nr_gpr < GPR_COUNT) {
gpregs[nr_gpr++] = value;
} else {
*d++ = value;
}
}
if (i >= 7)
d++;
}
}
EXPORT_XPCOM_API(nsresult)
NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
uint32_t paramCount, nsXPTCVariant* params);
NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount,
nsXPTCVariant* params);

Просмотреть файл

@ -7,36 +7,64 @@
#include "xptcprivate.h"
// The Linux/PPC64 ABI passes the first 8 integral
// parameters and the first 13 floating point parameters in registers
// (r3-r10 and f1-f13), no stack space is allocated for these by the
// caller. The rest of the parameters are passed in the caller's stack
// area. The stack pointer has to retain 16-byte alignment.
// The PowerPC64 platform ABI can be found here:
// http://www.freestandards.org/spec/ELF/ppc64/
// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
// here:
// https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
// and in particular:
// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
// https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
// http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
// and in particular section 2.2, page 22. However, most big-endian ppc64
// systems still use ELF v1, so this file should support both.
//
// Both ABIs pass the first 8 integral parameters and the first 13 floating
// point parameters in registers r3-r10 and f1-f13. No stack space is
// allocated for these by the caller. The rest of the parameters are passed
// in the caller's stack area. The stack pointer must stay 16-byte aligned.
#define PARAM_BUFFER_COUNT 16
#define GPR_COUNT 7
#define FPR_COUNT 13
const uint32_t PARAM_BUFFER_COUNT = 16;
const uint32_t GPR_COUNT = 7;
const uint32_t FPR_COUNT = 13;
// PrepareAndDispatch() is called by SharedStub() and calls the actual method.
//
// - 'args[]' contains the arguments passed on stack
// - 'gprData[]' contains the arguments passed in integer registers
// - 'fprData[]' contains the arguments passed in floating point registers
// - 'gpregs[]' contains the arguments passed in integer registers
// - 'fpregs[]' contains the arguments passed in floating point registers
//
// The parameters are mapped into an array of type 'nsXPTCMiniVariant'
// and then the method gets called.
#include <stdio.h>
//
// Both ABIs use the same register assignment strategy, as per this
// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
//
// typedef struct {
// int a;
// double dd;
// } sparm;
// sparm s, t;
// int c, d, e;
// long double ld;
// double ff, gg, hh;
//
// x = func(c, ff, d, ld, s, gg, t, e, hh);
//
// Parameter Register Offset in parameter save area
// c r3 0-7 (not stored in parameter save area)
// ff f1 8-15 (not stored)
// d r5 16-23 (not stored)
// ld f2,f3 24-39 (not stored)
// s r8,r9 40-55 (not stored)
// gg f4 56-63 (not stored)
// t (none) 64-79 (stored in parameter save area)
// e (none) 80-87 (stored)
// hh f5 88-95 (not stored)
//
// i.e., each successive FPR usage skips a GPR, but not the other way around.
extern "C" nsresult ATTRIBUTE_USED
PrepareAndDispatch(nsXPTCStubBase* self,
uint64_t methodIndex,
uint64_t* args,
uint64_t *gprData,
double *fprData)
PrepareAndDispatch(nsXPTCStubBase * self, uint32_t methodIndex,
uint64_t * args, uint64_t * gpregs, double *fpregs)
{
nsXPTCMiniVariant paramBuffer[PARAM_BUFFER_COUNT];
nsXPTCMiniVariant* dispatchParams = nullptr;
@ -48,7 +76,7 @@ PrepareAndDispatch(nsXPTCStubBase* self,
self->mEntry->GetMethodInfo(uint16_t(methodIndex), &info);
NS_ASSERTION(info,"no method info");
if (! info)
if (!info)
return NS_ERROR_UNEXPECTED;
paramCount = info->GetParamCount();
@ -64,9 +92,11 @@ PrepareAndDispatch(nsXPTCStubBase* self,
const uint8_t indexOfJSContext = info->IndexOfJSContext();
uint64_t* ap = args;
uint32_t iCount = 0;
uint32_t fpCount = 0;
uint64_t tempu64;
// |that| is implicit in the calling convention; we really do start at the
// first GPR (as opposed to x86_64).
uint32_t nr_gpr = 0;
uint32_t nr_fpr = 0;
uint64_t value;
for(i = 0; i < paramCount; i++) {
const nsXPTParamInfo& param = info->GetParam(i);
@ -74,67 +104,67 @@ PrepareAndDispatch(nsXPTCStubBase* self,
nsXPTCMiniVariant* dp = &dispatchParams[i];
if (i == indexOfJSContext) {
if (iCount < GPR_COUNT)
iCount++;
if (nr_gpr < GPR_COUNT)
nr_gpr++;
else
ap++;
}
if (!param.IsOut() && type == nsXPTType::T_DOUBLE) {
if (fpCount < FPR_COUNT) {
dp->val.d = fprData[fpCount++];
if (nr_fpr < FPR_COUNT) {
dp->val.d = fpregs[nr_fpr++];
nr_gpr++;
} else {
dp->val.d = *(double*)ap++;
}
else
dp->val.d = *(double*) ap;
} else if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
if (fpCount < FPR_COUNT) {
dp->val.f = (float) fprData[fpCount++]; // in registers floats are passed as doubles
}
else {
float *p = (float *)ap;
#ifndef __LITTLE_ENDIAN__
continue;
}
if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
if (nr_fpr < FPR_COUNT) {
// Single-precision floats are passed in FPRs too.
dp->val.f = (float)fpregs[nr_fpr++];
nr_gpr++;
} else {
#ifdef __LITTLE_ENDIAN__
dp->val.f = *(float*)ap++;
#else
// Big endian needs adjustment to point to the least
// significant word.
float* p = (float*)ap;
p++;
#endif
dp->val.f = *p;
ap++;
#endif
}
} else { /* integer type or pointer */
if (iCount < GPR_COUNT)
tempu64 = gprData[iCount];
else
tempu64 = *ap;
continue;
}
if (nr_gpr < GPR_COUNT)
value = gpregs[nr_gpr++];
else
value = *ap++;
if (param.IsOut() || !type.IsArithmetic())
dp->val.p = (void*) tempu64;
else if (type == nsXPTType::T_I8)
dp->val.i8 = (int8_t) tempu64;
else if (type == nsXPTType::T_I16)
dp->val.i16 = (int16_t) tempu64;
else if (type == nsXPTType::T_I32)
dp->val.i32 = (int32_t) tempu64;
else if (type == nsXPTType::T_I64)
dp->val.i64 = (int64_t) tempu64;
else if (type == nsXPTType::T_U8)
dp->val.u8 = (uint8_t) tempu64;
else if (type == nsXPTType::T_U16)
dp->val.u16 = (uint16_t) tempu64;
else if (type == nsXPTType::T_U32)
dp->val.u32 = (uint32_t) tempu64;
else if (type == nsXPTType::T_U64)
dp->val.u64 = (uint64_t) tempu64;
else if (type == nsXPTType::T_BOOL)
dp->val.b = (bool) tempu64;
else if (type == nsXPTType::T_CHAR)
dp->val.c = (char) tempu64;
else if (type == nsXPTType::T_WCHAR)
dp->val.wc = (wchar_t) tempu64;
else
NS_ERROR("bad type");
if (param.IsOut() || !type.IsArithmetic()) {
dp->val.p = (void*) value;
continue;
}
if (iCount < GPR_COUNT)
iCount++; // gprs are skipped for fp args, so this always needs inc
else
ap++;
switch (type) {
case nsXPTType::T_I8: dp->val.i8 = (int8_t) value; break;
case nsXPTType::T_I16: dp->val.i16 = (int16_t) value; break;
case nsXPTType::T_I32: dp->val.i32 = (int32_t) value; break;
case nsXPTType::T_I64: dp->val.i64 = (int64_t) value; break;
case nsXPTType::T_U8: dp->val.u8 = (uint8_t) value; break;
case nsXPTType::T_U16: dp->val.u16 = (uint16_t) value; break;
case nsXPTType::T_U32: dp->val.u32 = (uint32_t) value; break;
case nsXPTType::T_U64: dp->val.u64 = (uint64_t) value; break;
case nsXPTType::T_BOOL: dp->val.b = (bool) value; break;
case nsXPTType::T_CHAR: dp->val.c = (char) value; break;
case nsXPTType::T_WCHAR: dp->val.wc = (wchar_t) value; break;
default:
NS_ERROR("bad type");
break;
}
}
nsresult result = self->mOuter->CallMethod((uint16_t) methodIndex, info,
@ -148,23 +178,19 @@ PrepareAndDispatch(nsXPTCStubBase* self,
// Load r11 with the constant 'n' and branch to SharedStub().
//
// XXX Yes, it's ugly that we're relying on gcc's name-mangling here;
// however, it's quick, dirty, and'll break when the ABI changes on
// us, which is what we want ;-).
// gcc-3 version
//
// As G++3 ABI contains the length of the functionname in the mangled
// name, it is difficult to get a generic assembler mechanism like
// in the G++ 2.95 case.
// XXX Yes, it's ugly that we're relying on gcc's name-mangling here;
// however, it's quick, dirty, and'll break when the ABI changes on
// us, which is what we want ;-).
// Create names would be like:
// _ZN14nsXPTCStubBase5Stub1Ev
// _ZN14nsXPTCStubBase6Stub12Ev
// _ZN14nsXPTCStubBase7Stub123Ev
// _ZN14nsXPTCStubBase8Stub1234Ev
// etc.
// Use assembler directives to get the names right...
// Use assembler directives to get the names right.
#if _CALL_ELF == 2
# define STUB_ENTRY(n) \
@ -250,7 +276,7 @@ __asm__ ( \
#define SENTINEL_ENTRY(n) \
nsresult nsXPTCStubBase::Sentinel##n() \
{ \
NS_ERROR("nsXPTCStubBase::Sentinel called"); \
NS_ERROR("nsXPTCStubBase::Sentinel called"); \
return NS_ERROR_NOT_IMPLEMENTED; \
}