Bug 1576303 - Fix and tune xptcall for ppc64le, and harmonize with x86_64's. r=tcampbell,froydnj

Fix handling of ABI arguments in xptcinvoke for PPC64 platforms. Previously, non-floating-point arguments would advance the index of which floating-point register to use. This cleans up both of the platform-specific invoke and stubs to be better aligned to the x86_64 model. Differential Revision: https://phabricator.services.mozilla.com/D46421 --HG-- extra : moz-landing-system : lando
2019-09-19 19:45:20 +00:00 · 2019-09-19 19:45:20 +00:00 · 1db7aa3620
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_ppc64_linux.S
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_ppc64_linux.S
@ -151,10 +151,10 @@ NS_InvokeByIndex:
        ld      r2,STACK_TOC(r1)        # Load our own TOC pointer
        ld      r1,0(r1)                # Revert stack frame
        ld      0,16(r1)                # Reload lr
+        mtlr    0
        ld      29,-24(r1)              # Restore NVGPRS
        ld      30,-16(r1)
        ld      31,-8(r1)
-        mtlr    0
        blr

 #if _CALL_ELF == 2
--- a/xpcom/reflect/xptcall/md/unix/xptcinvoke_ppc64_linux.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_ppc64_linux.cpp
@ -5,93 +5,126 @@

 // Platform specific code to invoke XPCOM methods on native objects

+#include "xptcprivate.h"
+
 // The purpose of NS_InvokeByIndex() is to map a platform
 // independent call to the platform ABI. To do that,
 // NS_InvokeByIndex() has to determine the method to call via vtable
 // access. The parameters for the method are read from the
 // nsXPTCVariant* and prepared for the native ABI.
-
-// The PowerPC64 platform ABI can be found here:
-// http://www.freestandards.org/spec/ELF/ppc64/
+//
+// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
+// here:
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
 // and in particular:
-// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
+// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
+//   http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+// and in particular section 2.2, page 22. However, most big-endian ppc64
+// systems still use ELF v1, so this file should support both.

-#include <stdio.h>
-#include "xptcprivate.h"
+// 7 integral parameters are passed in registers, not including |this|
+// (i.e., r3-r10, with r3 being |this|).
+const uint32_t GPR_COUNT = 7;

-// 8 integral parameters are passed in registers, not including 'that'
-#define GPR_COUNT     7
+// 13 floating point parameters are passed in registers, either single or
+// double precision (i.e., f1-f13).
+const uint32_t FPR_COUNT = 13;

-// 8 floating point parameters are passed in registers, floats are
-// promoted to doubles when passed in registers
-#define FPR_COUNT     13
+// Both ABIs use the same register assignment strategy, as per this
+// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
+//
+// typedef struct {
+//   int    a;
+//   double dd;
+// } sparm;
+// sparm   s, t;
+// int     c, d, e;
+// long double ld;
+// double  ff, gg, hh;
+//
+// x = func(c, ff, d, ld, s, gg, t, e, hh);
+//
+// Parameter     Register     Offset in parameter save area
+// c             r3           0-7    (not stored in parameter save area)
+// ff            f1           8-15   (not stored)
+// d             r5           16-23  (not stored)
+// ld            f2,f3        24-39  (not stored)
+// s             r8,r9        40-55  (not stored)
+// gg            f4           56-63  (not stored)
+// t             (none)       64-79  (stored in parameter save area)
+// e             (none)       80-87  (stored)
+// hh            f5           88-95  (not stored)
+//
+// i.e., each successive FPR usage skips a GPR, but not the other way around.

-extern "C" uint32_t
-invoke_count_words(uint32_t paramCount, nsXPTCVariant* s)
+extern "C" void invoke_copy_to_stack(uint64_t* gpregs, double* fpregs,
+                                     uint32_t paramCount, nsXPTCVariant* s,
+                                     uint64_t* d)
 {
-    return uint32_t(((paramCount * 2) + 3) & ~3);
-}
+    uint32_t nr_gpr = 0u;
+    uint32_t nr_fpr = 0u;
+    uint64_t value = 0u;

-extern "C" void
-invoke_copy_to_stack(uint64_t* gpregs,
-                     double* fpregs,
-                     uint32_t paramCount,
-                     nsXPTCVariant* s,
-                     uint64_t* d)
-{
-    uint64_t tempu64;
-
-    for(uint32_t i = 0; i < paramCount; i++, s++) {
-        if(s->IsIndirect())
-            tempu64 = (uint64_t) &s->val;
+    for (uint32_t i = 0; i < paramCount; i++, s++) {
+        if (s->IsIndirect())
+            value = (uint64_t) &s->val;
        else {
-            switch(s->type) {
-            case nsXPTType::T_FLOAT:                                  break;
-            case nsXPTType::T_DOUBLE:                                 break;
-            case nsXPTType::T_I8:     tempu64 = s->val.i8;            break;
-            case nsXPTType::T_I16:    tempu64 = s->val.i16;           break;
-            case nsXPTType::T_I32:    tempu64 = s->val.i32;           break;
-            case nsXPTType::T_I64:    tempu64 = s->val.i64;           break;
-            case nsXPTType::T_U8:     tempu64 = s->val.u8;            break;
-            case nsXPTType::T_U16:    tempu64 = s->val.u16;           break;
-            case nsXPTType::T_U32:    tempu64 = s->val.u32;           break;
-            case nsXPTType::T_U64:    tempu64 = s->val.u64;           break;
-            case nsXPTType::T_BOOL:   tempu64 = s->val.b;             break;
-            case nsXPTType::T_CHAR:   tempu64 = s->val.c;             break;
-            case nsXPTType::T_WCHAR:  tempu64 = s->val.wc;            break;
-            default:                  tempu64 = (uint64_t) s->val.p;  break;
+            switch (s->type) {
+            case nsXPTType::T_FLOAT:                                break;
+            case nsXPTType::T_DOUBLE:                               break;
+            case nsXPTType::T_I8:     value = s->val.i8;            break;
+            case nsXPTType::T_I16:    value = s->val.i16;           break;
+            case nsXPTType::T_I32:    value = s->val.i32;           break;
+            case nsXPTType::T_I64:    value = s->val.i64;           break;
+            case nsXPTType::T_U8:     value = s->val.u8;            break;
+            case nsXPTType::T_U16:    value = s->val.u16;           break;
+            case nsXPTType::T_U32:    value = s->val.u32;           break;
+            case nsXPTType::T_U64:    value = s->val.u64;           break;
+            case nsXPTType::T_BOOL:   value = s->val.b;             break;
+            case nsXPTType::T_CHAR:   value = s->val.c;             break;
+            case nsXPTType::T_WCHAR:  value = s->val.wc;            break;
+            default:                  value = (uint64_t) s->val.p;  break;
            }
        }

        if (!s->IsIndirect() && s->type == nsXPTType::T_DOUBLE) {
-            if (i < FPR_COUNT)
-                fpregs[i]    = s->val.d;
-            else
-                *(double *)d = s->val.d;
+            if (nr_fpr < FPR_COUNT) {
+                fpregs[nr_fpr++] = s->val.d;
+                nr_gpr++;
+            } else {
+                *((double *)d) = s->val.d;
+                d++;
+            }
        }
        else if (!s->IsIndirect() && s->type == nsXPTType::T_FLOAT) {
-            if (i < FPR_COUNT) {
-                fpregs[i]   = s->val.f; // if passed in registers, floats are promoted to doubles
+            if (nr_fpr < FPR_COUNT) {
+                // Single-precision floats are passed in FPRs too.
+                fpregs[nr_fpr++] = s->val.f;
+                nr_gpr++;
            } else {
-                float *p = (float *)d;
-#ifndef __LITTLE_ENDIAN__
+#ifdef __LITTLE_ENDIAN__
+                *((float *)d) = s->val.f;
+#else
+                // Big endian needs adjustment to point to the least
+                // significant word.
+                float* p = (float*)d;
                p++;
-#endif
                *p = s->val.f;
+#endif
+                d++;
            }
        }
        else {
-            if (i < GPR_COUNT)
-                gpregs[i] = tempu64;
-            else
-                *d = tempu64;
+            if (nr_gpr < GPR_COUNT) {
+                gpregs[nr_gpr++] = value;
+            } else {
+                *d++ = value;
+            }
        }
-        if (i >= 7)
-            d++;
    }
 }

 EXPORT_XPCOM_API(nsresult)
-NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
-                   uint32_t paramCount, nsXPTCVariant* params);
-
+NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount,
+                 nsXPTCVariant* params);
--- a/xpcom/reflect/xptcall/md/unix/xptcstubs_ppc64_linux.cpp
+++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_ppc64_linux.cpp
@ -7,36 +7,64 @@

 #include "xptcprivate.h"

-// The Linux/PPC64 ABI passes the first 8 integral
-// parameters and the first 13 floating point parameters in registers
-// (r3-r10 and f1-f13), no stack space is allocated for these by the
-// caller.  The rest of the parameters are passed in the caller's stack
-// area. The stack pointer has to retain 16-byte alignment.
-
-// The PowerPC64 platform ABI can be found here:
-// http://www.freestandards.org/spec/ELF/ppc64/
+// Prior to POWER8, all 64-bit Power ISA systems used ELF v1 ABI, found
+// here:
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html
 // and in particular:
-// http://www.freestandards.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#FUNC-CALL
+//   https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-CALL
+// Little-endian ppc64le, however, uses ELF v2 ABI, which is here:
+//   http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+// and in particular section 2.2, page 22. However, most big-endian ppc64
+// systems still use ELF v1, so this file should support both.
+//
+// Both ABIs pass the first 8 integral parameters and the first 13 floating
+// point parameters in registers r3-r10 and f1-f13. No stack space is
+// allocated for these by the caller. The rest of the parameters are passed
+// in the caller's stack area. The stack pointer must stay 16-byte aligned.

-#define PARAM_BUFFER_COUNT      16
-#define GPR_COUNT                7
-#define FPR_COUNT               13
+const uint32_t PARAM_BUFFER_COUNT   = 16;
+const uint32_t GPR_COUNT            = 7;
+const uint32_t FPR_COUNT            = 13;

 // PrepareAndDispatch() is called by SharedStub() and calls the actual method.
 //
 // - 'args[]' contains the arguments passed on stack
-// - 'gprData[]' contains the arguments passed in integer registers
-// - 'fprData[]' contains the arguments passed in floating point registers
+// - 'gpregs[]' contains the arguments passed in integer registers
+// - 'fpregs[]' contains the arguments passed in floating point registers
 //
 // The parameters are mapped into an array of type 'nsXPTCMiniVariant'
 // and then the method gets called.
-#include <stdio.h>
+//
+// Both ABIs use the same register assignment strategy, as per this
+// example from V1 ABI section 3.2.3 and V2 ABI section 2.2.3.2 [page 43]:
+//
+// typedef struct {
+//   int    a;
+//   double dd;
+// } sparm;
+// sparm   s, t;
+// int     c, d, e;
+// long double ld;
+// double  ff, gg, hh;
+//
+// x = func(c, ff, d, ld, s, gg, t, e, hh);
+//
+// Parameter     Register     Offset in parameter save area
+// c             r3           0-7    (not stored in parameter save area)
+// ff            f1           8-15   (not stored)
+// d             r5           16-23  (not stored)
+// ld            f2,f3        24-39  (not stored)
+// s             r8,r9        40-55  (not stored)
+// gg            f4           56-63  (not stored)
+// t             (none)       64-79  (stored in parameter save area)
+// e             (none)       80-87  (stored)
+// hh            f5           88-95  (not stored)
+//
+// i.e., each successive FPR usage skips a GPR, but not the other way around.
+
 extern "C" nsresult ATTRIBUTE_USED
-PrepareAndDispatch(nsXPTCStubBase* self,
-                   uint64_t methodIndex,
-                   uint64_t* args,
-                   uint64_t *gprData,
-                   double *fprData)
+PrepareAndDispatch(nsXPTCStubBase * self, uint32_t methodIndex,
+                   uint64_t * args, uint64_t * gpregs, double *fpregs)
 {
    nsXPTCMiniVariant paramBuffer[PARAM_BUFFER_COUNT];
    nsXPTCMiniVariant* dispatchParams = nullptr;
@ -48,7 +76,7 @@ PrepareAndDispatch(nsXPTCStubBase* self,

    self->mEntry->GetMethodInfo(uint16_t(methodIndex), &info);
    NS_ASSERTION(info,"no method info");
-    if (! info)
+    if (!info)
        return NS_ERROR_UNEXPECTED;

    paramCount = info->GetParamCount();
@ -64,9 +92,11 @@ PrepareAndDispatch(nsXPTCStubBase* self,
    const uint8_t indexOfJSContext = info->IndexOfJSContext();

    uint64_t* ap = args;
-    uint32_t iCount = 0;
-    uint32_t fpCount = 0;
-    uint64_t tempu64;
+    // |that| is implicit in the calling convention; we really do start at the
+    // first GPR (as opposed to x86_64).
+    uint32_t nr_gpr = 0;
+    uint32_t nr_fpr = 0;
+    uint64_t value;

    for(i = 0; i < paramCount; i++) {
        const nsXPTParamInfo& param = info->GetParam(i);
@ -74,67 +104,67 @@ PrepareAndDispatch(nsXPTCStubBase* self,
        nsXPTCMiniVariant* dp = &dispatchParams[i];

        if (i == indexOfJSContext) {
-            if (iCount < GPR_COUNT)
-                iCount++;
+            if (nr_gpr < GPR_COUNT)
+                nr_gpr++;
            else
                ap++;
        }

        if (!param.IsOut() && type == nsXPTType::T_DOUBLE) {
-            if (fpCount < FPR_COUNT) {
-                dp->val.d = fprData[fpCount++];
+            if (nr_fpr < FPR_COUNT) {
+                dp->val.d = fpregs[nr_fpr++];
+                nr_gpr++;
+            } else {
+                dp->val.d = *(double*)ap++;
            }
-            else
-                dp->val.d = *(double*) ap;
-        } else if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
-            if (fpCount < FPR_COUNT) {
-                dp->val.f = (float) fprData[fpCount++]; // in registers floats are passed as doubles
-            }
-            else {
-                float *p = (float *)ap;
-#ifndef __LITTLE_ENDIAN__
+            continue;
+        }
+        if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
+            if (nr_fpr < FPR_COUNT) {
+                // Single-precision floats are passed in FPRs too.
+                dp->val.f = (float)fpregs[nr_fpr++];
+                nr_gpr++;
+            } else {
+#ifdef __LITTLE_ENDIAN__
+                dp->val.f = *(float*)ap++;
+#else
+                // Big endian needs adjustment to point to the least
+                // significant word.
+                float* p = (float*)ap;
                p++;
-#endif
                dp->val.f = *p;
+                ap++;
+#endif
            }
-        } else { /* integer type or pointer */
-            if (iCount < GPR_COUNT)
-                tempu64 = gprData[iCount];
-            else
-                tempu64 = *ap;
+            continue;
+        }
+        if (nr_gpr < GPR_COUNT)
+            value = gpregs[nr_gpr++];
+        else
+            value = *ap++;

-            if (param.IsOut() || !type.IsArithmetic())
-                dp->val.p = (void*) tempu64;
-            else if (type == nsXPTType::T_I8)
-                dp->val.i8  = (int8_t)   tempu64;
-            else if (type == nsXPTType::T_I16)
-                dp->val.i16 = (int16_t)  tempu64;
-            else if (type == nsXPTType::T_I32)
-                dp->val.i32 = (int32_t)  tempu64;
-            else if (type == nsXPTType::T_I64)
-                dp->val.i64 = (int64_t)  tempu64;
-            else if (type == nsXPTType::T_U8)
-                dp->val.u8  = (uint8_t)  tempu64;
-            else if (type == nsXPTType::T_U16)
-                dp->val.u16 = (uint16_t) tempu64;
-            else if (type == nsXPTType::T_U32)
-                dp->val.u32 = (uint32_t) tempu64;
-            else if (type == nsXPTType::T_U64)
-                dp->val.u64 = (uint64_t) tempu64;
-            else if (type == nsXPTType::T_BOOL)
-                dp->val.b   = (bool)   tempu64;
-            else if (type == nsXPTType::T_CHAR)
-                dp->val.c   = (char)     tempu64;
-            else if (type == nsXPTType::T_WCHAR)
-                dp->val.wc  = (wchar_t)  tempu64;
-            else
-                NS_ERROR("bad type");
+        if (param.IsOut() || !type.IsArithmetic()) {
+            dp->val.p = (void*) value;
+            continue;
        }

-        if (iCount < GPR_COUNT)
-            iCount++;  // gprs are skipped for fp args, so this always needs inc
-        else
-            ap++;
+        switch (type) {
+        case nsXPTType::T_I8:      dp->val.i8  = (int8_t)   value; break;
+        case nsXPTType::T_I16:     dp->val.i16 = (int16_t)  value; break;
+        case nsXPTType::T_I32:     dp->val.i32 = (int32_t)  value; break;
+        case nsXPTType::T_I64:     dp->val.i64 = (int64_t)  value; break;
+        case nsXPTType::T_U8:      dp->val.u8  = (uint8_t)  value; break;
+        case nsXPTType::T_U16:     dp->val.u16 = (uint16_t) value; break;
+        case nsXPTType::T_U32:     dp->val.u32 = (uint32_t) value; break;
+        case nsXPTType::T_U64:     dp->val.u64 = (uint64_t) value; break;
+        case nsXPTType::T_BOOL:    dp->val.b   = (bool)     value; break;
+        case nsXPTType::T_CHAR:    dp->val.c   = (char)     value; break;
+        case nsXPTType::T_WCHAR:   dp->val.wc  = (wchar_t)  value; break;
+
+        default:
+            NS_ERROR("bad type");
+            break;
+        }
    }

    nsresult result = self->mOuter->CallMethod((uint16_t) methodIndex, info,
@ -148,23 +178,19 @@ PrepareAndDispatch(nsXPTCStubBase* self,

 // Load r11 with the constant 'n' and branch to SharedStub().
 //
-// XXX Yes, it's ugly that we're relying on gcc's name-mangling here;
-// however, it's quick, dirty, and'll break when the ABI changes on
-// us, which is what we want ;-).
-
-
-// gcc-3 version
-//
 // As G++3 ABI contains the length of the functionname in the mangled
 // name, it is difficult to get a generic assembler mechanism like
 // in the G++ 2.95 case.
+// XXX Yes, it's ugly that we're relying on gcc's name-mangling here;
+// however, it's quick, dirty, and'll break when the ABI changes on
+// us, which is what we want ;-).
 // Create names would be like:
 // _ZN14nsXPTCStubBase5Stub1Ev
 // _ZN14nsXPTCStubBase6Stub12Ev
 // _ZN14nsXPTCStubBase7Stub123Ev
 // _ZN14nsXPTCStubBase8Stub1234Ev
 // etc.
-// Use assembler directives to get the names right...
+// Use assembler directives to get the names right.

 #if _CALL_ELF == 2
 # define STUB_ENTRY(n)                                                  \
@ -250,7 +276,7 @@ __asm__ (                                                               \
 #define SENTINEL_ENTRY(n)                                               \
 nsresult nsXPTCStubBase::Sentinel##n()                                  \
 {                                                                       \
-    NS_ERROR("nsXPTCStubBase::Sentinel called");                  \
+    NS_ERROR("nsXPTCStubBase::Sentinel called");                        \
    return NS_ERROR_NOT_IMPLEMENTED;                                    \
 }