зеркало из https://github.com/mozilla/pjs.git
Fix for bug 272327 . AMD64 assembly optimization for bignum multiply. r=nelson
This commit is contained in:
Родитель
9de196f20f
Коммит
0126f05e21
|
@ -129,9 +129,10 @@ endif
|
|||
|
||||
ifeq ($(OS_TARGET),Linux)
|
||||
ifeq ($(CPU_ARCH),x86_64)
|
||||
ASFILES = arcfour-amd64-gas.s
|
||||
ASFLAGS += -march=opteron -m64
|
||||
DEFINES += -DNSS_BEVAND_ARCFOUR
|
||||
ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
|
||||
ASFLAGS += -march=opteron -m64 -fPIC
|
||||
DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
|
||||
MPI_SRCS += mpi_amd64.c
|
||||
endif
|
||||
ifeq ($(CPU_ARCH),x86)
|
||||
ASFILES = mpi_x86.s
|
||||
|
@ -246,13 +247,14 @@ else
|
|||
ifeq ($(USE_64),1)
|
||||
# Solaris for AMD64
|
||||
ifdef NS_USE_GCC
|
||||
ASFILES = arcfour-amd64-gas.s
|
||||
ASFLAGS += -march=opteron -m64
|
||||
ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
|
||||
ASFLAGS += -march=opteron -m64 -fPIC
|
||||
else
|
||||
ASFILES = arcfour-amd64-sun.s
|
||||
ASFILES = arcfour-amd64-sun.s mpi_amd64_sun.s
|
||||
ASFLAGS += -xarch=generic64 -K PIC
|
||||
endif
|
||||
DEFINES += -DNSS_BEVAND_ARCFOUR
|
||||
DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
|
||||
MPI_SRCS += mpi_amd64.c
|
||||
else
|
||||
# Solaris x86
|
||||
DEFINES += -D_X86_
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi-priv.h,v 1.17 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
/* $Id: mpi-priv.h,v 1.18 2005-02-25 04:30:11 julien.pierre.bugs%sun.com Exp $ */
|
||||
#ifndef _MPI_PRIV_H_
|
||||
#define _MPI_PRIV_H_ 1
|
||||
|
||||
|
@ -238,10 +238,28 @@ mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
|
|||
#define MPI_ASM_DECL
|
||||
#endif
|
||||
|
||||
#ifdef MPI_AMD64
|
||||
|
||||
mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit*, mp_digit *, mp_size, mp_digit);
|
||||
mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit*, const mp_digit*, mp_size, mp_digit);
|
||||
|
||||
/* c = a * b */
|
||||
#define s_mpv_mul_d(a, a_len, b, c) \
|
||||
((unsigned long*)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
|
||||
|
||||
/* c += a * b */
|
||||
#define s_mpv_mul_d_add(a, a_len, b, c) \
|
||||
((unsigned long*)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
|
||||
|
||||
#else
|
||||
|
||||
void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c);
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c);
|
||||
|
||||
#endif
|
||||
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
|
||||
mp_size a_len, mp_digit b,
|
||||
mp_digit *c);
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/* $Id: mpi-test.c,v 1.12 2004-04-27 23:04:36 gerv%gerv.net Exp $ */
|
||||
/* $Id: mpi-test.c,v 1.13 2005-02-25 04:30:11 julien.pierre.bugs%sun.com Exp $ */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -134,6 +134,7 @@ const char *mp19 =
|
|||
"F1C9DACDA287F2E3C88DCE2393B8F53DAAAC1196DC36510962B6B59454CFE64B";
|
||||
const char *mp20 =
|
||||
"D445662C8B6FE394107B867797750C326E0F4A967E135FC430F6CD7207913AC7";
|
||||
const char* mp21 = "2";
|
||||
|
||||
const mp_digit md1 = 0;
|
||||
const mp_digit md2 = 0x1;
|
||||
|
@ -201,7 +202,7 @@ const char *p_mp1415 =
|
|||
"548F1732452F9E7F810C0B4B430C073C0FBCE03F0D03F82630654BCE166AA772E1EE"
|
||||
"DD0C08D3E3EBDF0AF54203B43AFDFC40D8FC79C97A4B0A4E1BEB14D8FCEFDDED8758"
|
||||
"6ED65B18";
|
||||
|
||||
const char *p_mp2121 = "4";
|
||||
const char *mp_mp345 = "B9B6D3A3";
|
||||
const char *mp_mp335 = "16609C2D";
|
||||
|
||||
|
@ -874,6 +875,15 @@ int test_mul(void)
|
|||
reason("error: computed %s, expected %s\n", g_intbuf, p_mp1415);
|
||||
res = 1;
|
||||
}
|
||||
mp_read_radix(&a, mp21, 10); mp_read_radix(&b, mp21, 10);
|
||||
|
||||
IFOK( mp_mul(&a, &b, &a) );
|
||||
mp_toradix(&a, g_intbuf, 10);
|
||||
|
||||
if(strcmp(g_intbuf, p_mp2121) != 0) {
|
||||
reason("error: computed %s, expected %s\n", g_intbuf, p_mp2121);
|
||||
res = 1; goto CLEANUP;
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&a); mp_clear(&b);
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is the Solaris software cryptographic token.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Sun Microsystems, Inc.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2005
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Sun Microsystems, Inc.
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MPI_AMD64
|
||||
#error This file only works on AMD64 platforms.
|
||||
#endif
|
||||
|
||||
#include <mpi-priv.h>
|
||||
|
||||
/*
|
||||
* MPI glue
|
||||
*
|
||||
*/
|
||||
|
||||
/* Presently, this is only used by the Montgomery arithmetic code. */
|
||||
/* c += a * b */
|
||||
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
|
||||
mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit w;
|
||||
mp_digit d;
|
||||
|
||||
d = s_mpv_mul_add_vec64(c, a, a_len, b);
|
||||
c += a_len;
|
||||
while (d) {
|
||||
w = c[0] + d;
|
||||
d = (w < c[0] || w < d);
|
||||
*c++ = w;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,418 @@
|
|||
# ***** BEGIN LICENSE BLOCK *****
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is the Solaris software cryptographic token.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Sun Microsystems, Inc.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Sun Microsystems, Inc.
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ***** END LICENSE BLOCK ***** */
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
#
|
||||
# Implementation of s_mpv_mul_set_vec which exploits
|
||||
# the 64X64->128 bit unsigned multiply instruction.
|
||||
#
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
# r = a * digit, r and a are vectors of length len
|
||||
# returns the carry digit
|
||||
# r and a are 64 bit aligned.
|
||||
#
|
||||
# uint64_t
|
||||
# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
#
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
|
||||
|
||||
xorq %rax, %rax # if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L17
|
||||
|
||||
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 # cy = 0
|
||||
|
||||
.L15:
|
||||
cmpq $8, %r8 # 8 - len
|
||||
jb .L16
|
||||
movq 0(%rsi), %rax # rax = a[0]
|
||||
movq 8(%rsi), %r11 # prefetch a[1]
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 # prefetch a[2]
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 # prefetch a[3]
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 # prefetch a[4]
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 # prefetch a[5]
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 # prefetch a[6]
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 # prefetch a[7]
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx # p = a[7] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 56(%rdi) # r[7] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L17
|
||||
jmp .L15
|
||||
|
||||
.L16:
|
||||
movq 0(%rsi), %rax
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
|
||||
.L17:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
#
|
||||
# Implementation of s_mpv_mul_add_vec which exploits
|
||||
# the 64X64->128 bit unsigned multiply instruction.
|
||||
#
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
# r += a * digit, r and a are vectors of length len
|
||||
# returns the carry digit
|
||||
# r and a are 64 bit aligned.
|
||||
#
|
||||
# uint64_t
|
||||
# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
#
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
|
||||
|
||||
xorq %rax, %rax # if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L27
|
||||
|
||||
movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 # cy = 0
|
||||
|
||||
.L25:
|
||||
cmpq $8, %r8 # 8 - len
|
||||
jb .L26
|
||||
movq 0(%rsi), %rax # rax = a[0]
|
||||
movq 0(%rdi), %r10 # r10 = r[0]
|
||||
movq 8(%rsi), %r11 # prefetch a[1]
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[0]
|
||||
movq 8(%rdi), %r10 # prefetch r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 # prefetch a[2]
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[1]
|
||||
movq 16(%rdi), %r10 # prefetch r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 # prefetch a[3]
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[2]
|
||||
movq 24(%rdi), %r10 # prefetch r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 # prefetch a[4]
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[3]
|
||||
movq 32(%rdi), %r10 # prefetch r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 # prefetch a[5]
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[4]
|
||||
movq 40(%rdi), %r10 # prefetch r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 # prefetch a[6]
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[5]
|
||||
movq 48(%rdi), %r10 # prefetch r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 # prefetch a[7]
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[6]
|
||||
movq 56(%rdi), %r10 # prefetch r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx # p = a[7] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 56(%rdi) # r[7] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L27
|
||||
jmp .L25
|
||||
|
||||
.L26:
|
||||
movq 0(%rsi), %rax
|
||||
movq 0(%rdi), %r10
|
||||
mulq %rcx # p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[0]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 0(%rdi) # r[0] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
movq 8(%rdi), %r10
|
||||
mulq %rcx # p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 8(%rdi) # r[1] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
movq 16(%rdi), %r10
|
||||
mulq %rcx # p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 16(%rdi) # r[2] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
movq 24(%rdi), %r10
|
||||
mulq %rcx # p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 24(%rdi) # r[3] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
movq 32(%rdi), %r10
|
||||
mulq %rcx # p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 32(%rdi) # r[4] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
movq 40(%rdi), %r10
|
||||
mulq %rcx # p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 40(%rdi) # r[5] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
movq 48(%rdi), %r10
|
||||
mulq %rcx # p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx # p += r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx # p += cy
|
||||
movq %rax, 48(%rdi) # r[6] = lo(p)
|
||||
movq %rdx, %r9 # cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
|
||||
.L27:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]
|
|
@ -0,0 +1,418 @@
|
|||
/ ***** BEGIN LICENSE BLOCK *****
|
||||
/ Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
/
|
||||
/ The contents of this file are subject to the Mozilla Public License Version
|
||||
/ 1.1 (the "License"); you may not use this file except in compliance with
|
||||
/ the License. You may obtain a copy of the License at
|
||||
/ http://www.mozilla.org/MPL/
|
||||
/
|
||||
/ Software distributed under the License is distributed on an "AS IS" basis,
|
||||
/ WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
/ for the specific language governing rights and limitations under the
|
||||
/ License.
|
||||
/
|
||||
/ The Original Code is the Solaris software cryptographic token.
|
||||
/
|
||||
/ The Initial Developer of the Original Code is
|
||||
/ Sun Microsystems, Inc.
|
||||
/ Portions created by the Initial Developer are Copyright (C) 2005
|
||||
/ the Initial Developer. All Rights Reserved.
|
||||
/
|
||||
/ Contributor(s):
|
||||
/ Sun Microsystems, Inc.
|
||||
/
|
||||
/ Alternatively, the contents of this file may be used under the terms of
|
||||
/ either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
/ the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
/ in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
/ of those above. If you wish to allow use of your version of this file only
|
||||
/ under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
/ use your version of this file under the terms of the MPL, indicate your
|
||||
/ decision by deleting the provisions above and replace them with the notice
|
||||
/ and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
/ the provisions above, a recipient may use your version of this file under
|
||||
/ the terms of any one of the MPL, the GPL or the LGPL.
|
||||
/
|
||||
/ ***** END LICENSE BLOCK ***** */
|
||||
|
||||
|
||||
/ ------------------------------------------------------------------------
|
||||
/
|
||||
/ Implementation of s_mpv_mul_set_vec which exploits
|
||||
/ the 64X64->128 bit unsigned multiply instruction.
|
||||
/
|
||||
/ ------------------------------------------------------------------------
|
||||
|
||||
/ r = a * digit, r and a are vectors of length len
|
||||
/ returns the carry digit
|
||||
/ r and a are 64 bit aligned.
|
||||
/
|
||||
/ uint64_t
|
||||
/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
/
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
|
||||
|
||||
xorq %rax, %rax / if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L17
|
||||
|
||||
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 / cy = 0
|
||||
|
||||
.L15:
|
||||
cmpq $8, %r8 / 8 - len
|
||||
jb .L16
|
||||
movq 0(%rsi), %rax / rax = a[0]
|
||||
movq 8(%rsi), %r11 / prefetch a[1]
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 / prefetch a[2]
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 / prefetch a[3]
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 / prefetch a[4]
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 / prefetch a[5]
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 / prefetch a[6]
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 / prefetch a[7]
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx / p = a[7] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 56(%rdi) / r[7] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L17
|
||||
jmp .L15
|
||||
|
||||
.L16:
|
||||
movq 0(%rsi), %rax
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L17
|
||||
|
||||
|
||||
.L17:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_set_vec64, [.-s_mpv_mul_set_vec64]
|
||||
|
||||
/ ------------------------------------------------------------------------
|
||||
/
|
||||
/ Implementation of s_mpv_mul_add_vec which exploits
|
||||
/ the 64X64->128 bit unsigned multiply instruction.
|
||||
/
|
||||
/ ------------------------------------------------------------------------
|
||||
|
||||
/ r += a * digit, r and a are vectors of length len
|
||||
/ returns the carry digit
|
||||
/ r and a are 64 bit aligned.
|
||||
/
|
||||
/ uint64_t
|
||||
/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
|
||||
/
|
||||
|
||||
.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
|
||||
|
||||
xorq %rax, %rax / if (len == 0) return (0)
|
||||
testq %rdx, %rdx
|
||||
jz .L27
|
||||
|
||||
movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
|
||||
xorq %r9, %r9 / cy = 0
|
||||
|
||||
.L25:
|
||||
cmpq $8, %r8 / 8 - len
|
||||
jb .L26
|
||||
movq 0(%rsi), %rax / rax = a[0]
|
||||
movq 0(%rdi), %r10 / r10 = r[0]
|
||||
movq 8(%rsi), %r11 / prefetch a[1]
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[0]
|
||||
movq 8(%rdi), %r10 / prefetch r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 16(%rsi), %r11 / prefetch a[2]
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[1]
|
||||
movq 16(%rdi), %r10 / prefetch r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 24(%rsi), %r11 / prefetch a[3]
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[2]
|
||||
movq 24(%rdi), %r10 / prefetch r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 32(%rsi), %r11 / prefetch a[4]
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[3]
|
||||
movq 32(%rdi), %r10 / prefetch r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 40(%rsi), %r11 / prefetch a[5]
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[4]
|
||||
movq 40(%rdi), %r10 / prefetch r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 48(%rsi), %r11 / prefetch a[6]
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[5]
|
||||
movq 48(%rdi), %r10 / prefetch r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
movq 56(%rsi), %r11 / prefetch a[7]
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[6]
|
||||
movq 56(%rdi), %r10 / prefetch r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
movq %r11, %rax
|
||||
mulq %rcx / p = a[7] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[7]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 56(%rdi) / r[7] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
subq $8, %r8
|
||||
|
||||
jz .L27
|
||||
jmp .L25
|
||||
|
||||
.L26:
|
||||
movq 0(%rsi), %rax
|
||||
movq 0(%rdi), %r10
|
||||
mulq %rcx / p = a[0] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[0]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 0(%rdi) / r[0] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 8(%rsi), %rax
|
||||
movq 8(%rdi), %r10
|
||||
mulq %rcx / p = a[1] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[1]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 8(%rdi) / r[1] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 16(%rsi), %rax
|
||||
movq 16(%rdi), %r10
|
||||
mulq %rcx / p = a[2] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[2]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 16(%rdi) / r[2] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 24(%rsi), %rax
|
||||
movq 24(%rdi), %r10
|
||||
mulq %rcx / p = a[3] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[3]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 24(%rdi) / r[3] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 32(%rsi), %rax
|
||||
movq 32(%rdi), %r10
|
||||
mulq %rcx / p = a[4] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[4]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 32(%rdi) / r[4] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 40(%rsi), %rax
|
||||
movq 40(%rdi), %r10
|
||||
mulq %rcx / p = a[5] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[5]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 40(%rdi) / r[5] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
movq 48(%rsi), %rax
|
||||
movq 48(%rdi), %r10
|
||||
mulq %rcx / p = a[6] * digit
|
||||
addq %r10, %rax
|
||||
adcq $0, %rdx / p += r[6]
|
||||
addq %r9, %rax
|
||||
adcq $0, %rdx / p += cy
|
||||
movq %rax, 48(%rdi) / r[6] = lo(p)
|
||||
movq %rdx, %r9 / cy = hi(p)
|
||||
decq %r8
|
||||
jz .L27
|
||||
|
||||
|
||||
.L27:
|
||||
movq %r9, %rax
|
||||
ret
|
||||
|
||||
.size s_mpv_mul_add_vec64, [.-s_mpv_mul_add_vec64]
|
|
@ -217,3 +217,14 @@ CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
|
|||
-pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
|
||||
-DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET),AMD64SOLARIS)
|
||||
ASFLAGS += -xarch=generic64
|
||||
AS_OBJS = mpi_amd64.o mpi_amd64_sun.o
|
||||
MP_CONFIG = -DMP_ASSEMBLY_MULTIPLY -DMPI_AMD64
|
||||
CFLAGS = -xarch=generic64 -xO4 -I. -DMP_API_COMPATIBLE -DMP_IOFUNC $(MP_CONFIG)
|
||||
MPICMN += $(MP_CONFIG)
|
||||
|
||||
mpi_amd64_asm.o: mpi_amd64_sun.s
|
||||
$(AS) -xarch=generic64 -P -D_ASM mpi_amd64_sun.s
|
||||
endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче