зеркало из https://github.com/stride3d/NativePath.git
WIP on LLVM targets, got all of them working so far!
This commit is contained in:
Родитель
8f6a31de78
Коммит
bdbd09de46
|
@ -31,113 +31,325 @@ THE SOFTWARE.
|
|||
#ifndef nativemath_h
|
||||
#define nativemath_h
|
||||
|
||||
#include "NativePath.h"
|
||||
#include "ShaderFastMathLib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//ShaderFastMathLib
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : ~3.4% over full
|
||||
// Precise format : ~small float
|
||||
// 2 ALU
|
||||
//
|
||||
extern float npFastRcpSqrtNR0(float inX);
|
||||
|
||||
//
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : ~0.2% over full
|
||||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
extern float npFastRcpSqrtNR1(float inX);
|
||||
|
||||
//
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : ~4.6e-004% over full
|
||||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
extern float npFastRcpSqrtNR2(float inX);
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : < 0.7% over full
|
||||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
extern float npFastSqrtNR0(float inX);
|
||||
|
||||
//
|
||||
// Use inverse Rcp Sqrt
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : ~0.2% over full
|
||||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
extern float npFastSqrtNR1(float inX);
|
||||
|
||||
//
|
||||
// Use inverse Rcp Sqrt
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : ~4.6e-004% over full
|
||||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
extern float npFastSqrtNR2(float inX);
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : < 0.4% over full
|
||||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
extern float npFastRcpNR0(float inX);
|
||||
|
||||
//
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : < 0.02% over full
|
||||
// Precise format : ~half float
|
||||
// 3 ALU
|
||||
//
|
||||
extern float npFastRcpNR1(float inX);
|
||||
|
||||
//
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : < 5.0e-005% over full
|
||||
// Precise format : ~full float
|
||||
// 5 ALU
|
||||
//
|
||||
extern float npFastRcpNR2(float inX);
|
||||
|
||||
// 4th order polynomial approximation
|
||||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed.
|
||||
extern float npAcosFast4(float inX);
|
||||
|
||||
// 4th order polynomial approximation
|
||||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
extern float npAsinFast4(float inX);
|
||||
|
||||
// 4th order hyperbolical approximation
|
||||
// 4 VGRP, 12 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Efficient approximations for the arctangent function, Rajan, S. Sichun Wang Inkol, R. Joyal, A., May 2006
|
||||
extern float npAtanFast4(float inX);
|
||||
|
||||
//LoL engine fast math
|
||||
|
||||
extern double npLolFabs(double x);
|
||||
extern double npLolSin(double x);
|
||||
extern double npLolCos(double x);
|
||||
extern void npLolSincos(double x, double *sinx, double *cosx);
|
||||
extern void npLolSincosf(float x, float *sinx, float *cosx);
|
||||
extern double npLolTan(double x);
|
||||
#define FP_USE(x) (void)(x)
|
||||
#define __likely(x) __builtin_expect(!!(x), 1)
|
||||
#define __unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
static const double D_PI = 3.1415926535897932384626433f;
|
||||
|
||||
static const double PI_2 = 1.57079632679489661923132;
|
||||
static const double PI_4 = 0.785398163397448309615661;
|
||||
static const double INV_PI = 0.318309886183790671537768;
|
||||
static const double ROOT3 = 1.73205080756887729352745;
|
||||
|
||||
static const double ZERO = 0.0;
|
||||
static const double ONE = 1.0;
|
||||
static const double NEG_ONE = -1.0;
|
||||
static const double HALF = 0.5;
|
||||
static const double QUARTER = 0.25;
|
||||
static const double TWO = 2.0;
|
||||
static const double VERY_SMALL_NUMBER = 0x1.0p-128;
|
||||
static const double TWO_EXP_52 = 4503599627370496.0;
|
||||
static const double TWO_EXP_54 = 18014398509481984.0;
|
||||
|
||||
/** sin Taylor series coefficients. */
|
||||
static const double SC[] =
|
||||
{
|
||||
-1.6449340668482264364724e-0, // π^2/3!
|
||||
+8.1174242528335364363700e-1, // π^4/5!
|
||||
-1.9075182412208421369647e-1, // π^6/7!
|
||||
+2.6147847817654800504653e-2, // π^8/9!
|
||||
-2.3460810354558236375089e-3, // π^10/11!
|
||||
+1.4842879303107100368487e-4, // π^12/13!
|
||||
-6.9758736616563804745344e-6, // π^14/15!
|
||||
+2.5312174041370276513517e-7, // π^16/17!
|
||||
};
|
||||
|
||||
/* Note: the last value should be -1.3878952462213772114468e-7 (ie.
|
||||
* π^18/18!) but we tweak it in order to get the better average precision
|
||||
* required for tan() computations when close to π/2+kπ values. */
|
||||
static const double CC[] =
|
||||
{
|
||||
-4.9348022005446793094172e-0, // π^2/2!
|
||||
+4.0587121264167682181850e-0, // π^4/4!
|
||||
-1.3352627688545894958753e-0, // π^6/6!
|
||||
+2.3533063035889320454188e-1, // π^8/8!
|
||||
-2.5806891390014060012598e-2, // π^10/10!
|
||||
+1.9295743094039230479033e-3, // π^12/12!
|
||||
-1.0463810492484570711802e-4, // π^14/14!
|
||||
+4.3030695870329470072978e-6, // π^16/16!
|
||||
-1.3777e-7,
|
||||
};
|
||||
|
||||
/* These coefficients use Sloane’s http://oeis.org/A002430 and
|
||||
* http://oeis.org/A036279 sequences for the Taylor series of tan().
|
||||
* Note: the last value should be 2.12485922978838540352881e5 (ie.
|
||||
* 443861162*π^18/1856156927625), but we tweak it in order to get
|
||||
* sub 1e-11 average precision in a larger range. */
|
||||
static const double TC[] =
|
||||
{
|
||||
3.28986813369645287294483e0, // π^2/3
|
||||
1.29878788045336582981920e1, // 2*π^4/15
|
||||
5.18844961612069061254404e1, // 17*π^6/315
|
||||
2.07509320280908496804928e2, // 62*π^8/2835
|
||||
8.30024701695986756361561e2, // 1382*π^10/155925
|
||||
3.32009324029001216460018e3, // 21844*π^12/6081075
|
||||
1.32803704909665483598490e4, // 929569*π^14/638512875
|
||||
5.31214808666037709352112e4, // 6404582*π^16/10854718875
|
||||
2.373e5,
|
||||
};
|
||||
|
||||
static inline double npLolSin(double x)
|
||||
{
|
||||
double absx = __builtin_fabs(x * INV_PI);
|
||||
|
||||
/* If branches are cheap, skip the cycle count when |x| < π/4,
|
||||
* and only do the Taylor series up to the required precision. */
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
/* Computing x^4 is one multiplication too many we do, but it helps
|
||||
* interleave the Taylor series operations a lot better. */
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
return x * taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wrap |x| to the range [-1, 1] and keep track of the number of
|
||||
* cycles required. If odd, we'll need to change the sign of the
|
||||
* result. */
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
/* If branches are very cheap, we have the option to do the Taylor
|
||||
* series at a much lower degree by splitting. */
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (__builtin_fabs(absx) > QUARTER)
|
||||
{
|
||||
sign = (x * absx >= 0.0) ? sign : -sign;
|
||||
|
||||
double x1 = HALF - __builtin_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return taylor * sign;
|
||||
}
|
||||
#endif
|
||||
|
||||
sign *= (x >= 0.0) ? D_PI : -D_PI;
|
||||
|
||||
/* Compute a Tailor series for sin() and combine sign information. */
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
#else
|
||||
double sub1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
|
||||
#endif
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return absx * taylor * sign;
|
||||
}
|
||||
|
||||
static inline double npLolCos(double x)
|
||||
{
|
||||
double absx = __builtin_fabs(x * INV_PI);
|
||||
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylor = (sub1 * x2 + sub2) * x2 + ONE;
|
||||
return taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (__builtin_fabs(absx) > QUARTER)
|
||||
{
|
||||
double x1 = HALF - __builtin_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return x1 * taylor * sign * D_PI;
|
||||
}
|
||||
#endif
|
||||
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
#else
|
||||
double sub1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
|
||||
#endif
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return taylor * sign;
|
||||
}
|
||||
|
||||
static inline void npLolSincos(double x, double *sinx, double *cosx)
|
||||
{
|
||||
double absx = __builtin_fabs(x * INV_PI);
|
||||
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
|
||||
/* Computing the Taylor series to the 11th order is enough to get
|
||||
* x * 1e-11 precision, but we push it to the 13th order so that
|
||||
* tan() has a better precision. */
|
||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = x * taylors;
|
||||
|
||||
double subc1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
|
||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylorc = (subc1 * x2 + subc2) * x2 + ONE;
|
||||
*cosx = taylorc;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sin_sign = is_even;
|
||||
double cos_sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (__builtin_fabs(absx) > QUARTER)
|
||||
{
|
||||
cos_sign = sin_sign;
|
||||
sin_sign = (x * absx >= 0.0) ? sin_sign : -sin_sign;
|
||||
|
||||
double x1 = HALF - __builtin_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
|
||||
double subs1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subs2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = taylors * sin_sign;
|
||||
|
||||
double subc1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subc2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylorc = subc2 * x2 + subc1;
|
||||
*cosx = x1 * taylorc * cos_sign * D_PI;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
sin_sign *= (x >= 0.0) ? D_PI : -D_PI;
|
||||
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double subc1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
#else
|
||||
double subs1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
|
||||
/* Push Taylor series to the 19th order to enhance tan() accuracy. */
|
||||
double subc1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subc2 = (((CC[8] * x4 + CC[6]) * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
|
||||
#endif
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = absx * taylors * sin_sign;
|
||||
|
||||
double taylorc = subc2 * x2 + subc1;
|
||||
*cosx = taylorc * cos_sign;
|
||||
}
|
||||
|
||||
static inline double npLolTan(double x)
|
||||
{
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
double absx = __builtin_fabs(x * INV_PI);
|
||||
|
||||
/* This value was determined empirically to ensure an error of no
|
||||
* more than x * 1e-11 in this range. */
|
||||
if (absx < 0.163)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (((TC[7] * x4 + TC[5]) * x4
|
||||
+ TC[3]) * x4 + TC[1]) * x4 + ONE;
|
||||
double sub2 = (((TC[8] * x4 + TC[6]) * x4
|
||||
+ TC[4]) * x4 + TC[2]) * x4 + TC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
return x * taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
double sinx, cosx;
|
||||
npLolSincos(x, &sinx, &cosx);
|
||||
|
||||
/* Ensure cosx isn't zero. FIXME: we lose the cosx sign here. */
|
||||
double absc = __builtin_fabs(cosx);
|
||||
|
||||
if (__unlikely(absc < VERY_SMALL_NUMBER))
|
||||
cosx = VERY_SMALL_NUMBER;
|
||||
return sinx / cosx;
|
||||
}
|
||||
|
||||
//Utility OpenCL vector goodies
|
||||
|
||||
|
@ -151,7 +363,7 @@ static inline float4 npTransformNormalF4(float4 normal, float4 matrix[4])
|
|||
return normal.xxxx * matrix[0].xyzw + normal.yyyy * matrix[1].xyzw + normal.zzzz * matrix[2].xyzw + normal.wwww * matrix[3].xyzw;
|
||||
}
|
||||
|
||||
static void npMatrixIdentityF4(float4* outMatrix)
|
||||
static inline void npMatrixIdentityF4(float4* outMatrix)
|
||||
{
|
||||
outMatrix[0].yzw = 0.0f;
|
||||
outMatrix[1].xzw = 0.0f;
|
||||
|
@ -163,14 +375,9 @@ static void npMatrixIdentityF4(float4* outMatrix)
|
|||
outMatrix[3].w = 1.0f;
|
||||
}
|
||||
|
||||
static inline float sqrtf(float x)
|
||||
static inline float npLengthF4(float4 vec)
|
||||
{
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
static float npLengthF4(float4 vec)
|
||||
{
|
||||
return sqrtf(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z + vec.w * vec.w);
|
||||
return __builtin_sqrt(vec.x * vec.x + vec.y * vec.y + vec.z * vec.z + vec.w * vec.w);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -1591,16 +1591,13 @@ typedef uint32_t uint4 __attribute__((ext_vector_type(4)));
|
|||
#define sqrt __builtin_sqrt
|
||||
#endif
|
||||
|
||||
/* #if !__has_builtin(__builtin_sqrtf)
|
||||
#if !__has_builtin(__builtin_sqrtf)
|
||||
#error \"sqrtf clang built-in not available\"
|
||||
// ff
|
||||
extern float sqrtf(...);
|
||||
#else
|
||||
#define sqrtf __builtin_sqrtf
|
||||
#endif */
|
||||
|
||||
#undef sqrtf
|
||||
#define sqrtf sqrt
|
||||
#endif
|
||||
|
||||
#if !__has_builtin(__builtin_sqrtl)
|
||||
#error \"sqrtl clang built-in not available\"
|
||||
|
|
|
@ -56,13 +56,17 @@
|
|||
#ifndef SHADER_FAST_MATH_INC_FX
|
||||
#define SHADER_FAST_MATH_INC_FX
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
|
||||
union _float_int
|
||||
{
|
||||
int i;
|
||||
float f;
|
||||
};
|
||||
|
||||
#include <math.h>
|
||||
#include <NativePath.h>
|
||||
|
||||
// Derived from batch testing
|
||||
// TODO : Should be improved
|
||||
|
@ -100,7 +104,7 @@ union _float_int
|
|||
//
|
||||
|
||||
// Approximate guess using integer float arithmetics based on IEEE floating point standard
|
||||
float rcpSqrtIEEEIntApproximation(float inX, const int inRcpSqrtConst)
|
||||
static inline float rcpSqrtIEEEIntApproximation(float inX, const int inRcpSqrtConst)
|
||||
{
|
||||
union _float_int x;
|
||||
x.f = inX;
|
||||
|
@ -108,7 +112,7 @@ float rcpSqrtIEEEIntApproximation(float inX, const int inRcpSqrtConst)
|
|||
return x.f;
|
||||
}
|
||||
|
||||
float rcpSqrtNewtonRaphson(float inXHalf, float inRcpX)
|
||||
static inline float rcpSqrtNewtonRaphson(float inXHalf, float inRcpX)
|
||||
{
|
||||
return inRcpX * (-inXHalf * (inRcpX * inRcpX) + 1.5f);
|
||||
}
|
||||
|
@ -119,7 +123,7 @@ float rcpSqrtNewtonRaphson(float inXHalf, float inRcpX)
|
|||
// Precise format : ~small float
|
||||
// 2 ALU
|
||||
//
|
||||
float fastRcpSqrtNR0(float inX)
|
||||
static inline float fastRcpSqrtNR0(float inX)
|
||||
{
|
||||
float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR0);
|
||||
return xRcpSqrt;
|
||||
|
@ -131,7 +135,7 @@ float fastRcpSqrtNR0(float inX)
|
|||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
float fastRcpSqrtNR1(float inX)
|
||||
static inline float fastRcpSqrtNR1(float inX)
|
||||
{
|
||||
float xhalf = 0.5f * inX;
|
||||
float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR1);
|
||||
|
@ -145,7 +149,7 @@ float fastRcpSqrtNR1(float inX)
|
|||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
float fastRcpSqrtNR2(float inX)
|
||||
static inline float fastRcpSqrtNR2(float inX)
|
||||
{
|
||||
float xhalf = 0.5f * inX;
|
||||
float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR2);
|
||||
|
@ -158,7 +162,7 @@ float fastRcpSqrtNR2(float inX)
|
|||
//
|
||||
// SQRT
|
||||
//
|
||||
float sqrtIEEEIntApproximation(float inX, const int inSqrtConst)
|
||||
static inline float sqrtIEEEIntApproximation(float inX, const int inSqrtConst)
|
||||
{
|
||||
union _float_int x;
|
||||
x.f = inX;
|
||||
|
@ -172,7 +176,7 @@ float sqrtIEEEIntApproximation(float inX, const int inSqrtConst)
|
|||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
float fastSqrtNR0(float inX)
|
||||
static inline float fastSqrtNR0(float inX)
|
||||
{
|
||||
float xRcp = sqrtIEEEIntApproximation(inX, IEEE_INT_SQRT_CONST_NR0);
|
||||
return xRcp;
|
||||
|
@ -185,7 +189,7 @@ float fastSqrtNR0(float inX)
|
|||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
float fastSqrtNR1(float inX)
|
||||
static inline float fastSqrtNR1(float inX)
|
||||
{
|
||||
// Inverse Rcp Sqrt
|
||||
return inX * fastRcpSqrtNR1(inX);
|
||||
|
@ -198,7 +202,7 @@ float fastSqrtNR1(float inX)
|
|||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
float fastSqrtNR2(float inX)
|
||||
static inline float fastSqrtNR2(float inX)
|
||||
{
|
||||
// Inverse Rcp Sqrt
|
||||
return inX * fastRcpSqrtNR2(inX);
|
||||
|
@ -208,7 +212,7 @@ float fastSqrtNR2(float inX)
|
|||
// RCP
|
||||
//
|
||||
|
||||
float rcpIEEEIntApproximation(float inX, const int inRcpConst)
|
||||
static inline float rcpIEEEIntApproximation(float inX, const int inRcpConst)
|
||||
{
|
||||
union _float_int x;
|
||||
x.f = inX;
|
||||
|
@ -216,7 +220,7 @@ float rcpIEEEIntApproximation(float inX, const int inRcpConst)
|
|||
return x.f;
|
||||
}
|
||||
|
||||
float rcpNewtonRaphson(float inX, float inRcpX)
|
||||
static inline float rcpNewtonRaphson(float inX, float inRcpX)
|
||||
{
|
||||
return inRcpX * (-inRcpX * inX + 2.0f);
|
||||
}
|
||||
|
@ -227,7 +231,7 @@ float rcpNewtonRaphson(float inX, float inRcpX)
|
|||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
float fastRcpNR0(float inX)
|
||||
static inline float fastRcpNR0(float inX)
|
||||
{
|
||||
float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR0);
|
||||
return xRcp;
|
||||
|
@ -239,7 +243,7 @@ float fastRcpNR0(float inX)
|
|||
// Precise format : ~half float
|
||||
// 3 ALU
|
||||
//
|
||||
float fastRcpNR1(float inX)
|
||||
static inline float fastRcpNR1(float inX)
|
||||
{
|
||||
float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR1);
|
||||
xRcp = rcpNewtonRaphson(inX, xRcp);
|
||||
|
@ -252,7 +256,7 @@ float fastRcpNR1(float inX)
|
|||
// Precise format : ~full float
|
||||
// 5 ALU
|
||||
//
|
||||
float fastRcpNR2(float inX)
|
||||
static inline float fastRcpNR2(float inX)
|
||||
{
|
||||
float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR2);
|
||||
xRcp = rcpNewtonRaphson(inX, xRcp);
|
||||
|
@ -271,7 +275,7 @@ static const float fsl_HALF_PI = 0.5f * 3.1415926535897932384626433f;
|
|||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed.
|
||||
float acosFast4(float inX)
|
||||
static inline float acosFast4(float inX)
|
||||
{
|
||||
float x1 = fabsf(inX);
|
||||
float x2 = x1 * x1;
|
||||
|
@ -291,7 +295,7 @@ float acosFast4(float inX)
|
|||
// 4th order polynomial approximation
|
||||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
float asinFast4(float inX)
|
||||
static inline float asinFast4(float inX)
|
||||
{
|
||||
float x = inX;
|
||||
|
||||
|
@ -303,9 +307,13 @@ float asinFast4(float inX)
|
|||
// 4 VGRP, 12 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Efficient approximations for the arctangent function, Rajan, S. Sichun Wang Inkol, R. Joyal, A., May 2006
|
||||
float atanFast4(float inX)
|
||||
static inline float atanFast4(float inX)
|
||||
{
|
||||
float x = inX;
|
||||
return x*(-0.1784f * fabsf(x) - 0.0663f * x * x + 1.0301f);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //cplusplus
|
||||
#endif //SHADER_FAST_MATH_INC_FX
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "../NativePath.h"
|
||||
#include "../NativeMemory.h"
|
||||
#include "stddef.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2015 Giovanni Petrantoni
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//
|
||||
// NativeMath.c
|
||||
// NativePath
|
||||
//
|
||||
// Created by Giovanni Petrantoni on 11/16/15.
|
||||
// Copyright © 2015 Giovanni Petrantoni. All rights reserved.
|
||||
//
|
||||
|
||||
#include <math.h>
|
||||
#include "ShaderFastMathLib.h"
|
||||
#include "lol_trig.h"
|
||||
|
||||
//ShaderFastMathLib
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : ~3.4% over full
|
||||
// Precise format : ~small float
|
||||
// 2 ALU
|
||||
//
|
||||
float npFastRcpSqrtNR0(float inX)
|
||||
{
|
||||
return fastRcpSqrtNR0(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : ~0.2% over full
|
||||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
float npFastRcpSqrtNR1(float inX)
|
||||
{
|
||||
return fastRcpSqrtNR1(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : ~4.6e-004% over full
|
||||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
float npFastRcpSqrtNR2(float inX)
|
||||
{
|
||||
return fastRcpSqrtNR2(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : < 0.7% over full
|
||||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
float npFastSqrtNR0(float inX)
|
||||
{
|
||||
return fastSqrtNR0(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Use inverse Rcp Sqrt
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : ~0.2% over full
|
||||
// Precise format : ~half float
|
||||
// 6 ALU
|
||||
//
|
||||
float npFastSqrtNR1(float inX)
|
||||
{
|
||||
return fastSqrtNR1(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Use inverse Rcp Sqrt
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : ~4.6e-004% over full
|
||||
// Precise format : ~full float
|
||||
// 9 ALU
|
||||
//
|
||||
float npFastSqrtNR2(float inX)
|
||||
{
|
||||
return fastSqrtNR2(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 0 Newton Raphson iterations
|
||||
// Relative error : < 0.4% over full
|
||||
// Precise format : ~small float
|
||||
// 1 ALU
|
||||
//
|
||||
float npFastRcpNR0(float inX)
|
||||
{
|
||||
return fastRcpNR0(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 1 Newton Raphson iterations
|
||||
// Relative error : < 0.02% over full
|
||||
// Precise format : ~half float
|
||||
// 3 ALU
|
||||
//
|
||||
float npFastRcpNR1(float inX)
|
||||
{
|
||||
return fastRcpNR1(inX);
|
||||
}
|
||||
|
||||
//
|
||||
// Using 2 Newton Raphson iterations
|
||||
// Relative error : < 5.0e-005% over full
|
||||
// Precise format : ~full float
|
||||
// 5 ALU
|
||||
//
|
||||
float npFastRcpNR2(float inX)
|
||||
{
|
||||
return fastRcpNR2(inX);
|
||||
}
|
||||
|
||||
// 4th order polynomial approximation
|
||||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed.
|
||||
float npAcosFast4(float inX)
|
||||
{
|
||||
return acosFast4(inX);
|
||||
}
|
||||
|
||||
// 4th order polynomial approximation
|
||||
// 4 VGRP, 16 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
float npAsinFast4(float inX)
|
||||
{
|
||||
return asinFast4(inX);
|
||||
}
|
||||
|
||||
// 4th order hyperbolical approximation
|
||||
// 4 VGRP, 12 ALU Full Rate
|
||||
// 7 * 10^-5 radians precision
|
||||
// Reference : Efficient approximations for the arctangent function, Rajan, S. Sichun Wang Inkol, R. Joyal, A., May 2006
|
||||
float npAtanFast4(float inX)
|
||||
{
|
||||
return atanFast4(inX);
|
||||
}
|
||||
|
||||
double npLolFabs(double x)
|
||||
{
|
||||
return _lol_fabs(x);
|
||||
}
|
||||
|
||||
double npLolSin(double x)
|
||||
{
|
||||
return _lol_sin(x);
|
||||
}
|
||||
|
||||
double npLolCos(double x)
|
||||
{
|
||||
return _lol_cos(x);
|
||||
}
|
||||
|
||||
void npLolSincos(double x, double *sinx, double *cosx)
|
||||
{
|
||||
_lol_sincos(x, sinx, cosx);
|
||||
}
|
||||
|
||||
void npLolSincosf(float x, float *sinx, float *cosx)
|
||||
{
|
||||
_lol_sincosf(x, sinx, cosx);
|
||||
}
|
||||
|
||||
double npLolTan(double x)
|
||||
{
|
||||
return _lol_tan(x);
|
||||
}
|
||||
|
|
@ -1,426 +0,0 @@
|
|||
//
|
||||
// Lol Engine
|
||||
//
|
||||
// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the Do What The Fuck You Want To
|
||||
// Public License, Version 2, as published by Sam Hocevar. See
|
||||
// http://www.wtfpl.net/ for more details.
|
||||
//
|
||||
|
||||
//#include <lol/engine-internal.h>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
static const double D_PI = 3.1415926535897932384626433f;
|
||||
|
||||
#if defined HAVE_FASTMATH_H
|
||||
# include <fastmath.h>
|
||||
#endif
|
||||
|
||||
// Optimisation helpers
|
||||
#if defined __GNUC__
|
||||
# define __likely(x) __builtin_expect(!!(x), 1)
|
||||
# define __unlikely(x) __builtin_expect(!!(x), 0)
|
||||
# define INLINEATTR __attribute__((always_inline))
|
||||
# if defined __x86_64__
|
||||
# define FP_USE(x) __asm__("" : "+x" (x))
|
||||
# elif defined __i386__ /* FIXME: this isn't good */
|
||||
# define FP_USE(x) __asm__("" : "+m" (x))
|
||||
# else
|
||||
# define FP_USE(x) (void)(x)
|
||||
# endif
|
||||
#else
|
||||
# define __likely(x) x
|
||||
# define __unlikely(x) x
|
||||
# define INLINEATTR
|
||||
# define FP_USE(x) (void)(x)
|
||||
#endif
|
||||
|
||||
namespace lol
|
||||
{
|
||||
|
||||
static const double PI_2 = 1.57079632679489661923132;
|
||||
static const double PI_4 = 0.785398163397448309615661;
|
||||
static const double INV_PI = 0.318309886183790671537768;
|
||||
static const double ROOT3 = 1.73205080756887729352745;
|
||||
|
||||
static const double ZERO = 0.0;
|
||||
static const double ONE = 1.0;
|
||||
static const double NEG_ONE = -1.0;
|
||||
static const double HALF = 0.5;
|
||||
static const double QUARTER = 0.25;
|
||||
static const double TWO = 2.0;
|
||||
#if defined __GNUC__
|
||||
static const double VERY_SMALL_NUMBER = 0x1.0p-128;
|
||||
#else
|
||||
static const double VERY_SMALL_NUMBER = 3e-39;
|
||||
#endif
|
||||
static const double TWO_EXP_52 = 4503599627370496.0;
|
||||
static const double TWO_EXP_54 = 18014398509481984.0;
|
||||
|
||||
/** sin Taylor series coefficients. */
|
||||
static const double SC[] =
|
||||
{
|
||||
-1.6449340668482264364724e-0, // π^2/3!
|
||||
+8.1174242528335364363700e-1, // π^4/5!
|
||||
-1.9075182412208421369647e-1, // π^6/7!
|
||||
+2.6147847817654800504653e-2, // π^8/9!
|
||||
-2.3460810354558236375089e-3, // π^10/11!
|
||||
+1.4842879303107100368487e-4, // π^12/13!
|
||||
-6.9758736616563804745344e-6, // π^14/15!
|
||||
+2.5312174041370276513517e-7, // π^16/17!
|
||||
};
|
||||
|
||||
/* Note: the last value should be -1.3878952462213772114468e-7 (ie.
|
||||
* π^18/18!) but we tweak it in order to get the better average precision
|
||||
* required for tan() computations when close to π/2+kπ values. */
|
||||
static const double CC[] =
|
||||
{
|
||||
-4.9348022005446793094172e-0, // π^2/2!
|
||||
+4.0587121264167682181850e-0, // π^4/4!
|
||||
-1.3352627688545894958753e-0, // π^6/6!
|
||||
+2.3533063035889320454188e-1, // π^8/8!
|
||||
-2.5806891390014060012598e-2, // π^10/10!
|
||||
+1.9295743094039230479033e-3, // π^12/12!
|
||||
-1.0463810492484570711802e-4, // π^14/14!
|
||||
+4.3030695870329470072978e-6, // π^16/16!
|
||||
-1.3777e-7,
|
||||
};
|
||||
|
||||
/* These coefficients use Sloane’s http://oeis.org/A002430 and
|
||||
* http://oeis.org/A036279 sequences for the Taylor series of tan().
|
||||
* Note: the last value should be 2.12485922978838540352881e5 (ie.
|
||||
* 443861162*π^18/1856156927625), but we tweak it in order to get
|
||||
* sub 1e-11 average precision in a larger range. */
|
||||
static const double TC[] =
|
||||
{
|
||||
3.28986813369645287294483e0, // π^2/3
|
||||
1.29878788045336582981920e1, // 2*π^4/15
|
||||
5.18844961612069061254404e1, // 17*π^6/315
|
||||
2.07509320280908496804928e2, // 62*π^8/2835
|
||||
8.30024701695986756361561e2, // 1382*π^10/155925
|
||||
3.32009324029001216460018e3, // 21844*π^12/6081075
|
||||
1.32803704909665483598490e4, // 929569*π^14/638512875
|
||||
5.31214808666037709352112e4, // 6404582*π^16/10854718875
|
||||
2.373e5,
|
||||
};
|
||||
|
||||
static inline double lol_fabs(double x) INLINEATTR;
|
||||
#if defined __GNUC__
|
||||
static inline double lol_round(double x) INLINEATTR;
|
||||
static inline double lol_trunc(double x) INLINEATTR;
|
||||
#endif
|
||||
|
||||
static inline double lol_fabs(double x)
|
||||
{
|
||||
#if defined __GNUC__
|
||||
return __builtin_fabs(x);
|
||||
#else
|
||||
using std::fabs;
|
||||
return fabs(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined __GNUC__
|
||||
static inline double lol_round(double x)
|
||||
{
|
||||
return __builtin_round(x);
|
||||
}
|
||||
|
||||
static inline double lol_trunc(double x)
|
||||
{
|
||||
return __builtin_trunc(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
double lol_sin(double x)
|
||||
{
|
||||
double absx = lol_fabs(x * INV_PI);
|
||||
|
||||
/* If branches are cheap, skip the cycle count when |x| < π/4,
|
||||
* and only do the Taylor series up to the required precision. */
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
/* Computing x^4 is one multiplication too many we do, but it helps
|
||||
* interleave the Taylor series operations a lot better. */
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
return x * taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wrap |x| to the range [-1, 1] and keep track of the number of
|
||||
* cycles required. If odd, we'll need to change the sign of the
|
||||
* result. */
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
/* If branches are very cheap, we have the option to do the Taylor
|
||||
* series at a much lower degree by splitting. */
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (lol_fabs(absx) > QUARTER)
|
||||
{
|
||||
sign = (x * absx >= 0.0) ? sign : -sign;
|
||||
|
||||
double x1 = HALF - lol_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return taylor * sign;
|
||||
}
|
||||
#endif
|
||||
|
||||
sign *= (x >= 0.0) ? D_PI : -D_PI;
|
||||
|
||||
/* Compute a Tailor series for sin() and combine sign information. */
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
#else
|
||||
double sub1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
|
||||
#endif
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return absx * taylor * sign;
|
||||
}
|
||||
|
||||
double lol_cos(double x)
|
||||
{
|
||||
double absx = lol_fabs(x * INV_PI);
|
||||
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylor = (sub1 * x2 + sub2) * x2 + ONE;
|
||||
return taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (lol_fabs(absx) > QUARTER)
|
||||
{
|
||||
double x1 = HALF - lol_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
|
||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return x1 * taylor * sign * D_PI;
|
||||
}
|
||||
#endif
|
||||
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
#else
|
||||
double sub1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
|
||||
#endif
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
|
||||
return taylor * sign;
|
||||
}
|
||||
|
||||
void lol_sincos(double x, double *sinx, double *cosx)
|
||||
{
|
||||
double absx = lol_fabs(x * INV_PI);
|
||||
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
if (absx < QUARTER)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
|
||||
/* Computing the Taylor series to the 11th order is enough to get
|
||||
* x * 1e-11 precision, but we push it to the 13th order so that
|
||||
* tan() has a better precision. */
|
||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = x * taylors;
|
||||
|
||||
double subc1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
|
||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylorc = (subc1 * x2 + subc2) * x2 + ONE;
|
||||
*cosx = taylorc;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
double num_cycles = absx + TWO_EXP_52;
|
||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52;
|
||||
|
||||
double is_even = TWO * num_cycles - ONE;
|
||||
FP_USE(is_even); is_even += TWO_EXP_54;
|
||||
FP_USE(is_even); is_even -= TWO_EXP_54;
|
||||
FP_USE(is_even);
|
||||
is_even -= TWO * num_cycles - ONE;
|
||||
double sin_sign = is_even;
|
||||
double cos_sign = is_even;
|
||||
|
||||
absx -= num_cycles;
|
||||
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
if (lol_fabs(absx) > QUARTER)
|
||||
{
|
||||
cos_sign = sin_sign;
|
||||
sin_sign = (x * absx >= 0.0) ? sin_sign : -sin_sign;
|
||||
|
||||
double x1 = HALF - lol_fabs(absx);
|
||||
double x2 = x1 * x1;
|
||||
double x4 = x2 * x2;
|
||||
|
||||
double subs1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subs2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = taylors * sin_sign;
|
||||
|
||||
double subc1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subc2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double taylorc = subc2 * x2 + subc1;
|
||||
*cosx = x1 * taylorc * cos_sign * D_PI;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
sin_sign *= (x >= 0.0) ? D_PI : -D_PI;
|
||||
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES
|
||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
|
||||
double subc1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
|
||||
#else
|
||||
double subs1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
|
||||
double subs2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
|
||||
/* Push Taylor series to the 19th order to enhance tan() accuracy. */
|
||||
double subc1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
|
||||
double subc2 = (((CC[8] * x4 + CC[6]) * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
|
||||
#endif
|
||||
double taylors = subs2 * x2 + subs1;
|
||||
*sinx = absx * taylors * sin_sign;
|
||||
|
||||
double taylorc = subc2 * x2 + subc1;
|
||||
*cosx = taylorc * cos_sign;
|
||||
}
|
||||
|
||||
void lol_sincos(float x, float *sinx, float *cosx)
|
||||
{
|
||||
double x2 = static_cast<double>(x);
|
||||
double s2, c2;
|
||||
lol_sincos(x2, &s2, &c2);
|
||||
*sinx = static_cast<float>(s2);
|
||||
*cosx = static_cast<float>(c2);
|
||||
}
|
||||
|
||||
double lol_tan(double x)
|
||||
{
|
||||
#if LOL_FEATURE_CHEAP_BRANCHES
|
||||
double absx = lol_fabs(x * INV_PI);
|
||||
|
||||
/* This value was determined empirically to ensure an error of no
|
||||
* more than x * 1e-11 in this range. */
|
||||
if (absx < 0.163)
|
||||
{
|
||||
double x2 = absx * absx;
|
||||
double x4 = x2 * x2;
|
||||
double sub1 = (((TC[7] * x4 + TC[5]) * x4
|
||||
+ TC[3]) * x4 + TC[1]) * x4 + ONE;
|
||||
double sub2 = (((TC[8] * x4 + TC[6]) * x4
|
||||
+ TC[4]) * x4 + TC[2]) * x4 + TC[0];
|
||||
double taylor = sub2 * x2 + sub1;
|
||||
return x * taylor;
|
||||
}
|
||||
#endif
|
||||
|
||||
double sinx, cosx;
|
||||
lol_sincos(x, &sinx, &cosx);
|
||||
|
||||
/* Ensure cosx isn't zero. FIXME: we lose the cosx sign here. */
|
||||
double absc = lol_fabs(cosx);
|
||||
|
||||
if (__unlikely(absc < VERY_SMALL_NUMBER))
|
||||
cosx = VERY_SMALL_NUMBER;
|
||||
return sinx / cosx;
|
||||
}
|
||||
|
||||
} /* namespace lol */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
double _lol_fabs(double x)
|
||||
{
|
||||
return lol::lol_fabs(x);
|
||||
}
|
||||
|
||||
double _lol_sin(double x)
|
||||
{
|
||||
return lol::lol_sin(x);
|
||||
}
|
||||
|
||||
double _lol_cos(double x)
|
||||
{
|
||||
return lol::lol_cos(x);
|
||||
}
|
||||
|
||||
void _lol_sincos(double x, double *sinx, double *cosx)
|
||||
{
|
||||
lol::lol_sincos(x, sinx, cosx);
|
||||
}
|
||||
|
||||
void _lol_sincosf(float x, float *sinx, float *cosx)
|
||||
{
|
||||
lol::lol_sincos(x, sinx, cosx);
|
||||
}
|
||||
|
||||
double _lol_tan(double x)
|
||||
{
|
||||
return lol::lol_tan(x);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
//
|
||||
// lol_trig.h
|
||||
// NativePath
|
||||
//
|
||||
// Created by Void on 11/17/15.
|
||||
// Copyright © 2015 Voidtarget. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef lol_trig_h
|
||||
#define lol_trig_h
|
||||
|
||||
double _lol_fabs(double x);
|
||||
double _lol_round(double x);
|
||||
double _lol_trunc(double x);
|
||||
double _lol_sin(double x);
|
||||
double _lol_cos(double x);
|
||||
void _lol_sincos(double x, double *sinx, double *cosx);
|
||||
void _lol_sincosf(float x, float *sinx, float *cosx);
|
||||
double _lol_tan(double x);
|
||||
|
||||
#endif /* lol_trig_h */
|
|
@ -17,7 +17,7 @@ function BuildWindows32DLL(cfile, isCpp)
|
|||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z " end
|
||||
local cmd = "clang -v -m32 -DNP_WIN32 -Wall -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -v -m32 -DNP_WIN32 -Wall -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -38,14 +38,19 @@ end
|
|||
|
||||
--LLVM bytecode
|
||||
|
||||
function BuildLLVM32(cfile)
|
||||
function BuildLLVM32(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -m32 -nostdlibinc -nobuiltininc -nostdinc++ -fno-exceptions "..common_flags.." "..flags.." -o "..cfile..".ll ".." -S -c -emit-llvm -target i386-unknown "..cfile;
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -m32 -nostdlibinc -nobuiltininc -nostdinc++ "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target i386-unknown "..cfile
|
||||
local cmdLL = "clang -DNP_LLVM_BC -m32 -nostdlibinc -nobuiltininc -nostdinc++ "..common_flags.." "..flags.." -o "..cfile..".ll ".." -S -c -emit-llvm -target i386-unknown "..cfile
|
||||
local cmdPP = "clang -DNP_LLVM_BC -m32 -nostdlibinc -nobuiltininc -nostdinc++ "..common_flags.." "..flags.." -E "..cfile.." > "..cfile..".pp"
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
if os.execute(cmd) == 0 then table.insert(objs, cfile..".bc") end
|
||||
os.execute(cmdLL)
|
||||
os.execute(cmdPP)
|
||||
end
|
||||
|
||||
function LinkLLVM32()
|
||||
|
@ -53,17 +58,110 @@ function LinkLLVM32()
|
|||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM32\\"..outputName..".bc "..objs_str
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-i386.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
os.execute(cmd)
|
||||
end
|
||||
|
||||
function BuildLLVM64(cfile)
|
||||
function BuildLLVMarmv7(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -m64 -nostdlibinc -nobuiltininc -nostdinc++ -fno-exceptions "..common_flags.." "..flags.." -o "..cfile..".ll ".." -S -c -emit-llvm -target x86_64-unknown "..cfile;
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -nostdlibinc -nobuiltininc -nostdinc++ -mfpu=neon -mfloat-abi=hard "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target armv7-unknown "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
if os.execute(cmd) == 0 then table.insert(objs, cfile..".bc") end
|
||||
end
|
||||
|
||||
function LinkLLVMarmv7()
|
||||
local objs_str = ""
|
||||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-armv7.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
os.execute(cmd)
|
||||
end
|
||||
|
||||
function BuildLLVMarmv7s(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -nostdlibinc -nobuiltininc -nostdinc++ -mfpu=neon -mfloat-abi=hard "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target armv7s-unknown "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
if os.execute(cmd) == 0 then table.insert(objs, cfile..".bc") end
|
||||
end
|
||||
|
||||
function LinkLLVMarmv7s()
|
||||
local objs_str = ""
|
||||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-armv7s.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
os.execute(cmd)
|
||||
end
|
||||
|
||||
function BuildLLVMAArch64(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -nostdlibinc -nobuiltininc -nostdinc++ "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target aarch64-unknown "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
if os.execute(cmd) == 0 then table.insert(objs, cfile..".bc") end
|
||||
end
|
||||
|
||||
function LinkLLVMAArch64()
|
||||
local objs_str = ""
|
||||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-aarch64.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
os.execute(cmd)
|
||||
end
|
||||
|
||||
function BuildLLVMarmv6(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -nostdlibinc -nobuiltininc -nostdinc++ -mfloat-abi=hard -mfpu=vfp "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target armv6-unknown "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
if os.execute(cmd) == 0 then table.insert(objs, cfile..".bc") end
|
||||
end
|
||||
|
||||
function LinkLLVMarmv6()
|
||||
local objs_str = ""
|
||||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-armv6.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
os.execute(cmd)
|
||||
end
|
||||
|
||||
function BuildLLVM64(cfile, isCpp)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z -fno-rtti -fno-exceptions" end
|
||||
local cmd = "clang -DNP_LLVM_BC -m64 -nostdlibinc -nobuiltininc -nostdinc++ "..common_flags.." "..flags.." -o "..cfile..".bc ".." -c -emit-llvm -target x86_64-unknown "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -75,7 +173,7 @@ function LinkLLVM64()
|
|||
for i, o in ipairs(objs) do
|
||||
objs_str = objs_str..o.." "
|
||||
end
|
||||
local cmd = "llvm-link -o LLVM64\\"..outputName..".bc "..objs_str
|
||||
local cmd = "llvm-link -o LLVM\\"..outputName.."-x86_64.bc "..objs_str
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -87,7 +185,7 @@ end
|
|||
function BuildWindows32(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -m32 -DNP_WIN32 -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -m32 -DNP_WIN32 -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -109,7 +207,7 @@ end
|
|||
function BuildWindows64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -m64 -DNP_WIN32 -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -m64 -DNP_WIN32 -gcodeview -fno-ms-extensions -nostdlibinc -nobuiltininc -nostdinc++ -target i686-pc-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -155,7 +253,7 @@ end
|
|||
function BuildWindowsUWP64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_ms_flags else flags = release_ms_flags end
|
||||
local cmd = "clang-cl -DNP_WIN32 -WX -EHsc -GS- -MD -DWIN_EXPORT -m64 "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang-cl -DNP_WIN32 -WX -EHsc -GS- -MD -DWIN_EXPORT -m64 "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -177,7 +275,7 @@ end
|
|||
function BuildWindowsUWPARM(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_ms_flags else flags = release_ms_flags end
|
||||
local cmd = "clang-cl -DNP_WIN32 -WX -EHsc -GS- -MD -DWIN_EXPORT -m32 --target=thumbv7-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang-cl -DNP_WIN32 -WX -EHsc -GS- -MD -DWIN_EXPORT -m32 --target=thumbv7-windows-msvc "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -266,7 +364,7 @@ function BuildIOSArm7(cfile, isCpp)
|
|||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
if isCpp then flags = flags.." -std=c++1z " end
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target armv7-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target armv7-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -291,7 +389,7 @@ end
|
|||
function BuildIOSArm7s(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target armv7s-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target armv7s-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -313,7 +411,7 @@ end
|
|||
function BuildIOSArm64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target arm64-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target arm64-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -335,7 +433,7 @@ end
|
|||
function BuildIOSx86(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target i386-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target i386-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -357,7 +455,7 @@ end
|
|||
function BuildIOSx64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target x86_64-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_IOS -nostdlibinc -nobuiltininc -nostdinc++ -mios-version-min=6.0 -target x86_64-apple-ios "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -381,7 +479,7 @@ end
|
|||
function BuildMacOSx86(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_MACOS -nostdlibinc -nobuiltininc -nostdinc++ -mmacosx-version-min=10.5 -target i386-apple-macosx "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_MACOS -nostdlibinc -nobuiltininc -nostdinc++ -mmacosx-version-min=10.5 -target i386-apple-macosx "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -403,7 +501,7 @@ end
|
|||
function BuildMacOSx64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_MACOS -nostdlibinc -nobuiltininc -nostdinc++ -mmacosx-version-min=10.5 -target x86_64-apple-macosx "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_MACOS -nostdlibinc -nobuiltininc -nostdinc++ -mmacosx-version-min=10.5 -target x86_64-apple-macosx "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -427,7 +525,7 @@ end
|
|||
function BuildAndroidArm(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target arm-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target arm-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -449,7 +547,7 @@ end
|
|||
function BuildAndroidArm7(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target armv7-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target armv7-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -471,7 +569,7 @@ end
|
|||
function BuildAndroidArm64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target aarch64-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target aarch64-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -493,7 +591,7 @@ end
|
|||
function BuildAndroidx86(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target i386-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target i386-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -518,7 +616,7 @@ end
|
|||
function BuildAndroidx64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target x86_64-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_ANDROID -nostdlibinc -nobuiltininc -nostdinc++ -target x86_64-none-android "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -542,7 +640,7 @@ end
|
|||
function BuildLinuxX64(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_LINUX -nostdlibinc -nobuiltininc -nostdinc++ -fPIC -target x86_64-linux-gnu "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_LINUX -nostdlibinc -nobuiltininc -nostdinc++ -fPIC -target x86_64-linux-gnu "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -564,7 +662,7 @@ end
|
|||
function BuildLinuxX86(cfile)
|
||||
local flags = ""
|
||||
if debug then flags = debug_flags else flags = release_flags end
|
||||
local cmd = "clang -DNP_LINUX -nostdlibinc -nobuiltininc -nostdinc++ -fPIC -target i386-linux-gnu "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile;
|
||||
local cmd = "clang -DNP_LINUX -nostdlibinc -nobuiltininc -nostdinc++ -fPIC -target i386-linux-gnu "..common_flags.." "..flags.." -o "..cfile..".o ".." -c "..cfile
|
||||
if is_verbose == true then
|
||||
print(cmd)
|
||||
end
|
||||
|
@ -767,23 +865,67 @@ elseif platform == "macos" then
|
|||
os.remove("macOS\\"..outputName.."_x86_64.a")
|
||||
|
||||
elseif platform == "llvm" then
|
||||
lfs.mkdir("LLVM32")
|
||||
lfs.mkdir("LLVM")
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM x86...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVM32(f)
|
||||
BuildLLVM32(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVM32(f, true)
|
||||
end
|
||||
LinkLLVM32()
|
||||
|
||||
lfs.mkdir("LLVM64")
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM x64...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVM64(f)
|
||||
BuildLLVM64(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVM64(f, true)
|
||||
end
|
||||
LinkLLVM64()
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM armv6...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVMarmv6(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVMarmv6(f, true)
|
||||
end
|
||||
LinkLLVMarmv6()
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM armv7...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVMarmv7(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVMarmv7(f, true)
|
||||
end
|
||||
LinkLLVMarmv7()
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM armv7s...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVMarmv7s(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVMarmv7s(f, true)
|
||||
end
|
||||
LinkLLVMarmv7s()
|
||||
|
||||
objs = {}
|
||||
print ("Building LLVM AArch64...")
|
||||
for i,f in ipairs(cfiles) do
|
||||
BuildLLVMAArch64(f, false)
|
||||
end
|
||||
for i,f in ipairs(cppfiles) do
|
||||
BuildLLVMAArch64(f, true)
|
||||
end
|
||||
LinkLLVMAArch64()
|
||||
|
||||
elseif platform == "linux" then
|
||||
lfs.mkdir("Linux")
|
||||
|
|
Загрузка…
Ссылка в новой задаче