2019-07-25 14:46:59 +03:00
|
|
|
/*!
|
|
|
|
* Copyright (c) 2018 Microsoft Corporation. All rights reserved.
|
|
|
|
* Licensed under the MIT License. See LICENSE file in the project root for license information.
|
|
|
|
*/
|
2018-01-31 09:20:41 +03:00
|
|
|
/* lightgbmlib.i */
|
|
|
|
%module lightgbmlib
|
|
|
|
%ignore LGBM_BoosterSaveModelToString;
|
2019-03-16 09:29:41 +03:00
|
|
|
%ignore LGBM_BoosterGetEvalNames;
|
2020-03-20 04:05:38 +03:00
|
|
|
%ignore LGBM_BoosterGetFeatureNames;
|
2018-01-31 09:20:41 +03:00
|
|
|
%{
|
|
|
|
/* Includes the header in the wrapper code */
|
|
|
|
#include "../include/LightGBM/export.h"
|
|
|
|
#include "../include/LightGBM/utils/log.h"
|
2019-03-16 09:29:41 +03:00
|
|
|
#include "../include/LightGBM/utils/common.h"
|
2018-01-31 09:20:41 +03:00
|
|
|
#include "../include/LightGBM/c_api.h"
|
|
|
|
%}
|
|
|
|
|
2019-04-29 11:43:29 +03:00
|
|
|
%include "various.i"
|
|
|
|
%include "carrays.i"
|
|
|
|
%include "cpointer.i"
|
2019-08-30 04:59:54 +03:00
|
|
|
%include "stdint.i"
|
2019-04-29 11:43:29 +03:00
|
|
|
|
|
|
|
/* Note: instead of using array_functions for string array we apply a typemap instead.
|
|
|
|
Future char** parameter names should be added to the typemap.
|
|
|
|
*/
|
|
|
|
%apply char **STRING_ARRAY { char **feature_names, char **out_strs }
|
|
|
|
|
2018-01-31 09:20:41 +03:00
|
|
|
/* header files */
|
|
|
|
%include "../include/LightGBM/export.h"
|
|
|
|
%include "../include/LightGBM/c_api.h"
|
|
|
|
|
2019-07-25 14:46:59 +03:00
|
|
|
%typemap(in, numinputs = 0) JNIEnv *jenv %{
|
|
|
|
$1 = jenv;
|
2019-03-19 02:37:48 +03:00
|
|
|
%}
|
|
|
|
|
2018-01-31 09:20:41 +03:00
|
|
|
%inline %{
|
|
|
|
char * LGBM_BoosterSaveModelToStringSWIG(BoosterHandle handle,
|
2019-03-16 09:29:41 +03:00
|
|
|
int start_iteration,
|
|
|
|
int num_iteration,
|
2020-07-15 22:18:53 +03:00
|
|
|
int feature_importance_type,
|
2019-03-16 09:29:41 +03:00
|
|
|
int64_t buffer_len,
|
|
|
|
int64_t* out_len) {
|
2018-01-31 09:20:41 +03:00
|
|
|
char* dst = new char[buffer_len];
|
2020-07-15 22:18:53 +03:00
|
|
|
int result = LGBM_BoosterSaveModelToString(handle, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, dst);
|
2019-03-16 09:29:41 +03:00
|
|
|
// Reallocate to use larger length
|
|
|
|
if (*out_len > buffer_len) {
|
|
|
|
delete [] dst;
|
|
|
|
int64_t realloc_len = *out_len;
|
|
|
|
dst = new char[realloc_len];
|
2020-07-15 22:18:53 +03:00
|
|
|
result = LGBM_BoosterSaveModelToString(handle, start_iteration, num_iteration, feature_importance_type, realloc_len, out_len, dst);
|
2019-03-16 09:29:41 +03:00
|
|
|
}
|
|
|
|
if (result != 0) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2019-11-13 16:47:16 +03:00
|
|
|
char * LGBM_BoosterDumpModelSWIG(BoosterHandle handle,
|
|
|
|
int start_iteration,
|
|
|
|
int num_iteration,
|
2020-07-15 22:18:53 +03:00
|
|
|
int feature_importance_type,
|
2019-11-13 16:47:16 +03:00
|
|
|
int64_t buffer_len,
|
|
|
|
int64_t* out_len) {
|
|
|
|
char* dst = new char[buffer_len];
|
2020-07-15 22:18:53 +03:00
|
|
|
int result = LGBM_BoosterDumpModel(handle, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, dst);
|
2019-11-13 16:47:16 +03:00
|
|
|
// Reallocate to use larger length
|
|
|
|
if (*out_len > buffer_len) {
|
|
|
|
delete [] dst;
|
|
|
|
int64_t realloc_len = *out_len;
|
|
|
|
dst = new char[realloc_len];
|
2020-07-15 22:18:53 +03:00
|
|
|
result = LGBM_BoosterDumpModel(handle, start_iteration, num_iteration, feature_importance_type, realloc_len, out_len, dst);
|
2019-11-13 16:47:16 +03:00
|
|
|
}
|
|
|
|
if (result != 0) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2019-07-25 14:46:59 +03:00
|
|
|
int LGBM_BoosterPredictForMatSingle(JNIEnv *jenv,
|
2019-03-19 02:37:48 +03:00
|
|
|
jdoubleArray data,
|
|
|
|
BoosterHandle handle,
|
|
|
|
int data_type,
|
|
|
|
int ncol,
|
|
|
|
int is_row_major,
|
|
|
|
int predict_type,
|
2020-08-06 15:40:33 +03:00
|
|
|
int start_iteration,
|
2019-03-19 02:37:48 +03:00
|
|
|
int num_iteration,
|
|
|
|
const char* parameter,
|
|
|
|
int64_t* out_len,
|
|
|
|
double* out_result) {
|
|
|
|
double* data0 = (double*)jenv->GetPrimitiveArrayCritical(data, 0);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2020-08-06 15:40:33 +03:00
|
|
|
int ret = LGBM_BoosterPredictForMatSingleRow(handle, data0, data_type, ncol, is_row_major, predict_type, start_iteration,
|
2019-04-29 11:43:29 +03:00
|
|
|
num_iteration, parameter, out_len, out_result);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
jenv->ReleasePrimitiveArrayCritical(data, data0, JNI_ABORT);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
return ret;
|
|
|
|
}
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2020-07-15 22:50:56 +03:00
|
|
|
/*! \brief Even faster variant of `LGBM_BoosterPredictForMatSingle`.
|
|
|
|
*
|
|
|
|
* Uses `LGBM_BoosterPredictForMatSingleRowFast` which is faster
|
|
|
|
* than `LGBM_BoosterPredictForMatSingleRow` and the trick of
|
|
|
|
* `LGBM_BoosterPredictForMatSingle` to capture the Java data array
|
|
|
|
* using `GetPrimitiveArrayCritical`, which can yield faster access
|
|
|
|
* to the array if the JVM passes the actual address to the C++ side
|
|
|
|
* instead of performing a copy.
|
|
|
|
*/
|
|
|
|
int LGBM_BoosterPredictForMatSingleRowFastCriticalSWIG(JNIEnv *jenv,
|
|
|
|
jdoubleArray data,
|
|
|
|
FastConfigHandle handle,
|
|
|
|
int64_t* out_len,
|
|
|
|
double* out_result) {
|
|
|
|
double* data0 = (double*)jenv->GetPrimitiveArrayCritical(data, 0);
|
|
|
|
|
2020-08-05 18:05:20 +03:00
|
|
|
int ret = LGBM_BoosterPredictForMatSingleRowFast(handle, data0, out_len, out_result);
|
2020-07-15 22:50:56 +03:00
|
|
|
|
|
|
|
jenv->ReleasePrimitiveArrayCritical(data, data0, JNI_ABORT);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
int LGBM_BoosterPredictForCSRSingle(JNIEnv *jenv,
|
2019-07-25 14:46:59 +03:00
|
|
|
jintArray indices,
|
|
|
|
jdoubleArray values,
|
|
|
|
int numNonZeros,
|
|
|
|
BoosterHandle handle,
|
|
|
|
int indptr_type,
|
|
|
|
int data_type,
|
|
|
|
int64_t nelem,
|
|
|
|
int64_t num_col,
|
|
|
|
int predict_type,
|
2020-08-06 15:40:33 +03:00
|
|
|
int start_iteration,
|
2019-07-25 14:46:59 +03:00
|
|
|
int num_iteration,
|
|
|
|
const char* parameter,
|
|
|
|
int64_t* out_len,
|
|
|
|
double* out_result) {
|
2019-03-19 02:37:48 +03:00
|
|
|
// Alternatives
|
|
|
|
// - GetIntArrayElements: performs copy
|
|
|
|
// - GetDirectBufferAddress: fails on wrapped array
|
|
|
|
// Some words of warning for GetPrimitiveArrayCritical
|
|
|
|
// https://stackoverflow.com/questions/23258357/whats-the-trade-off-between-using-getprimitivearraycritical-and-getprimitivety
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
jboolean isCopy;
|
|
|
|
int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, &isCopy);
|
|
|
|
double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, &isCopy);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
int32_t ind[2] = { 0, numNonZeros };
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
int ret = LGBM_BoosterPredictForCSRSingleRow(handle, ind, indptr_type, indices0, values0, data_type, 2,
|
2020-08-06 15:40:33 +03:00
|
|
|
nelem, num_col, predict_type, start_iteration, num_iteration, parameter, out_len, out_result);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
jenv->ReleasePrimitiveArrayCritical(values, values0, JNI_ABORT);
|
2020-07-15 22:50:56 +03:00
|
|
|
jenv->ReleasePrimitiveArrayCritical(indices, indices0, JNI_ABORT);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*! \brief Even faster variant of `LGBM_BoosterPredictForCSRSingle`.
|
|
|
|
*
|
|
|
|
* Uses `LGBM_BoosterPredictForCSRSingleRowFast` which is faster
|
|
|
|
* than `LGBM_BoosterPredictForMatSingleRow` and the trick of
|
|
|
|
* `LGBM_BoosterPredictForCSRSingle` to capture the Java data array
|
|
|
|
* using `GetPrimitiveArrayCritical`, which can yield faster access
|
|
|
|
* to the array if the JVM passes the actual address to the C++ side
|
|
|
|
* instead of performing a copy.
|
|
|
|
*/
|
|
|
|
int LGBM_BoosterPredictForCSRSingleRowFastCriticalSWIG(JNIEnv *jenv,
|
|
|
|
jintArray indices,
|
|
|
|
jdoubleArray values,
|
|
|
|
int numNonZeros,
|
|
|
|
FastConfigHandle handle,
|
|
|
|
int indptr_type,
|
|
|
|
int64_t nelem,
|
|
|
|
int64_t* out_len,
|
|
|
|
double* out_result) {
|
|
|
|
// Alternatives
|
|
|
|
// - GetIntArrayElements: performs copy
|
|
|
|
// - GetDirectBufferAddress: fails on wrapped array
|
|
|
|
// Some words of warning for GetPrimitiveArrayCritical
|
|
|
|
// https://stackoverflow.com/questions/23258357/whats-the-trade-off-between-using-getprimitivearraycritical-and-getprimitivety
|
|
|
|
|
|
|
|
jboolean isCopy;
|
|
|
|
int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, &isCopy);
|
|
|
|
double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, &isCopy);
|
|
|
|
|
|
|
|
int32_t ind[2] = { 0, numNonZeros };
|
|
|
|
|
|
|
|
int ret = LGBM_BoosterPredictForCSRSingleRowFast(handle, ind, indptr_type, indices0, values0, 2,
|
2020-08-05 18:05:20 +03:00
|
|
|
nelem, out_len, out_result);
|
2020-07-15 22:50:56 +03:00
|
|
|
|
|
|
|
jenv->ReleasePrimitiveArrayCritical(values, values0, JNI_ABORT);
|
2019-03-19 02:37:48 +03:00
|
|
|
jenv->ReleasePrimitiveArrayCritical(indices, indices0, JNI_ABORT);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
return ret;
|
|
|
|
}
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
#include <functional>
|
2019-07-25 14:46:59 +03:00
|
|
|
#include <vector>
|
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
struct CSRDirect {
|
|
|
|
jintArray indices;
|
|
|
|
jdoubleArray values;
|
|
|
|
int* indices0;
|
|
|
|
double* values0;
|
|
|
|
int size;
|
|
|
|
};
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
int LGBM_DatasetCreateFromCSRSpark(JNIEnv *jenv,
|
|
|
|
jobjectArray arrayOfSparseVector,
|
|
|
|
int num_rows,
|
|
|
|
int64_t num_col,
|
|
|
|
const char* parameters,
|
|
|
|
const DatasetHandle reference,
|
|
|
|
DatasetHandle* out) {
|
2019-07-25 14:46:59 +03:00
|
|
|
jclass sparseVectorClass = jenv->FindClass("org/apache/spark/ml/linalg/SparseVector");
|
|
|
|
jmethodID sparseVectorIndices = jenv->GetMethodID(sparseVectorClass, "indices", "()[I");
|
|
|
|
jmethodID sparseVectorValues = jenv->GetMethodID(sparseVectorClass, "values", "()[D");
|
|
|
|
|
|
|
|
std::vector<CSRDirect> jniCache;
|
|
|
|
jniCache.reserve(num_rows);
|
|
|
|
|
|
|
|
// this needs to be done ahead of time as row_func is invoked from multiple threads
|
|
|
|
// these threads would have to be registered with the JVM and also unregistered.
|
|
|
|
// It is not clear if that can be achieved with OpenMP
|
|
|
|
for (int i = 0; i < num_rows; i++) {
|
2019-03-19 02:37:48 +03:00
|
|
|
// get the row
|
2019-07-25 14:46:59 +03:00
|
|
|
jobject objSparseVec = jenv->GetObjectArrayElement(arrayOfSparseVector, i);
|
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
// get the size, indices and values
|
|
|
|
auto indices = (jintArray)jenv->CallObjectMethod(objSparseVec, sparseVectorIndices);
|
2019-09-26 08:01:26 +03:00
|
|
|
if (jenv->ExceptionCheck()) {
|
|
|
|
return -1;
|
|
|
|
}
|
2019-03-19 02:37:48 +03:00
|
|
|
auto values = (jdoubleArray)jenv->CallObjectMethod(objSparseVec, sparseVectorValues);
|
2019-09-26 08:01:26 +03:00
|
|
|
if (jenv->ExceptionCheck()) {
|
|
|
|
return -1;
|
|
|
|
}
|
2019-03-19 02:37:48 +03:00
|
|
|
int size = jenv->GetArrayLength(indices);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
// Note: when testing on larger data (e.g. 288k rows per partition and 36mio rows total)
|
|
|
|
// using GetPrimitiveArrayCritical resulted in a dead-lock
|
|
|
|
// lock arrays
|
|
|
|
// int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, 0);
|
|
|
|
// double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, 0);
|
|
|
|
// in test-usecase an alternative to GetPrimitiveArrayCritical as it performs copies
|
2019-05-04 21:39:20 +03:00
|
|
|
int* indices0 = (int *)jenv->GetIntArrayElements(indices, 0);
|
2019-03-19 02:37:48 +03:00
|
|
|
double* values0 = jenv->GetDoubleArrayElements(values, 0);
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
jniCache.push_back({indices, values, indices0, values0, size});
|
2019-07-25 14:46:59 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// type is important here as we want a std::function, rather than a lambda
|
|
|
|
std::function<void(int idx, std::vector<std::pair<int, double>>& ret)> row_func = [&](int row_num, std::vector<std::pair<int, double>>& ret) {
|
2019-03-19 02:37:48 +03:00
|
|
|
auto& jc = jniCache[row_num];
|
2019-07-25 14:46:59 +03:00
|
|
|
ret.clear(); // reset size, but not free()
|
|
|
|
ret.reserve(jc.size); // make sure we have enough allocated
|
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
// copy data
|
|
|
|
int* indices0p = jc.indices0;
|
|
|
|
double* values0p = jc.values0;
|
|
|
|
int* indices0e = indices0p + jc.size;
|
2019-07-25 14:46:59 +03:00
|
|
|
|
2019-03-19 02:37:48 +03:00
|
|
|
for (; indices0p != indices0e; ++indices0p, ++values0p)
|
|
|
|
ret.emplace_back(*indices0p, *values0p);
|
2019-07-25 14:46:59 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
int ret = LGBM_DatasetCreateFromCSRFunc(&row_func, num_rows, num_col, parameters, reference, out);
|
|
|
|
|
|
|
|
for (auto& jc : jniCache) {
|
2019-03-19 02:37:48 +03:00
|
|
|
// jenv->ReleasePrimitiveArrayCritical(jc.values, jc.values0, JNI_ABORT);
|
|
|
|
// jenv->ReleasePrimitiveArrayCritical(jc.indices, jc.indices0, JNI_ABORT);
|
|
|
|
jenv->ReleaseDoubleArrayElements(jc.values, jc.values0, JNI_ABORT);
|
2019-05-04 21:39:20 +03:00
|
|
|
jenv->ReleaseIntArrayElements(jc.indices, (jint *)jc.indices0, JNI_ABORT);
|
2019-07-25 14:46:59 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2018-01-31 09:20:41 +03:00
|
|
|
}
|
|
|
|
%}
|
|
|
|
|
2020-03-20 04:05:38 +03:00
|
|
|
|
2020-11-24 07:49:49 +03:00
|
|
|
%include "pointer_manipulation.i"
|
2020-03-20 04:05:38 +03:00
|
|
|
%include "StringArray_API_extensions.i"
|
2021-03-21 15:07:21 +03:00
|
|
|
%include "ChunkedArray_API_extensions.i"
|