Merge pull request #3 from microsoft/chjinche/add_custom_parser_example
Add FreeForm2Parser as customized parser example
This commit is contained in:
Коммит
6de49eff7e
|
@ -1 +1,4 @@
|
|||
build
|
||||
build
|
||||
*egg-info
|
||||
dist
|
||||
*.so
|
|
@ -0,0 +1,21 @@
|
|||
cmake_minimum_required(VERSION 3.15.0 FATAL_ERROR)
|
||||
|
||||
project(custom_transform)
|
||||
set(LIB_NAME "_custom_parser")
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external_libs/LightGBM)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/)
|
||||
|
||||
set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_library(${LIB_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/examples/freeform2_parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/external_libs/LightGBM/include)
|
||||
|
||||
target_include_directories(${LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/external_libs/LightGBM/include)
|
||||
|
||||
target_link_libraries(${LIB_NAME}
|
||||
-Wl,--no-as-needed
|
||||
-Wl,--start-group
|
||||
_lightgbm
|
||||
_transform
|
||||
-Wl,--end-group
|
||||
)
|
|
@ -0,0 +1,49 @@
|
|||
#include <LightGBM/dataset.h>
|
||||
#include <LightGBM/utils/log.h>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include "TransformProcessor.h"
|
||||
using namespace std;
|
||||
|
||||
|
||||
namespace LightGBM {
|
||||
class FreeForm2Parser: public Parser {
|
||||
public:
|
||||
FreeForm2Parser(std::string config_str) {
|
||||
string label_key = "labelId:";
|
||||
string expr_key = "transform:\n";
|
||||
string header_key = "header:\n";
|
||||
|
||||
size_t start_pos = config_str.find(label_key);
|
||||
config_str.erase(0, start_pos);
|
||||
size_t end_pos = config_str.find("\n");
|
||||
string label_line = config_str.substr(label_key.size(), end_pos);
|
||||
int label_idx = std::stod(label_line);
|
||||
|
||||
start_pos = config_str.find(expr_key);
|
||||
config_str.erase(0, start_pos);
|
||||
end_pos = config_str.find("end of transform");
|
||||
string transform_str = config_str.substr(expr_key.size(), end_pos);
|
||||
|
||||
start_pos = config_str.find(header_key);
|
||||
config_str.erase(0, start_pos);
|
||||
end_pos = config_str.find("end of header");
|
||||
string header_str = config_str.substr(header_key.size(), end_pos);
|
||||
Log::Info("Initializing transform processor.");
|
||||
transform_.reset(new TransformProcessor(transform_str, header_str, label_idx));
|
||||
}
|
||||
inline void ParseOneLine(const char* str, std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
|
||||
vector<string> out_feature_strs;
|
||||
out_feature_strs.clear();
|
||||
transform_->Parse(str, &out_feature_strs, out_label, "\t");
|
||||
transform_->Apply(&out_feature_strs, out_features);
|
||||
}
|
||||
inline int NumFeatures() const override {return transform_->GetFeatureCount();}
|
||||
private:
|
||||
std::unique_ptr<TransformProcessor> transform_;
|
||||
};
|
||||
|
||||
Parser* CreateObject(std::string config_str) { return new FreeForm2Parser(config_str);}
|
||||
ParserReflector reflector("FreeForm2Parser", CreateObject);
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
import ctypes
|
||||
from pathlib import Path
|
||||
|
||||
CUSTOM_PARSER_LIB_NAME = 'lib_custom_parser.so'
|
||||
for p in ['lib_transform.so', 'lib_lightgbm.so', CUSTOM_PARSER_LIB_NAME]:
|
||||
print(p)
|
||||
ctypes.cdll.LoadLibrary(str(Path(__file__).resolve().parent / p))
|
|
@ -0,0 +1,17 @@
|
|||
# run command `sh ./scripts/publish_python_package.sh` in repo root dir.
|
||||
lgb_python_pkg_dir="./external_libs/LightGBM/python-package"
|
||||
# compile transformation, lightgbm, and customized parser libs.
|
||||
# rm -rf build && mkdir build &&
|
||||
cd build && cmake ../ && make -j4 && cd ../ || exit -1
|
||||
# copy all shared libs to lightgbm python package directory.
|
||||
cp ./lib_custom_parser.so ${lgb_python_pkg_dir}/lightgbm && \
|
||||
cp ./src/lib_transform.so ${lgb_python_pkg_dir}/lightgbm && \
|
||||
cp ./external_libs/LightGBM/lib_lightgbm.so ${lgb_python_pkg_dir}/lightgbm || exit -1
|
||||
# modify `basic.py` to load all libs first, or cannot find them when calling python interfaces.
|
||||
cp ${lgb_python_pkg_dir}/lightgbm/basic.py raw && cat ./scripts/load_precompiled_libs.py ${lgb_python_pkg_dir}/lightgbm/basic.py > tmp && cp tmp ${lgb_python_pkg_dir}/lightgbm/basic.py || exit -1
|
||||
# pack wheel package.
|
||||
cd ${lgb_python_pkg_dir} && rm -rf dist/ && python setup.py bdist_wheel --precompile && cd ../../../ || exit -1
|
||||
# revert changes
|
||||
mv raw ${lgb_python_pkg_dir}/lightgbm/basic.py && rm -rf raw tmp ${lgb_python_pkg_dir}/lightgbm/*.so || exit -1
|
||||
# upload package to your pypi, use testpypi as an example.
|
||||
twine upload --repository testpypi ${lgb_python_pkg_dir}/dist/* || exit -1
|
|
@ -48,41 +48,6 @@ set(LLVM_LIB
|
|||
LLVMX86Utils
|
||||
)
|
||||
|
||||
set(BOOST_LIB
|
||||
boost_atomic
|
||||
boost_chrono
|
||||
boost_context
|
||||
boost_coroutine
|
||||
boost_date_time
|
||||
boost_exception
|
||||
boost_filesystem
|
||||
boost_graph
|
||||
boost_graph_parallel
|
||||
boost_iostreams
|
||||
boost_locale
|
||||
boost_log
|
||||
boost_log_setup
|
||||
boost_math_c99
|
||||
boost_math_c99f
|
||||
boost_math_c99l
|
||||
boost_math_tr1
|
||||
boost_math_tr1f
|
||||
boost_math_tr1l
|
||||
boost_mpi
|
||||
boost_prg_exec_monitor
|
||||
boost_program_options
|
||||
boost_random
|
||||
boost_regex
|
||||
boost_serialization
|
||||
boost_system
|
||||
boost_test_exec_monitor
|
||||
boost_thread
|
||||
boost_timer
|
||||
boost_unit_test_framework
|
||||
boost_wave
|
||||
boost_wserialization
|
||||
)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
if(USE_DEBUG)
|
||||
SET(CMAKE_BUILD_TYPE "Debug")
|
||||
|
|
|
@ -45,7 +45,7 @@ target_link_libraries(${PROJECT_NAME}
|
|||
DRFreeFormTransformLibrary
|
||||
DRFreeFormSExpressionLibrary
|
||||
DRFreeFormLibrary
|
||||
${BOOST_LIB}
|
||||
${Boost_LIBRARIES}
|
||||
${LLVM_LIB}
|
||||
-Wl,--end-group
|
||||
)
|
||||
|
|
|
@ -46,7 +46,7 @@ target_link_libraries(${PROJECT_NAME}
|
|||
DRFreeFormSExpressionLibrary
|
||||
DRFreeFormLibrary
|
||||
NeuralTreeEvaluatorLibrary
|
||||
${BOOST_LIB}
|
||||
${Boost_LIBRARIES}
|
||||
${LLVM_LIB}
|
||||
-Wl,--end-group
|
||||
)
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
|
||||
set(PROJECT_NAME TransformProcessor)
|
||||
|
||||
Project(${PROJECT_NAME})
|
||||
project(${PROJECT_NAME})
|
||||
|
||||
set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/../../)
|
||||
|
||||
add_library(_transform SHARED
|
||||
IniFileParserInterface.h
|
||||
|
@ -36,7 +38,7 @@ target_link_libraries(_transform
|
|||
DRFreeFormSExpressionLibrary
|
||||
DRFreeFormLibrary
|
||||
NeuralTreeEvaluatorLibrary
|
||||
${BOOST_LIB}
|
||||
${Boost_LIBRARIES}
|
||||
${LLVM_LIB}
|
||||
-Wl,--end-group
|
||||
)
|
||||
|
|
|
@ -39,7 +39,7 @@ target_link_libraries(${PROJECT_NAME}
|
|||
DRFreeFormLibrary
|
||||
NeuralTreeEvaluatorLibrary
|
||||
_transform
|
||||
${BOOST_LIB}
|
||||
${Boost_LIBRARIES}
|
||||
${LLVM_LIB}
|
||||
-Wl,--end-group
|
||||
)
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,33 @@
|
|||
className:FreeForm2Parser
|
||||
labelId:10
|
||||
transform:
|
||||
[Input:1]
|
||||
Line1=(+ feature_1 feature_2)
|
||||
Transform=FreeForm2
|
||||
Slope=1
|
||||
Intercept=0
|
||||
|
||||
[Input:2]
|
||||
Transform=FreeForm2
|
||||
Line1=(* feature_1 feature_3)
|
||||
|
||||
[Input:3]
|
||||
Transform=FreeForm2
|
||||
Line1=(max feature_6 feature_7)
|
||||
|
||||
[Input:4]
|
||||
Transform=Linear
|
||||
Name=feature_8
|
||||
Intercept=0
|
||||
Slope=1
|
||||
|
||||
[Input:5]
|
||||
Transform=Linear
|
||||
Name=feature_9
|
||||
Intercept=0
|
||||
Slope=1
|
||||
end of transform
|
||||
|
||||
header:
|
||||
feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 labels
|
||||
end of header
|
|
@ -0,0 +1,100 @@
|
|||
12 12 12 2 18 7 16 5 11 14 1
|
||||
3 12 10 11 10 8 5 0 7 8 0
|
||||
9 3 18 4 8 18 12 14 3 4 1
|
||||
15 15 14 17 0 10 2 5 2 8 0
|
||||
18 16 9 9 15 16 9 10 15 19 0
|
||||
5 0 14 1 15 16 8 2 2 18 1
|
||||
10 9 1 13 19 7 7 15 1 4 1
|
||||
19 3 4 16 19 8 3 8 17 15 1
|
||||
5 16 14 13 14 14 17 3 3 3 1
|
||||
7 11 10 15 19 3 5 11 6 3 1
|
||||
5 12 14 12 9 11 19 18 17 2 1
|
||||
6 15 9 11 10 15 17 0 6 6 1
|
||||
10 8 16 13 9 0 10 7 19 19 0
|
||||
16 4 19 18 13 0 10 7 14 4 0
|
||||
19 17 12 5 12 5 18 15 12 19 0
|
||||
2 18 1 18 18 8 8 7 8 0 1
|
||||
1 17 11 1 8 12 0 11 5 6 1
|
||||
4 1 11 10 15 19 15 2 5 16 0
|
||||
15 14 10 5 6 15 7 16 3 4 0
|
||||
6 6 11 8 15 4 16 16 8 12 0
|
||||
10 15 11 9 12 6 5 9 19 0 1
|
||||
4 3 3 8 13 18 5 12 19 8 0
|
||||
11 13 7 2 18 17 5 17 3 10 1
|
||||
19 7 9 0 1 9 11 7 4 1 0
|
||||
8 19 17 18 17 5 3 6 7 6 0
|
||||
16 4 19 13 3 0 15 6 16 2 1
|
||||
9 1 0 19 7 9 8 19 10 16 0
|
||||
9 5 4 12 7 5 8 11 16 1 1
|
||||
15 14 18 0 13 18 2 10 10 14 1
|
||||
16 1 5 3 3 7 11 8 14 4 1
|
||||
10 19 0 4 6 11 12 10 9 19 0
|
||||
18 6 18 6 6 10 4 13 17 12 0
|
||||
15 2 17 11 6 8 10 3 0 12 1
|
||||
2 14 3 11 5 19 17 9 9 4 1
|
||||
16 19 7 9 10 13 15 17 0 0 1
|
||||
4 6 17 17 14 5 2 8 12 7 1
|
||||
14 7 11 5 0 13 1 0 17 16 0
|
||||
17 13 0 11 5 0 16 1 19 17 1
|
||||
9 13 3 17 1 1 2 15 2 18 0
|
||||
12 17 3 10 4 9 8 4 7 5 0
|
||||
19 8 16 6 9 18 19 12 17 0 1
|
||||
1 14 15 8 7 9 1 19 8 12 1
|
||||
16 1 16 10 17 12 6 4 6 8 1
|
||||
7 8 16 10 15 15 8 14 14 0 0
|
||||
6 1 10 14 14 10 15 0 8 10 1
|
||||
3 16 12 16 17 7 8 19 17 19 1
|
||||
15 0 0 12 7 0 13 2 17 16 0
|
||||
5 8 12 12 4 10 0 8 12 7 1
|
||||
10 17 14 19 13 2 8 17 17 8 1
|
||||
11 6 11 16 10 12 7 19 14 13 0
|
||||
18 10 7 3 11 17 4 12 13 8 1
|
||||
4 3 10 8 14 10 10 10 10 12 1
|
||||
3 2 14 1 7 19 8 6 14 3 1
|
||||
1 8 2 11 17 8 0 0 7 18 0
|
||||
0 13 8 11 4 19 16 13 5 0 0
|
||||
19 4 19 14 19 17 19 14 12 4 0
|
||||
9 15 16 5 8 15 18 0 9 18 1
|
||||
5 10 0 14 19 18 1 4 6 9 0
|
||||
1 14 19 11 3 6 16 9 15 6 1
|
||||
12 7 13 19 12 15 4 7 15 14 0
|
||||
18 18 9 6 4 8 6 1 9 4 0
|
||||
11 14 16 9 14 9 17 9 7 12 1
|
||||
1 2 7 10 13 15 4 9 7 17 0
|
||||
7 11 10 17 1 18 4 12 7 16 0
|
||||
4 2 14 0 11 17 6 0 14 13 0
|
||||
16 12 14 6 5 18 9 15 19 10 1
|
||||
15 15 18 3 7 5 2 14 13 12 0
|
||||
9 12 15 1 18 13 12 10 19 2 1
|
||||
17 16 19 6 15 5 6 3 5 1 1
|
||||
8 2 5 4 16 13 3 6 8 9 1
|
||||
4 9 19 7 0 7 15 13 6 8 0
|
||||
12 7 9 4 9 1 1 16 18 10 1
|
||||
18 0 15 1 19 16 2 17 3 14 0
|
||||
5 16 18 11 2 11 9 19 5 14 0
|
||||
9 10 8 19 16 9 8 2 3 2 0
|
||||
5 12 15 1 16 10 0 18 9 12 1
|
||||
11 12 17 3 13 18 16 5 4 8 1
|
||||
14 1 2 13 5 15 8 3 14 0 0
|
||||
11 14 13 7 6 17 16 0 7 0 1
|
||||
18 9 16 8 10 11 1 18 11 14 1
|
||||
10 18 9 9 19 18 9 11 2 14 1
|
||||
4 15 3 1 13 17 4 9 0 7 1
|
||||
2 12 17 7 13 12 8 5 1 5 0
|
||||
10 17 8 5 14 6 4 18 17 0 0
|
||||
19 9 14 17 6 18 5 12 1 3 1
|
||||
11 10 12 19 10 8 18 3 1 14 1
|
||||
4 6 15 5 12 13 17 7 7 10 1
|
||||
18 6 10 5 12 0 11 4 11 18 1
|
||||
4 1 18 13 18 2 14 5 16 18 0
|
||||
6 3 16 1 14 2 12 2 3 9 1
|
||||
11 12 11 0 0 18 6 19 6 14 0
|
||||
16 9 12 4 7 15 6 8 16 14 1
|
||||
10 2 15 1 11 5 2 14 16 19 0
|
||||
0 5 14 10 9 15 13 5 11 12 1
|
||||
3 18 16 16 4 14 13 10 7 14 0
|
||||
13 19 13 0 0 3 17 8 4 16 0
|
||||
1 19 6 16 1 13 10 3 12 10 0
|
||||
8 8 14 1 2 14 2 6 19 10 1
|
||||
15 19 10 17 11 11 17 16 17 12 1
|
||||
5 16 12 16 8 18 1 1 11 5 0
|
|
Загрузка…
Ссылка в новой задаче