WIP implementation of the simplified prediction api

This commit is contained in:
Jaakko Lukkari 2018-08-20 14:56:28 +03:00
Родитель c195979a2e
Коммит 7616a4b1cb
11 изменённых файлов: 394 добавлений и 74 удалений

Просмотреть файл

@ -3,5 +3,5 @@ build-docker:
.PHONY: build-docker
test:
./scripts/docker_run.sh go test ./...
./scripts/docker_run.sh go test ./... -bench=.
.PHONY: test

Просмотреть файл

@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/Applifier/go-xgboost.svg?branch=master)](https://travis-ci.org/Applifier/go-xgboost)
[![Build Status](https://travis-ci.org/Applifier/go-core.svg?branch=master)](https://travis-ci.org/Applifier/go-xgboost)
[![GoDoc](https://godoc.org/github.com/Applifier/go-xgboost?status.svg)](http://godoc.org/github.com/Applifier/go-xgboost)
@ -10,61 +10,18 @@ Go bindings for [XGBoost](https://github.com/dmlc/xgboost)
import "github.com/Applifier/go-xgboost"
```
## Example
## Usage
This library is meant for running predictions against a pre-trained XGBoost model. Limited training related functionality is implemented under [core](https://github.com/Applifier/go-xgboost/blob/master/core) but training the model in python or using the xgboost cli is encouraged.
```go
// create the training data
cols := 3
rows := 5
trainData := make([]float32, cols*rows)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
trainData[(i*cols)+j] = float32((i + 1) * (j + 1))
}
}
trainLabels := make([]float32, rows)
for i := 0; i < rows; i++ {
trainLabels[i] = float32(1 + i*i*i)
}
// Create predictor for a model and define the number of workers (and other settings)
predictor, _ := xgboost.NewPredictor(modelPath, runtime.NumCPU(), 0, 0, -1)
// Create XGDMatrix for training data
matrix, _ := xgboost.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
// Make prediction for one column
res, _ := predictor.Predict(xgboost.FloatSliceVector([]float32{1, 2, 3}))
fmt.Printf("Results: %+v\n", res)
// output: Results: [1.08002]
// Set training labels
matrix.SetFloatInfo("label", trainLabels)
// Create booster
booster, _ := xgboost.XGBoosterCreate([]*xgboost.XGDMatrix{matrix})
// Set booster parameters
booster.SetParam("booster", "gbtree")
booster.SetParam("objective", "reg:linear")
booster.SetParam("max_depth", "5")
booster.SetParam("eta", "0.1")
booster.SetParam("min_child_weight", "1")
booster.SetParam("subsample", "0.5")
booster.SetParam("colsample_bytree", "1")
booster.SetParam("num_parallel_tree", "1")
// perform 200 learning iterations
for iter := 0; iter < 200; iter++ {
booster.UpdateOneIter(iter, matrix)
}
testData := make([]float32, cols*rows)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
testData[(i*cols)+j] = float32((i + 1) * (j + 1))
}
}
// Create XGDMatrix for test data
testmat, _ := xgboost.XGDMatrixCreateFromMat(testData, rows, cols, -1)
// Predict
res, _ := booster.Predict(testmat, 0, 0)
fmt.Printf("%+v\n", res)
// output: [1.08002 2.5686886 7.86032 29.923136 63.76062]
```

103
booster.go Normal file
Просмотреть файл

@ -0,0 +1,103 @@
package xgboost
import (
"runtime"
"github.com/Applifier/go-xgboost/core"
)
// Matrix interface for 2D matrix
type Matrix interface {
Data() (data []float32, rowCount, columnCount int)
}
// FloatSliceVector float32 slice backed Matrix implementation
type FloatSliceVector []float32
// Data returns float32 slice as (1, len(data)) matrix
func (fsm FloatSliceVector) Data() (data []float32, rowCount, columnCount int) {
return fsm, 1, len(fsm)
}
// Predictor interface for xgboost predictors
type Predictor interface {
Predict(input Matrix) ([]float32, error)
}
// NewPredictor returns a new predictor based on given model path, worker count, option mask, ntree_limit and missing value indicator
func NewPredictor(xboostSavedModelPath string, workerCount int, optionMask int, nTreeLimit uint, missingValue float32) (Predictor, error) {
requestChan := make(chan multiBoosterRequest)
initErrors := make(chan error)
defer close(initErrors)
for i := 0; i < workerCount; i++ {
go func() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
booster, err := core.XGBoosterCreate(nil)
if err != nil {
initErrors <- err
return
}
err = booster.LoadModel(xboostSavedModelPath)
if err != nil {
initErrors <- err
return
}
// No errors occured during init
initErrors <- nil
for req := range requestChan {
data, rowCount, columnCount := req.matrix.Data()
matrix, err := core.XGDMatrixCreateFromMat(data, rowCount, columnCount, missingValue)
if err != nil {
req.resultChan <- multiBoosterResponse{
err: err,
}
continue
}
res, err := booster.Predict(matrix, optionMask, nTreeLimit)
req.resultChan <- multiBoosterResponse{
err: err,
result: res,
}
}
}()
err := <-initErrors
if err != nil {
return nil, err
}
}
return &multiBooster{reqChan: requestChan}, nil
}
type multiBoosterRequest struct {
matrix Matrix
resultChan chan multiBoosterResponse
}
type multiBoosterResponse struct {
err error
result []float32
}
type multiBooster struct {
reqChan chan multiBoosterRequest
}
func (mb *multiBooster) Predict(input Matrix) ([]float32, error) {
resChan := make(chan multiBoosterResponse)
mb.reqChan <- multiBoosterRequest{
matrix: input,
resultChan: resChan,
}
result := <-resChan
return result.result, result.err
}

182
booster_test.go Normal file
Просмотреть файл

@ -0,0 +1,182 @@
package xgboost
import (
"fmt"
"io/ioutil"
"log"
"os"
"path"
"runtime"
"testing"
"github.com/Applifier/go-xgboost/core"
)
type tester interface {
Helper()
Error(args ...interface{})
}
func trainAndSaveModel(t tester) (string, func()) {
if t != nil {
t.Helper()
}
cols := 3
rows := 5
trainData := make([]float32, cols*rows)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
trainData[(i*cols)+j] = float32((i + 1) * (j + 1))
}
}
trainLabels := make([]float32, rows)
for i := 0; i < rows; i++ {
trainLabels[i] = float32(1 + i*i*i)
}
matrix, err := core.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
if err != nil && t != nil {
t.Error(err)
}
err = matrix.SetFloatInfo("label", trainLabels)
if err != nil && t != nil {
t.Error(err)
}
booster, err := core.XGBoosterCreate([]*core.XGDMatrix{matrix})
if err != nil && t != nil {
t.Error(err)
}
noErr := func(err error) {
if err != nil && t != nil {
t.Error(err)
}
}
noErr(booster.SetParam("booster", "gbtree"))
noErr(booster.SetParam("objective", "reg:linear"))
noErr(booster.SetParam("max_depth", "5"))
noErr(booster.SetParam("eta", "0.1"))
noErr(booster.SetParam("min_child_weight", "1"))
noErr(booster.SetParam("subsample", "0.5"))
noErr(booster.SetParam("colsample_bytree", "1"))
noErr(booster.SetParam("num_parallel_tree", "1"))
noErr(booster.SetParam("silent", "1"))
// perform 200 learning iterations
for iter := 0; iter < 200; iter++ {
noErr(booster.UpdateOneIter(iter, matrix))
}
dir, err := ioutil.TempDir("", "go-xgboost")
if err != nil {
log.Fatal(err)
}
savePath := path.Join(dir, "testmodel.bst")
noErr(booster.SaveModel(savePath))
return savePath, func() {
os.RemoveAll(dir)
}
}
func TestBooster(t *testing.T) {
modelPath, cleanUp := trainAndSaveModel(t)
defer cleanUp()
predictor, err := NewPredictor(modelPath, 1, 0, 0, -1)
if err != nil {
t.Fatal(err)
}
cols := 3
rows := 5
testData := make([][]float32, rows)
for i := 0; i < rows; i++ {
testData[i] = make([]float32, cols)
for j := 0; j < cols; j++ {
testData[i][j] = float32((i + 1) * (j + 1))
}
}
expectedResult := []float32{1.08002, 2.5686886, 7.86032, 29.923136, 63.76062}
for i, test := range testData {
res, err := predictor.Predict(FloatSliceVector(test))
if err != nil {
t.Error(err)
}
if res[0] != expectedResult[i] {
t.Error("unexpected result received")
}
}
}
func BenchmarkBooster(b *testing.B) {
modelPath, cleanUp := trainAndSaveModel(b)
defer cleanUp()
predictor, err := NewPredictor(modelPath, 1, 0, 0, -1)
if err != nil {
b.Fatal(err)
}
testData := []float32{1, 2, 3}
b.ResetTimer()
for i := 0; i < b.N; i++ {
res, err := predictor.Predict(FloatSliceVector(testData))
if err != nil {
b.Error(err)
}
if len(res) != 1 {
b.Error("invalid amount of results received")
}
}
}
func BenchmarkBoosterParallel(b *testing.B) {
modelPath, cleanUp := trainAndSaveModel(b)
defer cleanUp()
predictor, err := NewPredictor(modelPath, runtime.NumCPU(), 0, 0, -1)
if err != nil {
b.Fatal(err)
}
testData := []float32{1, 2, 3}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
res, err := predictor.Predict(FloatSliceVector(testData))
if err != nil {
b.Error(err)
}
if len(res) != 1 {
b.Error("invalid amount of results received")
}
}
})
}
func ExampleBooster() {
// Retrieve filepath for a pre-trained model
modelPath, cleanUp := trainAndSaveModel(nil)
defer cleanUp()
// Create predictor and define the number of workers (and other settings)
predictor, _ := NewPredictor(modelPath, runtime.NumCPU(), 0, 0, -1)
res, _ := predictor.Predict(FloatSliceVector([]float32{1, 2, 3}))
fmt.Printf("Results: %+v\n", res)
// output: Results: [1.08002]
}

66
core/README.md Normal file
Просмотреть файл

@ -0,0 +1,66 @@
[![GoDoc](https://godoc.org/github.com/Applifier/go-xgboost/core?status.svg)](http://godoc.org/github.com/Applifier/go-xgboost/core)
# Core package
```go
import "github.com/Applifier/go-xgboost/core"
```
## Example
```go
// create the training data
cols := 3
rows := 5
trainData := make([]float32, cols*rows)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
trainData[(i*cols)+j] = float32((i + 1) * (j + 1))
}
}
trainLabels := make([]float32, rows)
for i := 0; i < rows; i++ {
trainLabels[i] = float32(1 + i*i*i)
}
// Create XGDMatrix for training data
matrix, _ := core.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
// Set training labels
matrix.SetFloatInfo("label", trainLabels)
// Create booster
booster, _ := core.XGBoosterCreate([]*core.XGDMatrix{matrix})
// Set booster parameters
booster.SetParam("booster", "gbtree")
booster.SetParam("objective", "reg:linear")
booster.SetParam("max_depth", "5")
booster.SetParam("eta", "0.1")
booster.SetParam("min_child_weight", "1")
booster.SetParam("subsample", "0.5")
booster.SetParam("colsample_bytree", "1")
booster.SetParam("num_parallel_tree", "1")
// perform 200 learning iterations
for iter := 0; iter < 200; iter++ {
booster.UpdateOneIter(iter, matrix)
}
testData := make([]float32, cols*rows)
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
testData[(i*cols)+j] = float32((i + 1) * (j + 1))
}
}
// Create XGDMatrix for test data
testmat, _ := core.XGDMatrixCreateFromMat(testData, rows, cols, -1)
// Predict
res, _ := booster.Predict(testmat, 0, 0)
fmt.Printf("%+v\n", res)
// output: [1.08002 2.5686886 7.86032 29.923136 63.76062]
```

13
core/utils.go Normal file
Просмотреть файл

@ -0,0 +1,13 @@
package core
func copyUint32Slice(sli []uint32) []uint32 {
n := make([]uint32, len(sli))
copy(n, sli)
return n
}
func copyFloat32Slice(sli []float32) []float32 {
n := make([]float32, len(sli))
copy(n, sli)
return n
}

Просмотреть файл

@ -1,4 +1,4 @@
package xgboost
package core
/*
#cgo LDFLAGS: -lxgboost

Просмотреть файл

@ -1,4 +1,4 @@
package xgboost_test
package core_test
import (
"fmt"
@ -9,7 +9,7 @@ import (
"path"
"testing"
"github.com/Applifier/go-xgboost"
"github.com/Applifier/go-xgboost/core"
)
func TestXGBoost(t *testing.T) {
@ -28,7 +28,7 @@ func TestXGBoost(t *testing.T) {
trainLabels[i] = float32(1 + i*i*i)
}
matrix, err := xgboost.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
matrix, err := core.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
if err != nil {
t.Error(err)
}
@ -38,7 +38,7 @@ func TestXGBoost(t *testing.T) {
t.Error(err)
}
booster, err := xgboost.XGBoosterCreate([]*xgboost.XGDMatrix{matrix})
booster, err := core.XGBoosterCreate([]*core.XGDMatrix{matrix})
if err != nil {
t.Error(err)
}
@ -57,6 +57,7 @@ func TestXGBoost(t *testing.T) {
noErr(booster.SetParam("subsample", "0.5"))
noErr(booster.SetParam("colsample_bytree", "1"))
noErr(booster.SetParam("num_parallel_tree", "1"))
noErr(booster.SetParam("silent", "1"))
// perform 200 learning iterations
for iter := 0; iter < 200; iter++ {
@ -70,7 +71,7 @@ func TestXGBoost(t *testing.T) {
}
}
testmat, err := xgboost.XGDMatrixCreateFromMat(testData, rows, cols, -1)
testmat, err := core.XGDMatrixCreateFromMat(testData, rows, cols, -1)
if err != nil {
t.Error(err)
}
@ -101,14 +102,14 @@ func TestXGBoost(t *testing.T) {
noErr(booster.SaveModel(savePath))
newBooster, err := xgboost.XGBoosterCreate(nil)
newBooster, err := core.XGBoosterCreate(nil)
if err != nil {
t.Error(err)
}
noErr(newBooster.LoadModel(savePath))
testmat2, err := xgboost.XGDMatrixCreateFromMat(testData, rows, cols, -1)
testmat2, err := core.XGDMatrixCreateFromMat(testData, rows, cols, -1)
if err != nil {
t.Error(err)
}
@ -147,13 +148,13 @@ func ExampleXGBoost() {
}
// Create XGDMatrix for training data
matrix, _ := xgboost.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
matrix, _ := core.XGDMatrixCreateFromMat(trainData, rows, cols, -1)
// Set training labels
matrix.SetFloatInfo("label", trainLabels)
// Create booster
booster, _ := xgboost.XGBoosterCreate([]*xgboost.XGDMatrix{matrix})
booster, _ := core.XGBoosterCreate([]*core.XGDMatrix{matrix})
// Set booster parameters
booster.SetParam("booster", "gbtree")
@ -164,6 +165,7 @@ func ExampleXGBoost() {
booster.SetParam("subsample", "0.5")
booster.SetParam("colsample_bytree", "1")
booster.SetParam("num_parallel_tree", "1")
booster.SetParam("silent", "1")
// perform 200 learning iterations
for iter := 0; iter < 200; iter++ {
@ -178,7 +180,7 @@ func ExampleXGBoost() {
}
// Create XGDMatrix for test data
testmat, _ := xgboost.XGDMatrixCreateFromMat(testData, rows, cols, -1)
testmat, _ := core.XGDMatrixCreateFromMat(testData, rows, cols, -1)
// Predict
res, _ := booster.Predict(testmat, 0, 0)

Просмотреть файл

@ -1,4 +1,4 @@
package xgboost
package core
/*
#cgo LDFLAGS: -lxgboost
@ -73,7 +73,7 @@ func (booster *XGBooster) Predict(mat *XGDMatrix, optionMask int, ntreeLimit uin
sliceHeader.Len = int(outLen)
sliceHeader.Data = uintptr(unsafe.Pointer(outResult))
return list, nil
return copyFloat32Slice(list), nil
}
// LoadModel load model from existing file

Просмотреть файл

@ -1,4 +1,4 @@
package xgboost
package core
/*
#cgo LDFLAGS: -lxgboost
@ -94,7 +94,7 @@ func (matrix *XGDMatrix) GetFloatInfo(field string) ([]float32, error) {
sliceHeader.Len = int(outLen)
sliceHeader.Data = uintptr(unsafe.Pointer(outResult))
return list, nil
return copyFloat32Slice(list), nil
}
// GetUIntInfo get uint32 info vector from matrix
@ -115,7 +115,7 @@ func (matrix *XGDMatrix) GetUIntInfo(field string) ([]uint32, error) {
sliceHeader.Len = int(outLen)
sliceHeader.Data = uintptr(unsafe.Pointer(outResult))
return list, nil
return copyUint32Slice(list), nil
}
func xdgMatrixFinalizer(mat *XGDMatrix) {

Просмотреть файл

@ -1,7 +1,6 @@
package xgboost
package core
import (
"fmt"
"testing"
)
@ -48,6 +47,4 @@ func TestXGDMatrix(t *testing.T) {
if colCount != 2 {
t.Error("Wrong col count returned")
}
fmt.Printf("%+v\n", data)
}