onnxruntime-extensions/tutorials/bert_sentiment.py

import onnx
import torch
import onnxruntime
import onnxruntime_extensions

from pathlib import Path
from onnxruntime_extensions import pnp
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model_path = "./" + model_name + ".onnx"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# set the model to inference mode
model.eval()

# Generate dummy inputs to the model. Adjust if neccessary
inputs = {
        'input_ids':   torch.randint(32, [1, 32], dtype=torch.long), # list of numerical ids for the tokenized text
        'attention_mask': torch.ones([1, 32], dtype=torch.long)      # dummy list of ones
    }

symbolic_names = {0: 'batch_size', 1: 'max_seq_llsen'}
torch.onnx.export(model,                                         # model being run
                  (inputs['input_ids'],
                   inputs['attention_mask']),
                  model_path,                                    # where to save the model (can be a file or file-like object)
                  opset_version=11,                              # the ONNX version to export the model to
                  do_constant_folding=True,                      # whether to execute constant folding for optimization
                  input_names=['input_ids',
                               'input_mask'],                    # the model's input names
                  output_names=['output_logits'],                # the model's output names
                  dynamic_axes={'input_ids': symbolic_names,
                                'input_mask' : symbolic_names,
                                'output_logits' : symbolic_names}) # variable length axes

# The fine-tuned HuggingFace model is exported to ONNX in the code snippet above
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model_path = Path(model_name + ".onnx")

# mapping the BertTokenizer outputs into the onnx model inputs
def map_token_output(input_ids, attention_mask, token_type_ids):
    return input_ids.unsqueeze(0), token_type_ids.unsqueeze(0), attention_mask.unsqueeze(0)

# Post process the start and end logits
def post_process(*pred):
    output = torch.argmax(pred[0])
    return output

tokenizer = AutoTokenizer.from_pretrained(model_name)
bert_tokenizer = pnp.PreHuggingFaceBert(hf_tok=tokenizer)
bert_model = onnx.load_model(str(model_path))

augmented_model = pnp.SequentialProcessingModule(bert_tokenizer, map_token_output,
                                                 bert_model, post_process)

test_input = ["This is s test sentence"]

# create the final onnx model which includes pre- and post- processing.
augmented_model = pnp.export(augmented_model,
                             test_input,
                             opset_version=12,
                             input_names=['input'],
                             output_names=['output'],
                             output_path=model_name + '-aug.onnx',
                             dynamic_axes={'input': [0], 'output': [0]})

test_input = ["I don't really like tomatoes. They are too bitter"]

# Load the model
session_options = onnxruntime.SessionOptions()
session_options.register_custom_ops_library(onnxruntime_extensions.get_library_path())
session = onnxruntime.InferenceSession('distilbert-base-uncased-finetuned-sst-2-english-aug.onnx', session_options)

# Run the model
results = session.run(["output"], {"input": test_input})

print("\nResult is: " + ("positive" if results[0] == 1 else "negative"))