Fix ::tolower error when locale is not 'C' (#174)

* add test and implement tolower * fix locale * fix locale Co-authored-by: Ze Tao <zetao@microsoft.com>
2021-10-21 11:59:29 +08:00 · 2021-10-21 11:59:29 +08:00 · 46d096f1af
--- a/operators/string_utils.cc
+++ b/operators/string_utils.cc
@ -98,10 +98,23 @@ bool IsAccent(char32_t c)
  return c >= 0x300 && c <= 0x36F;
 }

+// only support latin now
+char32_t ToLower(char32_t c) {
+  if ((c >= 'A') && (c <= 'Z')) {
+    return c + 'a' - 'A';
+  }
+
+  if ((c >= U'À' && (c <= U'Þ'))) {
+    return c + U'à' - U'À';
+  }
+
+  return c;
+}
+
 char32_t StripAccent(char32_t c)
 {
  //   "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
-  const char* tr = "AAAAAAÆCEEEEIIIIÐNOOOOO×ØUUUUYÞßaaaaaaæceeeeiiiiðnooooo÷øuuuuyþy";
+  const char32_t * tr = U"AAAAAAÆCEEEEIIIIÐNOOOOO×ØUUUUYÞßaaaaaaæceeeeiiiiðnooooo÷øuuuuyþy";
  if (c < 192 || c > 255) {
    return c;
 }
@ -177,4 +190,5 @@ uint64_t Hash64Fast(const char* data, size_t n) {
  return static_cast<int64_t>(util::Fingerprint64(data, n));
 }

+
 #endif  // ENABLE_TF_STRING
--- a/operators/string_utils.h
+++ b/operators/string_utils.h
@ -65,6 +65,8 @@ bool IsPunct(char32_t c);

 bool IsControl(char32_t c);

+char32_t ToLower(char32_t c);
+
 char32_t StripAccent(char32_t c);

 uint64_t Hash64(const char* data, size_t n, uint64_t seed);
--- a/operators/text/string_lower.cc
+++ b/operators/text/string_lower.cc
@ -17,7 +17,7 @@ void KernelStringLower::Compute(OrtKernelContext* context) {
  GetTensorMutableDataString(api_, ort_, context, input_X, X);

  for (int64_t i = 0; i < (int64_t)X.size(); ++i) {
-    std::transform(X[i].begin(), X[i].end(), X[i].begin(), ::tolower);
+    std::transform(X[i].begin(), X[i].end(), X[i].begin(), ToLower);
  }

  OrtTensorDimensions dimensions(ort_, input_X);
--- a/operators/tokenizer/basic_tokenizer.cc
+++ b/operators/tokenizer/basic_tokenizer.cc
@ -38,7 +38,7 @@ std::vector<ustring> BasicTokenizer::Tokenize(ustring text) {

  if (do_lower_case_) {
    for (auto& c : text) {
-        c = ::tolower(c);
+        c = ToLower(c);
    }
  }

--- a/operators/tokenizer/bert_tokenizer.cc
+++ b/operators/tokenizer/bert_tokenizer.cc
@ -276,16 +276,11 @@ void KernelBertTokenizer::Compute(OrtKernelContext* context) {
  std::vector<int64_t> input_ids;
  std::vector<int64_t> token_type_ids;

-  if (input_data.size() == 1 || input_data[1].empty()) {
+  if (input_data.size() == 1) {
    std::vector<int64_t> encode = tokenizer_->Encode(tokenizer_->Tokenize(ustring(input_data[0])));
    truncate_->Truncate(encode, (max_length_ > 0 && max_length_ <= 2) ? 0 : max_length_ - 2);
    input_ids = tokenizer_->AddSpecialToken(encode);
    token_type_ids = tokenizer_->GenerateTypeId(encode);
-  } else if (input_data[0].empty()) {
-    std::vector<int64_t> encode = tokenizer_->Encode(tokenizer_->Tokenize(ustring(input_data[1])));
-    truncate_->Truncate(encode, (max_length_ > 0 && max_length_ <= 2) ? 0 : max_length_ - 2);
-    input_ids = tokenizer_->AddSpecialToken(encode);
-    token_type_ids = tokenizer_->GenerateTypeId(encode);
  } else {
    std::vector<int64_t> encode1 = tokenizer_->Encode(tokenizer_->Tokenize(ustring(input_data[0])));
    std::vector<int64_t> encode2 = tokenizer_->Encode(tokenizer_->Tokenize(ustring(input_data[1])));
--- a/test/data/test_bert_tokenizer1.onnx
+++ b/test/data/test_bert_tokenizer1.onnx
--- a/test/data/test_bert_tokenizer_scalar.onnx
+++ b/test/data/test_bert_tokenizer_scalar.onnx
--- a/test/shared_test/test_ortops_strings.cc
+++ b/test/shared_test/test_ortops_strings.cc
@ -8,7 +8,7 @@
 #include "text/string_lower.hpp"


-TEST(utils, test_string_lower) {
+TEST(string_operator, test_string_lower) {
  auto ort_env = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "Default");

  std::vector<TestValue> inputs(1);
@ -32,7 +32,7 @@ TEST(utils, test_string_lower) {
 }


-TEST(utils, test_regex_split_with_offsets) {
+TEST(string_operator, test_regex_split_with_offsets) {
  auto ort_env = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "Default");

  std::vector<TestValue> inputs(1);
--- a/test/shared_test/test_ortops_tokenizer.cc
+++ b/test/shared_test/test_ortops_tokenizer.cc
@ -2,11 +2,12 @@
 // Licensed under the MIT License.

 #include <filesystem>
+#include <locale>
 #include "gtest/gtest.h"
 #include "ocos.h"
 #include "test_kernel.hpp"

-TEST(utils, test_bert_tokenizer) {
+TEST(tokenizer_opertors, test_bert_tokenizer) {
  auto ort_env = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "Default");

  std::vector<TestValue> inputs(1);
@ -35,10 +36,9 @@ TEST(utils, test_bert_tokenizer) {
  model_path = model_path.parent_path();
  model_path /= "..";
  model_path /= "data";
-  model_path /= "test_bert_tokenizer1.onnx";
+  model_path /= "test_bert_tokenizer.onnx";
  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());

-
  inputs[0].name = "text";
  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
  inputs[0].dims = {1};
@ -80,4 +80,193 @@ TEST(utils, test_bert_tokenizer) {
  outputs[2].dims = {71};
  outputs[2].values_int64 = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {1};
+  inputs[0].values_string = {""};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {2};
+  outputs[0].values_int64 = {101, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {2};
+  outputs[1].values_int64 = {0, 0};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {2};
+  outputs[2].values_int64 = {1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {2};
+  inputs[0].values_string = {"M1 Pro and M1 Max scale the amazing M1 architecture to new heights — and for the first time, they bring a system on a chip (SoC) architecture to a pro notebook.",
+                             "Both have more CPU cores, more GPU cores, and more unified memory than M1. Along with a powerful Neural Engine for supercharged machine learning and upgraded media engines with ProRes support, M1 Pro and M1 Max allow pros to do things they never could before."};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {99};
+  outputs[0].values_int64 = {101, 26528, 5096, 1105, 26528, 3405, 3418, 1103, 6929, 26528, 4220, 1106, 1207, 16291, 100, 1105, 1111, 1103, 1148, 1159, 117, 1152, 2498, 170, 1449, 1113, 170, 11451, 113, 1573, 1658, 114, 4220, 1106, 170, 5250, 17189, 119, 102, 2695, 1138, 1167, 18701, 4160, 1116, 117, 1167, 15175, 2591, 4160, 1116, 117, 1105, 1167, 13943, 2962, 1190, 26528, 119, 6364, 1114, 170, 3110, 151, 8816, 1348, 13451, 1111, 7688, 23131, 3395, 3776, 1105, 9554, 2394, 4540, 1114, 5096, 2069, 1279, 1619, 117, 26528, 5096, 1105, 26528, 3405, 2621, 5250, 1116, 1106, 1202, 1614, 1152, 1309, 1180, 1196, 119, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {99};
+  outputs[1].values_int64 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {99};
+  outputs[2].values_int64 = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {2};
+  inputs[0].values_string = {"a", ""};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {4};
+  outputs[0].values_int64 = {101, 170, 102, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {4};
+  outputs[1].values_int64 = {0, 0, 0, 1};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {4};
+  outputs[2].values_int64 = {1, 1, 1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {1};
+  inputs[0].values_string = {""};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {2};
+  outputs[0].values_int64 = {101, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {2};
+  outputs[1].values_int64 = {0, 0};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {2};
+  outputs[2].values_int64 = {1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {2};
+  inputs[0].values_string = {"M1 Pro and M1 Max scale the amazing M1 architecture to new heights — and for the first time, they bring a system on a chip (SoC) architecture to a pro notebook.",
+                             "Both have more CPU cores, more GPU cores, and more unified memory than M1. Along with a powerful Neural Engine for supercharged machine learning and upgraded media engines with ProRes support, M1 Pro and M1 Max allow pros to do things they never could before."};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {99};
+  outputs[0].values_int64 = {101, 26528, 5096, 1105, 26528, 3405, 3418, 1103, 6929, 26528, 4220, 1106, 1207, 16291, 100, 1105, 1111, 1103, 1148, 1159, 117, 1152, 2498, 170, 1449, 1113, 170, 11451, 113, 1573, 1658, 114, 4220, 1106, 170, 5250, 17189, 119, 102, 2695, 1138, 1167, 18701, 4160, 1116, 117, 1167, 15175, 2591, 4160, 1116, 117, 1105, 1167, 13943, 2962, 1190, 26528, 119, 6364, 1114, 170, 3110, 151, 8816, 1348, 13451, 1111, 7688, 23131, 3395, 3776, 1105, 9554, 2394, 4540, 1114, 5096, 2069, 1279, 1619, 117, 26528, 5096, 1105, 26528, 3405, 2621, 5250, 1116, 1106, 1202, 1614, 1152, 1309, 1180, 1196, 119, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {99};
+  outputs[1].values_int64 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {99};
+  outputs[2].values_int64 = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {2};
+  inputs[0].values_string = {"", "a"};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {4};
+  outputs[0].values_int64 = {101, 102, 170, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {4};
+  outputs[1].values_int64 = {0, 0, 1, 1};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {4};
+  outputs[2].values_int64 = {1, 1, 1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+}
+
+TEST(tokenizer_opertors, test_bert_tokenizer_scalar) {
+  auto ort_env = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "Default");
+
+  std::vector<TestValue> inputs(1);
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {};
+  inputs[0].values_string = {"We look forward to welcoming you to our stores. Whether you shop in a store or shop online, our Specialists can help you buy the products you love."};
+
+  std::vector<TestValue> outputs(3);
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {33};
+  outputs[0].values_int64 = {101, 1195, 1440, 1977, 1106, 20028, 1128, 1106, 1412, 4822, 119, 2480, 1128, 4130, 1107, 170, 2984, 1137, 4130, 3294, 117, 1412, 18137, 1169, 1494, 1128, 4417, 1103, 2982, 1128, 1567, 119, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {33};
+  outputs[1].values_int64 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {33};
+  outputs[2].values_int64 = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+  std::filesystem::path model_path = __FILE__;
+  model_path = model_path.parent_path();
+  model_path /= "..";
+  model_path /= "data";
+  model_path /= "test_bert_tokenizer_scalar.onnx";
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
+
+  // change locale to system locale
+  std::locale();
+
+  inputs[0].name = "text";
+  inputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+  inputs[0].dims = {};
+  inputs[0].values_string = {
+      "再见我的爱\n"
+      "I wanna say goodbye\n"
+      "再见我的过去\n"
+      "I want a new life"};
+
+  outputs[0].name = "input_ids";
+  outputs[0].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[0].dims = {22};
+  outputs[0].values_int64 = {101, 100, 100, 100, 100, 100, 178, 16445, 1474, 12903, 100, 100, 100, 100, 100, 100, 178, 1328, 170, 1207, 1297, 102};
+
+  outputs[1].name = "token_type_ids";
+  outputs[1].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[1].dims = {22};
+  outputs[1].values_int64 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  outputs[2].name = "attention_mask";
+  outputs[2].element_type = ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+  outputs[2].dims = {22};
+  outputs[2].values_int64 = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  TestInference(*ort_env, model_path.c_str(), inputs, outputs, GetLibraryPath());
 }
--- a/tools/generate_unicode_category_table.py
+++ b/tools/generate_unicode_category_table.py
@ -40,6 +40,14 @@ def _is_punctuation(char):
    return False


+def _is_upper_case(char):
+    cp = ord(char)
+    cat = unicodedata.category(char)
+    if cat.startswith("Lu") and char.lower() != char:
+        return True
+    return False
+
+
 def find_expect_char_in_range(judge_fun, start, end):
    result = []
    for c in range(start, end):