maro/tests/test_binary_convert_read.py

167 строки
5.2 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT licence
import os
import tempfile
import unittest
from maro.data_lib import BinaryConverter, BinaryReader
from maro.data_lib.item_meta import BinaryMeta
class TestBinaryConverter(unittest.TestCase):
def test_convert_with_events(self):
out_dir = tempfile.mkdtemp()
out_bin = os.path.join(out_dir, "trips.bin")
meta_file = os.path.join("tests", "data", "data_lib", "case_1", "meta.yml")
csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")
bct = BinaryConverter(out_bin, meta_file)
# add and convert 1st csv file
bct.add_csv(csv_file)
# add again will append to the end ignore the order
bct.add_csv(csv_file)
# flush will close the file, cannot add again
bct.flush()
# check if output exist
self.assertTrue(os.path.exists(out_bin))
# check content
reader = BinaryReader(out_bin)
# start tick should be smallest one
start_date = reader.start_datetime
self.assertEqual(start_date.year, 2019)
self.assertEqual(start_date.month, 1)
self.assertEqual(start_date.day, 1)
self.assertEqual(start_date.hour, 0)
self.assertEqual(start_date.minute, 0)
self.assertEqual(start_date.second, 0)
end_date = reader.end_datetime
self.assertEqual(end_date.year, 2019)
self.assertEqual(end_date.month, 1)
self.assertEqual(end_date.day, 1)
self.assertEqual(end_date.hour, 0)
self.assertEqual(end_date.minute, 5)
self.assertEqual(end_date.second, 0)
# there should be double items as trips.csv
self.assertEqual(4*2, reader.header.item_count)
# 20 byte
self.assertEqual(20, reader.header.item_size)
start_station_index = [0, 0, 1, 0]
idx = 0
# check iterating interface
for item in reader.items():
# check if fields same as meta
self.assertTupleEqual(('timestamp', 'durations', 'src_station', 'dest_station'), item._fields)
# check item start station index
self.assertEqual(start_station_index[idx % len(start_station_index)], item.src_station)
idx += 1
# check if filter works as expected
l = len([item for item in reader.items(end_time_offset=0, time_unit="m")])
# although there are 2 items that match the condition, but they not sorted, reader will not try to read to the end, but
# to the first item which not match the condition
self.assertEqual(1, l)
l = len([item for item in reader.items(start_time_offset=1, time_unit='m')])
# reader will try to read 1st one that > end tick, so there should be 6 items
self.assertEqual(6, l)
def test_convert_without_events(self):
out_dir = tempfile.mkdtemp()
out_bin = os.path.join(out_dir, "trips.bin")
meta_file = os.path.join("tests", "data", "data_lib", "case_2", "meta.yml")
csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")
bct = BinaryConverter(out_bin, meta_file)
bct.add_csv(csv_file)
# flush will close the file, cannot add again
bct.flush()
reader = BinaryReader(out_bin)
meta: BinaryMeta = reader.meta
self.assertIsNotNone(meta)
# check events
self.assertListEqual(["require_bike", "return_bike", "rebalance_bike", "deliver_bike"], [event.display_name for event in meta.events])
self.assertListEqual(["RequireBike", "ReturnBike", "RebalanceBike", "DeliverBike"], [event.type_name for event in meta.events])
self.assertEqual("RequireBike", meta.default_event_name)
self.assertIsNone(meta.event_attr_name)
def test_convert_with_starttimestamp(self):
out_dir = tempfile.mkdtemp()
out_bin = os.path.join(out_dir, "trips.bin")
meta_file = os.path.join("tests", "data", "data_lib", "case_2", "meta.yml")
csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")
#12/31/2018 @ 11:59pm (UTC)
bct = BinaryConverter(out_bin, meta_file, utc_start_timestamp=1546300740)
bct.add_csv(csv_file)
# flush will close the file, cannot add again
bct.flush()
reader = BinaryReader(out_bin)
# check header
self.assertEqual(1546300740, reader.header.starttime)
# then tick 0 will not be 2019/01/01 00:00:00
l = len([item for item in reader.items(end_time_offset=0, time_unit='m')])
self.assertEqual(0, l)
# it should be tick 1 for now
l = len([item for item in reader.items(end_time_offset=1, time_unit='m')])
self.assertEqual(1, l)
def test_convert_without_meta_timestamp(self):
out_dir = tempfile.mkdtemp()
out_bin = os.path.join(out_dir, "trips.bin")
meta_file = os.path.join("tests", "data", "data_lib", "case_3", "meta.yml")
csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")
#12/31/2018 @ 11:59pm (UTC)
with self.assertRaises(Exception) as ctx:
bct = BinaryConverter(out_bin, meta_file)
if __name__ == "__main__":
unittest.main()