Merge pull request #13 from sigorbor/nuget0.1.10

Promote NuGet package version + cargo fmt
This commit is contained in:
sigorbor 2021-07-05 16:12:52 +03:00 коммит произвёл GitHub
Родитель ed1e7ecffd 8e0b51486f
Коммит 23a6f6ad2b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 46 добавлений и 40 удалений

Просмотреть файл

@ -2,14 +2,14 @@
<package >
<metadata>
<id>pq2json</id>
<version>0.1.9</version>
<version>0.1.10</version>
<authors>Evgeney Ryzhyk</authors>
<owners>Evgeney Ryzhyk</owners>
<license type="expression">MIT</license>
<projectUrl>https://github.com/Azure/azure-kusto-parquet-conv</projectUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>Parquet to JSON (line delimited) converter tool.</description>
<releaseNotes>Added configuration option that enables implicit Parquet to Kusto types conversion.</releaseNotes>
<releaseNotes>Added support for producing empty values for columns missing from the file</releaseNotes>
<copyright>Copyright 2020</copyright>
<tags></tags>
<dependencies></dependencies>

Просмотреть файл

@ -75,8 +75,12 @@ fn projected_schema(
let res = schema_fields.get_mut(c);
match res {
Some(ptr) => { projected_fields.push(ptr.clone()); }
None => { missing_columns.insert(c.clone()); }
Some(ptr) => {
projected_fields.push(ptr.clone());
}
None => {
missing_columns.insert(c.clone());
}
}
}
@ -84,7 +88,7 @@ fn projected_schema(
SchemaType::group_type_builder(&file_meta.schema().get_basic_info().name())
.with_fields(&mut projected_fields)
.build()
.unwrap()
.unwrap(),
)
}
@ -150,24 +154,23 @@ fn top_level_rows_to_csv(
.from_writer(vec![]);
let mut column_idx = 0;
let columns = settings.columns.as_ref();
match columns {
Some(cols) => {
// Produce empty values for columns specified by --columns argument, but missing in the file
Some(cols) => {
// Produce empty values for columns specified by --columns argument, but missing in the file
for col in cols {
let value =
if missing_columns.contains(col) {
Value::Null
} else {
let field_type = row.get_field_type(column_idx);
let val = element_to_value!(field_type, row, column_idx, settings);
column_idx = column_idx +1;
val
};
let value = if missing_columns.contains(col) {
Value::Null
} else {
let field_type = row.get_field_type(column_idx);
let val = element_to_value!(field_type, row, column_idx, settings);
column_idx += 1;
val
};
csv_writer.write_field(value_to_csv(&value))?;
}
},
}
None => {
// No columns specified by --columns argument
for i in 0..row.len() {

Просмотреть файл

@ -34,9 +34,7 @@ fn main() {
Arg::with_name("convert-types")
.short("r")
.long("convert-types")
.help(
"Implicit Parquet to Kusto types conversion (e.g. U64 into long)"
)
.help("Implicit Parquet to Kusto types conversion (e.g. U64 into long)")
.takes_value(false)
.required(false),
)

Просмотреть файл

@ -49,22 +49,23 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {
let schema_desc = file_meta.schema_descr();
let fields = match schema_desc.root_schema() {
&Type::GroupType { ref fields, .. } => {
fields
.iter()
.map(|field| field_csl_schema(field))
.collect::<Vec<(&str, &str)>>()
}
&Type::GroupType { ref fields, .. } => fields
.iter()
.map(|field| field_csl_schema(field))
.collect::<Vec<(&str, &str)>>(),
_ => panic!("root schema is expected to be of group type!"),
};
let json_arr = Value::Array(
fields.iter().map(|(field_name, field_type)| {
let mut map = serde_json::Map::with_capacity(2);
map.insert(String::from("name"), Value::String(field_name.to_string()));
map.insert(String::from("type"), Value::String(field_type.to_string()));
Value::Object(map)
}).collect_vec()
fields
.iter()
.map(|(field_name, field_type)| {
let mut map = serde_json::Map::with_capacity(2);
map.insert(String::from("name"), Value::String(field_name.to_string()));
map.insert(String::from("type"), Value::String(field_type.to_string()));
Value::Object(map)
})
.collect_vec(),
);
println!("{}", serde_json::to_string(&json_arr)?);
Ok(())
@ -72,7 +73,11 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {
fn field_csl_schema(field_type: &Type) -> (&str, &str) {
match field_type {
Type::PrimitiveType { ref basic_info, physical_type, .. } => {
Type::PrimitiveType {
ref basic_info,
physical_type,
..
} => {
let csl_type = match physical_type {
PhysicalType::BOOLEAN => "bool",
PhysicalType::BYTE_ARRAY => match basic_info.logical_type() {
@ -83,22 +88,22 @@ fn field_csl_schema(field_type: &Type) -> (&str, &str) {
PhysicalType::FIXED_LEN_BYTE_ARRAY => match basic_info.logical_type() {
LogicalType::DECIMAL => "real",
_ => "dynamic",
}
},
PhysicalType::DOUBLE | PhysicalType::FLOAT => "real",
PhysicalType::INT32 => match basic_info.logical_type() {
LogicalType::DATE => "datetime",
LogicalType::DECIMAL => "real",
_ => "long",
}
},
PhysicalType::INT64 => match basic_info.logical_type() {
LogicalType::TIMESTAMP_MILLIS | LogicalType::TIMESTAMP_MICROS => "datetime",
LogicalType::DECIMAL => "real",
_ => "long"
}
_ => "long",
},
PhysicalType::INT96 => "datetime",
};
(basic_info.name(), csl_type)
}
Type::GroupType { ref basic_info, .. } => (basic_info.name(), "dynamic"),
}
}
}