Merge pull request #13 from sigorbor/nuget0.1.10
Promote NuGet package version + cargo fmt
This commit is contained in:
Коммит
23a6f6ad2b
|
@ -2,14 +2,14 @@
|
|||
<package >
|
||||
<metadata>
|
||||
<id>pq2json</id>
|
||||
<version>0.1.9</version>
|
||||
<version>0.1.10</version>
|
||||
<authors>Evgeney Ryzhyk</authors>
|
||||
<owners>Evgeney Ryzhyk</owners>
|
||||
<license type="expression">MIT</license>
|
||||
<projectUrl>https://github.com/Azure/azure-kusto-parquet-conv</projectUrl>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>Parquet to JSON (line delimited) converter tool.</description>
|
||||
<releaseNotes>Added configuration option that enables implicit Parquet to Kusto types conversion.</releaseNotes>
|
||||
<releaseNotes>Added support for producing empty values for columns missing from the file</releaseNotes>
|
||||
<copyright>Copyright 2020</copyright>
|
||||
<tags></tags>
|
||||
<dependencies></dependencies>
|
||||
|
|
|
@ -75,8 +75,12 @@ fn projected_schema(
|
|||
let res = schema_fields.get_mut(c);
|
||||
|
||||
match res {
|
||||
Some(ptr) => { projected_fields.push(ptr.clone()); }
|
||||
None => { missing_columns.insert(c.clone()); }
|
||||
Some(ptr) => {
|
||||
projected_fields.push(ptr.clone());
|
||||
}
|
||||
None => {
|
||||
missing_columns.insert(c.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,7 +88,7 @@ fn projected_schema(
|
|||
SchemaType::group_type_builder(&file_meta.schema().get_basic_info().name())
|
||||
.with_fields(&mut projected_fields)
|
||||
.build()
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -150,24 +154,23 @@ fn top_level_rows_to_csv(
|
|||
.from_writer(vec![]);
|
||||
let mut column_idx = 0;
|
||||
let columns = settings.columns.as_ref();
|
||||
|
||||
|
||||
match columns {
|
||||
Some(cols) => {
|
||||
// Produce empty values for columns specified by --columns argument, but missing in the file
|
||||
Some(cols) => {
|
||||
// Produce empty values for columns specified by --columns argument, but missing in the file
|
||||
for col in cols {
|
||||
let value =
|
||||
if missing_columns.contains(col) {
|
||||
Value::Null
|
||||
} else {
|
||||
let field_type = row.get_field_type(column_idx);
|
||||
let val = element_to_value!(field_type, row, column_idx, settings);
|
||||
column_idx = column_idx +1;
|
||||
val
|
||||
};
|
||||
|
||||
let value = if missing_columns.contains(col) {
|
||||
Value::Null
|
||||
} else {
|
||||
let field_type = row.get_field_type(column_idx);
|
||||
let val = element_to_value!(field_type, row, column_idx, settings);
|
||||
column_idx += 1;
|
||||
val
|
||||
};
|
||||
|
||||
csv_writer.write_field(value_to_csv(&value))?;
|
||||
}
|
||||
},
|
||||
}
|
||||
None => {
|
||||
// No columns specified by --columns argument
|
||||
for i in 0..row.len() {
|
||||
|
|
|
@ -34,9 +34,7 @@ fn main() {
|
|||
Arg::with_name("convert-types")
|
||||
.short("r")
|
||||
.long("convert-types")
|
||||
.help(
|
||||
"Implicit Parquet to Kusto types conversion (e.g. U64 into long)"
|
||||
)
|
||||
.help("Implicit Parquet to Kusto types conversion (e.g. U64 into long)")
|
||||
.takes_value(false)
|
||||
.required(false),
|
||||
)
|
||||
|
|
|
@ -49,22 +49,23 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {
|
|||
let schema_desc = file_meta.schema_descr();
|
||||
|
||||
let fields = match schema_desc.root_schema() {
|
||||
&Type::GroupType { ref fields, .. } => {
|
||||
fields
|
||||
.iter()
|
||||
.map(|field| field_csl_schema(field))
|
||||
.collect::<Vec<(&str, &str)>>()
|
||||
}
|
||||
&Type::GroupType { ref fields, .. } => fields
|
||||
.iter()
|
||||
.map(|field| field_csl_schema(field))
|
||||
.collect::<Vec<(&str, &str)>>(),
|
||||
_ => panic!("root schema is expected to be of group type!"),
|
||||
};
|
||||
|
||||
let json_arr = Value::Array(
|
||||
fields.iter().map(|(field_name, field_type)| {
|
||||
let mut map = serde_json::Map::with_capacity(2);
|
||||
map.insert(String::from("name"), Value::String(field_name.to_string()));
|
||||
map.insert(String::from("type"), Value::String(field_type.to_string()));
|
||||
Value::Object(map)
|
||||
}).collect_vec()
|
||||
fields
|
||||
.iter()
|
||||
.map(|(field_name, field_type)| {
|
||||
let mut map = serde_json::Map::with_capacity(2);
|
||||
map.insert(String::from("name"), Value::String(field_name.to_string()));
|
||||
map.insert(String::from("type"), Value::String(field_type.to_string()));
|
||||
Value::Object(map)
|
||||
})
|
||||
.collect_vec(),
|
||||
);
|
||||
println!("{}", serde_json::to_string(&json_arr)?);
|
||||
Ok(())
|
||||
|
@ -72,7 +73,11 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {
|
|||
|
||||
fn field_csl_schema(field_type: &Type) -> (&str, &str) {
|
||||
match field_type {
|
||||
Type::PrimitiveType { ref basic_info, physical_type, .. } => {
|
||||
Type::PrimitiveType {
|
||||
ref basic_info,
|
||||
physical_type,
|
||||
..
|
||||
} => {
|
||||
let csl_type = match physical_type {
|
||||
PhysicalType::BOOLEAN => "bool",
|
||||
PhysicalType::BYTE_ARRAY => match basic_info.logical_type() {
|
||||
|
@ -83,22 +88,22 @@ fn field_csl_schema(field_type: &Type) -> (&str, &str) {
|
|||
PhysicalType::FIXED_LEN_BYTE_ARRAY => match basic_info.logical_type() {
|
||||
LogicalType::DECIMAL => "real",
|
||||
_ => "dynamic",
|
||||
}
|
||||
},
|
||||
PhysicalType::DOUBLE | PhysicalType::FLOAT => "real",
|
||||
PhysicalType::INT32 => match basic_info.logical_type() {
|
||||
LogicalType::DATE => "datetime",
|
||||
LogicalType::DECIMAL => "real",
|
||||
_ => "long",
|
||||
}
|
||||
},
|
||||
PhysicalType::INT64 => match basic_info.logical_type() {
|
||||
LogicalType::TIMESTAMP_MILLIS | LogicalType::TIMESTAMP_MICROS => "datetime",
|
||||
LogicalType::DECIMAL => "real",
|
||||
_ => "long"
|
||||
}
|
||||
_ => "long",
|
||||
},
|
||||
PhysicalType::INT96 => "datetime",
|
||||
};
|
||||
(basic_info.name(), csl_type)
|
||||
}
|
||||
Type::GroupType { ref basic_info, .. } => (basic_info.name(), "dynamic"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче