Merge pull request #13 from sigorbor/nuget0.1.10

Promote NuGet package version + cargo fmt
2021-07-05 16:12:52 +03:00 · 2021-07-05 16:12:52 +03:00 · 23a6f6ad2b
--- a/Package.nuspec
+++ b/Package.nuspec
@ -2,14 +2,14 @@
 <package >
  <metadata>
    <id>pq2json</id>
-    <version>0.1.9</version>
+    <version>0.1.10</version>
    <authors>Evgeney Ryzhyk</authors>
    <owners>Evgeney Ryzhyk</owners>
    <license type="expression">MIT</license>
    <projectUrl>https://github.com/Azure/azure-kusto-parquet-conv</projectUrl>
    <requireLicenseAcceptance>false</requireLicenseAcceptance>
    <description>Parquet to JSON (line delimited) converter tool.</description>
-    <releaseNotes>Added configuration option that enables implicit Parquet to Kusto types conversion.</releaseNotes>
+    <releaseNotes>Added support for producing empty values for columns missing from the file</releaseNotes>
    <copyright>Copyright 2020</copyright>
    <tags></tags>
    <dependencies></dependencies>
--- a/pq2json/src/converter.rs
+++ b/pq2json/src/converter.rs
@ -75,8 +75,12 @@ fn projected_schema(
        let res = schema_fields.get_mut(c);

        match res {
-            Some(ptr) => { projected_fields.push(ptr.clone());  }
-            None => { missing_columns.insert(c.clone()); }
+            Some(ptr) => {
+                projected_fields.push(ptr.clone());
+            }
+            None => {
+                missing_columns.insert(c.clone());
+            }
        }
    }

@ -84,7 +88,7 @@ fn projected_schema(
        SchemaType::group_type_builder(&file_meta.schema().get_basic_info().name())
            .with_fields(&mut projected_fields)
            .build()
-            .unwrap()
+            .unwrap(),
    )
 }

@ -150,24 +154,23 @@ fn top_level_rows_to_csv(
            .from_writer(vec![]);
        let mut column_idx = 0;
        let columns = settings.columns.as_ref();
-        
+
        match columns {
-            Some(cols) => {      
-                // Produce empty values for columns specified by --columns argument, but missing in the file  
+            Some(cols) => {
+                // Produce empty values for columns specified by --columns argument, but missing in the file
                for col in cols {
-                    let value = 
-                        if missing_columns.contains(col) { 
-                                Value::Null
-                        } else {           
-                            let field_type = row.get_field_type(column_idx);
-                            let val = element_to_value!(field_type, row, column_idx, settings);
-                            column_idx = column_idx +1;
-                            val
-                        };
-    
+                    let value = if missing_columns.contains(col) {
+                        Value::Null
+                    } else {
+                        let field_type = row.get_field_type(column_idx);
+                        let val = element_to_value!(field_type, row, column_idx, settings);
+                        column_idx += 1;
+                        val
+                    };
+
                    csv_writer.write_field(value_to_csv(&value))?;
                }
-            },
+            }
            None => {
                // No columns specified by --columns argument
                for i in 0..row.len() {
--- a/pq2json/src/main.rs
+++ b/pq2json/src/main.rs
@ -34,9 +34,7 @@ fn main() {
            Arg::with_name("convert-types")
                .short("r")
                .long("convert-types")
-                .help(
-                    "Implicit Parquet to Kusto types conversion (e.g. U64 into long)"
-                )
+                .help("Implicit Parquet to Kusto types conversion (e.g. U64 into long)")
                .takes_value(false)
                .required(false),
        )
--- a/pq2json/src/schema.rs
+++ b/pq2json/src/schema.rs
@ -49,22 +49,23 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {
    let schema_desc = file_meta.schema_descr();

    let fields = match schema_desc.root_schema() {
-        &Type::GroupType { ref fields, .. } => {
-            fields
-                .iter()
-                .map(|field| field_csl_schema(field))
-                .collect::<Vec<(&str, &str)>>()
-        }
+        &Type::GroupType { ref fields, .. } => fields
+            .iter()
+            .map(|field| field_csl_schema(field))
+            .collect::<Vec<(&str, &str)>>(),
        _ => panic!("root schema is expected to be of group type!"),
    };

    let json_arr = Value::Array(
-        fields.iter().map(|(field_name, field_type)| {
-            let mut map = serde_json::Map::with_capacity(2);
-            map.insert(String::from("name"), Value::String(field_name.to_string()));
-            map.insert(String::from("type"), Value::String(field_type.to_string()));
-            Value::Object(map)
-        }).collect_vec()
+        fields
+            .iter()
+            .map(|(field_name, field_type)| {
+                let mut map = serde_json::Map::with_capacity(2);
+                map.insert(String::from("name"), Value::String(field_name.to_string()));
+                map.insert(String::from("type"), Value::String(field_type.to_string()));
+                Value::Object(map)
+            })
+            .collect_vec(),
    );
    println!("{}", serde_json::to_string(&json_arr)?);
    Ok(())
@ -72,7 +73,11 @@ pub fn print_csl_schema(input_file: &str) -> Result<(), Box<dyn Error>> {

 fn field_csl_schema(field_type: &Type) -> (&str, &str) {
    match field_type {
-        Type::PrimitiveType { ref basic_info, physical_type, .. } => {
+        Type::PrimitiveType {
+            ref basic_info,
+            physical_type,
+            ..
+        } => {
            let csl_type = match physical_type {
                PhysicalType::BOOLEAN => "bool",
                PhysicalType::BYTE_ARRAY => match basic_info.logical_type() {
@ -83,22 +88,22 @@ fn field_csl_schema(field_type: &Type) -> (&str, &str) {
                PhysicalType::FIXED_LEN_BYTE_ARRAY => match basic_info.logical_type() {
                    LogicalType::DECIMAL => "real",
                    _ => "dynamic",
-                }
+                },
                PhysicalType::DOUBLE | PhysicalType::FLOAT => "real",
                PhysicalType::INT32 => match basic_info.logical_type() {
                    LogicalType::DATE => "datetime",
                    LogicalType::DECIMAL => "real",
                    _ => "long",
-                }
+                },
                PhysicalType::INT64 => match basic_info.logical_type() {
                    LogicalType::TIMESTAMP_MILLIS | LogicalType::TIMESTAMP_MICROS => "datetime",
                    LogicalType::DECIMAL => "real",
-                    _ => "long"
-                }
+                    _ => "long",
+                },
                PhysicalType::INT96 => "datetime",
            };
            (basic_info.name(), csl_type)
        }
        Type::GroupType { ref basic_info, .. } => (basic_info.name(), "dynamic"),
    }
-}
+}