Merge pull request #14 from Azure/fixCompressedDataAndTests

Fix compressed data
2020-02-02 17:37:53 +02:00 · 2020-02-02 17:37:53 +02:00 · e9a78c9d68
--- a/pom.xml
+++ b/pom.xml
@ -32,6 +32,11 @@
                    <target>8</target>
                </configuration>
            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>2.21.0</version>
+            </plugin>
        </plugins>
        <resources>
            <resource>
@ -113,7 +118,7 @@
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
-            <version>4.4</version>
+            <version>4.7</version>
            <scope>test</scope>
        </dependency>
        <dependency>
@ -133,6 +138,12 @@
            <version>RELEASE</version>
            <scope>test</scope>
        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+            <version>1.9.5</version>
+            <scope>test</scope>
+        </dependency>
    </dependencies>
    <repositories>
        <repository>
--- a/src/main/java/com/microsoft/azure/kusto/kafka/connect/sink/FileWriter.java
+++ b/src/main/java/com/microsoft/azure/kusto/kafka/connect/sink/FileWriter.java
@ -19,7 +19,7 @@ import java.util.zip.GZIPOutputStream;
 public class FileWriter implements Closeable {

    private static final Logger log = LoggerFactory.getLogger(KustoSinkTask.class);
-    public FileDescriptor currentFile;
+    FileDescriptor currentFile;
    private Timer timer;
    private Consumer<FileDescriptor> onRollCallback;
    private final long flushInterval;
@ -27,7 +27,7 @@ public class FileWriter implements Closeable {
    private Supplier<String> getFilePath;
    private OutputStream outputStream;
    private String basePath;
-    private CountingOutputStream fileStream;
+    private CountingOutputStream countingStream;
    private long fileThreshold;

    /**
@ -51,7 +51,7 @@ public class FileWriter implements Closeable {
        this.shouldCompressData = shouldCompressData;
    }

-    public boolean isDirty() {
+    boolean isDirty() {
        return this.currentFile != null && this.currentFile.rawBytes > 0;
    }

@ -66,10 +66,10 @@ public class FileWriter implements Closeable {
        outputStream.write(data);

        currentFile.rawBytes += data.length;
-        currentFile.zippedBytes += fileStream.numBytes;
+        currentFile.zippedBytes += countingStream.numBytes;
        currentFile.numRecords++;

-        if (this.flushInterval == 0 || (currentFile.rawBytes + data.length) > fileThreshold) {
+        if (this.flushInterval == 0 || currentFile.rawBytes > fileThreshold) {
            rotate();
            resetFlushTimer(true);
        }
@ -93,9 +93,8 @@ public class FileWriter implements Closeable {
        FileOutputStream fos = new FileOutputStream(file);
        fos.getChannel().truncate(0);

-        fileStream = new CountingOutputStream(fos);
-        outputStream = shouldCompressData ? new GZIPOutputStream(fileStream) : fileStream;
-
+        countingStream = new CountingOutputStream(fos);
+        outputStream = shouldCompressData ? new GZIPOutputStream(countingStream) : countingStream;
        fileDescriptor.file = file;
        currentFile = fileDescriptor;
    }
@ -105,13 +104,20 @@ public class FileWriter implements Closeable {
        openFile();
    }

-    private void finishFile() throws IOException {
-        if (isDirty()) {
-            outputStream.close();
+    void finishFile() throws IOException {
+        if(isDirty()){
+            if(shouldCompressData){
+                GZIPOutputStream gzip = (GZIPOutputStream) outputStream;
+                gzip.finish();
+            } else {
+                outputStream.flush();
+            }
+
            onRollCallback.accept(currentFile);
        }

-        currentFile.file.delete();
+        // closing late so that the success callback will have a chance to use the file. This is a real thing on debug?!
+        outputStream.close();
    }

    public void rollback() throws IOException {
--- a/src/main/java/com/microsoft/azure/kusto/kafka/connect/sink/TopicPartitionWriter.java
+++ b/src/main/java/com/microsoft/azure/kusto/kafka/connect/sink/TopicPartitionWriter.java
@ -1,6 +1,8 @@
 package com.microsoft.azure.kusto.kafka.connect.sink;

+import com.microsoft.azure.kusto.data.ConnectionStringBuilder;
 import com.microsoft.azure.kusto.ingest.IngestClient;
+import com.microsoft.azure.kusto.ingest.IngestClientFactory;
 import com.microsoft.azure.kusto.ingest.IngestionProperties;
 import com.microsoft.azure.kusto.ingest.source.CompressionType;
 import com.microsoft.azure.kusto.ingest.source.FileSourceInfo;
@ -45,6 +47,7 @@ public class TopicPartitionWriter {

        try {
            client.ingestFromFile(fileSourceInfo, ingestionProps);
+
            log.info(String.format("Kusto ingestion: file (%s) of size (%s) at current offset (%s)", fileDescriptor.path, fileDescriptor.rawBytes, currentOffset));
            this.lastCommittedOffset = currentOffset;
        } catch (Exception e) {
@ -55,9 +58,16 @@ public class TopicPartitionWriter {
    public String getFilePath() {
        long nextOffset = fileWriter != null && fileWriter.isDirty() ? currentOffset + 1 : currentOffset;

-        // Output files are always compressed
-        String compressionExtension = this.eventDataCompression == null ? "gz" : this.eventDataCompression.toString();
-        return Paths.get(basePath, String.format("kafka_%s_%s_%d.%s.%s", tp.topic(), tp.partition(), nextOffset, ingestionProps.getDataFormat(), compressionExtension)).toString();
+        String compressionExtension = "";
+        if (shouldCompressData(ingestionProps, null) || eventDataCompression != null) {
+            if(eventDataCompression != null) {
+                compressionExtension = "." + eventDataCompression.toString();
+            } else {
+                compressionExtension = ".gz";
+            }
+        }
+
+        return Paths.get(basePath, String.format("kafka_%s_%s_%d.%s%s", tp.topic(), tp.partition(), nextOffset, ingestionProps.getDataFormat(), compressionExtension)).toString();
    }

    public void writeRecord(SinkRecord record) {
@ -83,9 +93,8 @@ public class TopicPartitionWriter {
            this.currentOffset = record.kafkaOffset();
        } else {
            try {
-                fileWriter.write(value);
-
                this.currentOffset = record.kafkaOffset();
+                fileWriter.write(value);
            } catch (IOException e) {
                log.error("File write failed", e);
            }
@ -93,18 +102,16 @@ public class TopicPartitionWriter {
    }

    public void open() {
-        boolean flushImmediately = ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.avro.toString())
-                || ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.parquet.toString())
-                || ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.orc.toString())
-                || this.eventDataCompression != null;
+        // Should compress binary files
+        boolean shouldCompressData = shouldCompressData(this.ingestionProps, this.eventDataCompression);

        fileWriter = new FileWriter(
                basePath,
                fileThreshold,
                this::handleRollFile,
                this::getFilePath,
-                flushImmediately ? 0 : flushInterval,
-                this.eventDataCompression == null);
+                !shouldCompressData ? 0 : flushInterval,
+                shouldCompressData);
    }

    public void close() {
@ -115,4 +122,11 @@ public class TopicPartitionWriter {
            e.printStackTrace();
        }
    }
+
+    static boolean shouldCompressData(IngestionProperties ingestionProps, CompressionType eventDataCompression) {
+        return !(ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.avro.toString())
+                || ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.parquet.toString())
+                || ingestionProps.getDataFormat().equals(IngestionProperties.DATA_FORMAT.orc.toString())
+                || eventDataCompression != null);
+    }
 }
--- a/src/test/java/com/microsoft/azure/kusto/kafka/connect/sink/FileWriterTest.java
+++ b/src/test/java/com/microsoft/azure/kusto/kafka/connect/sink/FileWriterTest.java
@ -62,7 +62,7 @@ public class FileWriterTest {

        Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 1);
        Assert.assertEquals(fileWriter.currentFile.rawBytes, 0);
-        Assert.assertEquals(fileWriter.currentFile.path, FILE_PATH + ".gz");
+        Assert.assertEquals(fileWriter.currentFile.path, FILE_PATH);
        Assert.assertTrue(fileWriter.currentFile.file.canWrite());

        fileWriter.rollback();
@ -80,7 +80,7 @@ public class FileWriterTest {

        HashMap<String, Long> files = new HashMap<>();

-        final int MAX_FILE_SIZE = 128;
+        final int MAX_FILE_SIZE = 100;

        Consumer<FileDescriptor> trackFiles = (FileDescriptor f) -> files.put(f.path, f.rawBytes);

@ -177,11 +177,27 @@ public class FileWriterTest {
        GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream);
        String msg = "Message";

-        Consumer<FileDescriptor> trackFiles = (FileDescriptor f) -> {
-            try (FileInputStream fileInputStream = new FileInputStream(f.file)){
+        Consumer<FileDescriptor> trackFiles = getAssertFileConsumer(msg);
+
+        Supplier<String> generateFileName = () -> Paths.get(path, java.util.UUID.randomUUID().toString()).toString() + ".csv.gz";
+
+        // Expect no files to be ingested as size is small and flushInterval is big
+        FileWriter fileWriter = new FileWriter(path, MAX_FILE_SIZE, trackFiles, generateFileName, 0, false);
+
+        gzipOutputStream.write(msg.getBytes());
+        gzipOutputStream.finish();
+        fileWriter.write(byteArrayOutputStream.toByteArray());
+
+        fileWriter.close();
+        Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
+    }
+
+    static Consumer<FileDescriptor> getAssertFileConsumer(String msg) {
+        return (FileDescriptor f) -> {
+            try (FileInputStream fileInputStream = new FileInputStream(f.file)) {
                byte[] bytes = IOUtils.toByteArray(fileInputStream);
                try (ByteArrayInputStream bin = new ByteArrayInputStream(bytes);
-                     GZIPInputStream gzipper = new GZIPInputStream(bin)){
+                     GZIPInputStream gzipper = new GZIPInputStream(bin)) {

                    byte[] buffer = new byte[1024];
                    ByteArrayOutputStream out = new ByteArrayOutputStream();
@ -202,19 +218,5 @@ public class FileWriterTest {
                Assert.fail(e.getMessage());
            }
        };
-
-        Supplier<String> generateFileName = () -> Paths.get(path, java.util.UUID.randomUUID().toString()).toString() + ".csv.gz";
-
-        // Expect no files to be ingested as size is small and flushInterval is big
-        FileWriter fileWriter = new FileWriter(path, MAX_FILE_SIZE, trackFiles, generateFileName, 0, true);
-
-        gzipOutputStream.write(msg.getBytes());
-        gzipOutputStream.finish();
-        fileWriter.write(byteArrayOutputStream.toByteArray());
-
-        fileWriter.close();
-        Assert.assertEquals(Objects.requireNonNull(folder.listFiles()).length, 0);
-
-
    }
 }
--- a/src/test/java/com/microsoft/azure/kusto/kafka/connect/sink/TopicPartitionWriterTest.java
+++ b/src/test/java/com/microsoft/azure/kusto/kafka/connect/sink/TopicPartitionWriterTest.java
@ -2,9 +2,11 @@ package com.microsoft.azure.kusto.kafka.connect.sink;

 import com.microsoft.azure.kusto.ingest.IngestClient;
 import com.microsoft.azure.kusto.ingest.IngestionProperties;
+import com.microsoft.azure.kusto.ingest.source.CompressionType;
 import com.microsoft.azure.kusto.ingest.source.FileSourceInfo;
 import org.apache.commons.io.FileUtils;
 import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.sink.SinkRecord;
 import org.junit.After;
 import org.junit.Before;
@ -12,27 +14,31 @@ import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 import org.testng.Assert;

+import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Paths;
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.function.Consumer;
+import java.util.zip.GZIPOutputStream;

 import static org.mockito.Mockito.*;

 public class TopicPartitionWriterTest {
    // TODO: should probably find a better way to mock internal class (FileWriter)...
-    File currentDirectory;
+    private File currentDirectory;

    @Before
    public final void before() {
-        currentDirectory = new File(Paths.get(
-                System.getProperty("java.io.tmpdir"),
-                FileWriter.class.getSimpleName(),
-                String.valueOf(Instant.now().toEpochMilli())
-        ).toString());
+//        currentDirectory = new File(Paths.get(
+//                System.getProperty("java.io.tmpdir"),
+//                FileWriter.class.getSimpleName(),
+//                String.valueOf(Instant.now().toEpochMilli())
+//        ).toString());
+        currentDirectory = new File("C:\\Users\\ohbitton\\Desktop");
    }

    @After
@ -89,10 +95,12 @@ public class TopicPartitionWriterTest {
        long fileThreshold = 100;
        long flushInterval = 300000;
        TopicIngestionProperties props = new TopicIngestionProperties();
+
        props.ingestionProperties = new IngestionProperties(db, table);
+        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);

-        Assert.assertEquals(writer.getFilePath(), Paths.get(basePath, "kafka_testTopic_11_0").toString());
+        Assert.assertEquals(writer.getFilePath(), Paths.get(basePath, "kafka_testTopic_11_0.csv.gz").toString());
    }

    @Test
@ -106,6 +114,7 @@ public class TopicPartitionWriterTest {
        long flushInterval = 300000;
        TopicIngestionProperties props = new TopicIngestionProperties();
        props.ingestionProperties = new IngestionProperties(db, table);
+        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);
        writer.open();
        List<SinkRecord> records = new ArrayList<>();
@ -117,7 +126,7 @@ public class TopicPartitionWriterTest {
            writer.writeRecord(record);
        }

-        Assert.assertEquals(writer.getFilePath(), Paths.get(basePath, "kafka_testTopic_11_5").toString());
+        Assert.assertEquals(writer.getFilePath(), Paths.get(basePath, "kafka_testTopic_11_5.csv.gz").toString());
    }

    @Test
@ -131,6 +140,7 @@ public class TopicPartitionWriterTest {
        long flushInterval = 300000;
        TopicIngestionProperties props = new TopicIngestionProperties();
        props.ingestionProperties = new IngestionProperties(db, table);
+        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);
        writer.open();
        writer.close();
@ -162,63 +172,111 @@ public class TopicPartitionWriterTest {
 //        Assert.assertEquals(writer.getFilePath(), "kafka_testPartition_11_0");
    }

-    @Test
-    public void testWriteStringyValuesAndOffset() throws Exception {
-        TopicPartition tp = new TopicPartition("testTopic", 2);
-        IngestClient mockClient = mock(IngestClient.class);
-        String db = "testdb1";
-        String table = "testtable1";
-        String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
-        long fileThreshold = 100;
-        long flushInterval = 300000;
-        TopicIngestionProperties props = new TopicIngestionProperties();
-        props.ingestionProperties = new IngestionProperties(db, table);
-        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);
-
-
-        writer.open();
-        List<SinkRecord> records = new ArrayList<SinkRecord>();
-
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "another,stringy,message", 3));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}", 4));
-
-        for (SinkRecord record : records) {
-            writer.writeRecord(record);
-        }
-
-        Assert.assertEquals(writer.fileWriter.currentFile.path, Paths.get(basePath, String.format("kafka_%s_%d_%d.gz", tp.topic(), tp.partition(), 0)).toString());
-    }
+//    @Test
+//    public void testWriteStringyValuesAndOffset() throws Exception {
+//        TopicPartition tp = new TopicPartition("testTopic", 2);
+//        IngestClient mockClient = mock(IngestClient.class);
+//        String db = "testdb1";
+//        String table = "testtable1";
+//        String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
+//        long fileThreshold = 100;
+//        long flushInterval = 300000;
+//        TopicIngestionProperties props = new TopicIngestionProperties();
+//
+//        props.ingestionProperties = new IngestionProperties(db, table);
+//        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
+//        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);
+//
+//
+//        writer.open();
+//        List<SinkRecord> records = new ArrayList<SinkRecord>();
+//
+//        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "another,stringy,message", 3));
+//        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}", 4));
+//
+//        for (SinkRecord record : records) {
+//            writer.writeRecord(record);
+//        }
+//
+//        Assert.assertEquals(writer.fileWriter.currentFile.path, Paths.get(basePath, String.format("kafka_%s_%d_%d.%s.gz", tp.topic(), tp.partition(), 3, IngestionProperties.DATA_FORMAT.csv.name())).toString());
+//    }

    @Test
-    public void testWriteBytesValuesAndOffset() throws Exception {
+    public void testWriteStringValuesAndOffset() throws IOException {
        TopicPartition tp = new TopicPartition("testPartition", 11);
        IngestClient mockClient = mock(IngestClient.class);
        String db = "testdb1";
        String table = "testtable1";
        String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
-        long fileThreshold = 50;
+        String[] messages = new String[]{ "stringy message", "another,stringy,message", "{'also':'stringy','sortof':'message'}"};
+
+        // Expect to finish file after writing forth message cause of fileThreshold
+        long fileThreshold = messages[0].length() + messages[1].length() + messages[2].length() + messages[2].length() - 1;
        long flushInterval = 300000;
        TopicIngestionProperties props = new TopicIngestionProperties();
        props.ingestionProperties = new IngestionProperties(db, table);
+        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.csv);
        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);

        writer.open();
        List<SinkRecord> records = new ArrayList<SinkRecord>();
-
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "stringy message".getBytes(StandardCharsets.UTF_8), 10));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "another,stringy,message".getBytes(StandardCharsets.UTF_8), 13));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}".getBytes(StandardCharsets.UTF_8), 14));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}".getBytes(StandardCharsets.UTF_8), 15));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}".getBytes(StandardCharsets.UTF_8), 16));
-        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, "{'also':'stringy','sortof':'message'}".getBytes(StandardCharsets.UTF_8), 17));
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[0], 10));
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[1], 13));
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[2], 14));
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[2], 15));
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, null, messages[2], 16));

        for (SinkRecord record : records) {
            writer.writeRecord(record);
        }

-        //TODO : file threshold ignored?
-        Assert.assertTrue(writer.lastCommittedOffset.equals((long) 15));
-        Assert.assertEquals(writer.currentOffset, 17);
-        Assert.assertEquals(writer.fileWriter.currentFile.path, Paths.get(basePath, String.format("kafka_%s_%d_%d.gz", tp.topic(), tp.partition(), 16)).toString());
+        Assert.assertEquals((long) writer.lastCommittedOffset, (long) 15);
+        Assert.assertEquals(writer.currentOffset, 16);
+
+        String currentFileName = writer.fileWriter.currentFile.path;
+        Assert.assertEquals(currentFileName, Paths.get(basePath, String.format("kafka_%s_%d_%d.%s.gz", tp.topic(), tp.partition(), 16, IngestionProperties.DATA_FORMAT.csv.name())).toString());
+
+        // Read
+        writer.fileWriter.finishFile();
+        Consumer<FileDescriptor> assertFileConsumer = FileWriterTest.getAssertFileConsumer(messages[2] + "\n");
+        assertFileConsumer.accept(writer.fileWriter.currentFile);
+    }
+
+    @Test
+    public void testWriteBytesValuesAndOffset() throws IOException {
+        TopicPartition tp = new TopicPartition("testPartition", 11);
+        IngestClient mockClient = mock(IngestClient.class);
+
+        String db = "testdb1";
+        String table = "testtable1";
+        String basePath = Paths.get(currentDirectory.getPath(), "testWriteStringyValuesAndOffset").toString();
+        FileInputStream fis = new FileInputStream("C:\\Users\\ohbitton\\source\\Workspaces\\Kusto\\Main\\Test\\UT\\Kusto.Engine.UT\\Common\\Kusto.Common.Svc\\Stream\\dataset3.avro");
+        ByteArrayOutputStream o = new ByteArrayOutputStream();
+        int content;
+        while ((content = fis.read()) != -1) {
+            // convert to char and display it
+            o.write(content);
+        }
+        // Expect to finish file with one record although fileThreshold is high
+        long fileThreshold = 128;
+        long flushInterval = 300000;
+        TopicIngestionProperties props = new TopicIngestionProperties();
+        props.ingestionProperties = new IngestionProperties(db, table);
+        props.ingestionProperties.setDataFormat(IngestionProperties.DATA_FORMAT.avro);
+        TopicPartitionWriter writer = new TopicPartitionWriter(tp, mockClient, props, basePath, fileThreshold, flushInterval);
+
+        writer.open();
+        List<SinkRecord> records = new ArrayList<SinkRecord>();
+        records.add(new SinkRecord(tp.topic(), tp.partition(), null, null, Schema.BYTES_SCHEMA, o.toByteArray(), 10));
+
+        for (SinkRecord record : records) {
+            writer.writeRecord(record);
+        }
+
+        Assert.assertEquals((long) writer.lastCommittedOffset, (long) 10);
+        Assert.assertEquals(writer.currentOffset, 10);
+
+        String currentFileName = writer.fileWriter.currentFile.path;
+        Assert.assertEquals(currentFileName, Paths.get(basePath, String.format("kafka_%s_%d_%d.%s.gz", tp.topic(), tp.partition(), 11, IngestionProperties.DATA_FORMAT.csv.name())).toString());
    }
 }