From 2923e77408055566b590629c468110a4671aeab0 Mon Sep 17 00:00:00 2001
From: Apoorve Dave <66283785+apoorvedave1@users.noreply.github.com>
Date: Fri, 26 Mar 2021 17:39:42 -0700
Subject: [PATCH] add distinct on file ids

Co-authored-by: EJ Song <51077614+sezruby@users.noreply.github.com>
---
 .../com/microsoft/hyperspace/index/rules/PEFilterIndexRule.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/scala/com/microsoft/hyperspace/index/rules/PEFilterIndexRule.scala b/src/main/scala/com/microsoft/hyperspace/index/rules/PEFilterIndexRule.scala
index 5d47ce22..5186a5f3 100644
--- a/src/main/scala/com/microsoft/hyperspace/index/rules/PEFilterIndexRule.scala
+++ b/src/main/scala/com/microsoft/hyperspace/index/rules/PEFilterIndexRule.scala
@@ -100,6 +100,7 @@ object PEFilterIndexRule
         .parquet(index.content.files.map(_.toString): _*)
         .where(condition.sql)
         .select(IndexConstants.DATA_FILE_NAME_ID)
+        .distinct
     val fileIds = filteredDf.rdd.map(r => r(0)).collect.toSet
 
     index.fileIdTracker.getFileToIdMap