Remove throwIfNotExists param in fromDirectory util (#453)

This commit is contained in:
EJ Song 2021-06-07 21:21:12 -07:00 коммит произвёл GitHub
Родитель 4bd323f893
Коммит 1a1f4f15a5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 31 добавлений и 56 удалений

Просмотреть файл

@ -16,8 +16,6 @@
package com.microsoft.hyperspace.index
import java.io.FileNotFoundException
import scala.annotation.tailrec
import scala.collection.mutable.{HashMap, ListBuffer}
import scala.collection.mutable
@ -80,19 +78,21 @@ object Content {
* @param hadoopConfiguration Hadoop configuration.
* @param pathFilter Filter for accepting paths. The default filter is picked from spark
* codebase, which filters out files like _SUCCESS.
* @param throwIfNotExists Throws FileNotFoundException if path is not found. Else creates a
* blank directory tree with no files.
* @return Content object with Directory tree starting at root, and containing all leaf files
* from "path" argument.
* from "path" argument. If the given path does not exist, return Content object with
* empty Directory object that represents the path.
*/
def fromDirectory(
path: Path,
fileIdTracker: FileIdTracker,
hadoopConfiguration: Configuration,
pathFilter: PathFilter = PathUtils.DataPathFilter,
throwIfNotExists: Boolean = false): Content =
Content(Directory.fromDirectory(path, fileIdTracker, pathFilter, hadoopConfiguration,
throwIfNotExists))
pathFilter: PathFilter = PathUtils.DataPathFilter): Content = {
if (path.getFileSystem(hadoopConfiguration).exists(path)) {
Content(Directory.fromDirectory(path, fileIdTracker, pathFilter, hadoopConfiguration))
} else {
Content(Directory.createEmptyDirectory(path))
}
}
/**
* Create a Content object from a specified list of leaf files. Any files not listed here will
@ -178,27 +178,20 @@ object Directory {
* Create a Directory object from a directory path by recursively listing its leaf files. All
* files from the directory tree will be part of the Directory.
*
* If the directory doesn't exist on file system, it will either throw an exception if
* throwIfNotExists flag is set. Otherwise, this will create an empty Directory object
* starting at the root, ending at the directory path specified.
*
* @param path Starting directory path under which the files will be considered part of the
* Directory object.
* @param fileIdTracker FileIdTracker to keep mapping of file properties to assigned file ids.
* @param pathFilter Filter for accepting paths. The default filter is picked from spark
* codebase, which filters out files like _SUCCESS.
* @param throwIfNotExists If true, throw FileNotFoundException if path is not found. If set to
* false, create a blank directory tree with no files.
* @return Directory tree starting at root, and containing the files from "path" argument.
*/
def fromDirectory(
path: Path,
fileIdTracker: FileIdTracker,
pathFilter: PathFilter = PathUtils.DataPathFilter,
hadoopConfiguration: Configuration = new Configuration,
throwIfNotExists: Boolean = false): Directory = {
hadoopConfiguration: Configuration = new Configuration): Directory = {
val fs = path.getFileSystem(hadoopConfiguration)
val leafFiles = listLeafFiles(path, pathFilter, throwIfNotExists, fs)
val leafFiles = listLeafFiles(path, pathFilter, fs)
if (leafFiles.nonEmpty) {
fromLeafFiles(leafFiles, fileIdTracker)
@ -211,7 +204,9 @@ object Directory {
}
@tailrec
private def createEmptyDirectory(path: Path, subDirs: Seq[Directory] = Seq()): Directory = {
private[hyperspace] def createEmptyDirectory(
path: Path,
subDirs: Seq[Directory] = Seq()): Directory = {
if (path.isRoot) {
Directory(path.toString, subDirs = subDirs)
} else {
@ -302,17 +297,11 @@ object Directory {
private def listLeafFiles(
path: Path,
pathFilter: PathFilter,
throwIfNotExists: Boolean,
fs: FileSystem): Seq[FileStatus] = {
try {
val (files, directories) = fs.listStatus(path).partition(_.isFile)
// TODO: explore fs.listFiles(recursive = true) for better performance of file listing.
files.filter(s => pathFilter.accept(s.getPath)) ++
directories.flatMap(d => listLeafFiles(d.getPath, pathFilter, throwIfNotExists, fs))
} catch {
case _: FileNotFoundException if !throwIfNotExists => Seq()
case e: Throwable => throw e
}
val (files, directories) = fs.listStatus(path).partition(_.isFile)
// TODO: explore fs.listFiles(recursive = true) for better performance of file listing.
files.filter(s => pathFilter.accept(s.getPath)) ++
directories.flatMap(d => listLeafFiles(d.getPath, pathFilter, fs))
}
}

Просмотреть файл

@ -362,40 +362,26 @@ class IndexLogEntryTest extends HyperspaceSuite with SQLHelper {
assert(directoryEquals(actual, expected))
}
test("Directory.fromLeafFiles: throwIfNotExist flag throws exception for non-existent" +
"directory, otherwise works as expected.") {
val testDirPath = toPath(testDir)
val nonExistentDir = new Path(testDirPath, "nonexistent")
// Try create Directory object with throwIfNotExists to true. This should throw exception.
intercept[FileNotFoundException] {
Directory.fromDirectory(nonExistentDir, fileIdTracker, throwIfNotExists = true)
}
// Try create Directory object with throwIfNotExists to false. This should create empty
// Directory.
val expected = {
val nonExistentDirDirectory = Directory(nonExistentDir.getName)
createDirectory(nonExistentDir, nonExistentDirDirectory)
}
val actual = Directory.fromDirectory(nonExistentDir, fileIdTracker, throwIfNotExists = false)
assert(directoryEquals(actual, expected))
}
test("Directory.fromDirectory where the directory is empty.") {
test("Directory.fromDirectory where the directory is empty or nonexistent.") {
val testDirPath = toPath(testDir)
val emptyDirPath = new Path(testDirPath, "empty")
// Try create Directory object with throwifNotExists to false. This should create empt
// Directory.
val expected = {
val emptyDirDirectory = Directory(emptyDirPath.getName)
createDirectory(emptyDirPath, emptyDirDirectory)
}
val actual = Directory.fromDirectory(emptyDirPath, fileIdTracker)
assert(directoryEquals(actual, expected))
{
// Test non-existent directory.
val actual = Directory.fromDirectory(emptyDirPath, fileIdTracker)
assert(directoryEquals(actual, expected))
}
{
// Test empty directory.
emptyDirPath.getFileSystem(new Configuration).mkdirs(emptyDirPath)
val actual = Directory.fromDirectory(emptyDirPath, fileIdTracker)
assert(directoryEquals(actual, expected))
}
}
test("Directory Test: pathfilter adds only valid files to Directory object.") {