From 610ef3af8a3fcf38e7221e1e26d3b1ffccb75f6b Mon Sep 17 00:00:00 2001 From: Sourav Gupta <98318303+souravgupta-msft@users.noreply.github.com> Date: Tue, 7 Nov 2023 15:56:28 +0530 Subject: [PATCH] Symlink fix for ADLS accounts (#1283) --- CHANGELOG.md | 1 + azure-pipeline-templates/e2e-tests-spcl.yml | 4 + azure-pipeline-templates/e2e-tests.yml | 5 +- azure-pipeline-templates/verbose-tests.yml | 19 +++++ component/azstorage/block_blob.go | 8 +- component/azstorage/config.go | 17 +++- component/azstorage/connection.go | 5 +- component/azstorage/datalake.go | 77 ++++++++++-------- component/libfuse/libfuse_handler.go | 1 - setup/baseConfig.yaml | 2 +- test/e2e_tests/dir_test.go | 89 +++++++++++++++++++-- test/e2e_tests/file_test.go | 65 +++++++++++++-- testdata/config/azure_key_symlink.yaml | 38 +++++++++ 13 files changed, 275 insertions(+), 56 deletions(-) create mode 100644 testdata/config/azure_key_symlink.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b40c8c0..8b42f71b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 2.1.2 (WIP) **Bug Fixes** +- [#1243](https://github.com/Azure/azure-storage-fuse/issues/1243) Fixed issue where symlink was not working for ADLS accounts. ## 2.1.1 (2023-10-31) **Bug Fixes** diff --git a/azure-pipeline-templates/e2e-tests-spcl.yml b/azure-pipeline-templates/e2e-tests-spcl.yml index 3aca5768..e48e72ed 100644 --- a/azure-pipeline-templates/e2e-tests-spcl.yml +++ b/azure-pipeline-templates/e2e-tests-spcl.yml @@ -34,6 +34,9 @@ parameters: - name: stream_direct_test type: boolean default: false + - name: enable_symlink_adls + type: boolean + default: false steps: - script: | @@ -65,6 +68,7 @@ steps: verbose_log: ${{ parameters.verbose_log }} clone: ${{ parameters.clone }} stream_direct_test: ${{ parameters.stream_direct_test }} + enable_symlink_adls: ${{ parameters.enable_symlink_adls }} mountStep: script: | $(WORK_DIR)/blobfuse2 mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config_file }} --default-working-dir=$(System.DefaultWorkingDirectory) diff --git a/azure-pipeline-templates/e2e-tests.yml b/azure-pipeline-templates/e2e-tests.yml index 3bddab12..2dd90c4e 100755 --- a/azure-pipeline-templates/e2e-tests.yml +++ b/azure-pipeline-templates/e2e-tests.yml @@ -22,6 +22,9 @@ parameters: - name: stream_direct_test type: boolean default: false + - name: enable_symlink_adls + type: boolean + default: false - name: artifact_name type: string - name: verbose_log @@ -66,7 +69,7 @@ steps: - task: Go@0 inputs: command: 'test' - arguments: '-v -timeout=2h ./... -args -mnt-path=${{ parameters.mount_dir }} -adls=${{parameters.adls}} -clone=${{parameters.clone}} -tmp-path=${{parameters.temp_dir}} -quick-test=${{parameters.quick_test}} -stream-direct-test=${{parameters.stream_direct_test}} -distro-name="${{parameters.distro_name}}"' + arguments: '-v -timeout=2h ./... -args -mnt-path=${{ parameters.mount_dir }} -adls=${{parameters.adls}} -clone=${{parameters.clone}} -tmp-path=${{parameters.temp_dir}} -quick-test=${{parameters.quick_test}} -stream-direct-test=${{parameters.stream_direct_test}} -enable-symlink-adls=${{parameters.enable_symlink_adls}} -distro-name="${{parameters.distro_name}}"' workingDirectory: ${{ parameters.working_dir }}/test/e2e_tests displayName: 'E2E Test: ${{ parameters.idstring }}' timeoutInMinutes: 120 diff --git a/azure-pipeline-templates/verbose-tests.yml b/azure-pipeline-templates/verbose-tests.yml index bc515b5e..5faa31b2 100644 --- a/azure-pipeline-templates/verbose-tests.yml +++ b/azure-pipeline-templates/verbose-tests.yml @@ -384,6 +384,25 @@ steps: verbose_log: ${{ parameters.verbose_log }} + - template: e2e-tests-spcl.yml + parameters: + conf_template: azure_key_symlink.yaml + config_file: ${{ parameters.config }} + container: ${{ parameters.container }} + temp_dir: ${{ parameters.temp_dir }} + mount_dir: ${{ parameters.mount_dir }} + adls: ${{ parameters.adls }} + account_name: ${{ parameters.account_name }} + account_key: ${{ parameters.account_key }} + account_type: ${{ parameters.account_type }} + account_endpoint: ${{ parameters.account_endpoint }} + idstring: "${{ parameters.service }} Symlink config tests" + distro_name: ${{ parameters.distro_name }} + quick_test: ${{ parameters.quick_test }} + verbose_log: ${{ parameters.verbose_log }} + enable_symlink_adls: true + + #--------------------------------------- Setup: End to end tests with different File Cache configurations ------------------------------------------ - script: | cd ${{ parameters.working_dir }} diff --git a/component/azstorage/block_blob.go b/component/azstorage/block_blob.go index 832cab7b..b69f3900 100644 --- a/component/azstorage/block_blob.go +++ b/component/azstorage/block_blob.go @@ -729,19 +729,17 @@ func (bb *BlockBlob) ReadToFile(name string, offset int64, count int64, fi *os.F // ReadBuffer : Download a specific range from a blob to a buffer func (bb *BlockBlob) ReadBuffer(name string, offset int64, len int64) ([]byte, error) { - log.Trace("BlockBlob::ReadBuffer : name %s", name) + log.Trace("BlockBlob::ReadBuffer : name %s, offset %v, len %v", name, offset, len) var buff []byte if len == 0 { - len = azblob.CountToEnd attr, err := bb.GetAttr(name) if err != nil { return buff, err } - buff = make([]byte, attr.Size) - } else { - buff = make([]byte, len) + len = attr.Size - offset } + buff = make([]byte, len) blobURL := bb.Container.NewBlobURL(filepath.Join(bb.Config.prefixPath, name)) err := azblob.DownloadBlobToBuffer(context.Background(), blobURL, offset, len, buff, bb.downloadOptions) diff --git a/component/azstorage/config.go b/component/azstorage/config.go index ee5042e5..bc8227aa 100644 --- a/component/azstorage/config.go +++ b/component/azstorage/config.go @@ -489,7 +489,7 @@ func ParseAndValidateConfig(az *AzStorage, opt AzStorageOptions) error { log.Info("ParseAndValidateConfig : Retry Config: Retry count %d, Max Timeout %d, BackOff Time %d, Max Delay %d", az.stConfig.maxRetries, az.stConfig.maxTimeout, az.stConfig.backoffTime, az.stConfig.maxRetryDelay) - log.Info("ParseAndValidateConfig : Telemetry : %s", az.stConfig.telemetry) + log.Info("ParseAndValidateConfig : Telemetry : %s, Honour ACL: %v, disable symlink: %v", az.stConfig.telemetry, az.stConfig.honourACL, az.stConfig.disableSymlink) return nil } @@ -536,9 +536,20 @@ func ParseAndReadDynamicConfig(az *AzStorage, opt AzStorageOptions, reload bool) } if config.IsSet(compName + ".honour-acl") { - az.stConfig.HonourACL = opt.HonourACL + az.stConfig.honourACL = opt.HonourACL } else { - az.stConfig.HonourACL = false + az.stConfig.honourACL = false + } + + if config.IsSet("attr_cache.no-symlinks") { + err := config.UnmarshalKey("attr_cache.no-symlinks", &az.stConfig.disableSymlink) + if err != nil { + az.stConfig.disableSymlink = true + log.Err("ParseAndReadDynamicConfig : Failed to unmarshal attr_cache.no-symlinks") + } + } else { + // by default symlink will be disabled + az.stConfig.disableSymlink = true } // Auth related reconfig diff --git a/component/azstorage/connection.go b/component/azstorage/connection.go index 185ab723..3e03a327 100644 --- a/component/azstorage/connection.go +++ b/component/azstorage/connection.go @@ -77,8 +77,9 @@ type AzStorageConfig struct { maxResultsForList int32 disableCompression bool - telemetry string - HonourACL bool + telemetry string + honourACL bool + disableSymlink bool } type AzStorageConnection struct { diff --git a/component/azstorage/datalake.go b/component/azstorage/datalake.go index 4c1e4588..c1f581d9 100644 --- a/component/azstorage/datalake.go +++ b/component/azstorage/datalake.go @@ -430,7 +430,7 @@ func (dl *Datalake) GetAttr(name string) (attr *internal.ObjAttr, err error) { } attr.Flags.Set(internal.PropFlagMetadataRetrieved) - if dl.Config.HonourACL && dl.Config.authConfig.ObjectID != "" { + if dl.Config.honourACL && dl.Config.authConfig.ObjectID != "" { acl, err := pathURL.GetAccessControl(context.Background()) if err != nil { // Just ignore the error here as rest of the attributes have been retrieved @@ -495,41 +495,52 @@ func (dl *Datalake) List(prefix string, marker *string, count int32) ([]*interna // Process the paths returned in this result segment (if the segment is empty, the loop body won't execute) for _, pathInfo := range listPath.Paths { - var mode fs.FileMode - if pathInfo.Permissions != nil { - mode, err = getFileMode(*pathInfo.Permissions) + var attr *internal.ObjAttr + + if dl.Config.disableSymlink { + var mode fs.FileMode + if pathInfo.Permissions != nil { + mode, err = getFileMode(*pathInfo.Permissions) + if err != nil { + log.Err("Datalake::List : Failed to get file mode for %s [%s]", *pathInfo.Name, err.Error()) + m := "" + return pathList, &m, err + } + } else { + // This happens when a blob account is mounted with type:adls + log.Err("Datalake::List : Failed to get file permissions for %s", *pathInfo.Name) + } + + var contentLength int64 = 0 + if pathInfo.ContentLength != nil { + contentLength = *pathInfo.ContentLength + } else { + // This happens when a blob account is mounted with type:adls + log.Err("Datalake::List : Failed to get file length for %s", *pathInfo.Name) + } + + attr = &internal.ObjAttr{ + Path: *pathInfo.Name, + Name: filepath.Base(*pathInfo.Name), + Size: contentLength, + Mode: mode, + Mtime: pathInfo.LastModifiedTime(), + Atime: pathInfo.LastModifiedTime(), + Ctime: pathInfo.LastModifiedTime(), + Crtime: pathInfo.LastModifiedTime(), + Flags: internal.NewFileBitMap(), + } + if pathInfo.IsDirectory != nil && *pathInfo.IsDirectory { + attr.Flags = internal.NewDirBitMap() + attr.Mode = attr.Mode | os.ModeDir + } + } else { + attr, err = dl.GetAttr(*pathInfo.Name) if err != nil { - log.Err("Datalake::List : Failed to get file mode for %s [%s]", *pathInfo.Name, err.Error()) + log.Err("Datalake::List : Failed to get properties for %s [%s]", *pathInfo.Name, err.Error()) m := "" return pathList, &m, err } - } else { - // This happens when a blob account is mounted with type:adls - log.Err("Datalake::List : Failed to get file permissions for %s", *pathInfo.Name) - } - - var contentLength int64 = 0 - if pathInfo.ContentLength != nil { - contentLength = *pathInfo.ContentLength - } else { - // This happens when a blob account is mounted with type:adls - log.Err("Datalake::List : Failed to get file length for %s", *pathInfo.Name) - } - - attr := &internal.ObjAttr{ - Path: *pathInfo.Name, - Name: filepath.Base(*pathInfo.Name), - Size: contentLength, - Mode: mode, - Mtime: pathInfo.LastModifiedTime(), - Atime: pathInfo.LastModifiedTime(), - Ctime: pathInfo.LastModifiedTime(), - Crtime: pathInfo.LastModifiedTime(), - Flags: internal.NewFileBitMap(), - } - if pathInfo.IsDirectory != nil && *pathInfo.IsDirectory { - attr.Flags = internal.NewDirBitMap() - attr.Mode = attr.Mode | os.ModeDir } // Note: Datalake list paths does not return metadata/properties. @@ -538,6 +549,8 @@ func (dl *Datalake) List(prefix string, marker *string, count int32) ([]*interna // If this flag is not set the attribute cache will call get attributes // to fetch metadata properties. // Any method that populates the metadata should set the attribute flag. + // Alternatively, if you want Datalake list paths to return metadata/properties as well. + // pass CLI parameter --no-symlinks=false in the mount command. pathList = append(pathList, attr) } diff --git a/component/libfuse/libfuse_handler.go b/component/libfuse/libfuse_handler.go index 33fb84c0..b80c04ef 100644 --- a/component/libfuse/libfuse_handler.go +++ b/component/libfuse/libfuse_handler.go @@ -794,7 +794,6 @@ func libfuse_flush(path *C.char, fi *C.fuse_file_info_t) C.int { handle := (*handlemap.Handle)(unsafe.Pointer(uintptr(fileHandle.obj))) log.Trace("Libfuse::libfuse_flush : %s, handle: %d", handle.Path, handle.ID) - // If the file handle is not dirty, there is no need to flush // If the file handle is not dirty, there is no need to flush if fileHandle.dirty != 0 { handle.Flags.Set(handlemap.HandleFlagDirty) diff --git a/setup/baseConfig.yaml b/setup/baseConfig.yaml index 91bde837..183b56de 100644 --- a/setup/baseConfig.yaml +++ b/setup/baseConfig.yaml @@ -162,7 +162,7 @@ azstorage: auth-resource: update-md5: true|false validate-md5: true|false - virtual-directory: true|false + virtual-directory: true|false disable-compression: true|false max-results-for-list: telemetry : diff --git a/test/e2e_tests/dir_test.go b/test/e2e_tests/dir_test.go index fc48af7b..32037c45 100644 --- a/test/e2e_tests/dir_test.go +++ b/test/e2e_tests/dir_test.go @@ -53,17 +53,20 @@ import ( type dirTestSuite struct { suite.Suite - testPath string - adlsTest bool - minBuff []byte - medBuff []byte - hugeBuff []byte + testPath string + adlsTest bool + testCachePath string + minBuff []byte + medBuff []byte + hugeBuff []byte } var pathPtr string +var tempPathPtr string var adlsPtr string var clonePtr string var streamDirectPtr string +var enableSymlinkADLS string func regDirTestFlag(p *string, name string, value string, usage string) { if flag.Lookup(name) == nil { @@ -78,8 +81,10 @@ func getDirTestFlag(name string) string { func initDirFlags() { pathPtr = getDirTestFlag("mnt-path") adlsPtr = getDirTestFlag("adls") + tempPathPtr = getDirTestFlag("tmp-path") clonePtr = getDirTestFlag("clone") streamDirectPtr = getDirTestFlag("stream-direct-test") + enableSymlinkADLS = getDirTestFlag("enable-symlink-adls") } func getTestDirName(n int) string { @@ -509,6 +514,70 @@ func (suite *dirTestSuite) TestGitStash() { } } +func (suite *dirTestSuite) TestReadDirLink() { + if suite.adlsTest && strings.ToLower(enableSymlinkADLS) != "true" { + fmt.Printf("Skipping this test case for adls : %v, enable-symlink-adls : %v\n", suite.adlsTest, enableSymlinkADLS) + return + } + + dirName := suite.testPath + "/test_hns" + err := os.Mkdir(dirName, 0777) + suite.Nil(err) + + fileName := dirName + "/small_file.txt" + f, err := os.Create(fileName) + suite.Nil(err) + f.Close() + + err = os.WriteFile(fileName, suite.minBuff, 0777) + suite.Nil(err) + + symName := suite.testPath + "/dirlink.lnk" + err = os.Symlink(dirName, symName) + suite.Nil(err) + + dl, err := os.ReadDir(suite.testPath) + suite.Nil(err) + suite.Greater(len(dl), 0) + + // list operation on symlink + dirLinkList, err := os.ReadDir(symName) + suite.Nil(err) + suite.Greater(len(dirLinkList), 0) + + dirList, err := os.ReadDir(dirName) + suite.Nil(err) + suite.Greater(len(dirList), 0) + + suite.Equal(len(dirLinkList), len(dirList)) + + // comparing list values since they are sorted by file name + for i := range dirLinkList { + suite.Equal(dirLinkList[i].Name(), dirList[i].Name()) + } + + // temp cache cleanup + suite.dirTestCleanup([]string{suite.testCachePath + "/test_hns/small_file.txt", suite.testCachePath + "/dirlink.lnk"}) + + data1, err := os.ReadFile(symName + "/small_file.txt") + suite.Nil(err) + suite.Equal(len(data1), len(suite.minBuff)) + + // temp cache cleanup + suite.dirTestCleanup([]string{suite.testCachePath + "/test_hns/small_file.txt", suite.testCachePath + "/dirlink.lnk"}) + + data2, err := os.ReadFile(fileName) + suite.Nil(err) + suite.Equal(len(data2), len(suite.minBuff)) + + // validating data + suite.Equal(data1, data2) + + suite.dirTestCleanup([]string{dirName}) + err = os.Remove(symName) + suite.Equal(nil, err) +} + // -------------- Main Method ------------------- func TestDirTestSuite(t *testing.T) { initDirFlags() @@ -520,9 +589,15 @@ func TestDirTestSuite(t *testing.T) { // Generate random test dir name where our End to End test run is contained testDirName := getTestDirName(10) + fmt.Println(testDirName) // Create directory for testing the End to End test on mount path dirTest.testPath = pathPtr + "/" + testDirName + fmt.Println(dirTest.testPath) + + dirTest.testCachePath = tempPathPtr + "/" + testDirName + fmt.Println(dirTest.testCachePath) + if adlsPtr == "true" || adlsPtr == "True" { fmt.Println("ADLS Testing...") dirTest.adlsTest = true @@ -554,5 +629,7 @@ func TestDirTestSuite(t *testing.T) { func init() { regDirTestFlag(&pathPtr, "mnt-path", "", "Mount Path of Container") regDirTestFlag(&adlsPtr, "adls", "", "Account is ADLS or not") - regFileTestFlag(&fileTestGitClonePtr, "clone", "", "Git clone test is enable or not") + regDirTestFlag(&clonePtr, "clone", "", "Git clone test is enable or not") + regDirTestFlag(&tempPathPtr, "tmp-path", "", "Cache dir path") + regDirTestFlag(&enableSymlinkADLS, "enable-symlink-adls", "false", "Enable symlink support for ADLS accounts") } diff --git a/test/e2e_tests/file_test.go b/test/e2e_tests/file_test.go index 29109c70..372a3a76 100644 --- a/test/e2e_tests/file_test.go +++ b/test/e2e_tests/file_test.go @@ -51,18 +51,21 @@ import ( ) var fileTestPathPtr string +var fileTestTempPathPtr string var fileTestAdlsPtr string var fileTestGitClonePtr string var fileTestStreamDirectPtr string var fileTestDistroName string +var fileTestEnableSymlinkADLS string type fileTestSuite struct { suite.Suite - testPath string - adlsTest bool - minBuff []byte - medBuff []byte - hugeBuff []byte + testPath string + adlsTest bool + testCachePath string + minBuff []byte + medBuff []byte + hugeBuff []byte } func regFileTestFlag(p *string, name string, value string, usage string) { @@ -78,9 +81,11 @@ func getFileTestFlag(name string) string { func initFileFlags() { fileTestPathPtr = getFileTestFlag("mnt-path") fileTestAdlsPtr = getFileTestFlag("adls") + fileTestTempPathPtr = getFileTestFlag("tmp-path") fileTestGitClonePtr = getFileTestFlag("clone") fileTestStreamDirectPtr = getFileTestFlag("stream-direct-test") fileTestDistroName = getFileTestFlag("distro-name") + fileTestEnableSymlinkADLS = getFileTestFlag("enable-symlink-adls") } func getFileTestDirName(n int) string { @@ -515,6 +520,50 @@ func (suite *fileTestSuite) TestLinkDeleteReadTarget() { suite.Equal(nil, err) } +func (suite *fileTestSuite) TestListDirReadLink() { + if suite.adlsTest && strings.ToLower(fileTestEnableSymlinkADLS) != "true" { + fmt.Printf("Skipping this test case for adls : %v, enable-symlink-adls : %v\n", suite.adlsTest, fileTestEnableSymlinkADLS) + return + } + + fileName := suite.testPath + "/small_hns.txt" + f, err := os.Create(fileName) + suite.Nil(err) + f.Close() + + err = os.WriteFile(fileName, suite.minBuff, 0777) + suite.Nil(err) + + symName := suite.testPath + "/small_hns.lnk" + err = os.Symlink(fileName, symName) + suite.Nil(err) + + dl, err := os.ReadDir(suite.testPath) + suite.Nil(err) + suite.Greater(len(dl), 0) + + // temp cache cleanup + suite.fileTestCleanup([]string{suite.testCachePath + "/small_hns.txt", suite.testCachePath + "/small_hns.lnk"}) + + data1, err := os.ReadFile(symName) + suite.Nil(err) + suite.Equal(len(data1), len(suite.minBuff)) + + // temp cache cleanup + suite.fileTestCleanup([]string{suite.testCachePath + "/small_hns.txt", suite.testCachePath + "/small_hns.lnk"}) + + data2, err := os.ReadFile(fileName) + suite.Nil(err) + suite.Equal(len(data2), len(suite.minBuff)) + + // validating data + suite.Equal(data1, data2) + + suite.fileTestCleanup([]string{fileName}) + err = os.Remove(symName) + suite.Equal(nil, err) +} + /* func (suite *fileTestSuite) TestReadOnlyFile() { if suite.adlsTest == true { @@ -588,6 +637,10 @@ func TestFileTestSuite(t *testing.T) { // Create directory for testing the End to End test on mount path fileTest.testPath = fileTestPathPtr + "/" + testDirName fmt.Println(fileTest.testPath) + + fileTest.testCachePath = fileTestTempPathPtr + "/" + testDirName + fmt.Println(fileTest.testCachePath) + if fileTestAdlsPtr == "true" || fileTestAdlsPtr == "True" { fmt.Println("ADLS Testing...") fileTest.adlsTest = true @@ -619,4 +672,6 @@ func init() { regFileTestFlag(&fileTestPathPtr, "mnt-path", "", "Mount Path of Container") regFileTestFlag(&fileTestAdlsPtr, "adls", "", "Account is ADLS or not") regFileTestFlag(&fileTestGitClonePtr, "clone", "", "Git clone test is enable or not") + regFileTestFlag(&fileTestTempPathPtr, "tmp-path", "", "Cache dir path") + regFileTestFlag(&fileTestEnableSymlinkADLS, "enable-symlink-adls", "false", "Enable symlink support for ADLS accounts") } diff --git a/testdata/config/azure_key_symlink.yaml b/testdata/config/azure_key_symlink.yaml new file mode 100644 index 00000000..81d4d9eb --- /dev/null +++ b/testdata/config/azure_key_symlink.yaml @@ -0,0 +1,38 @@ +logging: + level: log_debug + file-path: "blobfuse2-logs.txt" + type: base + +components: + - libfuse + - file_cache + - attr_cache + - azstorage + +libfuse: + attribute-expiration-sec: 0 + entry-expiration-sec: 0 + negative-entry-expiration-sec: 0 + ignore-open-flags: true + +file_cache: + path: { 1 } + timeout-sec: 30 + max-size-mb: 2048 + allow-non-empty-temp: true + cleanup-on-start: true + +attr_cache: + timeout-sec: 3600 + no-symlinks: false + +azstorage: + type: { ACCOUNT_TYPE } + endpoint: { ACCOUNT_ENDPOINT } + use-http: { USE_HTTP } + account-name: { NIGHTLY_STO_ACC_NAME } + account-key: { NIGHTLY_STO_ACC_KEY } + mode: key + container: { 0 } + tier: hot + sdk-trace: { VERBOSE_LOG }