This commit is contained in:
Anubhuti Shruti 2024-06-21 11:17:20 +05:30
Родитель ed3729e030 6be841d32d
Коммит fa04d94847
7 изменённых файлов: 106 добавлений и 29 удалений

2
.github/CODEOWNERS поставляемый
Просмотреть файл

@ -1 +1 @@
* @vibhansa-msft @souravgupta-msft @ashruti-msft
* @vibhansa-msft @souravgupta-msft @ashruti-msft @syeleti-msft

Просмотреть файл

@ -7,6 +7,11 @@
**Other Changes**
- LFU policy in file cache has been deprecated.
- Default values, if not assigned in config, for the following parameters in block-cache are calculated as follows:
- Memory preallocated for Block-Cache is 80% of free memory
- Disk Cache size is 80% of free disk space
- Prefetch is 2 times number of CPU cores
- Parallelism is 3 times the number of CPU cores
## 2.3.0 (2024-05-16)
**Bug Fixes**

Просмотреть файл

@ -138,12 +138,12 @@ To learn about a specific command, just include the name of the command (For exa
* `--block-size-mb=<SIZE IN MB>`: Size of a block to be downloaded during streaming.
- Block-Cache options
* `--block-cache-block-size=<SIZE IN MB>`: Size of a block to be downloaded as a unit.
* `--block-cache-pool-size=<SIZE IN MB>`: Size of pool to be used for caching. This limits total memory used by block-cache.
* `--block-cache-pool-size=<SIZE IN MB>`: Size of pool to be used for caching. This limits total memory used by block-cache. Default - 80% of free memory available.
* `--block-cache-path=<PATH>`: Path where downloaded blocks will be persisted. Not providing this parameter will disable the disk caching.
* `--block-cache-disk-size=<SIZE IN MB>`: Disk space to be used for caching.
* `--block-cache-disk-size=<SIZE IN MB>`: Disk space to be used for caching. Default - 80% of free disk space.
* `--block-cache-disk-timeout=<seconds>`: Timeout for which disk cache is valid.
* `--block-cache-prefetch=<Number of blocks>`: Number of blocks to prefetch at max when sequential reads are in progress.
* `--block-cache-parallelism=<count>`: Number of parallel threads doing upload/download operation.
* `--block-cache-prefetch=<Number of blocks>`: Number of blocks to prefetch at max when sequential reads are in progress. Default - 2 times number of CPU cores.
* `--block-cache-parallelism=<count>`: Number of parallel threads doing upload/download operation. Default - 3 times number of CPU cores.
* `--block-cache-prefetch-on-open=true`: Start prefetching on open system call instead of waiting for first read. Enhances perf if file is read sequentially from offset 0.
- Fuse options
* `--attr-timeout=<TIMEOUT IN SECONDS>`: Time the kernel can cache inode attributes.

Просмотреть файл

@ -41,9 +41,11 @@ import (
"io"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"sync"
"syscall"
"time"
"github.com/Azure/azure-storage-fuse/v2/common"
@ -206,22 +208,26 @@ func (bc *BlockCache) Configure(_ bool) error {
bc.blockSize = uint64(conf.BlockSize * float64(_1MB))
}
bc.memSize = uint64(4192) * _1MB
if config.IsSet(compName + ".mem-size-mb") {
bc.memSize = conf.MemSize * _1MB
} else {
var sysinfo syscall.Sysinfo_t
err = syscall.Sysinfo(&sysinfo)
if err != nil {
log.Err("BlockCache::Configure : config error %s [%s]. Assigning a pre-defined value of 4GB.", bc.Name(), err.Error())
bc.memSize = uint64(4192) * _1MB
} else {
bc.memSize = uint64(0.8 * (float64)(sysinfo.Freeram) * float64(sysinfo.Unit))
}
}
bc.diskSize = uint64(4192)
if config.IsSet(compName + ".disk-size-mb") {
bc.diskSize = conf.DiskSize
}
bc.diskTimeout = defaultTimeout
if config.IsSet(compName + ".disk-timeout-sec") {
bc.diskTimeout = conf.DiskTimeout
}
bc.prefetchOnOpen = conf.PrefetchOnOpen
bc.prefetch = MIN_PREFETCH
bc.prefetch = uint32(2 * runtime.NumCPU())
bc.noPrefetch = false
err = config.UnmarshalKey("lazy-write", &bc.lazyWrite)
@ -242,7 +248,7 @@ func (bc *BlockCache) Configure(_ bool) error {
bc.maxDiskUsageHit = false
bc.workers = 128
bc.workers = uint32(3 * runtime.NumCPU())
if config.IsSet(compName + ".parallelism") {
bc.workers = conf.Workers
}
@ -261,6 +267,18 @@ func (bc *BlockCache) Configure(_ bool) error {
return fmt.Errorf("config error in %s [%s]", bc.Name(), err.Error())
}
}
var stat syscall.Statfs_t
err = syscall.Statfs(bc.tmpPath, &stat)
if err != nil {
log.Err("BlockCache::Configure : config error %s [%s]. Assigning a default value of 4GB or if any value is assigned to .disk-size-mb in config.", bc.Name(), err.Error())
bc.diskSize = uint64(4192) * _1MB
} else {
bc.diskSize = uint64(0.8 * float64(stat.Bavail) * float64(stat.Bsize))
}
}
if config.IsSet(compName + ".disk-size-mb") {
bc.diskSize = conf.DiskSize * _1MB
}
if (uint64(bc.prefetch) * uint64(bc.blockSize)) > bc.memSize {
@ -268,7 +286,7 @@ func (bc *BlockCache) Configure(_ bool) error {
return fmt.Errorf("config error in %s [memory limit too low for configured prefetch]", bc.Name())
}
log.Info("BlockCache::Configure : block size %v, mem size %v, worker %v, prefetch %v, disk path %v, max size %vMB, disk timeout %v, prefetch-on-open %t, maxDiskUsageHit %v, noPrefetch %v",
log.Info("BlockCache::Configure : block size %v, mem size %v, worker %v, prefetch %v, disk path %v, max size %v, disk timeout %v, prefetch-on-open %t, maxDiskUsageHit %v, noPrefetch %v",
bc.blockSize, bc.memSize, bc.workers, bc.prefetch, bc.tmpPath, bc.diskSize, bc.diskTimeout, bc.prefetchOnOpen, bc.maxDiskUsageHit, bc.noPrefetch)
bc.blockPool = NewBlockPool(bc.blockSize, bc.memSize)
@ -284,7 +302,7 @@ func (bc *BlockCache) Configure(_ bool) error {
}
if bc.tmpPath != "" {
bc.diskPolicy, err = tlru.New(uint32((bc.diskSize*_1MB)/bc.blockSize), bc.diskTimeout, bc.diskEvict, 60, bc.checkDiskUsage)
bc.diskPolicy, err = tlru.New(uint32((bc.diskSize)/bc.blockSize), bc.diskTimeout, bc.diskEvict, 60, bc.checkDiskUsage)
if err != nil {
log.Err("BlockCache::Configure : fail to create LRU for memory nodes [%s]", err.Error())
return fmt.Errorf("config error in %s [%s]", bc.Name(), err.Error())
@ -1258,7 +1276,7 @@ func (bc *BlockCache) diskEvict(node *list.Element) {
// checkDiskUsage : Callback to check usage of disk and decide whether eviction is needed
func (bc *BlockCache) checkDiskUsage() bool {
data, _ := common.GetUsage(bc.tmpPath)
usage := uint32((data * 100) / float64(bc.diskSize))
usage := uint32((data * 100) / float64(bc.diskSize/_1MB))
if bc.maxDiskUsageHit {
if usage >= MIN_POOL_USAGE {

Просмотреть файл

@ -34,12 +34,16 @@
package block_cache
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"math"
"math/rand"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
@ -159,16 +163,66 @@ func (suite *blockCacheTestSuite) TestEmpty() {
suite.assert.Nil(err)
suite.assert.Equal(tobj.blockCache.Name(), "block_cache")
suite.assert.EqualValues(tobj.blockCache.blockSize, 16*_1MB)
suite.assert.EqualValues(tobj.blockCache.memSize, 4192*_1MB)
suite.assert.EqualValues(tobj.blockCache.diskSize, 4192)
suite.assert.EqualValues(tobj.blockCache.diskSize, 0)
suite.assert.EqualValues(tobj.blockCache.diskTimeout, defaultTimeout)
suite.assert.EqualValues(tobj.blockCache.workers, 128)
suite.assert.EqualValues(tobj.blockCache.prefetch, MIN_PREFETCH)
cmd := exec.Command("nproc")
output, err := cmd.Output()
suite.assert.Nil(err)
coresStr := strings.TrimSpace(string(output))
cores, err := strconv.Atoi(coresStr)
suite.assert.Nil(err)
suite.assert.EqualValues(tobj.blockCache.workers, uint32(3*cores))
suite.assert.EqualValues(tobj.blockCache.prefetch, uint32(2*cores))
suite.assert.EqualValues(tobj.blockCache.noPrefetch, false)
suite.assert.NotNil(tobj.blockCache.blockPool)
suite.assert.NotNil(tobj.blockCache.threadPool)
}
func (suite *blockCacheTestSuite) TestMemory() {
emptyConfig := "read-only: true\n\nblock_cache:\n block-size-mb: 16\n"
tobj, err := setupPipeline(emptyConfig)
defer tobj.cleanupPipeline()
suite.assert.Nil(err)
suite.assert.Equal(tobj.blockCache.Name(), "block_cache")
cmd := exec.Command("bash", "-c", "free -b | grep Mem | awk '{print $4}'")
var out bytes.Buffer
cmd.Stdout = &out
err = cmd.Run()
suite.assert.Nil(err)
free, err := strconv.Atoi(strings.TrimSpace(out.String()))
suite.assert.Nil(err)
expected := uint64(0.8 * float64(free))
actual := tobj.blockCache.memSize
difference := math.Abs(float64(actual) - float64(expected))
tolerance := 0.10 * float64(math.Max(float64(actual), float64(expected)))
suite.assert.LessOrEqual(difference, tolerance)
}
func (suite *blockCacheTestSuite) TestFreeDiskSpace() {
disk_cache_path := getFakeStoragePath("fake_storage")
config := fmt.Sprintf("read-only: true\n\nblock_cache:\n block-size-mb: 1\n path: %s", disk_cache_path)
tobj, err := setupPipeline(config)
defer tobj.cleanupPipeline()
suite.assert.Nil(err)
suite.assert.Equal(tobj.blockCache.Name(), "block_cache")
cmd := exec.Command("bash", "-c", fmt.Sprintf("df -B1 %s | awk 'NR==2{print $4}'", disk_cache_path))
var out bytes.Buffer
cmd.Stdout = &out
err = cmd.Run()
suite.assert.Nil(err)
freeDisk, err := strconv.Atoi(strings.TrimSpace(out.String()))
suite.assert.Nil(err)
expected := uint64(0.8 * float64(freeDisk))
actual := tobj.blockCache.diskSize
difference := math.Abs(float64(actual) - float64(expected))
tolerance := 0.10 * float64(math.Max(float64(actual), float64(expected)))
suite.assert.LessOrEqual(difference, tolerance, "mssg:", actual, expected)
}
func (suite *blockCacheTestSuite) TestInvalidPrefetchCount() {
cfg := "read-only: true\n\nblock_cache:\n block-size-mb: 16\n mem-size-mb: 500\n prefetch: 8\n parallelism: 10\n path: abcd\n disk-size-mb: 100\n disk-timeout-sec: 5"
tobj, err := setupPipeline(cfg)
@ -233,7 +287,7 @@ func (suite *blockCacheTestSuite) TestManualConfig() {
suite.assert.EqualValues(tobj.blockCache.blockSize, 16*_1MB)
suite.assert.EqualValues(tobj.blockCache.memSize, 500*_1MB)
suite.assert.EqualValues(tobj.blockCache.workers, 10)
suite.assert.EqualValues(tobj.blockCache.diskSize, 100)
suite.assert.EqualValues(tobj.blockCache.diskSize, 100*_1MB)
suite.assert.EqualValues(tobj.blockCache.diskTimeout, 5)
suite.assert.EqualValues(tobj.blockCache.prefetch, 12)
suite.assert.EqualValues(tobj.blockCache.workers, 10)
@ -256,7 +310,7 @@ func (suite *blockCacheTestSuite) TestOpenFileFail() {
suite.assert.Contains(err.Error(), "no such file or directory")
}
func (suite *blockCacheTestSuite) TestFileOpneClose() {
func (suite *blockCacheTestSuite) TestFileOpenClose() {
tobj, err := setupPipeline("")
defer tobj.cleanupPipeline()

Просмотреть файл

@ -56,12 +56,12 @@ libfuse:
# Block cache related configuration
block_cache:
block-size-mb: <size of each block to be cached in memory (in MB). Default - 16 MB>
mem-size-mb: <total amount of memory to be preallocated for block cache (in MB). Default - 4192 MB>
mem-size-mb: <total amount of memory to be preallocated for block cache (in MB). Default - 80% of free memory>
path: <path to local disk cache where downloaded blocked will be stored>
disk-size-mb: <maximum disk cache size allowed. Default - 4192 MB>
disk-size-mb: <maximum disk cache size allowed. Default - 80% of free disk space>
disk-timeout-sec: <default disk cache eviction timeout (in sec). Default - 120 sec>
prefetch: <number of blocks to be prefetched in serial read case. Min - 11>
parallelism: <number of parallel threads downloading the data and writing to disk cache. Default - 128>
prefetch: <number of blocks to be prefetched in serial read case. Min - 11, Default - 2 times number of CPU cores>
parallelism: <number of parallel threads downloading the data and writing to disk cache. Default - 3 times number of CPU cores>
# Disk cache related configuration
file_cache:

Просмотреть файл

@ -78,12 +78,12 @@ stream:
# Block cache related configuration
block_cache:
block-size-mb: <size of each block to be cached in memory (in MB). Default - 16 MB>
mem-size-mb: <total amount of memory to be preallocated for block cache (in MB). Default - 4192 MB>
mem-size-mb: <total amount of memory to be preallocated for block cache (in MB). Default - 80% of free memory>
path: <path to local disk cache where downloaded blocked will be stored>
disk-size-mb: <maximum disk cache size allowed. Default - 4192 MB>
disk-size-mb: <maximum disk cache size allowed. Default - 80% of free disk space>
disk-timeout-sec: <default disk cache eviction timeout (in sec). Default - 120 sec>
prefetch: <number of blocks to be prefetched in serial read case. Min - 11>
parallelism: <number of parallel threads downloading the data and writing to disk cache. Default - 128>
prefetch: <number of blocks to be prefetched in serial read case. Min - 11, Default - 2 times number of CPU cores>
parallelism: <number of parallel threads downloading the data and writing to disk cache. Default - 3 times number of CPU cores>
# Disk cache related configuration
file_cache: