Using a regular expression brings in the whole regexp and regexp/syntax
packages, which increase the resulting binary size by about 130K
(Go 1.18.1, Linux/amd64).

Besides, regular expressions are generally slow and incur some
initialization overhead (to compile all global regexp.MustComplile
variables). This, unlike the size difference, is not the main motivation
for this commit, but it feels like it should have also been mentioned.

A quick benchmark comparison shows a huge improvement (again, this is
not why this is done, nevertheless it pleases the eye):

name         old time/op    new time/op    delta
ParseSize-4    10.6µs ± 3%     2.6µs ±29%  -75.10%  (p=0.002 n=6+6)

name         old alloc/op   new alloc/op   delta
ParseSize-4    3.26kB ± 0%    0.20kB ± 0%  -93.75%  (p=0.000 n=7+6)

name         old allocs/op  new allocs/op  delta
ParseSize-4      72.0 ± 0%      26.0 ± 0%  -63.89%  (p=0.000 n=7+6)

Compatibility note: As a result, we are now a but more relaxed to the
input, allowing e.g. ".4 Gb", or "-0", or "234. B", following the rules
of strconv.ParseFloat. It seems that those were previously rejected as
a result of a regex being used, not deliberately.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin 2022-04-27 20:30:02 -07:00
Родитель cec49607f7
Коммит 737572633c
2 изменённых файлов: 74 добавлений и 27 удалений

70
size.go
Просмотреть файл

@ -2,7 +2,6 @@ package units
import (
"fmt"
"regexp"
"strconv"
"strings"
)
@ -26,16 +25,17 @@ const (
PiB = 1024 * TiB
)
type unitMap map[string]int64
type unitMap map[byte]int64
var (
decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB}
binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB}
)
var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
var (
decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
)
func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) {
i := 0
@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) {
// Parses the human-readable size string into the amount it represents.
func parseSize(sizeStr string, uMap unitMap) (int64, error) {
matches := sizeRegex.FindStringSubmatch(sizeStr)
if len(matches) != 4 {
// TODO: rewrite to use strings.Cut if there's a space
// once Go < 1.18 is deprecated.
sep := strings.LastIndexAny(sizeStr, "01234567890. ")
if sep == -1 {
// There should be at least a digit.
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}
var num, sfx string
if sizeStr[sep] != ' ' {
num = sizeStr[:sep+1]
sfx = sizeStr[sep+1:]
} else {
// Omit the space separator.
num = sizeStr[:sep]
sfx = sizeStr[sep+1:]
}
size, err := strconv.ParseFloat(matches[1], 64)
size, err := strconv.ParseFloat(num, 64)
if err != nil {
return -1, err
}
// Backward compatibility: reject negative sizes.
if size < 0 {
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
}
unitPrefix := strings.ToLower(matches[3])
if mul, ok := uMap[unitPrefix]; ok {
if len(sfx) == 0 {
return int64(size), nil
}
// Process the suffix.
if len(sfx) > 3 { // Too long.
goto badSuffix
}
sfx = strings.ToLower(sfx)
// Trivial case: b suffix.
if sfx[0] == 'b' {
if len(sfx) > 1 { // no extra characters allowed after b.
goto badSuffix
}
return int64(size), nil
}
// A suffix from the map.
if mul, ok := uMap[sfx[0]]; ok {
size *= float64(mul)
} else {
goto badSuffix
}
// The suffix may have extra "b" or "ib" (e.g. KiB or MB).
switch {
case len(sfx) == 2 && sfx[1] != 'b':
goto badSuffix
case len(sfx) == 3 && sfx[1:] != "ib":
goto badSuffix
}
return int64(size), nil
badSuffix:
return -1, fmt.Errorf("invalid suffix: '%s'", sfx)
}

Просмотреть файл

@ -103,6 +103,22 @@ func TestFromHumanSize(t *testing.T) {
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
assertSuccessEquals(t, 300, FromHumanSize, ".3kB")
assertSuccessEquals(t, 0, FromHumanSize, "0.")
assertSuccessEquals(t, 0, FromHumanSize, "0. ")
assertSuccessEquals(t, 0, FromHumanSize, "0.b")
assertSuccessEquals(t, 0, FromHumanSize, "0.B")
assertSuccessEquals(t, 0, FromHumanSize, "-0")
assertSuccessEquals(t, 0, FromHumanSize, "-0b")
assertSuccessEquals(t, 0, FromHumanSize, "-0B")
assertSuccessEquals(t, 0, FromHumanSize, "-0 b")
assertSuccessEquals(t, 0, FromHumanSize, "-0 B")
assertSuccessEquals(t, 32, FromHumanSize, "32.")
assertSuccessEquals(t, 32, FromHumanSize, "32.b")
assertSuccessEquals(t, 32, FromHumanSize, "32.B")
assertSuccessEquals(t, 32, FromHumanSize, "32. b")
assertSuccessEquals(t, 32, FromHumanSize, "32. B")
// We do not tolerate extra leading or trailing spaces
// (except for a space after the number and a missing suffix).
@ -124,26 +140,11 @@ func TestFromHumanSize(t *testing.T) {
assertError(t, FromHumanSize, " ")
assertError(t, FromHumanSize, " .")
assertError(t, FromHumanSize, " . ")
assertError(t, FromHumanSize, "0.")
assertError(t, FromHumanSize, "0. ")
assertError(t, FromHumanSize, "0.b")
assertError(t, FromHumanSize, "0.B")
assertError(t, FromHumanSize, "-0")
assertError(t, FromHumanSize, "-0b")
assertError(t, FromHumanSize, "-0B")
assertError(t, FromHumanSize, "-0 b")
assertError(t, FromHumanSize, "-0 B")
assertError(t, FromHumanSize, "-32")
assertError(t, FromHumanSize, ".3kB")
assertError(t, FromHumanSize, "-32b")
assertError(t, FromHumanSize, "-32B")
assertError(t, FromHumanSize, "-32 b")
assertError(t, FromHumanSize, "-32 B")
assertError(t, FromHumanSize, "32.")
assertError(t, FromHumanSize, "32.b")
assertError(t, FromHumanSize, "32.B")
assertError(t, FromHumanSize, "32. b")
assertError(t, FromHumanSize, "32. B")
assertError(t, FromHumanSize, "32b.")
assertError(t, FromHumanSize, "32B.")
assertError(t, FromHumanSize, "32 b.")