This commit is contained in:
softlandia 2020-02-13 02:10:26 +04:00
Родитель 0d40555dad
Коммит 38c7955ad0
3 изменённых файлов: 43 добавлений и 11 удалений

10
HIST.md
Просмотреть файл

@ -52,3 +52,13 @@ _____________________________
- string UTF32 w/o bom and w/o russian char detect as UTF16
_____________________________
## ver 0.5.1 // 2020.02.13 ##
* rename function NewReaderCP() to NewReaderTo()
* add tests
* add samples
### todo ###
- string UTF32 w/o bom and w/o russian char detect as UTF16

Просмотреть файл

@ -42,28 +42,31 @@ IDCodePage uint16 - index of code page, support String() interface, you can fmt.
## variables ##
ReadBufSize int = 1024 // count of byte to read from input reader by default
ReadBufSize int = 1024 // default count of byte to read from input reader for detecting
## functions ##
1. CodePageDetect(r io.Reader) (IDCodePage, error)
2. FileCodePageDetect(fn string, stopStr ...string) (IDCodePage, error)
1. CodepageDetect(r io.Reader) (IDCodePage, error)
2. FileCodepageDetect(fn string, stopStr ...string) (IDCodePage, error)
3. DecodeUTF16be(s string) string
4. DecodeUTF16le(s string) string
5. NewReader(r io.Reader, cpn ...string) (io.Reader, error)
6. NewReaderTo(r io.Reader, cpn string) (io.Reader, error)
7. CodepageAutoDetect(content []byte) (result IDCodePage)
## description ##
func CodePageAutoDetect(content []byte) (result IDCodePage)
func CodepageAutoDetect(content []byte) (result IDCodePage)
autodetect code page from input slice of byte
use this function instead golang.org/x/net/html/charset.DetermineEncoding()
CodePageDetect(r io.Reader) (IDCodePage, error)
CodepageDetect(r io.Reader) (IDCodePage, error)
detect code page of ascii data from reader 'r'
use library 'reflect' to check input reader
default read only first 1024 byte from 'r' (var ReadBufSize to change this setting)
input parameter stopStr not using
FileCodePageDetect(fn string, stopStr ...string) (IDCodePage, error)
FileCodepageDetect(fn string, stopStr ...string) (IDCodePage, error)
detect code page of text file "fn", read first 1024 byte (var ReadBufSize to change this setting)
return error if problem with file "fn"
return cpd.ASCII if code page not detected
@ -71,17 +74,35 @@ ReadBufSize int = 1024 // count of byte to read from input reader by default
file must contain characters of the Rusian alphabet
string stopStr now not using
func StrConvertCodePage(s string, fromCP, toCP IDCodePage) (string, error) //convert string from one code page to another, support Windows1251 & IBM866
func StrConvertCodePage(s string, fromCP, toCP IDCodePage) (string, error)
convert string from one code page to another, support Windows1251 & IBM866
func FileConvertCodePage(fileName string, fromCP, toCP IDCodePage) error //convert code page file with "fileName", support Windows1251 & IBM866
func FileConvertCodePage(fileName string, fromCP, toCP IDCodePage) error
convert code page file with "fileName", support Windows1251 & IBM866
func DecodeUTF16be(s string) string // convert input string from UTF-16BE to Utf-8
func DecodeUTF16be(s string) string
convert input string from UTF-16BE to Utf-8
func DecodeUTF16le(s string) string // convert input string from UTF-16LE to Utf-8
func DecodeUTF16le(s string) string
convert input string from UTF-16LE to Utf-8
NewReader(r io.Reader, cpn ...string) (io.Reader, error)
decoding input reader in UTF-8
cpn may contain the name of the encoding of the input data,
we can ommit cpn, then the encoding of the input data is determined automatically
NewReaderTo(r io.Reader) io.Reader
encode input reader to specified enconding
input data ONLY in UTF-8
## tests and static analiz ##
coverage: 88% of statements
coverage: 89.8%
folder "test_files" contain files for testing, do not remove/change/add if want support tests is work
folder sample contain:
1. tohex -- encode the input string to the specified encoding and return the string from the hexadecimal code of the received runes
2. detect-all-files -- displays the encoding of all files in the current folder
3. cpname -- work with encodinng names
file linter.md report from __golangci-lint__

Просмотреть файл

@ -57,6 +57,7 @@ func (i IDCodePage) ReaderHasBom(r io.Reader) bool {
// DeleteBomFromReader - return reader after removing BOM from it
func (i IDCodePage) DeleteBomFromReader(r io.Reader) io.Reader {
if i.ReaderHasBom(r) {
//ошибку не обрабатываем, если мы здесь, то эти байты мы уже читали
r.Read(make([]byte, UTF8.BomLen())) // считываем в никуда количество байт занимаемых BOM этой кодировки
}
return r