v0.3.1 exclude reflect

2020-01-10 02:09:09 +04:00 · 2020-01-10 02:09:09 +04:00 · 5166d2704f
--- a/cpd.go
+++ b/cpd.go
@ -7,7 +7,6 @@ import (
 	"fmt"
 	"io"
 	"os"
-	"reflect"
 	"unicode"

 	"golang.org/x/text/encoding/charmap"
@ -35,13 +34,13 @@ func FileCodePageDetect(fn string, stopStr ...string) (IDCodePage, error) {
 //CodePageDetect - detect code page of ascii data from reader 'r'
 func CodePageDetect(r io.Reader, stopStr ...string) (IDCodePage, error) {
 	//test input interfase
-	if !reflect.ValueOf(r).IsValid() {
-		return ASCII, fmt.Errorf("input reader is nil")
+	if r == nil {
+		return ASCII, nil
 	}
-
 	//make slice of byte from input reader
 	buf, err := bufio.NewReader(r).Peek(ReadBufSize)
-	if (err != nil) && (err.Error() != "EOF") {
+	//if (err != nil) && (err.Error() != "EOF") {
+	if (err != nil) && (err != io.EOF) {
 		return ASCII, err
 	}

--- a/cpd_test.go
+++ b/cpd_test.go
@ -35,28 +35,28 @@ type tFileCodePageDetectTest struct {
 }

 var dFileCodePageDetect = []tFileCodePageDetectTest{
-	{"test_files\\866&1251.txt", "", nil, CP1251},            //file contain more 1251 then 866
-	{"test_files\\empty_file.txt", "", nil, UTF8},            //file exist but empty, no error, return ASCII
-	{"test_files\\IBM866.txt", "", nil, CP866},               //file contain IBM866
-	{"test_files\\ISO8859-5.txt", "", nil, ISOLatinCyrillic}, //file contain ISO8859-5
-	{"test_files\\KOI8-r.txt", "", nil, KOI8R},               //file contain KOI8
-	{"test_files\\KOI8-r2.txt", "", nil, KOI8R},              //file contain KOI8
-	{"test_files\\noCodePage.txt", "", nil, UTF8},            //file contain rune only ASCII
-	{"test_files\\rune_encode_error.txt", "", nil, ASCII},    //file contain special rune -> encode error, but detect NO error
-	{"test_files\\rune_error_1251.txt", "", nil, CP1251},     //file contain 1251 and special rune -> encode error, but detect NO error
-	{"test_files\\utf8.txt", "", nil, UTF8},                  //file contain utf8 with out bom rune at start
-	{"test_files\\utf8-wbom.txt", "", nil, UTF8},             //file contain utf8 with bom prefix
-	{"test_files\\utf8-woBOM.txt", "", nil, UTF8},            //file contain utf8 with out bom rune at start
-	{"test_files\\utf16be-wBOM.txt", "", nil, UTF16BE},       //file contain utf16 big endian with bom
-	{"test_files\\utf16be-woBOM.txt", "", nil, UTF16BE},      //file contain utf16 big endian without bom
-	{"test_files\\utf16le-wBOM.txt", "", nil, UTF16LE},       //file contain utf16 little endian with bom
-	{"test_files\\utf16le-woBOM.txt", "", nil, UTF16LE},      //file contain utf16 little endian without bom
-	{"test_files\\utf32be-wBOM.txt", "", nil, UTF32BE},       //file contain utf32 big endian with bom
-	{"test_files\\utf32be-woBOM.txt", "", nil, UTF32BE},      //file contain utf32 big endian without bom
-	{"test_files\\utf32le-wBOM.txt", "", nil, UTF32LE},       //file contain utf32 little endian with bom
-	{"test_files\\utf32le-woBOM.txt", "", nil, UTF32LE},      //file contain utf32 little endian without bom
-	{"test_files\\Win1251.txt", "", nil, CP1251},             //file contain Windows1251
-	{"test_files\\win1251_upper.txt", "", nil, CP1251},       //file contain Windows1251
+	{"test_files\\866&1251.txt", "", nil, CP1251},                    //file contain more 1251 then 866
+	{"test_files\\empty_file.txt", "", nil, UTF8},                    //file exist but empty, no error, return ASCII
+	{"test_files\\IBM866.txt", "", nil, CP866},                       //file contain IBM866
+	{"test_files\\ISO8859-5.txt", "", nil, ISOLatinCyrillic},         //file contain ISO8859-5
+	{"test_files\\KOI8-r.txt", "", nil, KOI8R},                       //file contain KOI8
+	{"test_files\\KOI8-r2.txt", "", nil, KOI8R},                      //file contain KOI8
+	{"test_files\\noCodePage.txt", "", nil, UTF8},                    //file contain rune only ASCII
+	{"test_files\\rune_encode_error.txt", "", nil, ISOLatinCyrillic}, //file contain special rune -> encode error, but detect NO error
+	{"test_files\\rune_error_1251.txt", "", nil, CP1251},             //file contain 1251 and special rune -> encode error, but detect NO error
+	{"test_files\\utf8.txt", "", nil, UTF8},                          //file contain utf8 with out bom rune at start
+	{"test_files\\utf8-wbom.txt", "", nil, UTF8},                     //file contain utf8 with bom prefix
+	{"test_files\\utf8-woBOM.txt", "", nil, UTF8},                    //file contain utf8 with out bom rune at start
+	{"test_files\\utf16be-wBOM.txt", "", nil, UTF16BE},               //file contain utf16 big endian with bom
+	{"test_files\\utf16be-woBOM.txt", "", nil, UTF16BE},              //file contain utf16 big endian without bom
+	{"test_files\\utf16le-wBOM.txt", "", nil, UTF16LE},               //file contain utf16 little endian with bom
+	{"test_files\\utf16le-woBOM.txt", "", nil, UTF16LE},              //file contain utf16 little endian without bom
+	{"test_files\\utf32be-wBOM.txt", "", nil, UTF32BE},               //file contain utf32 big endian with bom
+	{"test_files\\utf32be-woBOM.txt", "", nil, UTF32BE},              //file contain utf32 big endian without bom
+	{"test_files\\utf32le-wBOM.txt", "", nil, UTF32LE},               //file contain utf32 little endian with bom
+	{"test_files\\utf32le-woBOM.txt", "", nil, UTF32LE},              //file contain utf32 little endian without bom
+	{"test_files\\Win1251.txt", "", nil, CP1251},                     //file contain Windows1251
+	{"test_files\\win1251_upper.txt", "", nil, CP1251},               //file contain Windows1251
 }

 //FileCodePageDetect
@ -84,7 +84,6 @@ func TestFileCodePageDetect(t *testing.T) {
 	if err == nil {
 		t.Errorf("<FileCodePageDetect> on file '' must return error, but return nil")
 	}
-
 }

 //TestCodePageDetect - тестирование метода CodePageDetect
@ -92,15 +91,15 @@ func TestFileCodePageDetect(t *testing.T) {
 // 1. nil		входящий поток явный nil, параметр останова отсутствует
 // 2. nil, "~"	входящий поток явный nil, параметр останова присутствует
 // 3. входящий поток не инициализированный объект, проверка на передачу пустого интерфейса
-// проверка работы осуществляется через FileCodePageDetect()
+// проверка самой работы осуществляется через FileCodePageDetect()
 func TestCodePageDetect(t *testing.T) {
-	_, err := CodePageDetect(nil)
-	if err == nil {
-		t.Errorf("<CodePageDetect> on input nil return error == nil, expect error != nil\n")
+	tmp, err := CodePageDetect(nil)
+	if (err != nil) && (tmp != ASCII) {
+		t.Errorf("<CodePageDetect> on input nil return error != nil or code page != ASCII\n")
 	}
-	_, err = CodePageDetect(nil, "~")
-	if err == nil {
-		t.Errorf("<CodePageDetect> on input nil return error == nil, expect error != nil\n")
+	tmp, err = CodePageDetect(nil, "~")
+	if (err != nil) && (tmp != ASCII) {
+		t.Errorf("<CodePageDetect> on input nil return error != nil or code page != ASCII\n")
 	}

 	var data *os.File
--- a/ibm866.go
+++ b/ibm866.go
@ -16,7 +16,7 @@ func match866(data []byte, tbl *codePageTable) MatchRes {
 }

 const (
-	cp866BeginUpperChar  = 0x80
+	cp866StartUpperChar  = 0x80
 	cp866StopUpperChar   = 0x9F
 	cp866BeginLowerChar1 = 0xA0
 	cp866StopLowerChar1  = 0xAF
@ -25,7 +25,7 @@ const (
 )

 func isUpper866(r byte) bool {
-	return (r >= cp866BeginUpperChar) && (r <= cp866StopUpperChar)
+	return (r >= cp866StartUpperChar) && (r <= cp866StopUpperChar)
 }

 func isLower866(r byte) bool {
--- a/iso-8859-5.go
+++ b/iso-8859-5.go
@ -1,7 +1,5 @@
 package cpd

-import "unicode"
-
 //unit for ISO-8859-5

 func matchISO88595(d []byte, tbl *codePageTable) MatchRes {
@ -21,31 +19,6 @@ func matchISO88595(d []byte, tbl *codePageTable) MatchRes {
 	return MatchRes{tbl.founded(), 0}
 }

-func runesMatchISO88595_2(data []byte, tbl *codePageTable) (counts int) {
-	for i := range data {
-		if i < 2 {
-			continue
-		}
-		//case " Us" - separator_UPPER_symbol
-		if unicode.IsPunct(rune(data[i-2])) && isUpperISO88595(rune(data[i-1])) {
-			j := tbl.index(rune(data[i]))
-			if j > 0 {
-				(*tbl)[j].count++
-				counts++
-				continue
-			}
-		}
-		if isISO88595(rune(data[i-1])) {
-			j := tbl.index(rune(data[i]))
-			if j > 0 {
-				(*tbl)[j].count++
-				counts++
-			}
-		}
-	}
-	return
-}
-
 const (
 	cpISO88595BeginUpperChar = 0xB0
 	cpISO88595StopUpperChar  = 0xCF
@ -61,11 +34,11 @@ func lu88595(r byte) (res int) {
 }

 func isUpperISO88595(r rune) bool {
-	return (r >= cpKOI8BeginUpperChar) && (r <= cpKOI8StopUpperChar)
+	return (r >= cpISO88595BeginUpperChar) && (r <= cpISO88595StopUpperChar)
 }

 func isLowerISO88595(r rune) bool {
-	return (r >= cpKOI8BeginLowerChar) && (r <= cpKOI8StopLowerChar)
+	return (r >= cpISO88595BeginLowerChar) && (r <= cpISO88595StopLowerChar)
 }

 func isISO88595(r rune) bool {
--- a/koi8.go
+++ b/koi8.go
@ -1,7 +1,5 @@
 package cpd

-import "unicode"
-
 //unit for koi-8

 var consonansKOI8 = [256]byte{
@ -77,31 +75,6 @@ func matchRuneKOI8(d []byte, tbl *codePageTable) int {
 	return tbl.founded()
 }

-func runesMatchKOI8_2(data []byte, tbl *codePageTable) (counts int) {
-	for i := range data {
-		if i < 2 {
-			continue
-		}
-		//case " Us" - separator_UPPER_symbol
-		if unicode.IsPunct(rune(data[i-2])) && isUpperKOI8(data[i-1]) {
-			j := tbl.index(rune(data[i]))
-			if j > 0 {
-				(*tbl)[j].count++
-				counts++
-				continue
-			}
-		}
-		if isKOI8(data[i-1]) {
-			j := tbl.index(rune(data[i]))
-			if j > 0 {
-				(*tbl)[j].count++
-				counts++
-			}
-		}
-	}
-	return
-}
-
 const (
 	cpKOI8BeginUpperChar = 0xE0
 	cpKOI8StopUpperChar  = 0xFF
--- a/utf8.go
+++ b/utf8.go
@ -87,10 +87,7 @@ func ValidUTF8(data []byte) bool {
 			return false
 		}
 	}
-	if float64(zerroByteCount)/float64(m) > 0.05 {
-		return false
-	}
-	return true
+	return float64(zerroByteCount)/float64(m) < 0.05
 }

 const (
--- a/win1251.go
+++ b/win1251.go
@ -77,36 +77,6 @@ func match1251(d []byte, tbl *codePageTable) MatchRes {
 	return MatchRes{matchRune1251(d, tbl), cvPairs1251(d)}
 }

-func runesMatch1251_1(d []byte, tbl *codePageTable) (counts int) {
-	for i := range d {
-		if i < 1 {
-			continue
-		}
-		//case "ab" - counts only if symbols are arranged in pairs
-		if is1251(d[i-1]) {
-			j := tbl.index(rune(d[i]))
-			if j > 0 {
-				(*tbl)[j].count++
-				counts++
-			}
-			continue
-		}
-		if i < 2 {
-			continue
-		}
-		//case " Us" separator_UPPER_lower
-		if IsSeparator(rune(d[i-2])) && isUpper1251(d[i-1]) {
-			j := tbl.index(rune(d[i]))
-			if (j > 0) && (isLower1251(d[i])) {
-				(*tbl)[j].count++
-				counts++
-				continue
-			}
-		}
-	}
-	return
-}
-
 const (
 	cp1251BeginUpperChar = 0xC0
 	cp1251StopUpperChar  = 0xDF