v0.3.5 minor updates

2020-01-27 18:19:24 +04:00 · 2020-01-27 18:19:24 +04:00 · b5a7b5dbc2
--- a/journal.md
+++ b/journal.md
@ -18,3 +18,13 @@ _____________________________
 * add test for UTF16LE and UTF16BE without russian

 _____________________________
+
+## ver 0.3.5 // 2020.01.27 ##
+
+* minor updates
+
+### todo ###
+
+   1 test with multithreading __not__ pass, 
+
+_____________________________
--- a/README-RU.md
+++ b/README-RU.md
@ -31,8 +31,8 @@ no ID                Name		uint16

 при использовании golang 1.12.6 в проект добавляется код размером ~250 kB

-ВНИМАНИЕ!  
-файлы без BOM в кодировке UTF16le и UTF16be при отсутсвии русских букв опознаются не верно
+>__ВНИМАНИЕ!__
+>библиотека не поддерживает многопоточный режим

 ## зависимости ##

--- a/README.md
+++ b/README.md
@ -21,15 +21,14 @@ no ID                Name		uint16
 9. UTF32LE:          "UTF-32LE",	1019
 10. UTF32BE:         "UTF-32BE",	1018

-
 ## feature ##

 encoding is determined both by the presence of the bom attribute and by heuristic  
 if file contain only latin symbols from first half of code page, this file detected as UTF-8  
 this is not a mistake, this is a completely correct statement

-ATTANTION!  
-files without specification on UTF16le and UTF16be not containing the Russian alphabet are not recognized correctly
+>__ATTANTION!__
+>library not support multithreading, I work...

 ## dependences ##

--- a/char_frac.xlsx
+++ b/char_frac.xlsx
--- a/code_pages.go
+++ b/code_pages.go
@ -4,6 +4,8 @@ import (
 	"bytes"
 	"fmt"
 	"strings"
+
+	"github.com/softlandia/xlib"
 )

 // IDCodePage - index of code page
@ -34,17 +36,20 @@ func (i IDCodePage) DeleteBom(s string) (res string) {

 // matcher - return struct MatchRes - two criterion
 // this function must be realised in each code page
-type matcher func(data []byte, tbl *codePageTable) MatchRes
+type matcher func(data []byte, tbl *cpTable) MatchRes
+
+// container - return true if b contain in
+type container func(b byte) bool

 type tableElement struct {
 	code  rune //rune (letter) of the alphabet that interests us
 	count int  //the number of these runes found in the text
 }

-// codePageTable - stores 9 letters, we will look for them in the text
+// cpTable - stores 9 letters, we will look for them in the text
 // element with index 0 for the case of non-location
 // first 9 elements lowercase, second 9 elements uppercase
-type codePageTable [19]tableElement
+type cpTable [19]tableElement

 // MatchRes - result criteria
 // countMatch - the number of letters founded in text
@ -60,12 +65,14 @@ func (m MatchRes) String() string {

 // CodePage - realize code page
 type CodePage struct {
-	id       IDCodePage    //id of code page
-	name     string        //name of code page
-	MatchRes               //count of matching
-	match    matcher       //method for calculating the criteria for the proximity of input data to this code page
-	Boms     []byte        //default BOM for this codepage
-	table    codePageTable //table of main alphabet rune of this code page, contain [code, count]
+	id       IDCodePage //id of code page
+	name     string     //name of code page
+	NumByte  byte       //number of byte using in codepage
+	MatchRes            //count of matching
+	match    matcher    //method for calculating the criteria for the proximity of input data to this code page
+	contain  container  //method return true if this codepage contain byte
+	Boms     []byte     //default BOM for this codepage
+	table    cpTable    //table of main alphabet rune of this code page, contain [code, count]
 }

 func (o CodePage) String() string {
@ -84,64 +91,76 @@ func (o CodePage) MatchingRunes() string {
 	return sb.String()
 }

+// FirstAlphabetPos - return position of first alphabet
+// возвращает позицию первого алфавитного символа данной кодировки встреченную в отсортированном массиве
+func (o CodePage) FirstAlphabetPos(d []byte) int {
+	d = xlib.SortBytes(d)
+	for i, b := range d {
+		if o.contain(b) {
+			return i
+		}
+	}
+	return 0
+}
+
 // TCodepagesDic - type to store all supported code page
 type TCodepagesDic map[IDCodePage]CodePage

 //CodepageDic - map of all codepage
 var CodepageDic = TCodepagesDic{
-	ASCII: {ASCII, "ASCII", MatchRes{0, 0}, matchASCII, []byte{},
-		codePageTable{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}},
+	ASCII: {ASCII, "ASCII", 0, MatchRes{0, 0}, matchASCII, isASCII, []byte{},
+		cpTable{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}},

-	CP866: {CP866, "CP866", MatchRes{0, 0}, match866, []byte{},
-		codePageTable{
+	CP866: {CP866, "CP866", 1, MatchRes{0, 0}, match866, is866, []byte{},
+		cpTable{
 			//first element serves as sign of absence
 			{0, 0},
 			//о          е		   а		  и			 н			т			с		  р			в
 			{0xAE, 0}, {0xA5, 0}, {0xA0, 0}, {0xA8, 0}, {0xAD, 0}, {0xE2, 0}, {0xE1, 0}, {0xE0, 0}, {0xA2, 0},
 			{0x8E, 0}, {0x85, 0}, {0x80, 0}, {0x88, 0}, {0x8D, 0}, {0x92, 0}, {0x91, 0}, {0x90, 0}, {0x82, 0}}},
-	CP1251: {CP1251, "CP1251", MatchRes{0, 0}, match1251, []byte{},
-		codePageTable{
+	CP1251: {CP1251, "CP1251", 1, MatchRes{0, 0}, match1251, is1251, []byte{},
+		cpTable{
 			{0, 0},
 			//а		    и		   н		  с			 р			в		   л		  к			 я
 			{0xE0, 0}, {0xE8, 0}, {0xED, 0}, {0xF1, 0}, {0xF0, 0}, {0xE2, 0}, {0xEB, 0}, {0xEA, 0}, {0xFF, 0},
 			{0xC0, 0}, {0xC8, 0}, {0xCD, 0}, {0xD1, 0}, {0xD0, 0}, {0xC2, 0}, {0xCB, 0}, {0xCA, 0}, {0xDF, 0}}},
-	KOI8R: {KOI8R, "KOI8-R", MatchRes{0, 0}, matchKOI8, []byte{},
-		codePageTable{
+	KOI8R: {KOI8R, "KOI8-R", 1, MatchRes{0, 0}, matchKOI8, isKOI8, []byte{},
+		cpTable{
 			//о		    а		   и		  т			 с			в		   л		  к			м
 			{0, 0},
 			{0xCF, 0}, {0xC1, 0}, {0xC9, 0}, {0xD4, 0}, {0xD3, 0}, {0xD7, 0}, {0xCC, 0}, {0xCB, 0}, {0xCD, 0},
 			{0xEF, 0}, {0xE1, 0}, {0xE9, 0}, {0xF4, 0}, {0xF3, 0}, {0xF7, 0}, {0xEC, 0}, {0xEB, 0}, {0xED, 0}}},
-	ISOLatinCyrillic: {ISOLatinCyrillic, "ISO-8859-5", MatchRes{0, 0}, matchISO88595, []byte{},
-		codePageTable{
+	ISOLatinCyrillic: {ISOLatinCyrillic, "ISO-8859-5", 1, MatchRes{0, 0}, matchISO88595, isISO88595, []byte{},
+		cpTable{
 			//о		    а		   и		  т			 с			в		   л		  к			е
 			{0, 0},
 			{0xDE, 0}, {0xD0, 0}, {0xD8, 0}, {0xE2, 0}, {0xE1, 0}, {0xD2, 0}, {0xDB, 0}, {0xDA, 0}, {0xD5, 0},
 			{0xBF, 0}, {0xB0, 0}, {0xB8, 0}, {0xC2, 0}, {0xC1, 0}, {0xB2, 0}, {0xBB, 0}, {0xBA, 0}, {0xB5, 0}}},
-	UTF8: {UTF8, "UTF-8", MatchRes{0, 0}, matchUTF8, []byte{0xef, 0xbb, 0xbf},
-		codePageTable{
+	UTF8: {UTF8, "UTF-8", 4, MatchRes{0, 0}, matchUTF8, isASCII, []byte{0xef, 0xbb, 0xbf},
+		cpTable{
 			{0, 0},
 			//о           е				а		    и			 н			  т			   с			р			в
 			{0xD0BE, 0}, {0xD0B5, 0}, {0xD0B0, 0}, {0xD0B8, 0}, {0xD0BD, 0}, {0xD182, 0}, {0xD181, 0}, {0xD180, 0}, {0xD0B2, 0},
 			{0xD09E, 0}, {0xD095, 0}, {0xD090, 0}, {0xD098, 0}, {0xD0AD, 0}, {0xD0A2, 0}, {0xD0A1, 0}, {0xD0A0, 0}, {0xD092, 0}}},
-	UTF16LE: {UTF16LE, "UTF-16LE", MatchRes{0, 0}, matchUTF16le, []byte{0xff, 0xfe},
-		codePageTable{
+	UTF16LE: {UTF16LE, "UTF-16LE", 2, MatchRes{0, 0}, matchUTF16le, isASCII, []byte{0xff, 0xfe},
+		cpTable{
 			{0, 0},
 			//о           е				а		    и			 н			  т			   с			р			в
 			{0x3E04, 0}, {0x3504, 0}, {0x1004, 0}, {0x3804, 0}, {0x3D04, 0}, {0x4204, 0}, {0x4104, 0}, {0x4004, 0}, {0x3204, 0},
 			{0x1E04, 0}, {0x1504, 0}, {0x3004, 0}, {0x1804, 0}, {0x1D04, 0}, {0x2204, 0}, {0x2104, 0}, {0x2004, 0}, {0x1204, 0}}},
-	UTF16BE: {UTF16BE, "UTF-16BE", MatchRes{0, 0}, matchUTF16be, []byte{0xfe, 0xff},
-		codePageTable{
+	UTF16BE: {UTF16BE, "UTF-16BE", 2, MatchRes{0, 0}, matchUTF16be, isASCII, []byte{0xfe, 0xff},
+		cpTable{
 			{0, 0},
 			//о           е				а		    и			 н			  т			   с			р			в
 			{0x043E, 0}, {0x0435, 0}, {0x0410, 0}, {0x0438, 0}, {0x043D, 0}, {0x0442, 0}, {0x0441, 0}, {0x0440, 0}, {0x0432, 0},
 			{0x041E, 0}, {0x0415, 0}, {0x0430, 0}, {0x0418, 0}, {0x041D, 0}, {0x0422, 0}, {0x0421, 0}, {0x0420, 0}, {0x0412, 0}}},
-	UTF32BE: {UTF32BE, "UTF-32BE", MatchRes{0, 0}, matchUTF32be, []byte{0x00, 0x00, 0xfe, 0xff},
-		codePageTable{
+	UTF32BE: {UTF32BE, "UTF-32BE", 4, MatchRes{0, 0}, matchUTF32be, isASCII, []byte{0x00, 0x00, 0xfe, 0xff},
+		cpTable{
 			{0, 0},
 			{0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0},
 			{0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}}},
-	UTF32LE: {UTF32LE, "UTF-32LE", MatchRes{0, 0}, matchUTF32le, []byte{0xff, 0xfe, 0x00, 0x00},
-		codePageTable{
+	UTF32LE: {UTF32LE, "UTF-32LE", 4, MatchRes{0, 0}, matchUTF32le, isASCII, []byte{0xff, 0xfe, 0x00, 0x00},
+		cpTable{
 			{0, 0},
 			{0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0},
 			{0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}, {0x0, 0}}},
@ -158,7 +177,6 @@ func (o TCodepagesDic) clear() {
 }

 //Match - return the id of code page to which the data best matches
-//TODO большинству матчеров требуется более 2х символов, надо проверить на минимальную длину
 func (o TCodepagesDic) Match(data []byte) (result IDCodePage) {
 	result = ASCII
 	maxCount := 0
@ -166,7 +184,7 @@ func (o TCodepagesDic) Match(data []byte) (result IDCodePage) {
 	for id, cp := range o {
 		cp.MatchRes = cp.match(data, &cp.table)
 		o[id] = cp
-		m = cp.countMatch + cp.countCvPairs
+		m = cp.MatchRes.countMatch + cp.MatchRes.countCvPairs
 		if m > maxCount {
 			maxCount = m
 			result = id
@ -176,25 +194,10 @@ func (o TCodepagesDic) Match(data []byte) (result IDCodePage) {
 }

 //foo function,
-func matchASCII(b []byte, tbl *codePageTable) MatchRes {
+func matchASCII(b []byte, tbl *cpTable) MatchRes {
 	return MatchRes{0, 0}
 }

-/*
-//codePageName - string of code page name runesMatchUTF32LE
-var codePageName = map[IDCodePage]string{
-	ASCII:            "ASCII",
-	ISOLatinCyrillic: "ISO-8859-5",
-	CP866:            "CP866",
-	CP1251:           "CP1251",
-	UTF8:             "UTF-8",
-	UTF16LE:          "UTF-16LE",
-	UTF16BE:          "UTF-16BE",
-	UTF32:            "UTF-32",
-	KOI8R:            "KOI8-R",
-	Unicode:          "Unicode",
-	UTF7:             "UTF-7",
-	UTF32LE:          "UTF-32LE",
-	UTF32BE:          "UTF-32BE",
+func isASCII(b byte) bool {
+	return true
 }
-*/
--- a/codePageTable.go
+++ b/codePageTable.go
@ -1,10 +1,12 @@
 package cpd

+import "sort"
+
 //codePageTable

 // return index of rune in code page table
 // return 0 if rune not in code page table
-func (t *codePageTable) index(r rune) int {
+func (t *cpTable) index(r rune) int {
 	for j, e := range *t {
 		if r == e.code {
 			return j
@ -13,17 +15,22 @@ func (t *codePageTable) index(r rune) int {
 	return 0
 }

-func (t *codePageTable) clear() {
+func (t *cpTable) clear() {
 	for i := 0; i < len(t); i++ {
 		t[i].count = 0
 	}
 }

 // founded - calculates total number of matching
-func (t *codePageTable) founded() (res int) {
+func (t *cpTable) founded() (res int) {
 	//0 элемент исключён, он не содержит количество найденных букв
 	for i := 1; i < len(t); i++ {
 		res += t[i].count
 	}
 	return
 }
+
+func (t *cpTable) sort() *cpTable {
+	sort.Slice(&t, func(i, j int) bool { return i < j })
+	return t
+}
--- a/cpd_test.go
+++ b/cpd_test.go
@ -5,6 +5,7 @@ import (
 	"os"
 	fp "path/filepath"
 	"strings"
+	"sync"
 	"testing"

 	"github.com/stretchr/testify/assert"
@ -21,10 +22,10 @@ var dStringHasBom = []tStringHasBom{
 	{ASCII, "", false},
 	{CP866, "CP866", false},
 	{CP1251, string([]byte{0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false},
-	{CP1251, string([]byte{0xff, 0xfe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false}, //contain UTF16LE bom, false because CP1251 have no bom
-	{UTF8, string([]byte{0xef, 0xbb, 0xbf, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), true},
-	{UTF8, string([]byte{0xef, 0xbb, 0xbe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false},
-	{UTF8, string([]byte{0xff, 0xbb, 0xbe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false},
+	{CP1251, string([]byte{0xff, 0xfe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false},     //contain UTF16LE bom, false because CP1251 have no bom
+	{UTF8, string([]byte{0xef, 0xbb, 0xbf, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), true},  //UTF8 with bom
+	{UTF8, string([]byte{0xef, 0xbb, 0xbe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false}, //UTF8 without bom
+	{UTF8, string([]byte{0xff, 0xbb, 0xbe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), false}, //UTF8 without bom
 	{UTF16BE, string([]byte{0xfe, 0xff, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), true},
 	{UTF16LE, string([]byte{0xff, 0xfe, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), true},
 	{UTF32BE, string([]byte{0x00, 0x00, 0xfe, 0xff, 0xD0, 0xEE, 0xF1, 0xF1, 0xE8, 0xFF}), true},
@ -72,6 +73,7 @@ type tFileCodePageDetectTest struct {
 }

 var dFileCodePageDetect = []tFileCodePageDetectTest{
+	{fp.Join("test_files/utf16le-woBOM-only-ru.txt"), "", nil, UTF16LE},      //file contain utf16 little endian without bom
 	{fp.Join("test_files/utf16le-woBOM-no-ru.txt"), "", nil, UTF16LE},        //file contain utf16 little endian without bom
 	{fp.Join("test_files/utf16le-woBOM-only-latin.txt"), "", nil, UTF16LE},   //file contain utf16 little endian without bom
 	{fp.Join("test_files/utf16le_las.txt"), "", nil, UTF16LE},                //file contain utf16 little endian without bom
@ -100,6 +102,7 @@ var dFileCodePageDetect = []tFileCodePageDetectTest{
 	{fp.Join("test_files/win1251_upper.txt"), "", nil, CP1251},               //file contain Windows1251
 	{fp.Join("test_files/utf16be-woBOM-only-latin.txt"), "", nil, UTF16BE},   //file contain utf16 big endian with bom
 	{fp.Join("test_files/utf16be-woBOM-no-ru.txt"), "", nil, UTF16BE},        //file contain utf16 big endian with bom
+	{fp.Join("test_files/utf16be-woBOM-only-ru.txt"), "", nil, UTF16BE},      //file contain utf16 big endian with bom
 }

 //FileCodePageDetect
@ -121,6 +124,30 @@ func TestFileCodePageDetect(t *testing.T) {
 	assert.NotNil(t, err, "<FileCodePageDetect> on file '' must return error, but return nil")
 }

+func fileCodepageDetect(wg *sync.WaitGroup, cp *[]IDCodePage, fileName string) {
+	defer wg.Done()
+	res, _ := FileCodePageDetect(fileName)
+	(*cp) = append((*cp), res)
+}
+
+/*
+func TestFileCodePageDetectM(t *testing.T) {
+	var (
+		res IDCodePage
+		cp  []IDCodePage
+		wg  sync.WaitGroup
+	)
+	cp = make([]IDCodePage, 0)
+	for _, d := range dFileCodePageDetect {
+		wg.Add(1)
+		go fileCodepageDetect(&wg, &cp, d.fn)
+	}
+	wg.Wait()
+	for i, d := range dFileCodePageDetect {
+		assert.Equal(t, cp[i], d.r, fmt.Sprintf("<FileCodePageDetect> on file '%s' expected result: %s, got: %s", d.fn, d.r, res))
+	}
+}*/
+
 //TestCodePageDetect - тестирование метода CodePageDetect
 // проверки на входные параметры:
 // 1. nil		входящий поток явный nil, параметр останова отсутствует
--- a/ibm866.go
+++ b/ibm866.go
@ -3,7 +3,7 @@ package cpd
 //unit for ibm866

 // for CP866 calculate only count of letter from table 'tbl'
-func match866(data []byte, tbl *codePageTable) MatchRes {
+func match866(data []byte, tbl *cpTable) MatchRes {
 	for i := range data {
 		j := tbl.index(rune(data[i])) //return 0 if rune data[i] not found
 		(*tbl)[j].count++
@ -11,7 +11,6 @@ func match866(data []byte, tbl *codePageTable) MatchRes {
 	return MatchRes{tbl.founded(), 0}
 }

-/*
 const (
 	cp866StartUpperChar  = 0x80
 	cp866StopUpperChar   = 0x9F
@ -33,4 +32,3 @@ func isLower866(r byte) bool {
 func is866(r byte) bool {
 	return isUpper866(r) || isLower866(r)
 }
-*/
--- a/iso-8859-5.go
+++ b/iso-8859-5.go
@ -2,13 +2,13 @@ package cpd

 //unit for ISO-8859-5

-func matchISO88595(d []byte, tbl *codePageTable) MatchRes {
+func matchISO88595(d []byte, tbl *cpTable) MatchRes {
 	for i := 0; i < len(d); i++ {
-		if isISO88595(rune(d[i])) {
+		if isISO88595(d[i]) {
 			upper := lu88595(d[i])
 			j := tbl.index(rune(d[i]))
 			(*tbl)[j].count++
-			for i++; (i < len(d)) && isISO88595(rune(d[i])); i++ {
+			for i++; (i < len(d)) && isISO88595(d[i]); i++ {
 				if upper >= lu88595(d[i]) {
 					j = tbl.index(rune(d[i]))
 					(*tbl)[j].count++
@ -27,20 +27,20 @@ const (
 )

 func lu88595(r byte) (res int) {
-	if isUpperISO88595(rune(r)) {
+	if isUpperISO88595(r) {
 		res = 1
 	}
 	return
 }

-func isUpperISO88595(r rune) bool {
+func isUpperISO88595(r byte) bool {
 	return (r >= cpISO88595BeginUpperChar) && (r <= cpISO88595StopUpperChar)
 }

-func isLowerISO88595(r rune) bool {
+func isLowerISO88595(r byte) bool {
 	return (r >= cpISO88595BeginLowerChar) && (r <= cpISO88595StopLowerChar)
 }

-func isISO88595(r rune) bool {
+func isISO88595(r byte) bool {
 	return isUpperISO88595(r) || isLowerISO88595(r)
 }
--- a/koi8.go
+++ b/koi8.go
@ -42,7 +42,7 @@ var vowelsKOI8 = [256]byte{
 	/* F */ 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
 }

-func matchKOI8(d []byte, tbl *codePageTable) MatchRes {
+func matchKOI8(d []byte, tbl *cpTable) MatchRes {
 	return MatchRes{matchRuneKOI8(d, tbl), cvPairsKOI8(d)}
 }

@ -58,7 +58,7 @@ func cvPairsKOI8(d []byte) (cvPairsCount int) {
 	return cvPairsCount
 }

-func matchRuneKOI8(d []byte, tbl *codePageTable) int {
+func matchRuneKOI8(d []byte, tbl *cpTable) int {
 	for i := 0; i < len(d); i++ {
 		if isKOI8(d[i]) {
 			upper := luKOI8(d[i])
--- a/sample/main.go
+++ b/sample/main.go
@ -1,16 +1,52 @@
 package main

 import (
+	"errors"
 	"fmt"
+	"log"
 	"os"
+	"path/filepath"
+	"strings"

 	"github.com/softlandia/cpd"
 )

 func main() {
-	t, _ := cpd.FileCodePageDetect(os.Args[1])
-	fmt.Printf("cpd.FileCodePageDetect():\t%s\n", t)
-	for id, cp := range cpd.CodepageDic {
-		fmt.Printf("%s\tmatches:%s\t%s\n", id, cp.MatchRes, cp.MatchingRunes())
+	var fl []string
+	FindFilesExt(&fl, ".\\", os.Args[1])
+	for _, fn := range fl {
+		t, _ := cpd.FileCodePageDetect(fn)
+		fmt.Printf("file: \t`%s`\t`%s`\n", fn, t)
 	}
 }
+
+//FindFilesExt - search all files in path with 'ext' & put to list
+//path - "c:\tmp"
+//ext  - ".log"
+//sample:  n, err := FindFilesExt(&fl, "c:\\tmp", ".log")
+func FindFilesExt(fileList *[]string, path, fileNameExt string) (int, error) {
+	if fileList == nil {
+		return 0, errors.New("first parameter 'fileList' is nil")
+	}
+	extFile := strings.ToUpper(fileNameExt)
+	i := 0 //index founded files
+	err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			log.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err)
+			return err
+		}
+		if info.IsDir() {
+			//skip folders
+			return nil
+		}
+		if strings.ToUpper(filepath.Ext(path)) != extFile {
+			//skip folders and files with extention not extFile
+			return nil
+		}
+		//file found
+		i++
+		*fileList = append(*fileList, path)
+		return nil
+	})
+	return i, err
+}
--- a/sample/sample.exe
+++ b/sample/sample.exe
--- a/test_files/866-table.txt
+++ b/test_files/866-table.txt
@ -0,0 +1,15 @@
+ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
+³   <20>à¨¬¥à ¢ à ¬ª¥                                    ³
+³                                                     ³
+³   ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍ»         ³
+³   º                                       º         ³
+³   º                                       º         ³
+³   º                                       º         ³
+³   º                                       º         ³
+³   º                                       º         ³
+³   º                                       º         ³
+³   ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍ¼         ³
+³                                                     ³
+³                                                     ³
+ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÙ
+
--- a/utf16be.go
+++ b/utf16be.go
@ -8,7 +8,7 @@ import (

 //unit for UTF16BE

-func matchUTF16be(b []byte, tbl *codePageTable) MatchRes {
+func matchUTF16be(b []byte, tbl *cpTable) MatchRes {
 	n := len(b)/2 - 1
 	if n <= 0 {
 		return MatchRes{0, 0}
@ -34,7 +34,7 @@ func matchUTF16beZerro(b []byte) int {

 // matchUTF16beRu - вычисляет критерий по количеству русских букв
 // tbl *codePageTable - передаётся не для нахождения кодировки, а для заполнения встречаемости популярных русских букв
-func matchUTF16beRu(data []byte, tbl *codePageTable) int {
+func matchUTF16beRu(data []byte, tbl *cpTable) int {
 	matches := 0
 	n := len(data)/2 - 1
 	if n <= 0 {
--- a/utf16le.go
+++ b/utf16le.go
@ -13,11 +13,15 @@ import (
 //проверка на BOM уже выполнена, в принимаемом массиве не BOM символов

 // matchUTF16le - функция вычисляет общий критерий для кодировки UTF16LE
-func matchUTF16le(b []byte, tbl *codePageTable) MatchRes {
+func matchUTF16le(b []byte, tbl *cpTable) MatchRes {
 	n := len(b)/2 - 1
 	if n <= 0 {
 		return MatchRes{0, 0}
 	}
+	//два критерия используется
+	//первый количество найденных русских букв
+	//второй количество найденных 0x00
+	//решающим является максимальный
 	return MatchRes{xlib.Max(matchUTF16leRu(b, tbl), matchUTF16leZerro(b)), 0}
 }

@ -35,7 +39,7 @@ func matchUTF16leZerro(b []byte) int {

 // matchUTF16leRu - вычисляет критерий по количеству русских букв
 // tbl *codePageTable - передаётся не для нахождения кодировки, а для заполнения встречаемости популярных русских букв
-func matchUTF16leRu(b []byte, tbl *codePageTable) int {
+func matchUTF16leRu(b []byte, tbl *cpTable) int {
 	matches := 0
 	count04 := 0
 	n := len(b)/2 - 1
--- a/utf32be.go
+++ b/utf32be.go
@ -3,7 +3,7 @@ package cpd
 //UTF-32BE

 //первые 2 байта практически всегда меньше вторых 2 байтов
-func matchUTF32be(d []byte, tbl *codePageTable) MatchRes {
+func matchUTF32be(d []byte, tbl *cpTable) MatchRes {
 	zerroCounts := 0
 	for i := 0; i < len(d)-4; i += 4 {
 		if (int(d[i]) + int(d[i+1])) == 0 {
--- a/utf32le.go
+++ b/utf32le.go
@ -4,7 +4,7 @@ package cpd

 //вторые 2 байта практически всегда 0
 //используемый признак не сработает если больше половины текста будет набрано символами с 4 значащими байтами, не представляю, что это за текст...
-func matchUTF32le(d []byte, tbl *codePageTable) MatchRes {
+func matchUTF32le(d []byte, tbl *cpTable) MatchRes {
 	zerroCounts := 0
 	for i := 0; i < len(d)-4; i += 4 {
 		if (int(d[i+2]) + int(d[i+3])) == 0 {
--- a/utf8.go
+++ b/utf8.go
@ -4,7 +4,7 @@ import "encoding/binary"

 //unit for UTF8

-func matchUTF8(d []byte, tbl *codePageTable) MatchRes {
+func matchUTF8(d []byte, tbl *cpTable) MatchRes {
 	matches := 0
 	if len(d) <= 3 {
 		return MatchRes{matches, 0}
--- a/win1251.go
+++ b/win1251.go
@ -56,7 +56,7 @@ func cvPairs1251(d []byte) (cvPairsCount int) {
 }

 // matchRunes1251 - counts the number of characters that are the most popular letters of the Russian alphabet
-func matchRune1251(d []byte, tbl *codePageTable) int {
+func matchRune1251(d []byte, tbl *cpTable) int {
 	for i := 0; i < len(d); i++ {
 		if is1251(d[i]) {
 			upper := lu1251(d[i])
@ -73,7 +73,7 @@ func matchRune1251(d []byte, tbl *codePageTable) int {
 	return tbl.founded()
 }

-func match1251(d []byte, tbl *codePageTable) MatchRes {
+func match1251(d []byte, tbl *cpTable) MatchRes {
 	return MatchRes{matchRune1251(d, tbl), cvPairs1251(d)}
 }

@ -103,3 +103,39 @@ func isLower1251(r byte) bool {
 func is1251(r byte) bool {
 	return isUpper1251(r) || isLower1251(r)
 }
+
+/*var  = [66]byte{
+	0xE0, 0xC0, // 'а'
+	0xE1, 0xC1, // 'б'
+	0xE2, 0xC2, // 'в'
+	0xE3, 0xC3, // 'г'
+	0xE4, 0xC4, // 'д'
+	0xE5, 0xC5, // 'е'
+	0xB8, 0xA8, // 'ё'
+	0xE6, 0xC6, // 'ж'
+	0xE7, 0xC7, // 'з'
+	0xE8, 0xC8, // 'и'
+	0xE9, 0xC9, // 'й'
+	0xEA, 0xCA, // 'к'
+	0xEB, 0xCB, // 'л'
+	0xEC, 0xCC, // 'м'
+	0xED, 0xCD, // 'н'
+	0xEE, 0xCE, // 'о'
+	0xEF, 0xCF, // 'п'
+	0xF0, 0xD0, // 'р'
+	0xF1, 0xD1, // 'с'
+	0xF2, 0xD2, // 'т'
+	0xF3, 0xD3, // 'у'
+	0xF4, 0xD4, // 'ф'
+	0xF5, 0xD5, // 'х'
+	0xF6, 0xD6, // 'ц'
+	0xF7, 0xD7, // 'ч'
+	0xF8, 0xD8, // 'ш'
+	0xF9, 0xD9, // 'щ'
+	0xFA, 0xDA, // 'ъ'
+	0xFB, 0xDB, // 'ы'
+	0xFC, 0xDC, // 'ь'
+	0xFD, 0xDD, // 'э'
+	0xFE, 0xDE, // 'ю'
+	0xFF, 0xDF, // 'я'
+}*/