зеркало из https://github.com/softlandia/cpd.git
v0.2.0
This commit is contained in:
Родитель
54bfa0d068
Коммит
4387137abc
|
@ -1,3 +1,4 @@
|
||||||
*.zip
|
*.zip
|
||||||
*.7z
|
*.7z
|
||||||
.idea/*
|
.idea/*
|
||||||
|
tmp*
|
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Launch",
|
||||||
|
"type": "go",
|
||||||
|
"request": "launch",
|
||||||
|
"mode": "auto",
|
||||||
|
"program": "${fileDirname}",
|
||||||
|
"env": {},
|
||||||
|
"args": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Двоичный файл не отображается.
|
@ -0,0 +1,23 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
//codePageTable
|
||||||
|
|
||||||
|
//return index of rune in code page table
|
||||||
|
//return 0 if rune not in code page table
|
||||||
|
func (t *codePageTable) containsRune(r rune) int {
|
||||||
|
for j, e := range *t {
|
||||||
|
if r == e.code {
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *codePageTable) isUpper(r rune) bool {
|
||||||
|
for i := 10; i < len(t); i++ {
|
||||||
|
if r == (*t)[i].code {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
// file from "golang.org\x\text\encoding\internal\identifier" (c) golang autors
|
||||||
|
// contain identifier of code page
|
||||||
|
// IDCodePage implements interface String()
|
||||||
|
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
//IDCodePage - index of code page
|
||||||
|
type IDCodePage uint16
|
||||||
|
|
||||||
|
func (i IDCodePage) String() string {
|
||||||
|
return codePageName[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
//itRuneMatch - return 1 if rune from this code page, 0 else
|
||||||
|
type itRuneMatch func(r rune, tbl *codePageTable) int
|
||||||
|
|
||||||
|
//runesMatch - return count of entry elements of data to code page
|
||||||
|
type runesMatch func(data []byte, tbl *codePageTable) int
|
||||||
|
|
||||||
|
type tableElement struct {
|
||||||
|
code rune //руна которая нас интересует, она присутствует в этой кодовой таблице как буква алфавита
|
||||||
|
count int //количество вхождений данной руны
|
||||||
|
}
|
||||||
|
|
||||||
|
//codePageTable - содержит основные (наиболее часто встречаемые) символы алфавита в данной кодировке
|
||||||
|
//первые 8 прописные, 2-я восьмёрка заглавные
|
||||||
|
type codePageTable [19]tableElement
|
||||||
|
|
||||||
|
//MatchRes - итоговый критерий совпадения массива данных с кодовой страницей
|
||||||
|
type MatchRes struct {
|
||||||
|
countMatch int
|
||||||
|
}
|
||||||
|
|
||||||
|
//CodePage - содержит данные по конкретной кодовой странице
|
||||||
|
type CodePage struct {
|
||||||
|
id IDCodePage //id of code page
|
||||||
|
name string //name of code page
|
||||||
|
MatchRes //count of matching
|
||||||
|
match runesMatch //calculate from input data count of entry to codepage
|
||||||
|
table codePageTable //table of main alfabet rune of this code page, use for calculate frequency
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o CodePage) String() string {
|
||||||
|
return fmt.Sprintf("id: %s, countMatch: %d", o.id, o.countMatch)
|
||||||
|
}
|
||||||
|
|
||||||
|
//MatchingRunes - return string with rune/counts
|
||||||
|
func (o CodePage) MatchingRunes() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
fmt.Fprint(&sb, "rune/counts: ")
|
||||||
|
for i, e := range o.table {
|
||||||
|
if i != 0 {
|
||||||
|
fmt.Fprintf(&sb, "%x/%d, ", e.code, e.count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
//TCodePages - type for store all code page
|
||||||
|
type TCodePages []CodePage
|
||||||
|
|
||||||
|
//DeepMach -
|
||||||
|
func (o *TCodePages) DeepMach(data []byte) IDCodePage {
|
||||||
|
return ASCII
|
||||||
|
}
|
||||||
|
|
||||||
|
//Match - return IDCodePage
|
||||||
|
//simple calculate count entry data runes in standart code page table
|
||||||
|
func (o TCodePages) Match(data []byte) (result IDCodePage) {
|
||||||
|
result = ASCII
|
||||||
|
maxCount := 0
|
||||||
|
for i, cp := range o {
|
||||||
|
o[i].countMatch = cp.match(data, &o[i].table)
|
||||||
|
if o[i].countMatch > maxCount {
|
||||||
|
maxCount = o[i].countMatch
|
||||||
|
result = cp.id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
//CodePages - slice of code pages
|
||||||
|
var CodePages = TCodePages{
|
||||||
|
{ASCII, "ASCII", MatchRes{0}, runesMatchASCII,
|
||||||
|
codePageTable{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}},
|
||||||
|
{IBM866, "IBM866", MatchRes{0}, runesMatch866,
|
||||||
|
codePageTable{
|
||||||
|
//first element serves as sign of absence
|
||||||
|
{0, 0},
|
||||||
|
//о е а и н т с р в
|
||||||
|
{0xAE, 0}, {0xA5, 0}, {0xA0, 0}, {0xA8, 0}, {0xAD, 0}, {0xE2, 0}, {0xE1, 0}, {0xE0, 0}, {0xA2, 0},
|
||||||
|
{0x8E, 0}, {0x85, 0}, {0x80, 0}, {0x88, 0}, {0x8D, 0}, {0x92, 0}, {0x91, 0}, {0x90, 0}, {0x82, 0}}},
|
||||||
|
{UTF8, "UTF8", MatchRes{0}, runesMatchUTF8,
|
||||||
|
codePageTable{
|
||||||
|
{0, 0},
|
||||||
|
//о е а и н т с р в
|
||||||
|
{0xD0BE, 0}, {0xD0B5, 0}, {0xD0B0, 0}, {0xD0B8, 0}, {0xD0BD, 0}, {0xD182, 0}, {0xD181, 0}, {0xD180, 0}, {0xD0B2, 0},
|
||||||
|
{0xD09E, 0}, {0xD095, 0}, {0xD090, 0}, {0xD098, 0}, {0xD0AD, 0}, {0xD0A2, 0}, {0xD0A1, 0}, {0xD0A0, 0}, {0xD092, 0}}},
|
||||||
|
{Windows1251, "Windows1251", MatchRes{0}, runesMatch1251,
|
||||||
|
codePageTable{
|
||||||
|
{0, 0},
|
||||||
|
//а и н с р в л к в
|
||||||
|
{0xE0, 0}, {0xE8, 0}, {0xED, 0}, {0xF1, 0}, {0xF0, 0}, {0xE2, 0}, {0xEB, 0}, {0xEA, 0}, {0xE2, 0},
|
||||||
|
{0xC0, 0}, {0xC8, 0}, {0xCD, 0}, {0xD1, 0}, {0xD0, 0}, {0xC2, 0}, {0xCB, 0}, {0xCA, 0}, {0xC2, 0}}},
|
||||||
|
{KOI8R, "KOI8R", MatchRes{0}, runesMatchKOI8,
|
||||||
|
codePageTable{
|
||||||
|
//о а и т с в л к в
|
||||||
|
{0, 0},
|
||||||
|
{0xCF, 0}, {0xC1, 0}, {0xC9, 0}, {0xD4, 0}, {0xD3, 0}, {0xD7, 0}, {0xCC, 0}, {0xCB, 0}, {0xD7, 0},
|
||||||
|
{0xEF, 0}, {0xE1, 0}, {0xE9, 0}, {0xF4, 0}, {0xF3, 0}, {0xF7, 0}, {0xEC, 0}, {0xEB, 0}, {0xF7, 0}}},
|
||||||
|
}
|
||||||
|
|
||||||
|
//codePageName - string of code page name
|
||||||
|
var codePageName = map[IDCodePage]string{
|
||||||
|
ASCII: "ASCII",
|
||||||
|
IBM866: "IBM866",
|
||||||
|
Windows1251: "Windows1251",
|
||||||
|
UTF8: "UTF8",
|
||||||
|
UTF16: "UTF16",
|
||||||
|
UTF16LE: "UTF16LE",
|
||||||
|
UTF16BE: "UTF16BE",
|
||||||
|
UTF32: "UTF32",
|
||||||
|
KOI8R: "KOI8R",
|
||||||
|
ISO5427Cyrillic: "ISO5427Cyrillic",
|
||||||
|
ISO51INISCyrillic: "ISO51INISCyrillic",
|
||||||
|
ISO111ECMACyrillic: "ISO111ECMACyrillic",
|
||||||
|
ISO153GOST1976874: "ISO153GOST1976874",
|
||||||
|
Unicode: "Unicode",
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
const (
|
||||||
|
// ASCII is the uint16 identifier with IANA name US-ASCII (MIME: US-ASCII).
|
||||||
|
// ANSI X3.4-1986
|
||||||
|
// Reference: RFC2046
|
||||||
|
ASCII IDCodePage = 3
|
||||||
|
|
||||||
|
// ISO5427Cyrillic is the uint16 identifier with IANA name ISO_5427.
|
||||||
|
// ISO-IR: International Register of Escape Sequences
|
||||||
|
// Note: The current registration authority is IPSJ/ITSCJ, Japan.
|
||||||
|
// Reference: RFC1345
|
||||||
|
ISO5427Cyrillic IDCodePage = 48
|
||||||
|
|
||||||
|
// ISO51INISCyrillic is the uint16 identifier with IANA name INIS-cyrillic.
|
||||||
|
// ISO-IR: International Register of Escape Sequences
|
||||||
|
// Note: The current registration authority is IPSJ/ITSCJ, Japan.
|
||||||
|
// Reference: RFC1345
|
||||||
|
ISO51INISCyrillic IDCodePage = 53
|
||||||
|
|
||||||
|
// ISO111ECMACyrillic is the uint16 identifier with IANA name ECMA-cyrillic.
|
||||||
|
// ISO registry
|
||||||
|
// (formerly ECMA registry )
|
||||||
|
ISO111ECMACyrillic IDCodePage = 77
|
||||||
|
|
||||||
|
// ISO153GOST1976874 is the uint16 identifier with IANA name GOST_19768-74.
|
||||||
|
// ISO-IR: International Register of Escape Sequences
|
||||||
|
// Note: The current registration authority is IPSJ/ITSCJ, Japan.
|
||||||
|
// Reference: RFC1345
|
||||||
|
ISO153GOST1976874 IDCodePage = 94
|
||||||
|
|
||||||
|
// UTF8 is the uint16 identifier with IANA name UTF-8.
|
||||||
|
//
|
||||||
|
// rfc3629
|
||||||
|
// Reference: RFC3629
|
||||||
|
UTF8 IDCodePage = 106
|
||||||
|
|
||||||
|
// Unicode is the uint16 identifier with IANA name ISO-10646-UCS-2.
|
||||||
|
//
|
||||||
|
// the 2-octet Basic Multilingual Plane, aka Unicode
|
||||||
|
// this needs to specify network byte order: the standard
|
||||||
|
// does not specify (it is a 16-bit integer space)
|
||||||
|
Unicode IDCodePage = 1000
|
||||||
|
|
||||||
|
// UnicodeASCII is the uint16 identifier with IANA name ISO-10646-UCS-Basic.
|
||||||
|
//
|
||||||
|
// ASCII subset of Unicode. Basic Latin = collection 1
|
||||||
|
// See ISO 10646, Appendix A
|
||||||
|
UnicodeASCII IDCodePage = 1002
|
||||||
|
|
||||||
|
// UTF7 is the uint16 identifier with IANA name UTF-7.
|
||||||
|
//
|
||||||
|
// rfc2152
|
||||||
|
// Reference: RFC2152
|
||||||
|
UTF7 IDCodePage = 1012
|
||||||
|
|
||||||
|
// UTF16BE is the uint16 identifier with IANA name UTF-16BE.
|
||||||
|
//
|
||||||
|
// rfc2781
|
||||||
|
// Reference: RFC2781
|
||||||
|
UTF16BE IDCodePage = 1013
|
||||||
|
|
||||||
|
// UTF16LE is the uint16 identifier with IANA name UTF-16LE.
|
||||||
|
//
|
||||||
|
// rfc2781
|
||||||
|
// Reference: RFC2781
|
||||||
|
UTF16LE IDCodePage = 1014
|
||||||
|
|
||||||
|
// UTF16 is the uint16 identifier with IANA name UTF-16.
|
||||||
|
//
|
||||||
|
// rfc2781
|
||||||
|
// Reference: RFC2781
|
||||||
|
UTF16 IDCodePage = 1015
|
||||||
|
|
||||||
|
// UTF32 is the uint16 identifier with IANA name UTF-32.
|
||||||
|
//
|
||||||
|
// https://www.unicode.org/unicode/reports/tr19/
|
||||||
|
UTF32 IDCodePage = 1017
|
||||||
|
|
||||||
|
// UTF32BE is the uint16 identifier with IANA name UTF-32BE.
|
||||||
|
//
|
||||||
|
// https://www.unicode.org/unicode/reports/tr19/
|
||||||
|
UTF32BE IDCodePage = 1018
|
||||||
|
|
||||||
|
// UTF32LE is the uint16 identifier with IANA name UTF-32LE.
|
||||||
|
//
|
||||||
|
// https://www.unicode.org/unicode/reports/tr19/
|
||||||
|
UTF32LE IDCodePage = 1019
|
||||||
|
|
||||||
|
// KOI8R is the uint16 identifier with IANA name KOI8-R (MIME: KOI8-R).
|
||||||
|
//
|
||||||
|
// rfc1489 , based on GOST-19768-74, ISO-6937/8,
|
||||||
|
// INIS-Cyrillic, ISO-5427.
|
||||||
|
// Reference: RFC1489
|
||||||
|
KOI8R IDCodePage = 2084
|
||||||
|
|
||||||
|
// IBM866 is the uint16 identifier with IANA name IBM866.
|
||||||
|
//
|
||||||
|
// IBM NLDG Volume 2 (SE09-8002-03) August 1994
|
||||||
|
IBM866 IDCodePage = 2086
|
||||||
|
|
||||||
|
// Windows1251 is the uint16 identifier with IANA name windows-1251.
|
||||||
|
//
|
||||||
|
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1251
|
||||||
|
Windows1251 IDCodePage = 2251
|
||||||
|
|
||||||
|
// Windows1252 is the uint16 identifier with IANA name windows-1252.
|
||||||
|
//
|
||||||
|
// Microsoft http://www.iana.org/assignments/charset-reg/windows-1252
|
||||||
|
Windows1252 IDCodePage = 2252
|
||||||
|
)
|
15
const.go
15
const.go
|
@ -1,15 +0,0 @@
|
||||||
package cpd
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/softlandia/cpd/internal/cp"
|
|
||||||
)
|
|
||||||
|
|
||||||
//numbers of code page
|
|
||||||
const (
|
|
||||||
CpASCII = cp.ASCII
|
|
||||||
CpWindows1251 = cp.Windows1251
|
|
||||||
CpIBM866 = cp.IBM866
|
|
||||||
CpUTF8 = cp.UTF8
|
|
||||||
CpUTF16 = cp.UTF16
|
|
||||||
CpUTF32 = cp.UTF32
|
|
||||||
)
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
//checkHeader - check buffer for match to utf-8, utf-16le or utf-16be BOM
|
||||||
|
func checkHeader(b []byte) (id IDCodePage, res bool) {
|
||||||
|
if bomUTF8(b) {
|
||||||
|
return UTF8, true
|
||||||
|
}
|
||||||
|
if bomUTF16le(b) {
|
||||||
|
return UTF16LE, true
|
||||||
|
}
|
||||||
|
if bomUTF16be(b) {
|
||||||
|
return UTF16BE, true
|
||||||
|
}
|
||||||
|
return ASCII, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func bomUTF8(b []byte) bool {
|
||||||
|
if len(b) < 3 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return (b[0] == 0xEF) && (b[1] == 0xBB) && (b[2] == 0xBF)
|
||||||
|
}
|
||||||
|
|
||||||
|
func bomUTF16le(b []byte) bool {
|
||||||
|
if len(b) < 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return (b[0] == 0xFF) && (b[1] == 0xFE)
|
||||||
|
}
|
||||||
|
|
||||||
|
func bomUTF16be(b []byte) bool {
|
||||||
|
if len(b) < 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return (b[0] == 0xFE) && (b[1] == 0xFF)
|
||||||
|
}
|
||||||
|
|
||||||
|
//ASCII block
|
||||||
|
func itASCII(r rune, tbl *codePageTable) int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func runesMatchASCII(b []byte, tbl *codePageTable) int {
|
||||||
|
return 0
|
||||||
|
}
|
152
cpd.go
152
cpd.go
|
@ -7,90 +7,57 @@ package cpd
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"reflect"
|
||||||
|
|
||||||
"github.com/softlandia/cpd/internal/cp"
|
|
||||||
"golang.org/x/text/encoding/charmap"
|
"golang.org/x/text/encoding/charmap"
|
||||||
"golang.org/x/text/transform"
|
"golang.org/x/text/transform"
|
||||||
)
|
)
|
||||||
|
|
||||||
//StrConvertCodePage - convert string from one code page to another
|
//CodePageAutoDetect - auto detect code page of input content
|
||||||
func StrConvertCodePage(s string, fromCP, toCP uint16) (string, error) {
|
func CodePageAutoDetect(content []byte) (result IDCodePage) {
|
||||||
if len(s) == 0 {
|
return CodePages.Match(content)
|
||||||
return "", nil
|
|
||||||
}
|
|
||||||
if fromCP == toCP {
|
|
||||||
return s, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var err error
|
|
||||||
|
|
||||||
switch fromCP {
|
|
||||||
case cp.IBM866:
|
|
||||||
s, _, err = transform.String(charmap.CodePage866.NewDecoder(), s)
|
|
||||||
case cp.Windows1251:
|
|
||||||
s, _, err = transform.String(charmap.Windows1251.NewDecoder(), s)
|
|
||||||
}
|
|
||||||
switch toCP {
|
|
||||||
case cp.IBM866:
|
|
||||||
s, _, err = transform.String(charmap.CodePage866.NewEncoder(), s)
|
|
||||||
case cp.Windows1251:
|
|
||||||
s, _, err = transform.String(charmap.Windows1251.NewEncoder(), s)
|
|
||||||
}
|
|
||||||
return s, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// CodePageAsString - return name of char set with id codepage
|
//CodePageDetect - detect code page of ascii data from reader 'r'
|
||||||
// if codepage not exist - return ""
|
func CodePageDetect(r io.Reader, stopStr ...string) (IDCodePage, error) {
|
||||||
func CodePageAsString(codepage uint16) string {
|
//initial test
|
||||||
return cp.Name[codepage]
|
//test input interfase
|
||||||
|
if !reflect.ValueOf(r).IsValid() {
|
||||||
|
return ASCII, fmt.Errorf("input reader is nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
//make slice of byte from input reader
|
||||||
|
buf, err := bufio.NewReader(r).Peek(1024)
|
||||||
|
if (err != nil) && (err.Error() != "EOF") {
|
||||||
|
return ASCII, err
|
||||||
|
}
|
||||||
|
|
||||||
|
//check file header // utf-8, utf-16 with BOM
|
||||||
|
if idHeader, ok := checkHeader(buf); ok {
|
||||||
|
return idHeader, nil
|
||||||
|
}
|
||||||
|
return CodePageAutoDetect(buf), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
//CodePageDetect - detect code page of file
|
//FileCodePageDetect - detect code page of text file
|
||||||
//return 0 if code page can not be detected
|
func FileCodePageDetect(fn string, stopStr ...string) (IDCodePage, error) {
|
||||||
//return const cpd.CpWindows1251 for Windows code page 1251
|
|
||||||
//return const cdp.CpIBM866 for IBM 866 code page
|
|
||||||
//return conts cdp.CpASCII by default or on error
|
|
||||||
//EF-BB-BF utf8 bom
|
|
||||||
func CodePageDetect(fn string, stopStr ...string) (uint16, error) {
|
|
||||||
var (
|
|
||||||
count1251 int //счётчик символов в кодировке 1251
|
|
||||||
count866 int //счётчик символов в кодировке 866
|
|
||||||
)
|
|
||||||
|
|
||||||
iFile, err := os.Open(fn)
|
iFile, err := os.Open(fn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return CpASCII, err
|
return ASCII, err
|
||||||
}
|
}
|
||||||
defer iFile.Close()
|
defer iFile.Close()
|
||||||
|
|
||||||
iScanner := bufio.NewScanner(iFile)
|
if len(stopStr) > 0 {
|
||||||
for i := 0; iScanner.Scan(); i++ {
|
return CodePageDetect(iFile, stopStr[0])
|
||||||
s := iScanner.Text()
|
|
||||||
if (len(stopStr) > 0) && strings.Contains(s, stopStr[0]) { //stopStr[0] - строка при обнаружении которой останавливаемся, stopStr - слайс строк
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
for j := range s {
|
return CodePageDetect(iFile)
|
||||||
if isRune1251(rune(s[j])) { //проверка принадлежности символа позициям алфавитных символов в кодовой таблице 1251
|
|
||||||
count1251++
|
|
||||||
}
|
|
||||||
if isRune866(rune(s[j])) { //проверка принадлежности символа позициям алфавитных символов в кодовой таблице 866
|
|
||||||
count866++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
switch {
|
|
||||||
case count1251 > count866:
|
|
||||||
return CpWindows1251, nil
|
|
||||||
case count1251 < count866:
|
|
||||||
return CpIBM866, nil
|
|
||||||
}
|
|
||||||
return CpASCII, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//FileConvertCodePage - replace code page text file from one to another
|
//FileConvertCodePage - replace code page text file from one to another
|
||||||
func FileConvertCodePage(fileName string, fromCP, toCP uint16) error {
|
func FileConvertCodePage(fileName string, fromCP, toCP IDCodePage) error {
|
||||||
if fromCP == toCP {
|
if fromCP == toCP {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -126,37 +93,34 @@ func FileConvertCodePage(fileName string, fromCP, toCP uint16) error {
|
||||||
return os.Rename(tmpFileName, fileName)
|
return os.Rename(tmpFileName, fileName)
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
//StrConvertCodePage - convert string from one code page to another
|
||||||
cp866r1Min = 0x80 //заглавная буква А
|
func StrConvertCodePage(s string, fromCP, toCP IDCodePage) (string, error) {
|
||||||
cp866r1Max = 0xAF //строчная буква п - в этом интервале в 866 раскладке лежит большинство русских букв
|
if len(s) == 0 {
|
||||||
cp866r2Min = 0xE0 //строчная р
|
return "", nil
|
||||||
cp866r2Max = 0xF1 //строчна ё - в этом интервале лежат остальные русские буквы
|
|
||||||
cp1251s1 = 0xA8 //Ё
|
|
||||||
cp1251s2 = 0xB8 //ё в этой позиции в 866 лежит псевдографика
|
|
||||||
cp1251r1Min = 0xC0 //с этой позиции начинается весь алфавит
|
|
||||||
cp1251r1Max = 0xFF //заканчивается
|
|
||||||
cpKOI8RMin = 0xC0 //начало интервала
|
|
||||||
cpKOI8RMax = 0xFF //конец интервала
|
|
||||||
)
|
|
||||||
|
|
||||||
func isRune1251(r rune) bool {
|
|
||||||
switch {
|
|
||||||
case r == cp1251s1:
|
|
||||||
return true
|
|
||||||
case r == cp1251s2:
|
|
||||||
return true
|
|
||||||
case (r >= cp1251r1Min) && (r <= cp1251r1Max):
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
return false
|
if fromCP == toCP {
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
switch fromCP {
|
||||||
|
case IBM866:
|
||||||
|
s, _, err = transform.String(charmap.CodePage866.NewDecoder(), s)
|
||||||
|
case Windows1251:
|
||||||
|
s, _, err = transform.String(charmap.Windows1251.NewDecoder(), s)
|
||||||
|
}
|
||||||
|
switch toCP {
|
||||||
|
case IBM866:
|
||||||
|
s, _, err = transform.String(charmap.CodePage866.NewEncoder(), s)
|
||||||
|
case Windows1251:
|
||||||
|
s, _, err = transform.String(charmap.Windows1251.NewEncoder(), s)
|
||||||
|
}
|
||||||
|
return s, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func isRune866(r rune) bool {
|
// CodePageAsString - return name of char set with id codepage
|
||||||
switch {
|
// if codepage not exist - return ""
|
||||||
case (r >= cp866r1Min) && (r <= cp866r1Max):
|
func CodePageAsString(codepage IDCodePage) string {
|
||||||
return true
|
return codePageName[codepage]
|
||||||
case (r >= cp866r2Min) && (r <= cp866r2Max):
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
143
cpd_test.go
143
cpd_test.go
|
@ -3,20 +3,18 @@
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/softlandia/cpd/internal/cp"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type tCodePageAsString struct {
|
type tCodePageAsString struct {
|
||||||
id uint16
|
id IDCodePage
|
||||||
s string
|
s string
|
||||||
}
|
}
|
||||||
|
|
||||||
var dCodePageAsString = []tCodePageAsString{
|
var dCodePageAsString = []tCodePageAsString{
|
||||||
{0, ""},
|
{0, ""},
|
||||||
{3, "ASCII"},
|
{3, "ASCII"},
|
||||||
{cp.IBM866, "IBM866"},
|
{IBM866, "IBM866"},
|
||||||
{cp.Windows1251, "Windows1251"},
|
{Windows1251, "Windows1251"},
|
||||||
{60000, ""},
|
{60000, ""},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,44 +27,107 @@ func TestCodePageAsString(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//CodePageDetect
|
//TestCodePageDetect - тестирование метода CodePageDetect
|
||||||
|
// проверки на входные параметры:
|
||||||
|
// 1. nil входящий поток явный nil, параметр останова отсутствует
|
||||||
|
// 2. nil, "~" входящий поток явный nil, параметр останова присутствует
|
||||||
|
// 3. входящий поток не инициализированный объект, проверка на передачу пустого интерфейса
|
||||||
|
// проверка работы осуществляется через FileCodePageDetect()
|
||||||
func TestCodePageDetect(t *testing.T) {
|
func TestCodePageDetect(t *testing.T) {
|
||||||
res, err := CodePageDetect("test_files\\866&1251.txt", "~X~") //befor ~X~ file contain 866, after 1251
|
_, err := CodePageDetect(nil)
|
||||||
if err != nil {
|
|
||||||
t.Errorf("<CodePageDetect> on file '%s' return error: %v", "866&1251.txt", err)
|
|
||||||
}
|
|
||||||
if res != cp.IBM866 {
|
|
||||||
t.Errorf("<CodePageDetect> on file '%s' expected 866 got: %s", "866&1251.txt", CodePageAsString(res))
|
|
||||||
}
|
|
||||||
|
|
||||||
res, err = CodePageDetect("test_files\\866&1251.txt") //file contain more 1251 then 866
|
|
||||||
if res != cp.Windows1251 {
|
|
||||||
t.Errorf("<CodePageDetect> on file '%s' expected 1251 got: %s", "866&1251.txt", CodePageAsString(res))
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = CodePageDetect("-.-") //file "-.-" not exist
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Errorf("<CodePageDetect> on file '-.-' must return error, but return nil")
|
t.Errorf("<CodePageDetect> on input nil return error == nil, expect error != nil\n")
|
||||||
|
}
|
||||||
|
_, err = CodePageDetect(nil, "~")
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("<CodePageDetect> on input nil return error == nil, expect error != nil\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
res, _ = CodePageDetect("test_files\\noCodePage.txt") //file contain rune only ASCII
|
var data *os.File
|
||||||
if res != cp.ASCII {
|
res, err := CodePageDetect(data, "~")
|
||||||
t.Errorf("<CodePageDetect> on file 'noCodePage.txt' expect ASCII got: %s", CodePageAsString(res))
|
if err == nil {
|
||||||
|
t.Errorf("<CodePageDetect> on input nil return error != nil, data: %+v, res: %d, code page: %s\n", data, res, CodePageAsString(res))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFileCodePageDetectSimple(t *testing.T) {
|
||||||
|
res, err := FileCodePageDetect("test_files\\866to1251.txt")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file '866to1251.txt' err expected: nil, got: %s\n", err)
|
||||||
|
}
|
||||||
|
if res != IBM866 {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file '866to1251.txt' expected: %s, got: %s\n", IBM866, res)
|
||||||
|
}
|
||||||
|
res, err = FileCodePageDetect("test_files\\866&1251.txt")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file '866&1251.txt' err expected: nil, got: %s\n", err)
|
||||||
|
}
|
||||||
|
if res != Windows1251 {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file '866&1251.txt' expected: %s, got: %s\n", Windows1251, res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFileCodePageDetectUtf8Bom(t *testing.T) {
|
||||||
|
res, err := FileCodePageDetect("test_files\\utf8wbom.txt")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file 'utf8wbom.txt' err expected: nil, got: %s\n", err)
|
||||||
|
}
|
||||||
|
if res != UTF8 {
|
||||||
|
t.Errorf("<FileCodePageDetect()> on file 'utf8wbom.txt' expected: %s, got: %s\n", UTF8, res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type tFileCodePageDetectTest struct {
|
||||||
|
fn string //filename
|
||||||
|
st string //stop string
|
||||||
|
e error //
|
||||||
|
r IDCodePage //expected result
|
||||||
|
}
|
||||||
|
|
||||||
|
var dFileCodePageDetect = []tFileCodePageDetectTest{
|
||||||
|
{"test_files\\utf16BEwbom.txt", "", nil, UTF16BE}, //file contain utf16 big endian with bom rune at start
|
||||||
|
{"test_files\\utf16be-woBOM.txt", "", nil, UTF16BE}, //file contain utf16 big endian with out bom rune at start
|
||||||
|
{"test_files\\utf16le-wBOM.txt", "", nil, UTF16LE}, //file contain utf16 liitle endian with bom rune at start
|
||||||
|
{"test_files\\utf16le-woBOM.txt", "", nil, UTF16LE}, //file contain utf16 liitle endian with out bom rune at start
|
||||||
|
{"test_files\\utf8-woBOM.txt", "", nil, UTF8}, //file contain utf8 with out bom rune at start
|
||||||
|
{"test_files\\866&1251.txt", "~X~", nil, Windows1251}, //befor ~X~ file contain 866, after 1251
|
||||||
|
{"test_files\\866&1251.txt", "", nil, Windows1251}, //file contain more 1251 then 866
|
||||||
|
{"test_files\\noCodePage.txt", "", nil, ASCII}, //file contain rune only ASCII
|
||||||
|
{"test_files\\empty_file.txt", "", nil, ASCII}, //file exist but empty, no error, return ASCII
|
||||||
|
{"test_files\\rune_encode_error.txt", "", nil, ASCII}, //file contain special rune -> encode error, but detect NO error
|
||||||
|
{"test_files\\rune_error_1251.txt", "", nil, Windows1251}, //file contain 1251 and special rune -> encode error, but detect NO error
|
||||||
|
{"test_files\\utf8wbom.txt", "", nil, UTF8}, //file contain utf8 with bom rune at start
|
||||||
|
{"test_files\\utf16LEwbom.txt", "", nil, UTF16LE}, //file contain utf16 little endian with bom rune at start
|
||||||
|
}
|
||||||
|
|
||||||
|
//FileCodePageDetect
|
||||||
|
func TestFileCodePageDetect(t *testing.T) {
|
||||||
|
var (
|
||||||
|
err error
|
||||||
|
res IDCodePage
|
||||||
|
)
|
||||||
|
for _, d := range dFileCodePageDetect {
|
||||||
|
if len(d.st) == 0 {
|
||||||
|
res, err = FileCodePageDetect(d.fn)
|
||||||
|
} else {
|
||||||
|
res, err = FileCodePageDetect(d.fn, d.st)
|
||||||
|
}
|
||||||
|
if err != d.e {
|
||||||
|
t.Errorf("<FileCodePageDetect> on file '%s' expected error: '%v', got: '%v', ", d.fn, d.e, err)
|
||||||
|
}
|
||||||
|
if res != d.r {
|
||||||
|
t.Errorf("<FileCodePageDetect> on file '%s' expected result: %s, got: %s", d.fn, d.r, res)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err = CodePageDetect("test_files\\empty_file.txt")
|
_, err = FileCodePageDetect("-.-") //file "-.-" not exist
|
||||||
if (res != cp.ASCII) || (err != nil) {
|
if err == nil {
|
||||||
t.Errorf("<CodePageDetect> on file 'empty_file.txt' expect ASCII and no error got: %s and %v", CodePageAsString(res), err)
|
t.Errorf("<FileCodePageDetect> on file '-.-' must return error, but return nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err = CodePageDetect("test_files\\rune_encode_error.txt")
|
_, err = FileCodePageDetect("") //file "" not exist
|
||||||
if (res != cp.ASCII) || (err != nil) {
|
if err == nil {
|
||||||
t.Errorf("<CodePageDetect> on file 'rune_encode_error.txt' expect ASCII and no error got: %s and %v", CodePageAsString(res), err)
|
t.Errorf("<FileCodePageDetect> on file '' must return error, but return nil")
|
||||||
}
|
|
||||||
|
|
||||||
res, err = CodePageDetect("test_files\\rune_error_1251.txt")
|
|
||||||
if res != cp.Windows1251 {
|
|
||||||
t.Errorf("<CodePageDetect> on file 'rune_error_1251.txt' expect 1251 and no error got: %s and %v", CodePageAsString(res), err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -83,13 +144,13 @@ func TestFileConvertCodePage(t *testing.T) {
|
||||||
t.Errorf("<FileConvertCodePage> on fromCp == toCp expected error==nil, got: %v", err)
|
t.Errorf("<FileConvertCodePage> on fromCp == toCp expected error==nil, got: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = FileConvertCodePage("test_files\\rune_encode_error.txt", cp.IBM866, cp.Windows1251)
|
err = FileConvertCodePage("test_files\\rune_encode_error.txt", IBM866, Windows1251)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Errorf("<FileConvertCodePage> expected error, got: %v", err)
|
t.Errorf("<FileConvertCodePage> expected error, got: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
os.Link("test_files\\866to1251.txt", "test_files\\866to1251.tmp")
|
os.Link("test_files\\866to1251.txt", "test_files\\866to1251.tmp")
|
||||||
err = FileConvertCodePage("test_files\\866to1251.tmp", cp.IBM866, cp.Windows1251)
|
err = FileConvertCodePage("test_files\\866to1251.tmp", IBM866, Windows1251)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("<FileConvertCodePage> expect no err, got: %v", err)
|
t.Errorf("<FileConvertCodePage> expect no err, got: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -98,19 +159,19 @@ func TestFileConvertCodePage(t *testing.T) {
|
||||||
|
|
||||||
//ConvertCodePage
|
//ConvertCodePage
|
||||||
func TestStrConvertCodePage(t *testing.T) {
|
func TestStrConvertCodePage(t *testing.T) {
|
||||||
_, err := StrConvertCodePage("1234", cp.IBM866, cp.Windows1251)
|
_, err := StrConvertCodePage("1234", IBM866, Windows1251)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("<StrConvertCodePage> on test 1 return unexpected err: %v", err)
|
t.Errorf("<StrConvertCodePage> on test 1 return unexpected err: %v", err)
|
||||||
}
|
}
|
||||||
_, err = StrConvertCodePage("1234", cp.Windows1251, cp.IBM866)
|
_, err = StrConvertCodePage("1234", Windows1251, IBM866)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("<StrConvertCodePage> on test 2 return unexpected err: %v", err)
|
t.Errorf("<StrConvertCodePage> on test 2 return unexpected err: %v", err)
|
||||||
}
|
}
|
||||||
_, err = StrConvertCodePage("", cp.IBM866, cp.Windows1251)
|
_, err = StrConvertCodePage("", IBM866, Windows1251)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("<StrConvertCodePage> with empty string must return ERROR, but retrurn: %v", err)
|
t.Errorf("<StrConvertCodePage> with empty string must return ERROR, but retrurn: %v", err)
|
||||||
}
|
}
|
||||||
_, err = StrConvertCodePage("1234", cp.IBM866, cp.IBM866)
|
_, err = StrConvertCodePage("1234", IBM866, IBM866)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("<StrConvertCodePage> with equal fromCP and toCp must return nil, but retrurn: %v", err)
|
t.Errorf("<StrConvertCodePage> with equal fromCP and toCp must return nil, but retrurn: %v", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
//unit for ibm866
|
||||||
|
|
||||||
|
func runesMatch866(data []byte, tbl *codePageTable) (counts int) {
|
||||||
|
for i := range data {
|
||||||
|
if i == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if tbl.containsRune(rune(data[i-1])) > 0 {
|
||||||
|
j := tbl.containsRune(rune(data[i]))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
1643
internal/cp/mib.go
1643
internal/cp/mib.go
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,49 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
import "unicode"
|
||||||
|
|
||||||
|
//unit for koi-8
|
||||||
|
|
||||||
|
func runesMatchKOI8(data []byte, tbl *codePageTable) (counts int) {
|
||||||
|
for i := range data {
|
||||||
|
if i < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
//case " Us" - separator_UPPER_symbol
|
||||||
|
if unicode.IsPunct(rune(data[i-2])) && isUpperKOI8(rune(data[i-1])) {
|
||||||
|
j := tbl.containsRune(rune(data[i]))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isKOI8(rune(data[i-1])) {
|
||||||
|
j := tbl.containsRune(rune(data[i]))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cpKOI8StartUpperChar = 0xE0
|
||||||
|
cpKOI8StopUpperChar = 0xFF
|
||||||
|
cpKOI8StartLowerChar = 0xC0
|
||||||
|
cpKOI8StopLowerChar = 0xDF
|
||||||
|
)
|
||||||
|
|
||||||
|
func isUpperKOI8(r rune) bool {
|
||||||
|
return (r >= cpKOI8StartUpperChar) && (r <= cpKOI8StopUpperChar)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLowerKOI8(r rune) bool {
|
||||||
|
return (r >= cpKOI8StartLowerChar) && (r <= cpKOI8StopLowerChar)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isKOI8(r rune) bool {
|
||||||
|
return isUpperKOI8(r) || isLowerKOI8(r)
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
<EFBFBD>ãá᪨© ¢ ª®¤¨à®¢ª¥ IBM866
|
|
@ -0,0 +1 @@
|
||||||
|
Русский в кодировке KOI8r
|
|
@ -0,0 +1 @@
|
||||||
|
Русский в кодировке Windows1251
|
|
@ -1 +1 @@
|
||||||
<EFBFBD>
|
0
|
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
||||||
|
Русский в кодировке UTF8
|
|
@ -0,0 +1 @@
|
||||||
|
Русский в кодировке UTF8
|
|
@ -0,0 +1 @@
|
||||||
|
Utf8 w/o bom Русский
|
|
@ -0,0 +1,2 @@
|
||||||
|
code page UTF8
|
||||||
|
Русский
|
|
@ -0,0 +1,22 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
import "encoding/binary"
|
||||||
|
|
||||||
|
//unit for UTF8
|
||||||
|
|
||||||
|
func runesMatchUTF8(data []byte, tbl *codePageTable) (counts int) {
|
||||||
|
n := len(data)/2 - 1
|
||||||
|
if n <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i := 0; i < n; i += 2 {
|
||||||
|
t := data[i : i+2]
|
||||||
|
d := binary.BigEndian.Uint16(t)
|
||||||
|
j := tbl.containsRune(rune(d))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package cpd
|
||||||
|
|
||||||
|
import "unicode"
|
||||||
|
|
||||||
|
//unit for windows1251
|
||||||
|
|
||||||
|
//TODO: нужно отличить от KOI-8r
|
||||||
|
func runesMatch1251(data []byte, tbl *codePageTable) (counts int) {
|
||||||
|
for i := range data {
|
||||||
|
if i < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
//case " Us" - separator_UPPER_symbol
|
||||||
|
if unicode.IsPunct(rune(data[i-2])) && isUpper1251(rune(data[i-1])) {
|
||||||
|
j := tbl.containsRune(rune(data[i]))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//case "ab" - counts only if symbols are arranged in pairs
|
||||||
|
if is1251(rune(data[i-1])) {
|
||||||
|
j := tbl.containsRune(rune(data[i]))
|
||||||
|
if j > 0 {
|
||||||
|
(*tbl)[j].count++
|
||||||
|
counts++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cp1251StartUpperChar = 0xC0
|
||||||
|
cp1251StopUpperChar = 0xDF
|
||||||
|
cp1251StartLowerChar = 0xE0
|
||||||
|
cp1251StopLowerChar = 0xFF
|
||||||
|
)
|
||||||
|
|
||||||
|
func isUpper1251(r rune) bool {
|
||||||
|
return (r >= cp1251StartUpperChar) && (r <= cp1251StopUpperChar)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLower1251(r rune) bool {
|
||||||
|
return (r >= cp1251StartLowerChar) && (r <= cp1251StopLowerChar)
|
||||||
|
}
|
||||||
|
|
||||||
|
func is1251(r rune) bool {
|
||||||
|
return isUpper1251(r) || isLower1251(r)
|
||||||
|
}
|
Двоичные данные
частотность букв.xlsx
Двоичные данные
частотность букв.xlsx
Двоичный файл не отображается.
Загрузка…
Ссылка в новой задаче