From 4387137abc7cbdff5eb56f5283fe7284d56c5b27 Mon Sep 17 00:00:00 2001 From: softlandia Date: Thu, 7 Nov 2019 18:28:00 +0400 Subject: [PATCH] v0.2.0 --- .gitignore | 3 +- .vscode/launch.json | 17 + char_frac.xlsx | Bin 0 -> 12302 bytes codePageTable.go | 23 + code_pages.go | 134 +++ code_pages_id.go | 111 +++ const.go | 15 - cp_deep_maching.go | 45 + cpd.go | 152 ++-- cpd_test.go | 143 ++- ibm866.go | 19 + internal/cp/mib.go | 1643 ---------------------------------- koi8.go | 49 + test_files/IBM866.txt | 1 + test_files/KOI8-r.txt | 1 + test_files/Win1251.txt | 1 + test_files/noCodePage.txt | 2 +- test_files/utf16BEwbom.txt | Bin 0 -> 44 bytes test_files/utf16LEwbom.txt | Bin 0 -> 42 bytes test_files/utf16be-wBOM.txt | Bin 0 -> 64 bytes test_files/utf16be-woBOM.txt | Bin 0 -> 62 bytes test_files/utf16le-wBOM.txt | Bin 0 -> 64 bytes test_files/utf16le-woBOM.txt | Bin 0 -> 62 bytes test_files/utf8-wBOM.txt | 1 + test_files/utf8-woBOM.txt | 1 + test_files/utf8.txt | 1 + test_files/utf8wbom.txt | 2 + utf8.go | 22 + win1251.go | 51 ++ частотность букв.xlsx | Bin 10449 -> 0 bytes 30 files changed, 642 insertions(+), 1795 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 char_frac.xlsx create mode 100644 codePageTable.go create mode 100644 code_pages.go create mode 100644 code_pages_id.go delete mode 100644 const.go create mode 100644 cp_deep_maching.go create mode 100644 ibm866.go delete mode 100644 internal/cp/mib.go create mode 100644 koi8.go create mode 100644 test_files/IBM866.txt create mode 100644 test_files/KOI8-r.txt create mode 100644 test_files/Win1251.txt create mode 100644 test_files/utf16BEwbom.txt create mode 100644 test_files/utf16LEwbom.txt create mode 100644 test_files/utf16be-wBOM.txt create mode 100644 test_files/utf16be-woBOM.txt create mode 100644 test_files/utf16le-wBOM.txt create mode 100644 test_files/utf16le-woBOM.txt create mode 100644 test_files/utf8-wBOM.txt create mode 100644 test_files/utf8-woBOM.txt create mode 100644 test_files/utf8.txt create mode 100644 test_files/utf8wbom.txt create mode 100644 utf8.go create mode 100644 win1251.go delete mode 100644 частотность букв.xlsx diff --git a/.gitignore b/.gitignore index 3936230..2ac8efe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.zip *.7z -.idea/* \ No newline at end of file +.idea/* +tmp* \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..c23774c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Launch", + "type": "go", + "request": "launch", + "mode": "auto", + "program": "${fileDirname}", + "env": {}, + "args": [] + } + ] +} \ No newline at end of file diff --git a/char_frac.xlsx b/char_frac.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4515e44d36b5fd51259cf3ad16608662679e77c6 GIT binary patch literal 12302 zcmeHtWmp_r)^6k0xVsaY;I080cZc8-+}$O(YmgvGaCg_>?!kh42o_v}+|D^O-@R^ z>|M?5T@BT}9L-$xnLX`nDRN<<>9YaQkn8`u{4Z{Sa`j>R9#)JFtpj24&zg}r;iaXB zB&{U6G~W^4dnUc6Y1}2GfAD)@uA)(c*iEZqJE==|AxKcMM5hfF*|ufA*H)r2LMe)^ za6S`yl>Q_UlwYnHTbB^cl}&(pasofofFR$zWWsDs^-M*J$5oHavDbaBxDPIi5Ei5D zVE9%FefRZHXt+lkS`Oq!V8ZJetja_ryQbCNuENftNmG^SLaM=>d=m@8sZnmJzbBVD z82N&etzTnF$S+1NVpR@RY{yOx5JZ?i#MS+B= zp|hE-3k&nF?SI4be{oFyi|ge{3QE1Kz_1hPhw!1Bxzz*=u$+gebQ`6*|67?A^oH0x z8sgQ?7bF(r7nXW}&&lgwH~NZ17=VR%C6fjR>U+G{4al~8kF zwd0Dg>L#A-gLjje0Sl>x`>4VZeDX(M(s74eOe_{FeTQr*ukMI6)GYa|s!cvR^HX^l zn%Z_t!LFz3VIvoy*CNExtWUT)T zlE7EVX%~it18I#LArF5 zr*Kw}Om|$&MOPN8BaK6J#Nm3-G94v{|K&O{L#zJlc{vsshcK^`@j>?+F0?He?5pmV z#Zg$W-q^M$0&ouF7vnI9CdU_rP6}GC8tRE+OAASBmf#C&IXaBdy)r2msL*3x};!VtP-zybj1kSidR`B$=(tINgDuws0s zJ|kybUlz=8pDEtA{wY!(GGz^FOViUElqu(TuO?lx?hmlY!l1vbMPyDtqh25LU5~+U zD$>cWk|(Y-jS8-b$9$*0tCtFTYWLxZVq;Go%0i^oj+z|XSl(UciO%~H-l2*B3;2@X zT-zukI5`zH*B7JFOOu5oyU^F8n)cC(le(pZpswK*y#C3HYZL1RG14D3i~(73QF6M` z{L3{(29gP-pW4{~(2~m!RZrnE$4*jx4pro+W#f7!&_sP^=X0{`yN3{f@SrBsw#tc-*XFN#KR1s=%Yg?75dK4@Gvg;1d`X}NS&;GTUj?`_;R zO`gI>{ZqwCiAR)gNNzZuU80{XPM(Jcoi|(WBkS_+`pb25XE@SY_zMZlcYHU=+m0anf04VvRO^cpi@DM`*v+8aXO=4Vox~FfvUS#t; zNJeI7H*u`?YxAPhF~bWiq7S&^6@LTyBT+s782BVb9Uzb%m+MIuw`GYHH$P3cqUg|= zd2D@u{Ory{{}G5DGFnTu^n)`8!Bdk=yM_wBwSxum9F8)qykoO!*W+)wmPGlJr2@h= zCo}*6=TEM=SQ?%4MoP}jI$ z@r~BscEDU?o43Fm!BxNL`q)G05a{5xwslh31O4EY$f}spWZ){oHR4weBac8K z_-l5fv{7qSLkCj_-Abv8u@<3*iM|o@L)*n44q#(EoTwC+sj0BBp9yBmU9hYfa1-O+ zr>i2cH6y@TE2C;DcjN=hrgzFsFpO~X2#-cQ=xtt8lf8^&=4Tt>2**O>2tJ-uBf-}n z&!M$1Hd+wYfuSUVwbfN{kBnC^!i+~YqCO}~x~E@)3i0ZjToFVqP;~gd_I5L(xjI#6 z!h9ed+v;*bXyrW}3M7wPqnjEDEJX8l$xcC=Smm(LAXl8;z1Y7ub1=W- zeES4;f8~pt#aiFQnxDbClasytYNJqlO$62VPsVBT5k{2az3rNk^MpfP| zin!=~T{1TwsF0=qDF3^86xyQ&$`|?k{HiJ5bV)Z(`*J6P*b8fQcHi|@LB#QkEqkJr zS=YLd!Xw03mcW5Dk!_}srl#esbsmO&fA+YizooZ;9lKmN+RjL1^_ph$h#nQ~o z)#V?F_>Tj&|4G9k@!e59tT+-kFi&EhKJVs3fnsVrRLvTDFedj)@XKilH#m+oGP@l<)KwcWWPLOqLO!wKV#pH?uxkgE36 zQJf#SpnYEFg!K7jhzwtX(W_i(z9EU@^S36VhaT4-ZT!$+3+={6FRqdPE}6K!O1F9k z4*qI38hW@+TGh+lADK zB}aYRuv2$rhBq(<1}j|6Wa9f^47o|Iysv;C9FlE}4e%SKM{?1pj)`MOABs>&J+O1A zb=N6nV_N+VH0^j1Iz#AeTs@wIOt4XQ%;EHf-d#5d z5w6S8?-r1hjIQWF)0yXg_0U_B|;l?f~*7Xw;Y?Fwdo2*3O(2MBVq|wp$ z=_lJsikO-VFiQ^7-ttKtSC7fHKc8c2m$~jqI9Gq%>d&^n!#Wgj+f5F0=zT;;X$!|2 zMg_8rejB?IZ5Bps`q_#9!|vq#ZqzMjiejx_N55iay8`6~lyfP<2wK~=KAtc($N*-m z*fGpbICmswN;#%GU;|eeB}N8h(r;rTzEgl*UTms5B*5 zmJ?B;R8i$fx-&EC;NdCk`=^v@;4S0;tPjh(dWHjcHJ7hCjz|EF>JeL!V`}a+)gzVi z6D{W6x~wGzP}NbdHAE8kQ+Hb3Je^&&OFASAQL%Jd(rN11W2RvD=1R!3vMeffC1g>( z*>TF6aolqaM7mu;=Zc8zDiM=eao;^%m=nP+pp4ROfDbyYqFsR2zH3Sh&d-z? zm;sTY2p#K0$GEzqAb+w7y((c3@TsmhMUlP{<>YghbWL0nS2pp_V}RkMB#8?s)#iKw zJHfF4)|_20i3FwTKZ>Z#DbL4!7X;tM0+x*s<3h(ry! z9Kji;?Vw)Ox(m&BOMJ|T_0q7qTa6#7%M(49)5(t%wJNeGG!JDpZV!6HpCIh}5#)Kf zf#WF(r&SWdf({CjFm#X08isGDx)J{@8S>I1F{#m~bIYT>`dfH`?9548d#e_R>rt^| z_UzHS;Y90Yhy5{>JR?<9ng8{)82~}WH9f#v<(T-B;ntk?awuE$sJx#6=;hYIgWL*Nw~q3Lu!yU zL27$8)jp4IL6}yVTf1b@;tfCgxfw7<@Ljwh9)>}`&5gs~X7Q`ahQvk4%TP3Ago^rS z4s&t!vNii-bUOQ5*I|nn$KTNES!~B60|g0AoB-D{KheA_aVh0`jt1@ZcHa;i>HE2- z8@|RjNnf08JX-Y=D4cvJ?j|1l@{}@z_~OOYlhfGFs|0rv*mnt;@Y(J!DEBWD)nxR{ z-=8J*S;{e7T0Q)9$dk1vY%#>5A~jOcWYdvtA!@ZxVU`Ijd-<7)R&cn5zB)v=#<47g zg0~DurF*68!rMO3DiRoOocOUfziVsRdrroyrYC6vl;>RLMOh>r{t2HJ-*WwDOdi4P{efz0*V^E}m7Tz`(fSg3f;y~uE%*9Ko+d;Jz+Gw1sSkcHURoZX{#q;$S zHcLT4FFB>Duc~cp-Vf3T#%ml3LvMItvwU&)a{!!}J+_=rqpUTNHMe|B@4DxLcXUez zGpY=@vyqFAm4%)mV(DP1&^d0;E}$>tikl%d42mbMd6?XQH!8@*_psHorsZ>9%P+S$ z#*)ZrXnJcc1b1hj+}uAtQ5@YH7h@Y4I24&la>HWbo~!Sg%Aq!M7%7RYNf|w2(wsgv zrGs-7iV)JRyXHJO_o#wD8@+sYo}^6_PcTeN&+y%(`y@*aY1c(?gx=s3i^B|A zZn`kUAG(5C(iy2>J@u2$=ApPB7dLHwsuNx4K;CAS44{kY2L~u7$zjF`tN4VLzma}o zrbfcCFpeoo&TZ_L=>B-(ohOsms?97%XN zS`GyI$#ByJMQgVjjxwqxMVzZSXfm*DnpB7mG(A99@mwNI=65HJ#}?&_?Df>hsJ%)Q=xA4=z)DtsdkZfp`qrvDDKb?OViPS7>P};%X zek(SS0!-B@aJcRlseV}e?qpj8Y!;;EGvK(Z_94joc8{Jk?FN;5pl`TOot7aHlX1v$ zK-+&(;zUgJkcPYl+E~{{AY!4jWo5-@{~AljLB zvd}+j{_H;UydaGKsqk^FX5^Y>vGMC3&*S=C&GX=~;j{CjO7eRAiI>5ZddTBh;j@88 zc-iJ@v;U;WqLVVod55(Z(}=!iiSTIC=9%)hMY&b0CQ?aJR4ON)V*p62$gAUtT`0UI&B+oWsK{$;ZoiVGtd5E@e$QR40r$f-YoA`U}1;Ww)g?WdnS6B^Q?<5@mLzhW;@Uw(DX)Uw)LswsAH#U zG2ZSferua~y}d(=p@v9dd}?m=gj2XOMUml*qEvXL^=m1{!E?u|q!Mqx%XQh)U5!`C zn!|lwwvGK=@mp2JG17C?imAhC28u+Z#erD#}#I4Y@iA`ubpr&(IyS#U}G20K4z z(5z08cFL+jTECPMlLYR*=g?&jT9tYunnVf>(St@FuIg}3{i{-$>kICWdguYAC3Tw5 zCGQoCLR%=RCB-&aEvXk4@eQV$WekH;VO7Zs2u%p&X}?drO*TCql&0BP6TwMmGNqx6 zXQly~M0(W{bbR3z@+SN^{?j7%^Zp>$X3iKZkDd9dW#Z?Ec;;@91B$7xJ|PLJIaYnP ztuK6t3@i}_eaRp(u*ZlS>8tQ9tOtB0o}wHVV&l1p$gK~Iid=pqI}{rk&~$);n&3V! zu$Xq(99=&41$p5IVz4<^0CR$NUd0$|b&J&-_KOmvjpU}BF&t@n0%*%k7+R6NBoWDP z`xJyuiNR+JO04AqGl3|vY8Ja-x@LAz#ZphbEX=n+)=~p50{JUTwSD3^174-0APkjt z&I!XZB0CA#fp3Mek@T_{^kjgV9JDrOuyO>ivjivUEpm5cjWZ>7 z@rN7u9<4*1uxBF*-v&BIDbM;yBBVt)%A5=reCgt8P4I?|{nGmo6aL_kc2wGY5moSO zHWYGtDCRU+b}U7BC-&`U!NVz646L3(y_N(dJUtjpte#VHpg4|Rd*qUMY{*eif->q2 zG!oa6EFx&HFji(L9pE^!tPDn`p$*~cJPjOZMn*EFAwlmuW=F*Gz|BAdg9+|NLgj0c zRdozx-z3}xvbHA=IQsZ=zIW;QHalSUp%OmPHT!2%&*k5Vcv9oGY$@=BzYq;bw8I2z6Tlourn8hk>5i=*>c0xdZrG1ZM>_MpO4RX zHI!MG1OROtB4yE5(EhbSC%a?-4Ts@ZV%V?wB6)*{Wy;%uVR65cH*8m+*;#ZkFxlo@TTS;q%Y&60gen*J!c7Cy)nSO}}$7g!r+)eZJIPgoZsn65n3VosqGrey{wNd)UA3wf71Pg8(8q622_wg!GJp`d`5A$!Uamg zmcf&a#Z^yXG8MR6S0`CJ?TiAsujjHt?%fA;jD?Y?TxX!JOYMWc`$6s}$@*RNN&VuK znK30Ta90u0xl$63i zq%+PC9ln~C35r*9xArD}MWmQykzbRYpK|f*=s5!Nx;ZZ~Y|bIIgXO3#M3ZH`q}O)d z$Edm*#LvBK{I(AyMA7mV_nzXS6#3s053uL2%YPI37`EUZiDv zZ4Z{~y|w$@gd}1LIx6;Y#!phU<#zjbjKtV-jN61@>9`N8;LzBCH?XlM=IKE)sb>`3 zg@SB8pj0!y({Y1f%!XMI3bXmV2~E|~%a4;~PN{t_@9z`zIJZ;Q{hV$fZN^iDBYz(C zZLLtSW;y?L*n;E6aymi8`$|uGvEe>fJCtx=;K+yT;am(W*NbfQ58Ms4x!G0q0pGRy5*s4Ya+N(AIfT9qkbfeTBieO}7@D?v}h3k-0v@=yDRBx#C4CcT{$j zl4|DKnLB5^xaUv3*WHL7>N8tF`ntEaxcxJhs8Am&+Q`#LaC$wt%7odm%>*-MqLXN$ z_y)79VudgBMS8@o`nG^v2rAtkJCQ020~B8xo|!h;Xi>&Tu)GlL=q+!5aC)PQMD3Wu z)_%*F5S2Z64^z9Q5##VBYJ7x}L>Wh#9~m1s-coJE4z^E6C)v!4Q=lO>n{3NH+-)(~ zSP7SQbg3dsIE|~-i#?u>nf=#IjknOa=Wc(r$aljAux0ROs@5LOnnjsuD+;0~(I*x6 zHOHPBnI8kw+MSo(y|0}Y!UVo}JvOG6_pPK+{-TC~UnP^|g1MTk0aQah{iw}O88X>I zn9_H8v_1c&hWL%jW6r9TdI`%Vs&nmf21ng6h>vYvSLuxe3YiKZuTbQUdivf&AbD`< z3rokF);uzk>)g-_BXiQKLveiVW};PfxO&Igkuqo=D@+|;2niM~HcjdsxeH7Q*C%S} zM{5Pe7tk|FQ}t282OK3-`t^P+Rp^^;QrT}({j$^kMu&P2nROiU1^M`v!)FGj2 z4raVQ?ysRPx+1gRY}nbNw1+>QmD4ir#e&v+pHwM@~+iG zNHdLtt$ZJKw%e?@OLk^le9zmfLA2wZL=GyAtfNAb*=tUL`vFoyeWsFad)*f#5yanbo&wTsxvKny$)jCoK{ziV(GrR4(o>`ky1x^{OQg+ z^=@Q>(r5Hj?YUi@@O9%|85JEt@dq!f&+!`12Dst(rLxa@>IXI&*Hs=@DeZb4J7mCF zejlc&k*6Fohr{Oe^Mk=0k0CU#JG+abd;jDmI^11Jzml@2iB*MXkH=@ z7A=-5ZvyUhjP^(Jzn$W+K=8#?g$(?iAhkG5h)lrL;f=DhgQE+}8wY2ze+b_Gs}6uP zT}YC?O)x7#=n2A4^dz6yn!4p6ov)Hrngr!PjXWs5Y{a-^ljiPE@EaQ9(BNBSCWha+ z7$h;GnQb165^I;>BvOcz(HkI@oa1y;;sJF`7qpP7KvkUW-%H`` zXbC10lXKb?r{|Y4Ol)uQWX+-XloeOhTTSQf+~RBoJQxv3LN`g$jM9x#H_^T?P-BuN z2E`BE>*5QEI0E64v&*|_C~l7$vgI!JLJiV+<*b&M-1rAR)WVVXP5ZGrb8;(SbJo2I z8&x{pJGg78I*^+ikAfM&>IN-z*%s7w)R$Wd!SJVLyK4l!FyR>rB5ZT!SeM2?x4V9I zYdHBN7r%Yu7A7U)$xK0dkd++2xINOQ*qcAEC^K#Yv`MFgz$IuB6jkK7*$9+e}}9uAS*K{BL+bn(QA-R+6a0GAl6j1m>O`xJt~ostgP)H<8x5tTu*9YP3)&e3Mg46H%Uc zv=HAoJ-g_A5WC~!c`q)rO2Th=nK0e29J!l|0|!fNgMXdk`E^y_9~UkOso{)Ggf{N& zmZYJtY9W!4F=&1chBg5Lf zJm!?FCTpA6a4v+y0Z&-kb@&6avFiF*C&GpUtGVEU_b9Fbo_1Qfw{{rtS+bXO0@x8X z@XNC?4NB75!u=01On{su+xKB__--a_ZX{TwVDR?_D2fBq!pZQMTf`fMbsrCsnNAo9rp`e){l8V3AH2?E) z{d4~}3TI`xe+T$?wdFqzf9;DPG4U_j%ij(EU4izmhGUSr@c&(p_B+n+vZTL|o*}aB z-$Y8k8~?5|`ODY_(qItd|4^O$j_`Xa|1Shth<4>4xBq_?_J0Tby)yn6pgcr%1p)lM zPX0T maxCount { + maxCount = o[i].countMatch + result = cp.id + } + } + return result +} + +//CodePages - slice of code pages +var CodePages = TCodePages{ + {ASCII, "ASCII", MatchRes{0}, runesMatchASCII, + codePageTable{{0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}}, + {IBM866, "IBM866", MatchRes{0}, runesMatch866, + codePageTable{ + //first element serves as sign of absence + {0, 0}, + //о е а и н т с р в + {0xAE, 0}, {0xA5, 0}, {0xA0, 0}, {0xA8, 0}, {0xAD, 0}, {0xE2, 0}, {0xE1, 0}, {0xE0, 0}, {0xA2, 0}, + {0x8E, 0}, {0x85, 0}, {0x80, 0}, {0x88, 0}, {0x8D, 0}, {0x92, 0}, {0x91, 0}, {0x90, 0}, {0x82, 0}}}, + {UTF8, "UTF8", MatchRes{0}, runesMatchUTF8, + codePageTable{ + {0, 0}, + //о е а и н т с р в + {0xD0BE, 0}, {0xD0B5, 0}, {0xD0B0, 0}, {0xD0B8, 0}, {0xD0BD, 0}, {0xD182, 0}, {0xD181, 0}, {0xD180, 0}, {0xD0B2, 0}, + {0xD09E, 0}, {0xD095, 0}, {0xD090, 0}, {0xD098, 0}, {0xD0AD, 0}, {0xD0A2, 0}, {0xD0A1, 0}, {0xD0A0, 0}, {0xD092, 0}}}, + {Windows1251, "Windows1251", MatchRes{0}, runesMatch1251, + codePageTable{ + {0, 0}, + //а и н с р в л к в + {0xE0, 0}, {0xE8, 0}, {0xED, 0}, {0xF1, 0}, {0xF0, 0}, {0xE2, 0}, {0xEB, 0}, {0xEA, 0}, {0xE2, 0}, + {0xC0, 0}, {0xC8, 0}, {0xCD, 0}, {0xD1, 0}, {0xD0, 0}, {0xC2, 0}, {0xCB, 0}, {0xCA, 0}, {0xC2, 0}}}, + {KOI8R, "KOI8R", MatchRes{0}, runesMatchKOI8, + codePageTable{ + //о а и т с в л к в + {0, 0}, + {0xCF, 0}, {0xC1, 0}, {0xC9, 0}, {0xD4, 0}, {0xD3, 0}, {0xD7, 0}, {0xCC, 0}, {0xCB, 0}, {0xD7, 0}, + {0xEF, 0}, {0xE1, 0}, {0xE9, 0}, {0xF4, 0}, {0xF3, 0}, {0xF7, 0}, {0xEC, 0}, {0xEB, 0}, {0xF7, 0}}}, +} + +//codePageName - string of code page name +var codePageName = map[IDCodePage]string{ + ASCII: "ASCII", + IBM866: "IBM866", + Windows1251: "Windows1251", + UTF8: "UTF8", + UTF16: "UTF16", + UTF16LE: "UTF16LE", + UTF16BE: "UTF16BE", + UTF32: "UTF32", + KOI8R: "KOI8R", + ISO5427Cyrillic: "ISO5427Cyrillic", + ISO51INISCyrillic: "ISO51INISCyrillic", + ISO111ECMACyrillic: "ISO111ECMACyrillic", + ISO153GOST1976874: "ISO153GOST1976874", + Unicode: "Unicode", +} diff --git a/code_pages_id.go b/code_pages_id.go new file mode 100644 index 0000000..19e4295 --- /dev/null +++ b/code_pages_id.go @@ -0,0 +1,111 @@ +package cpd + +const ( + // ASCII is the uint16 identifier with IANA name US-ASCII (MIME: US-ASCII). + // ANSI X3.4-1986 + // Reference: RFC2046 + ASCII IDCodePage = 3 + + // ISO5427Cyrillic is the uint16 identifier with IANA name ISO_5427. + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO5427Cyrillic IDCodePage = 48 + + // ISO51INISCyrillic is the uint16 identifier with IANA name INIS-cyrillic. + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO51INISCyrillic IDCodePage = 53 + + // ISO111ECMACyrillic is the uint16 identifier with IANA name ECMA-cyrillic. + // ISO registry + // (formerly ECMA registry ) + ISO111ECMACyrillic IDCodePage = 77 + + // ISO153GOST1976874 is the uint16 identifier with IANA name GOST_19768-74. + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO153GOST1976874 IDCodePage = 94 + + // UTF8 is the uint16 identifier with IANA name UTF-8. + // + // rfc3629 + // Reference: RFC3629 + UTF8 IDCodePage = 106 + + // Unicode is the uint16 identifier with IANA name ISO-10646-UCS-2. + // + // the 2-octet Basic Multilingual Plane, aka Unicode + // this needs to specify network byte order: the standard + // does not specify (it is a 16-bit integer space) + Unicode IDCodePage = 1000 + + // UnicodeASCII is the uint16 identifier with IANA name ISO-10646-UCS-Basic. + // + // ASCII subset of Unicode. Basic Latin = collection 1 + // See ISO 10646, Appendix A + UnicodeASCII IDCodePage = 1002 + + // UTF7 is the uint16 identifier with IANA name UTF-7. + // + // rfc2152 + // Reference: RFC2152 + UTF7 IDCodePage = 1012 + + // UTF16BE is the uint16 identifier with IANA name UTF-16BE. + // + // rfc2781 + // Reference: RFC2781 + UTF16BE IDCodePage = 1013 + + // UTF16LE is the uint16 identifier with IANA name UTF-16LE. + // + // rfc2781 + // Reference: RFC2781 + UTF16LE IDCodePage = 1014 + + // UTF16 is the uint16 identifier with IANA name UTF-16. + // + // rfc2781 + // Reference: RFC2781 + UTF16 IDCodePage = 1015 + + // UTF32 is the uint16 identifier with IANA name UTF-32. + // + // https://www.unicode.org/unicode/reports/tr19/ + UTF32 IDCodePage = 1017 + + // UTF32BE is the uint16 identifier with IANA name UTF-32BE. + // + // https://www.unicode.org/unicode/reports/tr19/ + UTF32BE IDCodePage = 1018 + + // UTF32LE is the uint16 identifier with IANA name UTF-32LE. + // + // https://www.unicode.org/unicode/reports/tr19/ + UTF32LE IDCodePage = 1019 + + // KOI8R is the uint16 identifier with IANA name KOI8-R (MIME: KOI8-R). + // + // rfc1489 , based on GOST-19768-74, ISO-6937/8, + // INIS-Cyrillic, ISO-5427. + // Reference: RFC1489 + KOI8R IDCodePage = 2084 + + // IBM866 is the uint16 identifier with IANA name IBM866. + // + // IBM NLDG Volume 2 (SE09-8002-03) August 1994 + IBM866 IDCodePage = 2086 + + // Windows1251 is the uint16 identifier with IANA name windows-1251. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1251 + Windows1251 IDCodePage = 2251 + + // Windows1252 is the uint16 identifier with IANA name windows-1252. + // + // Microsoft http://www.iana.org/assignments/charset-reg/windows-1252 + Windows1252 IDCodePage = 2252 +) diff --git a/const.go b/const.go deleted file mode 100644 index 73c4cae..0000000 --- a/const.go +++ /dev/null @@ -1,15 +0,0 @@ -package cpd - -import ( - "github.com/softlandia/cpd/internal/cp" -) - -//numbers of code page -const ( - CpASCII = cp.ASCII - CpWindows1251 = cp.Windows1251 - CpIBM866 = cp.IBM866 - CpUTF8 = cp.UTF8 - CpUTF16 = cp.UTF16 - CpUTF32 = cp.UTF32 -) diff --git a/cp_deep_maching.go b/cp_deep_maching.go new file mode 100644 index 0000000..a7fc119 --- /dev/null +++ b/cp_deep_maching.go @@ -0,0 +1,45 @@ +package cpd + +//checkHeader - check buffer for match to utf-8, utf-16le or utf-16be BOM +func checkHeader(b []byte) (id IDCodePage, res bool) { + if bomUTF8(b) { + return UTF8, true + } + if bomUTF16le(b) { + return UTF16LE, true + } + if bomUTF16be(b) { + return UTF16BE, true + } + return ASCII, false +} + +func bomUTF8(b []byte) bool { + if len(b) < 3 { + return false + } + return (b[0] == 0xEF) && (b[1] == 0xBB) && (b[2] == 0xBF) +} + +func bomUTF16le(b []byte) bool { + if len(b) < 2 { + return false + } + return (b[0] == 0xFF) && (b[1] == 0xFE) +} + +func bomUTF16be(b []byte) bool { + if len(b) < 2 { + return false + } + return (b[0] == 0xFE) && (b[1] == 0xFF) +} + +//ASCII block +func itASCII(r rune, tbl *codePageTable) int { + return 0 +} + +func runesMatchASCII(b []byte, tbl *codePageTable) int { + return 0 +} diff --git a/cpd.go b/cpd.go index d7e401f..5394ace 100644 --- a/cpd.go +++ b/cpd.go @@ -7,90 +7,57 @@ package cpd import ( "bufio" "fmt" + "io" "os" - "strings" + "reflect" - "github.com/softlandia/cpd/internal/cp" "golang.org/x/text/encoding/charmap" "golang.org/x/text/transform" ) -//StrConvertCodePage - convert string from one code page to another -func StrConvertCodePage(s string, fromCP, toCP uint16) (string, error) { - if len(s) == 0 { - return "", nil - } - if fromCP == toCP { - return s, nil - } - - var err error - - switch fromCP { - case cp.IBM866: - s, _, err = transform.String(charmap.CodePage866.NewDecoder(), s) - case cp.Windows1251: - s, _, err = transform.String(charmap.Windows1251.NewDecoder(), s) - } - switch toCP { - case cp.IBM866: - s, _, err = transform.String(charmap.CodePage866.NewEncoder(), s) - case cp.Windows1251: - s, _, err = transform.String(charmap.Windows1251.NewEncoder(), s) - } - return s, err +//CodePageAutoDetect - auto detect code page of input content +func CodePageAutoDetect(content []byte) (result IDCodePage) { + return CodePages.Match(content) } -// CodePageAsString - return name of char set with id codepage -// if codepage not exist - return "" -func CodePageAsString(codepage uint16) string { - return cp.Name[codepage] +//CodePageDetect - detect code page of ascii data from reader 'r' +func CodePageDetect(r io.Reader, stopStr ...string) (IDCodePage, error) { + //initial test + //test input interfase + if !reflect.ValueOf(r).IsValid() { + return ASCII, fmt.Errorf("input reader is nil") + } + + //make slice of byte from input reader + buf, err := bufio.NewReader(r).Peek(1024) + if (err != nil) && (err.Error() != "EOF") { + return ASCII, err + } + + //check file header // utf-8, utf-16 with BOM + if idHeader, ok := checkHeader(buf); ok { + return idHeader, nil + } + return CodePageAutoDetect(buf), nil } -//CodePageDetect - detect code page of file -//return 0 if code page can not be detected -//return const cpd.CpWindows1251 for Windows code page 1251 -//return const cdp.CpIBM866 for IBM 866 code page -//return conts cdp.CpASCII by default or on error -//EF-BB-BF utf8 bom -func CodePageDetect(fn string, stopStr ...string) (uint16, error) { - var ( - count1251 int //счётчик символов в кодировке 1251 - count866 int //счётчик символов в кодировке 866 - ) +//FileCodePageDetect - detect code page of text file +func FileCodePageDetect(fn string, stopStr ...string) (IDCodePage, error) { iFile, err := os.Open(fn) if err != nil { - return CpASCII, err + return ASCII, err } defer iFile.Close() - iScanner := bufio.NewScanner(iFile) - for i := 0; iScanner.Scan(); i++ { - s := iScanner.Text() - if (len(stopStr) > 0) && strings.Contains(s, stopStr[0]) { //stopStr[0] - строка при обнаружении которой останавливаемся, stopStr - слайс строк - break - } - for j := range s { - if isRune1251(rune(s[j])) { //проверка принадлежности символа позициям алфавитных символов в кодовой таблице 1251 - count1251++ - } - if isRune866(rune(s[j])) { //проверка принадлежности символа позициям алфавитных символов в кодовой таблице 866 - count866++ - } - } + if len(stopStr) > 0 { + return CodePageDetect(iFile, stopStr[0]) } - switch { - case count1251 > count866: - return CpWindows1251, nil - case count1251 < count866: - return CpIBM866, nil - } - return CpASCII, nil + return CodePageDetect(iFile) } //FileConvertCodePage - replace code page text file from one to another -func FileConvertCodePage(fileName string, fromCP, toCP uint16) error { +func FileConvertCodePage(fileName string, fromCP, toCP IDCodePage) error { if fromCP == toCP { return nil } @@ -126,37 +93,34 @@ func FileConvertCodePage(fileName string, fromCP, toCP uint16) error { return os.Rename(tmpFileName, fileName) } -const ( - cp866r1Min = 0x80 //заглавная буква А - cp866r1Max = 0xAF //строчная буква п - в этом интервале в 866 раскладке лежит большинство русских букв - cp866r2Min = 0xE0 //строчная р - cp866r2Max = 0xF1 //строчна ё - в этом интервале лежат остальные русские буквы - cp1251s1 = 0xA8 //Ё - cp1251s2 = 0xB8 //ё в этой позиции в 866 лежит псевдографика - cp1251r1Min = 0xC0 //с этой позиции начинается весь алфавит - cp1251r1Max = 0xFF //заканчивается - cpKOI8RMin = 0xC0 //начало интервала - cpKOI8RMax = 0xFF //конец интервала -) - -func isRune1251(r rune) bool { - switch { - case r == cp1251s1: - return true - case r == cp1251s2: - return true - case (r >= cp1251r1Min) && (r <= cp1251r1Max): - return true +//StrConvertCodePage - convert string from one code page to another +func StrConvertCodePage(s string, fromCP, toCP IDCodePage) (string, error) { + if len(s) == 0 { + return "", nil } - return false + if fromCP == toCP { + return s, nil + } + + var err error + + switch fromCP { + case IBM866: + s, _, err = transform.String(charmap.CodePage866.NewDecoder(), s) + case Windows1251: + s, _, err = transform.String(charmap.Windows1251.NewDecoder(), s) + } + switch toCP { + case IBM866: + s, _, err = transform.String(charmap.CodePage866.NewEncoder(), s) + case Windows1251: + s, _, err = transform.String(charmap.Windows1251.NewEncoder(), s) + } + return s, err } -func isRune866(r rune) bool { - switch { - case (r >= cp866r1Min) && (r <= cp866r1Max): - return true - case (r >= cp866r2Min) && (r <= cp866r2Max): - return true - } - return false +// CodePageAsString - return name of char set with id codepage +// if codepage not exist - return "" +func CodePageAsString(codepage IDCodePage) string { + return codePageName[codepage] } diff --git a/cpd_test.go b/cpd_test.go index 91ec61a..9f15b4c 100644 --- a/cpd_test.go +++ b/cpd_test.go @@ -3,20 +3,18 @@ import ( "os" "testing" - - "github.com/softlandia/cpd/internal/cp" ) type tCodePageAsString struct { - id uint16 + id IDCodePage s string } var dCodePageAsString = []tCodePageAsString{ {0, ""}, {3, "ASCII"}, - {cp.IBM866, "IBM866"}, - {cp.Windows1251, "Windows1251"}, + {IBM866, "IBM866"}, + {Windows1251, "Windows1251"}, {60000, ""}, } @@ -29,44 +27,107 @@ func TestCodePageAsString(t *testing.T) { } } -//CodePageDetect +//TestCodePageDetect - тестирование метода CodePageDetect +// проверки на входные параметры: +// 1. nil входящий поток явный nil, параметр останова отсутствует +// 2. nil, "~" входящий поток явный nil, параметр останова присутствует +// 3. входящий поток не инициализированный объект, проверка на передачу пустого интерфейса +// проверка работы осуществляется через FileCodePageDetect() func TestCodePageDetect(t *testing.T) { - res, err := CodePageDetect("test_files\\866&1251.txt", "~X~") //befor ~X~ file contain 866, after 1251 - if err != nil { - t.Errorf(" on file '%s' return error: %v", "866&1251.txt", err) - } - if res != cp.IBM866 { - t.Errorf(" on file '%s' expected 866 got: %s", "866&1251.txt", CodePageAsString(res)) - } - - res, err = CodePageDetect("test_files\\866&1251.txt") //file contain more 1251 then 866 - if res != cp.Windows1251 { - t.Errorf(" on file '%s' expected 1251 got: %s", "866&1251.txt", CodePageAsString(res)) - } - - _, err = CodePageDetect("-.-") //file "-.-" not exist + _, err := CodePageDetect(nil) if err == nil { - t.Errorf(" on file '-.-' must return error, but return nil") + t.Errorf(" on input nil return error == nil, expect error != nil\n") + } + _, err = CodePageDetect(nil, "~") + if err == nil { + t.Errorf(" on input nil return error == nil, expect error != nil\n") } - res, _ = CodePageDetect("test_files\\noCodePage.txt") //file contain rune only ASCII - if res != cp.ASCII { - t.Errorf(" on file 'noCodePage.txt' expect ASCII got: %s", CodePageAsString(res)) + var data *os.File + res, err := CodePageDetect(data, "~") + if err == nil { + t.Errorf(" on input nil return error != nil, data: %+v, res: %d, code page: %s\n", data, res, CodePageAsString(res)) + } +} + +func TestFileCodePageDetectSimple(t *testing.T) { + res, err := FileCodePageDetect("test_files\\866to1251.txt") + if err != nil { + t.Errorf(" on file '866to1251.txt' err expected: nil, got: %s\n", err) + } + if res != IBM866 { + t.Errorf(" on file '866to1251.txt' expected: %s, got: %s\n", IBM866, res) + } + res, err = FileCodePageDetect("test_files\\866&1251.txt") + if err != nil { + t.Errorf(" on file '866&1251.txt' err expected: nil, got: %s\n", err) + } + if res != Windows1251 { + t.Errorf(" on file '866&1251.txt' expected: %s, got: %s\n", Windows1251, res) + } +} + +func TestFileCodePageDetectUtf8Bom(t *testing.T) { + res, err := FileCodePageDetect("test_files\\utf8wbom.txt") + if err != nil { + t.Errorf(" on file 'utf8wbom.txt' err expected: nil, got: %s\n", err) + } + if res != UTF8 { + t.Errorf(" on file 'utf8wbom.txt' expected: %s, got: %s\n", UTF8, res) + } +} + +type tFileCodePageDetectTest struct { + fn string //filename + st string //stop string + e error // + r IDCodePage //expected result +} + +var dFileCodePageDetect = []tFileCodePageDetectTest{ + {"test_files\\utf16BEwbom.txt", "", nil, UTF16BE}, //file contain utf16 big endian with bom rune at start + {"test_files\\utf16be-woBOM.txt", "", nil, UTF16BE}, //file contain utf16 big endian with out bom rune at start + {"test_files\\utf16le-wBOM.txt", "", nil, UTF16LE}, //file contain utf16 liitle endian with bom rune at start + {"test_files\\utf16le-woBOM.txt", "", nil, UTF16LE}, //file contain utf16 liitle endian with out bom rune at start + {"test_files\\utf8-woBOM.txt", "", nil, UTF8}, //file contain utf8 with out bom rune at start + {"test_files\\866&1251.txt", "~X~", nil, Windows1251}, //befor ~X~ file contain 866, after 1251 + {"test_files\\866&1251.txt", "", nil, Windows1251}, //file contain more 1251 then 866 + {"test_files\\noCodePage.txt", "", nil, ASCII}, //file contain rune only ASCII + {"test_files\\empty_file.txt", "", nil, ASCII}, //file exist but empty, no error, return ASCII + {"test_files\\rune_encode_error.txt", "", nil, ASCII}, //file contain special rune -> encode error, but detect NO error + {"test_files\\rune_error_1251.txt", "", nil, Windows1251}, //file contain 1251 and special rune -> encode error, but detect NO error + {"test_files\\utf8wbom.txt", "", nil, UTF8}, //file contain utf8 with bom rune at start + {"test_files\\utf16LEwbom.txt", "", nil, UTF16LE}, //file contain utf16 little endian with bom rune at start +} + +//FileCodePageDetect +func TestFileCodePageDetect(t *testing.T) { + var ( + err error + res IDCodePage + ) + for _, d := range dFileCodePageDetect { + if len(d.st) == 0 { + res, err = FileCodePageDetect(d.fn) + } else { + res, err = FileCodePageDetect(d.fn, d.st) + } + if err != d.e { + t.Errorf(" on file '%s' expected error: '%v', got: '%v', ", d.fn, d.e, err) + } + if res != d.r { + t.Errorf(" on file '%s' expected result: %s, got: %s", d.fn, d.r, res) + } } - res, err = CodePageDetect("test_files\\empty_file.txt") - if (res != cp.ASCII) || (err != nil) { - t.Errorf(" on file 'empty_file.txt' expect ASCII and no error got: %s and %v", CodePageAsString(res), err) + _, err = FileCodePageDetect("-.-") //file "-.-" not exist + if err == nil { + t.Errorf(" on file '-.-' must return error, but return nil") } - res, err = CodePageDetect("test_files\\rune_encode_error.txt") - if (res != cp.ASCII) || (err != nil) { - t.Errorf(" on file 'rune_encode_error.txt' expect ASCII and no error got: %s and %v", CodePageAsString(res), err) - } - - res, err = CodePageDetect("test_files\\rune_error_1251.txt") - if res != cp.Windows1251 { - t.Errorf(" on file 'rune_error_1251.txt' expect 1251 and no error got: %s and %v", CodePageAsString(res), err) + _, err = FileCodePageDetect("") //file "" not exist + if err == nil { + t.Errorf(" on file '' must return error, but return nil") } } @@ -83,13 +144,13 @@ func TestFileConvertCodePage(t *testing.T) { t.Errorf(" on fromCp == toCp expected error==nil, got: %v", err) } - err = FileConvertCodePage("test_files\\rune_encode_error.txt", cp.IBM866, cp.Windows1251) + err = FileConvertCodePage("test_files\\rune_encode_error.txt", IBM866, Windows1251) if err == nil { t.Errorf(" expected error, got: %v", err) } os.Link("test_files\\866to1251.txt", "test_files\\866to1251.tmp") - err = FileConvertCodePage("test_files\\866to1251.tmp", cp.IBM866, cp.Windows1251) + err = FileConvertCodePage("test_files\\866to1251.tmp", IBM866, Windows1251) if err != nil { t.Errorf(" expect no err, got: %v", err) } @@ -98,19 +159,19 @@ func TestFileConvertCodePage(t *testing.T) { //ConvertCodePage func TestStrConvertCodePage(t *testing.T) { - _, err := StrConvertCodePage("1234", cp.IBM866, cp.Windows1251) + _, err := StrConvertCodePage("1234", IBM866, Windows1251) if err != nil { t.Errorf(" on test 1 return unexpected err: %v", err) } - _, err = StrConvertCodePage("1234", cp.Windows1251, cp.IBM866) + _, err = StrConvertCodePage("1234", Windows1251, IBM866) if err != nil { t.Errorf(" on test 2 return unexpected err: %v", err) } - _, err = StrConvertCodePage("", cp.IBM866, cp.Windows1251) + _, err = StrConvertCodePage("", IBM866, Windows1251) if err != nil { t.Errorf(" with empty string must return ERROR, but retrurn: %v", err) } - _, err = StrConvertCodePage("1234", cp.IBM866, cp.IBM866) + _, err = StrConvertCodePage("1234", IBM866, IBM866) if err != nil { t.Errorf(" with equal fromCP and toCp must return nil, but retrurn: %v", err) } diff --git a/ibm866.go b/ibm866.go new file mode 100644 index 0000000..aedb87a --- /dev/null +++ b/ibm866.go @@ -0,0 +1,19 @@ +package cpd + +//unit for ibm866 + +func runesMatch866(data []byte, tbl *codePageTable) (counts int) { + for i := range data { + if i == 0 { + continue + } + if tbl.containsRune(rune(data[i-1])) > 0 { + j := tbl.containsRune(rune(data[i])) + if j > 0 { + (*tbl)[j].count++ + counts++ + } + } + } + return +} diff --git a/internal/cp/mib.go b/internal/cp/mib.go deleted file mode 100644 index 93cf306..0000000 --- a/internal/cp/mib.go +++ /dev/null @@ -1,1643 +0,0 @@ -// file from "golang.org\x\text\encoding\internal\identifier" (c) golang autors -// contain identifier of code page - -package cp - -const ( - // ASCII is the uint16 identifier with IANA name US-ASCII (MIME: US-ASCII). - // - // ANSI X3.4-1986 - // Reference: RFC2046 - ASCII uint16 = 3 - - // ISOLatin1 is the uint16 identifier with IANA name ISO_8859-1:1987 (MIME: ISO-8859-1). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin1 uint16 = 4 - - // ISOLatin2 is the uint16 identifier with IANA name ISO_8859-2:1987 (MIME: ISO-8859-2). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin2 uint16 = 5 - - // ISOLatin3 is the uint16 identifier with IANA name ISO_8859-3:1988 (MIME: ISO-8859-3). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin3 uint16 = 6 - - // ISOLatin4 is the uint16 identifier with IANA name ISO_8859-4:1988 (MIME: ISO-8859-4). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin4 uint16 = 7 - - // ISOLatinCyrillic is the uint16 identifier with IANA name ISO_8859-5:1988 (MIME: ISO-8859-5). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatinCyrillic uint16 = 8 - - // ISOLatinArabic is the uint16 identifier with IANA name ISO_8859-6:1987 (MIME: ISO-8859-6). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatinArabic uint16 = 9 - - // ISOLatinGreek is the uint16 identifier with IANA name ISO_8859-7:1987 (MIME: ISO-8859-7). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1947 - // Reference: RFC1345 - ISOLatinGreek uint16 = 10 - - // ISOLatinHebrew is the uint16 identifier with IANA name ISO_8859-8:1988 (MIME: ISO-8859-8). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatinHebrew uint16 = 11 - - // ISOLatin5 is the uint16 identifier with IANA name ISO_8859-9:1989 (MIME: ISO-8859-9). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin5 uint16 = 12 - - // ISOLatin6 is the uint16 identifier with IANA name ISO-8859-10 (MIME: ISO-8859-10). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOLatin6 uint16 = 13 - - // ISOTextComm is the uint16 identifier with IANA name ISO_6937-2-add. - // - // ISO-IR: International Register of Escape Sequences and ISO 6937-2:1983 - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISOTextComm uint16 = 14 - - // HalfWidthKatakana is the uint16 identifier with IANA name JIS_X0201. - // - // JIS X 0201-1976. One byte only, this is equivalent to - // JIS/Roman (similar to ASCII) plus eight-bit half-width - // Katakana - // Reference: RFC1345 - HalfWidthKatakana uint16 = 15 - - // JISEncoding is the uint16 identifier with IANA name JIS_Encoding. - // - // JIS X 0202-1991. Uses ISO 2022 escape sequences to - // shift code sets as documented in JIS X 0202-1991. - JISEncoding uint16 = 16 - - // ShiftJIS is the uint16 identifier with IANA name Shift_JIS (MIME: Shift_JIS). - // - // This charset is an extension of csHalfWidthKatakana by - // adding graphic characters in JIS X 0208. The CCS's are - // JIS X0201:1997 and JIS X0208:1997. The - // complete definition is shown in Appendix 1 of JIS - // X0208:1997. - // This charset can be used for the top-level media type "text". - ShiftJIS uint16 = 17 - - // EUCPkdFmtJapanese is the uint16 identifier with IANA name Extended_UNIX_Code_Packed_Format_for_Japanese (MIME: EUC-JP). - // - // Standardized by OSF, UNIX International, and UNIX Systems - // Laboratories Pacific. Uses ISO 2022 rules to select - // code set 0: US-ASCII (a single 7-bit byte set) - // code set 1: JIS X0208-1990 (a double 8-bit byte set) - // restricted to A0-FF in both bytes - // code set 2: Half Width Katakana (a single 7-bit byte set) - // requiring SS2 as the character prefix - // code set 3: JIS X0212-1990 (a double 7-bit byte set) - // restricted to A0-FF in both bytes - // requiring SS3 as the character prefix - EUCPkdFmtJapanese uint16 = 18 - - // EUCFixWidJapanese is the uint16 identifier with IANA name Extended_UNIX_Code_Fixed_Width_for_Japanese. - // - // Used in Japan. Each character is 2 octets. - // code set 0: US-ASCII (a single 7-bit byte set) - // 1st byte = 00 - // 2nd byte = 20-7E - // code set 1: JIS X0208-1990 (a double 7-bit byte set) - // restricted to A0-FF in both bytes - // code set 2: Half Width Katakana (a single 7-bit byte set) - // 1st byte = 00 - // 2nd byte = A0-FF - // code set 3: JIS X0212-1990 (a double 7-bit byte set) - // restricted to A0-FF in - // the first byte - // and 21-7E in the second byte - EUCFixWidJapanese uint16 = 19 - - // ISO4UnitedKingdom is the uint16 identifier with IANA name BS_4730. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO4UnitedKingdom uint16 = 20 - - // ISO11SwedishForNames is the uint16 identifier with IANA name SEN_850200_C. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO11SwedishForNames uint16 = 21 - - // ISO15Italian is the uint16 identifier with IANA name IT. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO15Italian uint16 = 22 - - // ISO17Spanish is the uint16 identifier with IANA name ES. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO17Spanish uint16 = 23 - - // ISO21German is the uint16 identifier with IANA name DIN_66003. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO21German uint16 = 24 - - // ISO60Norwegian1 is the uint16 identifier with IANA name NS_4551-1. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO60Norwegian1 uint16 = 25 - - // ISO69French is the uint16 identifier with IANA name NF_Z_62-010. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO69French uint16 = 26 - - // ISO10646UTF1 is the uint16 identifier with IANA name ISO-10646-UTF-1. - // - // Universal Transfer Format (1), this is the multibyte - // encoding, that subsets ASCII-7. It does not have byte - // ordering issues. - ISO10646UTF1 uint16 = 27 - - // ISO646basic1983 is the uint16 identifier with IANA name ISO_646.basic:1983. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO646basic1983 uint16 = 28 - - // INVARIANT is the uint16 identifier with IANA name INVARIANT. - // - // Reference: RFC1345 - INVARIANT uint16 = 29 - - // ISO2IntlRefVersion is the uint16 identifier with IANA name ISO_646.irv:1983. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO2IntlRefVersion uint16 = 30 - - // NATSSEFI is the uint16 identifier with IANA name NATS-SEFI. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - NATSSEFI uint16 = 31 - - // NATSSEFIADD is the uint16 identifier with IANA name NATS-SEFI-ADD. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - NATSSEFIADD uint16 = 32 - - // NATSDANO is the uint16 identifier with IANA name NATS-DANO. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - NATSDANO uint16 = 33 - - // NATSDANOADD is the uint16 identifier with IANA name NATS-DANO-ADD. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - NATSDANOADD uint16 = 34 - - // ISO10Swedish is the uint16 identifier with IANA name SEN_850200_B. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO10Swedish uint16 = 35 - - // KSC56011987 is the uint16 identifier with IANA name KS_C_5601-1987. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - KSC56011987 uint16 = 36 - - // ISO2022KR is the uint16 identifier with IANA name ISO-2022-KR (MIME: ISO-2022-KR). - // - // rfc1557 (see also KS_C_5601-1987) - // Reference: RFC1557 - ISO2022KR uint16 = 37 - - // EUCKR is the uint16 identifier with IANA name EUC-KR (MIME: EUC-KR). - // - // rfc1557 (see also KS_C_5861-1992) - // Reference: RFC1557 - EUCKR uint16 = 38 - - // ISO2022JP is the uint16 identifier with IANA name ISO-2022-JP (MIME: ISO-2022-JP). - // - // rfc1468 (see also rfc2237 ) - // Reference: RFC1468 - ISO2022JP uint16 = 39 - - // ISO2022JP2 is the uint16 identifier with IANA name ISO-2022-JP-2 (MIME: ISO-2022-JP-2). - // - // rfc1554 - // Reference: RFC1554 - ISO2022JP2 uint16 = 40 - - // ISO13JISC6220jp is the uint16 identifier with IANA name JIS_C6220-1969-jp. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO13JISC6220jp uint16 = 41 - - // ISO14JISC6220ro is the uint16 identifier with IANA name JIS_C6220-1969-ro. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO14JISC6220ro uint16 = 42 - - // ISO16Portuguese is the uint16 identifier with IANA name PT. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO16Portuguese uint16 = 43 - - // ISO18Greek7Old is the uint16 identifier with IANA name greek7-old. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO18Greek7Old uint16 = 44 - - // ISO19LatinGreek is the uint16 identifier with IANA name latin-greek. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO19LatinGreek uint16 = 45 - - // ISO25French is the uint16 identifier with IANA name NF_Z_62-010_(1973). - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO25French uint16 = 46 - - // ISO27LatinGreek1 is the uint16 identifier with IANA name Latin-greek-1. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO27LatinGreek1 uint16 = 47 - - // ISO5427Cyrillic is the uint16 identifier with IANA name ISO_5427. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO5427Cyrillic uint16 = 48 - - // ISO42JISC62261978 is the uint16 identifier with IANA name JIS_C6226-1978. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO42JISC62261978 uint16 = 49 - - // ISO47BSViewdata is the uint16 identifier with IANA name BS_viewdata. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO47BSViewdata uint16 = 50 - - // ISO49INIS is the uint16 identifier with IANA name INIS. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO49INIS uint16 = 51 - - // ISO50INIS8 is the uint16 identifier with IANA name INIS-8. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO50INIS8 uint16 = 52 - - // ISO51INISCyrillic is the uint16 identifier with IANA name INIS-cyrillic. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO51INISCyrillic uint16 = 53 - - // ISO54271981 is the uint16 identifier with IANA name ISO_5427:1981. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO54271981 uint16 = 54 - - // ISO5428Greek is the uint16 identifier with IANA name ISO_5428:1980. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO5428Greek uint16 = 55 - - // ISO57GB1988 is the uint16 identifier with IANA name GB_1988-80. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO57GB1988 uint16 = 56 - - // ISO58GB231280 is the uint16 identifier with IANA name GB_2312-80. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO58GB231280 uint16 = 57 - - // ISO61Norwegian2 is the uint16 identifier with IANA name NS_4551-2. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO61Norwegian2 uint16 = 58 - - // ISO70VideotexSupp1 is the uint16 identifier with IANA name videotex-suppl. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO70VideotexSupp1 uint16 = 59 - - // ISO84Portuguese2 is the uint16 identifier with IANA name PT2. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO84Portuguese2 uint16 = 60 - - // ISO85Spanish2 is the uint16 identifier with IANA name ES2. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO85Spanish2 uint16 = 61 - - // ISO86Hungarian is the uint16 identifier with IANA name MSZ_7795.3. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO86Hungarian uint16 = 62 - - // ISO87JISX0208 is the uint16 identifier with IANA name JIS_C6226-1983. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO87JISX0208 uint16 = 63 - - // ISO88Greek7 is the uint16 identifier with IANA name greek7. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO88Greek7 uint16 = 64 - - // ISO89ASMO449 is the uint16 identifier with IANA name ASMO_449. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO89ASMO449 uint16 = 65 - - // ISO90 is the uint16 identifier with IANA name iso-ir-90. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO90 uint16 = 66 - - // ISO91JISC62291984a is the uint16 identifier with IANA name JIS_C6229-1984-a. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO91JISC62291984a uint16 = 67 - - // ISO92JISC62991984b is the uint16 identifier with IANA name JIS_C6229-1984-b. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO92JISC62991984b uint16 = 68 - - // ISO93JIS62291984badd is the uint16 identifier with IANA name JIS_C6229-1984-b-add. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO93JIS62291984badd uint16 = 69 - - // ISO94JIS62291984hand is the uint16 identifier with IANA name JIS_C6229-1984-hand. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO94JIS62291984hand uint16 = 70 - - // ISO95JIS62291984handadd is the uint16 identifier with IANA name JIS_C6229-1984-hand-add. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO95JIS62291984handadd uint16 = 71 - - // ISO96JISC62291984kana is the uint16 identifier with IANA name JIS_C6229-1984-kana. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO96JISC62291984kana uint16 = 72 - - // ISO2033 is the uint16 identifier with IANA name ISO_2033-1983. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO2033 uint16 = 73 - - // ISO99NAPLPS is the uint16 identifier with IANA name ANSI_X3.110-1983. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO99NAPLPS uint16 = 74 - - // ISO102T617bit is the uint16 identifier with IANA name T.61-7bit. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO102T617bit uint16 = 75 - - // ISO103T618bit is the uint16 identifier with IANA name T.61-8bit. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO103T618bit uint16 = 76 - - // ISO111ECMACyrillic is the uint16 identifier with IANA name ECMA-cyrillic. - // - // ISO registry - // (formerly ECMA - // registry ) - ISO111ECMACyrillic uint16 = 77 - - // ISO121Canadian1 is the uint16 identifier with IANA name CSA_Z243.4-1985-1. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO121Canadian1 uint16 = 78 - - // ISO122Canadian2 is the uint16 identifier with IANA name CSA_Z243.4-1985-2. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO122Canadian2 uint16 = 79 - - // ISO123CSAZ24341985gr is the uint16 identifier with IANA name CSA_Z243.4-1985-gr. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO123CSAZ24341985gr uint16 = 80 - - // ISO88596E is the uint16 identifier with IANA name ISO_8859-6-E (MIME: ISO-8859-6-E). - // - // rfc1556 - // Reference: RFC1556 - ISO88596E uint16 = 81 - - // ISO88596I is the uint16 identifier with IANA name ISO_8859-6-I (MIME: ISO-8859-6-I). - // - // rfc1556 - // Reference: RFC1556 - ISO88596I uint16 = 82 - - // ISO128T101G2 is the uint16 identifier with IANA name T.101-G2. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO128T101G2 uint16 = 83 - - // ISO88598E is the uint16 identifier with IANA name ISO_8859-8-E (MIME: ISO-8859-8-E). - // - // rfc1556 - // Reference: RFC1556 - ISO88598E uint16 = 84 - - // ISO88598I is the uint16 identifier with IANA name ISO_8859-8-I (MIME: ISO-8859-8-I). - // - // rfc1556 - // Reference: RFC1556 - ISO88598I uint16 = 85 - - // ISO139CSN369103 is the uint16 identifier with IANA name CSN_369103. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO139CSN369103 uint16 = 86 - - // ISO141JUSIB1002 is the uint16 identifier with IANA name JUS_I.B1.002. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO141JUSIB1002 uint16 = 87 - - // ISO143IECP271 is the uint16 identifier with IANA name IEC_P27-1. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO143IECP271 uint16 = 88 - - // ISO146Serbian is the uint16 identifier with IANA name JUS_I.B1.003-serb. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO146Serbian uint16 = 89 - - // ISO147Macedonian is the uint16 identifier with IANA name JUS_I.B1.003-mac. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO147Macedonian uint16 = 90 - - // ISO150GreekCCITT is the uint16 identifier with IANA name greek-ccitt. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO150GreekCCITT uint16 = 91 - - // ISO151Cuba is the uint16 identifier with IANA name NC_NC00-10:81. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO151Cuba uint16 = 92 - - // ISO6937Add is the uint16 identifier with IANA name ISO_6937-2-25. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO6937Add uint16 = 93 - - // ISO153GOST1976874 is the uint16 identifier with IANA name GOST_19768-74. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO153GOST1976874 uint16 = 94 - - // ISO8859Supp is the uint16 identifier with IANA name ISO_8859-supp. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO8859Supp uint16 = 95 - - // ISO10367Box is the uint16 identifier with IANA name ISO_10367-box. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO10367Box uint16 = 96 - - // ISO158Lap is the uint16 identifier with IANA name latin-lap. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO158Lap uint16 = 97 - - // ISO159JISX02121990 is the uint16 identifier with IANA name JIS_X0212-1990. - // - // ISO-IR: International Register of Escape Sequences - // Note: The current registration authority is IPSJ/ITSCJ, Japan. - // Reference: RFC1345 - ISO159JISX02121990 uint16 = 98 - - // ISO646Danish is the uint16 identifier with IANA name DS_2089. - // - // Danish Standard, DS 2089, February 1974 - // Reference: RFC1345 - ISO646Danish uint16 = 99 - - // USDK is the uint16 identifier with IANA name us-dk. - // - // Reference: RFC1345 - USDK uint16 = 100 - - // DKUS is the uint16 identifier with IANA name dk-us. - // - // Reference: RFC1345 - DKUS uint16 = 101 - - // KSC5636 is the uint16 identifier with IANA name KSC5636. - // - // Reference: RFC1345 - KSC5636 uint16 = 102 - - // Unicode11UTF7 is the uint16 identifier with IANA name UNICODE-1-1-UTF-7. - // - // rfc1642 - // Reference: RFC1642 - Unicode11UTF7 uint16 = 103 - - // ISO2022CN is the uint16 identifier with IANA name ISO-2022-CN. - // - // rfc1922 - // Reference: RFC1922 - ISO2022CN uint16 = 104 - - // ISO2022CNEXT is the uint16 identifier with IANA name ISO-2022-CN-EXT. - // - // rfc1922 - // Reference: RFC1922 - ISO2022CNEXT uint16 = 105 - - // UTF8 is the uint16 identifier with IANA name UTF-8. - // - // rfc3629 - // Reference: RFC3629 - UTF8 uint16 = 106 - - // ISO885913 is the uint16 identifier with IANA name ISO-8859-13. - // - // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13 - ISO885913 uint16 = 109 - - // ISO885914 is the uint16 identifier with IANA name ISO-8859-14. - // - // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14 - ISO885914 uint16 = 110 - - // ISO885915 is the uint16 identifier with IANA name ISO-8859-15. - // - // ISO - // Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15 - ISO885915 uint16 = 111 - - // ISO885916 is the uint16 identifier with IANA name ISO-8859-16. - // - // ISO - ISO885916 uint16 = 112 - - // GBK is the uint16 identifier with IANA name GBK. - // - // Chinese IT Standardization Technical Committee - // Please see: http://www.iana.org/assignments/charset-reg/GBK - GBK uint16 = 113 - - // GB18030 is the uint16 identifier with IANA name GB18030. - // - // Chinese IT Standardization Technical Committee - // Please see: http://www.iana.org/assignments/charset-reg/GB18030 - GB18030 uint16 = 114 - - // OSDEBCDICDF0415 is the uint16 identifier with IANA name OSD_EBCDIC_DF04_15. - // - // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 - OSDEBCDICDF0415 uint16 = 115 - - // OSDEBCDICDF03IRV is the uint16 identifier with IANA name OSD_EBCDIC_DF03_IRV. - // - // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV - OSDEBCDICDF03IRV uint16 = 116 - - // OSDEBCDICDF041 is the uint16 identifier with IANA name OSD_EBCDIC_DF04_1. - // - // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 - OSDEBCDICDF041 uint16 = 117 - - // ISO115481 is the uint16 identifier with IANA name ISO-11548-1. - // - // See http://www.iana.org/assignments/charset-reg/ISO-11548-1 - ISO115481 uint16 = 118 - - // KZ1048 is the uint16 identifier with IANA name KZ-1048. - // - // See http://www.iana.org/assignments/charset-reg/KZ-1048 - KZ1048 uint16 = 119 - - // Unicode is the uint16 identifier with IANA name ISO-10646-UCS-2. - // - // the 2-octet Basic Multilingual Plane, aka Unicode - // this needs to specify network byte order: the standard - // does not specify (it is a 16-bit integer space) - Unicode uint16 = 1000 - - // UCS4 is the uint16 identifier with IANA name ISO-10646-UCS-4. - // - // the full code space. (same comment about byte order, - // these are 31-bit numbers. - UCS4 uint16 = 1001 - - // UnicodeASCII is the uint16 identifier with IANA name ISO-10646-UCS-Basic. - // - // ASCII subset of Unicode. Basic Latin = collection 1 - // See ISO 10646, Appendix A - UnicodeASCII uint16 = 1002 - - // UnicodeLatin1 is the uint16 identifier with IANA name ISO-10646-Unicode-Latin1. - // - // ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 - // Supplement = collections 1 and 2. See ISO 10646, - // Appendix A. See rfc1815 . - UnicodeLatin1 uint16 = 1003 - - // UnicodeJapanese is the uint16 identifier with IANA name ISO-10646-J-1. - // - // ISO 10646 Japanese, see rfc1815 . - UnicodeJapanese uint16 = 1004 - - // UnicodeIBM1261 is the uint16 identifier with IANA name ISO-Unicode-IBM-1261. - // - // IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261 - UnicodeIBM1261 uint16 = 1005 - - // UnicodeIBM1268 is the uint16 identifier with IANA name ISO-Unicode-IBM-1268. - // - // IBM Latin-4 Extended Presentation Set, GCSGID: 1268 - UnicodeIBM1268 uint16 = 1006 - - // UnicodeIBM1276 is the uint16 identifier with IANA name ISO-Unicode-IBM-1276. - // - // IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276 - UnicodeIBM1276 uint16 = 1007 - - // UnicodeIBM1264 is the uint16 identifier with IANA name ISO-Unicode-IBM-1264. - // - // IBM Arabic Presentation Set, GCSGID: 1264 - UnicodeIBM1264 uint16 = 1008 - - // UnicodeIBM1265 is the uint16 identifier with IANA name ISO-Unicode-IBM-1265. - // - // IBM Hebrew Presentation Set, GCSGID: 1265 - UnicodeIBM1265 uint16 = 1009 - - // Unicode11 is the uint16 identifier with IANA name UNICODE-1-1. - // - // rfc1641 - // Reference: RFC1641 - Unicode11 uint16 = 1010 - - // SCSU is the uint16 identifier with IANA name SCSU. - // - // SCSU See http://www.iana.org/assignments/charset-reg/SCSU - SCSU uint16 = 1011 - - // UTF7 is the uint16 identifier with IANA name UTF-7. - // - // rfc2152 - // Reference: RFC2152 - UTF7 uint16 = 1012 - - // UTF16BE is the uint16 identifier with IANA name UTF-16BE. - // - // rfc2781 - // Reference: RFC2781 - UTF16BE uint16 = 1013 - - // UTF16LE is the uint16 identifier with IANA name UTF-16LE. - // - // rfc2781 - // Reference: RFC2781 - UTF16LE uint16 = 1014 - - // UTF16 is the uint16 identifier with IANA name UTF-16. - // - // rfc2781 - // Reference: RFC2781 - UTF16 uint16 = 1015 - - // CESU8 is the uint16 identifier with IANA name CESU-8. - // - // https://www.unicode.org/unicode/reports/tr26 - CESU8 uint16 = 1016 - - // UTF32 is the uint16 identifier with IANA name UTF-32. - // - // https://www.unicode.org/unicode/reports/tr19/ - UTF32 uint16 = 1017 - - // UTF32BE is the uint16 identifier with IANA name UTF-32BE. - // - // https://www.unicode.org/unicode/reports/tr19/ - UTF32BE uint16 = 1018 - - // UTF32LE is the uint16 identifier with IANA name UTF-32LE. - // - // https://www.unicode.org/unicode/reports/tr19/ - UTF32LE uint16 = 1019 - - // BOCU1 is the uint16 identifier with IANA name BOCU-1. - // - // https://www.unicode.org/notes/tn6/ - BOCU1 uint16 = 1020 - - // Windows30Latin1 is the uint16 identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. - // - // Extended ISO 8859-1 Latin-1 for Windows 3.0. - // PCL Symbol Set id: 9U - Windows30Latin1 uint16 = 2000 - - // Windows31Latin1 is the uint16 identifier with IANA name ISO-8859-1-Windows-3.1-Latin-1. - // - // Extended ISO 8859-1 Latin-1 for Windows 3.1. - // PCL Symbol Set id: 19U - Windows31Latin1 uint16 = 2001 - - // Windows31Latin2 is the uint16 identifier with IANA name ISO-8859-2-Windows-Latin-2. - // - // Extended ISO 8859-2. Latin-2 for Windows 3.1. - // PCL Symbol Set id: 9E - Windows31Latin2 uint16 = 2002 - - // Windows31Latin5 is the uint16 identifier with IANA name ISO-8859-9-Windows-Latin-5. - // - // Extended ISO 8859-9. Latin-5 for Windows 3.1 - // PCL Symbol Set id: 5T - Windows31Latin5 uint16 = 2003 - - // HPRoman8 is the uint16 identifier with IANA name hp-roman8. - // - // LaserJet IIP Printer User's Manual, - // HP part no 33471-90901, Hewlet-Packard, June 1989. - // Reference: RFC1345 - HPRoman8 uint16 = 2004 - - // AdobeStandardEncoding is the uint16 identifier with IANA name Adobe-Standard-Encoding. - // - // PostScript Language Reference Manual - // PCL Symbol Set id: 10J - AdobeStandardEncoding uint16 = 2005 - - // VenturaUS is the uint16 identifier with IANA name Ventura-US. - // - // Ventura US. ASCII plus characters typically used in - // publishing, like pilcrow, copyright, registered, trade mark, - // section, dagger, and double dagger in the range A0 (hex) - // to FF (hex). - // PCL Symbol Set id: 14J - VenturaUS uint16 = 2006 - - // VenturaInternational is the uint16 identifier with IANA name Ventura-International. - // - // Ventura International. ASCII plus coded characters similar - // to Roman8. - // PCL Symbol Set id: 13J - VenturaInternational uint16 = 2007 - - // DECMCS is the uint16 identifier with IANA name DEC-MCS. - // - // VAX/VMS User's Manual, - // Order Number: AI-Y517A-TE, April 1986. - // Reference: RFC1345 - DECMCS uint16 = 2008 - - // PC850Multilingual is the uint16 identifier with IANA name IBM850. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - PC850Multilingual uint16 = 2009 - - // PC8DanishNorwegian is the uint16 identifier with IANA name PC8-Danish-Norwegian. - // - // PC Danish Norwegian - // 8-bit PC set for Danish Norwegian - // PCL Symbol Set id: 11U - PC8DanishNorwegian uint16 = 2012 - - // PC862LatinHebrew is the uint16 identifier with IANA name IBM862. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - PC862LatinHebrew uint16 = 2013 - - // PC8Turkish is the uint16 identifier with IANA name PC8-Turkish. - // - // PC Latin Turkish. PCL Symbol Set id: 9T - PC8Turkish uint16 = 2014 - - // IBMSymbols is the uint16 identifier with IANA name IBM-Symbols. - // - // Presentation Set, CPGID: 259 - IBMSymbols uint16 = 2015 - - // IBMThai is the uint16 identifier with IANA name IBM-Thai. - // - // Presentation Set, CPGID: 838 - IBMThai uint16 = 2016 - - // HPLegal is the uint16 identifier with IANA name HP-Legal. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 1U - HPLegal uint16 = 2017 - - // HPPiFont is the uint16 identifier with IANA name HP-Pi-font. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 15U - HPPiFont uint16 = 2018 - - // HPMath8 is the uint16 identifier with IANA name HP-Math8. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 8M - HPMath8 uint16 = 2019 - - // HPPSMath is the uint16 identifier with IANA name Adobe-Symbol-Encoding. - // - // PostScript Language Reference Manual - // PCL Symbol Set id: 5M - HPPSMath uint16 = 2020 - - // HPDesktop is the uint16 identifier with IANA name HP-DeskTop. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 7J - HPDesktop uint16 = 2021 - - // VenturaMath is the uint16 identifier with IANA name Ventura-Math. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 6M - VenturaMath uint16 = 2022 - - // MicrosoftPublishing is the uint16 identifier with IANA name Microsoft-Publishing. - // - // PCL 5 Comparison Guide, Hewlett-Packard, - // HP part number 5961-0510, October 1992 - // PCL Symbol Set id: 6J - MicrosoftPublishing uint16 = 2023 - - // Windows31J is the uint16 identifier with IANA name Windows-31J. - // - // Windows Japanese. A further extension of Shift_JIS - // to include NEC special characters (Row 13), NEC - // selection of IBM extensions (Rows 89 to 92), and IBM - // extensions (Rows 115 to 119). The CCS's are - // JIS X0201:1997, JIS X0208:1997, and these extensions. - // This charset can be used for the top-level media type "text", - // but it is of limited or specialized use (see rfc2278 ). - // PCL Symbol Set id: 19K - Windows31J uint16 = 2024 - - // GB2312 is the uint16 identifier with IANA name GB2312 (MIME: GB2312). - // - // Chinese for People's Republic of China (PRC) mixed one byte, - // two byte set: - // 20-7E = one byte ASCII - // A1-FE = two byte PRC Kanji - // See GB 2312-80 - // PCL Symbol Set Id: 18C - GB2312 uint16 = 2025 - - // Big5 is the uint16 identifier with IANA name Big5 (MIME: Big5). - // - // Chinese for Taiwan Multi-byte set. - // PCL Symbol Set Id: 18T - Big5 uint16 = 2026 - - // Macintosh is the uint16 identifier with IANA name macintosh. - // - // The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991 - // Reference: RFC1345 - Macintosh uint16 = 2027 - - // IBM037 is the uint16 identifier with IANA name IBM037. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM037 uint16 = 2028 - - // IBM038 is the uint16 identifier with IANA name IBM038. - // - // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 - // Reference: RFC1345 - IBM038 uint16 = 2029 - - // IBM273 is the uint16 identifier with IANA name IBM273. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM273 uint16 = 2030 - - // IBM274 is the uint16 identifier with IANA name IBM274. - // - // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 - // Reference: RFC1345 - IBM274 uint16 = 2031 - - // IBM275 is the uint16 identifier with IANA name IBM275. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM275 uint16 = 2032 - - // IBM277 is the uint16 identifier with IANA name IBM277. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM277 uint16 = 2033 - - // IBM278 is the uint16 identifier with IANA name IBM278. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM278 uint16 = 2034 - - // IBM280 is the uint16 identifier with IANA name IBM280. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM280 uint16 = 2035 - - // IBM281 is the uint16 identifier with IANA name IBM281. - // - // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 - // Reference: RFC1345 - IBM281 uint16 = 2036 - - // IBM284 is the uint16 identifier with IANA name IBM284. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM284 uint16 = 2037 - - // IBM285 is the uint16 identifier with IANA name IBM285. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM285 uint16 = 2038 - - // IBM290 is the uint16 identifier with IANA name IBM290. - // - // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 - // Reference: RFC1345 - IBM290 uint16 = 2039 - - // IBM297 is the uint16 identifier with IANA name IBM297. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM297 uint16 = 2040 - - // IBM420 is the uint16 identifier with IANA name IBM420. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990, - // IBM NLS RM p 11-11 - // Reference: RFC1345 - IBM420 uint16 = 2041 - - // IBM423 is the uint16 identifier with IANA name IBM423. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM423 uint16 = 2042 - - // IBM424 is the uint16 identifier with IANA name IBM424. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM424 uint16 = 2043 - - // PC8CodePage437 is the uint16 identifier with IANA name IBM437. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - PC8CodePage437 uint16 = 2011 - - // IBM500 is the uint16 identifier with IANA name IBM500. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM500 uint16 = 2044 - - // IBM851 is the uint16 identifier with IANA name IBM851. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM851 uint16 = 2045 - - // PCp852 is the uint16 identifier with IANA name IBM852. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - PCp852 uint16 = 2010 - - // IBM855 is the uint16 identifier with IANA name IBM855. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM855 uint16 = 2046 - - // IBM857 is the uint16 identifier with IANA name IBM857. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM857 uint16 = 2047 - - // IBM860 is the uint16 identifier with IANA name IBM860. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM860 uint16 = 2048 - - // IBM861 is the uint16 identifier with IANA name IBM861. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM861 uint16 = 2049 - - // IBM863 is the uint16 identifier with IANA name IBM863. - // - // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 - // Reference: RFC1345 - IBM863 uint16 = 2050 - - // IBM864 is the uint16 identifier with IANA name IBM864. - // - // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 - // Reference: RFC1345 - IBM864 uint16 = 2051 - - // IBM865 is the uint16 identifier with IANA name IBM865. - // - // IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987) - // Reference: RFC1345 - IBM865 uint16 = 2052 - - // IBM868 is the uint16 identifier with IANA name IBM868. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM868 uint16 = 2053 - - // IBM869 is the uint16 identifier with IANA name IBM869. - // - // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 - // Reference: RFC1345 - IBM869 uint16 = 2054 - - // IBM870 is the uint16 identifier with IANA name IBM870. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM870 uint16 = 2055 - - // IBM871 is the uint16 identifier with IANA name IBM871. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM871 uint16 = 2056 - - // IBM880 is the uint16 identifier with IANA name IBM880. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM880 uint16 = 2057 - - // IBM891 is the uint16 identifier with IANA name IBM891. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM891 uint16 = 2058 - - // IBM903 is the uint16 identifier with IANA name IBM903. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM903 uint16 = 2059 - - // IBBM904 is the uint16 identifier with IANA name IBM904. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBBM904 uint16 = 2060 - - // IBM905 is the uint16 identifier with IANA name IBM905. - // - // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 - // Reference: RFC1345 - IBM905 uint16 = 2061 - - // IBM918 is the uint16 identifier with IANA name IBM918. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM918 uint16 = 2062 - - // IBM1026 is the uint16 identifier with IANA name IBM1026. - // - // IBM NLS RM Vol2 SE09-8002-01, March 1990 - // Reference: RFC1345 - IBM1026 uint16 = 2063 - - // IBMEBCDICATDE is the uint16 identifier with IANA name EBCDIC-AT-DE. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - IBMEBCDICATDE uint16 = 2064 - - // EBCDICATDEA is the uint16 identifier with IANA name EBCDIC-AT-DE-A. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICATDEA uint16 = 2065 - - // EBCDICCAFR is the uint16 identifier with IANA name EBCDIC-CA-FR. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICCAFR uint16 = 2066 - - // EBCDICDKNO is the uint16 identifier with IANA name EBCDIC-DK-NO. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICDKNO uint16 = 2067 - - // EBCDICDKNOA is the uint16 identifier with IANA name EBCDIC-DK-NO-A. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICDKNOA uint16 = 2068 - - // EBCDICFISE is the uint16 identifier with IANA name EBCDIC-FI-SE. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICFISE uint16 = 2069 - - // EBCDICFISEA is the uint16 identifier with IANA name EBCDIC-FI-SE-A. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICFISEA uint16 = 2070 - - // EBCDICFR is the uint16 identifier with IANA name EBCDIC-FR. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICFR uint16 = 2071 - - // EBCDICIT is the uint16 identifier with IANA name EBCDIC-IT. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICIT uint16 = 2072 - - // EBCDICPT is the uint16 identifier with IANA name EBCDIC-PT. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICPT uint16 = 2073 - - // EBCDICES is the uint16 identifier with IANA name EBCDIC-ES. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICES uint16 = 2074 - - // EBCDICESA is the uint16 identifier with IANA name EBCDIC-ES-A. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICESA uint16 = 2075 - - // EBCDICESS is the uint16 identifier with IANA name EBCDIC-ES-S. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICESS uint16 = 2076 - - // EBCDICUK is the uint16 identifier with IANA name EBCDIC-UK. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICUK uint16 = 2077 - - // EBCDICUS is the uint16 identifier with IANA name EBCDIC-US. - // - // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 - // Reference: RFC1345 - EBCDICUS uint16 = 2078 - - // Unknown8BiT is the uint16 identifier with IANA name UNKNOWN-8BIT. - // - // Reference: RFC1428 - Unknown8BiT uint16 = 2079 - - // Mnemonic is the uint16 identifier with IANA name MNEMONIC. - // - // rfc1345 , also known as "mnemonic+ascii+38" - // Reference: RFC1345 - Mnemonic uint16 = 2080 - - // Mnem is the uint16 identifier with IANA name MNEM. - // - // rfc1345 , also known as "mnemonic+ascii+8200" - // Reference: RFC1345 - Mnem uint16 = 2081 - - // VISCII is the uint16 identifier with IANA name VISCII. - // - // rfc1456 - // Reference: RFC1456 - VISCII uint16 = 2082 - - // VIQR is the uint16 identifier with IANA name VIQR. - // - // rfc1456 - // Reference: RFC1456 - VIQR uint16 = 2083 - - // KOI8R is the uint16 identifier with IANA name KOI8-R (MIME: KOI8-R). - // - // rfc1489 , based on GOST-19768-74, ISO-6937/8, - // INIS-Cyrillic, ISO-5427. - // Reference: RFC1489 - KOI8R uint16 = 2084 - - // HZGB2312 is the uint16 identifier with IANA name HZ-GB-2312. - // - // rfc1842 , rfc1843 rfc1843 rfc1842 - HZGB2312 uint16 = 2085 - - // IBM866 is the uint16 identifier with IANA name IBM866. - // - // IBM NLDG Volume 2 (SE09-8002-03) August 1994 - IBM866 uint16 = 2086 - - // PC775Baltic is the uint16 identifier with IANA name IBM775. - // - // HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996 - PC775Baltic uint16 = 2087 - - // KOI8U is the uint16 identifier with IANA name KOI8-U. - // - // rfc2319 - // Reference: RFC2319 - KOI8U uint16 = 2088 - - // IBM00858 is the uint16 identifier with IANA name IBM00858. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM00858 - IBM00858 uint16 = 2089 - - // IBM00924 is the uint16 identifier with IANA name IBM00924. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM00924 - IBM00924 uint16 = 2090 - - // IBM01140 is the uint16 identifier with IANA name IBM01140. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01140 - IBM01140 uint16 = 2091 - - // IBM01141 is the uint16 identifier with IANA name IBM01141. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01141 - IBM01141 uint16 = 2092 - - // IBM01142 is the uint16 identifier with IANA name IBM01142. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01142 - IBM01142 uint16 = 2093 - - // IBM01143 is the uint16 identifier with IANA name IBM01143. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01143 - IBM01143 uint16 = 2094 - - // IBM01144 is the uint16 identifier with IANA name IBM01144. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01144 - IBM01144 uint16 = 2095 - - // IBM01145 is the uint16 identifier with IANA name IBM01145. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01145 - IBM01145 uint16 = 2096 - - // IBM01146 is the uint16 identifier with IANA name IBM01146. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01146 - IBM01146 uint16 = 2097 - - // IBM01147 is the uint16 identifier with IANA name IBM01147. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01147 - IBM01147 uint16 = 2098 - - // IBM01148 is the uint16 identifier with IANA name IBM01148. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01148 - IBM01148 uint16 = 2099 - - // IBM01149 is the uint16 identifier with IANA name IBM01149. - // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01149 - IBM01149 uint16 = 2100 - - // Big5HKSCS is the uint16 identifier with IANA name Big5-HKSCS. - // - // See http://www.iana.org/assignments/charset-reg/Big5-HKSCS - Big5HKSCS uint16 = 2101 - - // IBM1047 is the uint16 identifier with IANA name IBM1047. - // - // IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf - IBM1047 uint16 = 2102 - - // PTCP154 is the uint16 identifier with IANA name PTCP154. - // - // See http://www.iana.org/assignments/charset-reg/PTCP154 - PTCP154 uint16 = 2103 - - // Amiga1251 is the uint16 identifier with IANA name Amiga-1251. - // - // See http://www.amiga.ultranet.ru/Amiga-1251.html - Amiga1251 uint16 = 2104 - - // KOI7switched is the uint16 identifier with IANA name KOI7-switched. - // - // See http://www.iana.org/assignments/charset-reg/KOI7-switched - KOI7switched uint16 = 2105 - - // BRF is the uint16 identifier with IANA name BRF. - // - // See http://www.iana.org/assignments/charset-reg/BRF - BRF uint16 = 2106 - - // TSCII is the uint16 identifier with IANA name TSCII. - // - // See http://www.iana.org/assignments/charset-reg/TSCII - TSCII uint16 = 2107 - - // CP51932 is the uint16 identifier with IANA name CP51932. - // - // See http://www.iana.org/assignments/charset-reg/CP51932 - CP51932 uint16 = 2108 - - // Windows874 is the uint16 identifier with IANA name windows-874. - // - // See http://www.iana.org/assignments/charset-reg/windows-874 - Windows874 uint16 = 2109 - - // Windows1250 is the uint16 identifier with IANA name windows-1250. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1250 - Windows1250 uint16 = 2250 - - // Windows1251 is the uint16 identifier with IANA name windows-1251. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1251 - Windows1251 uint16 = 2251 - - // Windows1252 is the uint16 identifier with IANA name windows-1252. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1252 - Windows1252 uint16 = 2252 - - // Windows1253 is the uint16 identifier with IANA name windows-1253. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1253 - Windows1253 uint16 = 2253 - - // Windows1254 is the uint16 identifier with IANA name windows-1254. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1254 - Windows1254 uint16 = 2254 - - // Windows1255 is the uint16 identifier with IANA name windows-1255. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1255 - Windows1255 uint16 = 2255 - - // Windows1256 is the uint16 identifier with IANA name windows-1256. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1256 - Windows1256 uint16 = 2256 - - // Windows1257 is the uint16 identifier with IANA name windows-1257. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1257 - Windows1257 uint16 = 2257 - - // Windows1258 is the uint16 identifier with IANA name windows-1258. - // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1258 - Windows1258 uint16 = 2258 - - // TIS620 is the uint16 identifier with IANA name TIS-620. - // - // Thai Industrial Standards Institute (TISI) - TIS620 uint16 = 2259 - - // CP50220 is the uint16 identifier with IANA name CP50220. - // - // See http://www.iana.org/assignments/charset-reg/CP50220 - CP50220 uint16 = 2260 -) - -//Name - string of code page name -var Name = map[uint16]string{ - ASCII: "ASCII", - ISOLatin1: "ISOLatin1", - ISOLatin2: "ISOLatin2", - ISOLatin3: "ISOLatin3", - ISOLatin4: "ISOLatin4", - ISOLatinCyrillic: "ISOLatinCyrillic", - ISOLatinArabic: "ISOLatinArabic", - ISOLatinGreek: "ISOLatinGreek", - ISOLatinHebrew: "ISOLatinHebrew", - ISOLatin5: "ISOLatin5", - ISOLatin6: "ISOLatin6", - IBM866: "IBM866", - Windows1251: "Windows1251", - UTF8: "UTF8", - UTF16: "UTF16", - UTF32: "UTF32", - KOI8R: "KOI8R", -} diff --git a/koi8.go b/koi8.go new file mode 100644 index 0000000..7639dae --- /dev/null +++ b/koi8.go @@ -0,0 +1,49 @@ +package cpd + +import "unicode" + +//unit for koi-8 + +func runesMatchKOI8(data []byte, tbl *codePageTable) (counts int) { + for i := range data { + if i < 2 { + continue + } + //case " Us" - separator_UPPER_symbol + if unicode.IsPunct(rune(data[i-2])) && isUpperKOI8(rune(data[i-1])) { + j := tbl.containsRune(rune(data[i])) + if j > 0 { + (*tbl)[j].count++ + counts++ + continue + } + } + if isKOI8(rune(data[i-1])) { + j := tbl.containsRune(rune(data[i])) + if j > 0 { + (*tbl)[j].count++ + counts++ + } + } + } + return +} + +const ( + cpKOI8StartUpperChar = 0xE0 + cpKOI8StopUpperChar = 0xFF + cpKOI8StartLowerChar = 0xC0 + cpKOI8StopLowerChar = 0xDF +) + +func isUpperKOI8(r rune) bool { + return (r >= cpKOI8StartUpperChar) && (r <= cpKOI8StopUpperChar) +} + +func isLowerKOI8(r rune) bool { + return (r >= cpKOI8StartLowerChar) && (r <= cpKOI8StopLowerChar) +} + +func isKOI8(r rune) bool { + return isUpperKOI8(r) || isLowerKOI8(r) +} diff --git a/test_files/IBM866.txt b/test_files/IBM866.txt new file mode 100644 index 0000000..0c8184a --- /dev/null +++ b/test_files/IBM866.txt @@ -0,0 +1 @@ +᪨ ஢ IBM866 \ No newline at end of file diff --git a/test_files/KOI8-r.txt b/test_files/KOI8-r.txt new file mode 100644 index 0000000..e2c8ffa --- /dev/null +++ b/test_files/KOI8-r.txt @@ -0,0 +1 @@ + KOI8r \ No newline at end of file diff --git a/test_files/Win1251.txt b/test_files/Win1251.txt new file mode 100644 index 0000000..5bb590b --- /dev/null +++ b/test_files/Win1251.txt @@ -0,0 +1 @@ + Windows1251 \ No newline at end of file diff --git a/test_files/noCodePage.txt b/test_files/noCodePage.txt index e494352..c227083 100644 --- a/test_files/noCodePage.txt +++ b/test_files/noCodePage.txt @@ -1 +1 @@ - \ No newline at end of file +0 \ No newline at end of file diff --git a/test_files/utf16BEwbom.txt b/test_files/utf16BEwbom.txt new file mode 100644 index 0000000000000000000000000000000000000000..28a9dc606e36382e637740a6eed1eff6a95bfa12 GIT binary patch literal 44 wcmezOpP`hYgdvT=kim?>iNTejoFR!J9|&_9lvxy5oLL-!(2B)^#gc&w0O`93H~;_u literal 0 HcmV?d00001 diff --git a/test_files/utf16LEwbom.txt b/test_files/utf16LEwbom.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3775bf4c2db60fa98965d24b73139ef9bbf6be1 GIT binary patch literal 42 ucmezWuau#LA&tS1!HmI&!Ihz$A&DU$2y+>f85CHYSsa1Tip7G(k_7WT1r{S9wqmhkF#+-&fV2^iXUd=ehoKB13~mgD3}y^T45QDXcL35xKp9h@7z~CogfO@<7&4eK 0 { + (*tbl)[j].count++ + counts++ + } + } + return +} diff --git a/win1251.go b/win1251.go new file mode 100644 index 0000000..0468931 --- /dev/null +++ b/win1251.go @@ -0,0 +1,51 @@ +package cpd + +import "unicode" + +//unit for windows1251 + +//TODO: нужно отличить от KOI-8r +func runesMatch1251(data []byte, tbl *codePageTable) (counts int) { + for i := range data { + if i < 2 { + continue + } + //case " Us" - separator_UPPER_symbol + if unicode.IsPunct(rune(data[i-2])) && isUpper1251(rune(data[i-1])) { + j := tbl.containsRune(rune(data[i])) + if j > 0 { + (*tbl)[j].count++ + counts++ + continue + } + } + //case "ab" - counts only if symbols are arranged in pairs + if is1251(rune(data[i-1])) { + j := tbl.containsRune(rune(data[i])) + if j > 0 { + (*tbl)[j].count++ + counts++ + } + } + } + return +} + +const ( + cp1251StartUpperChar = 0xC0 + cp1251StopUpperChar = 0xDF + cp1251StartLowerChar = 0xE0 + cp1251StopLowerChar = 0xFF +) + +func isUpper1251(r rune) bool { + return (r >= cp1251StartUpperChar) && (r <= cp1251StopUpperChar) +} + +func isLower1251(r rune) bool { + return (r >= cp1251StartLowerChar) && (r <= cp1251StopLowerChar) +} + +func is1251(r rune) bool { + return isUpper1251(r) || isLower1251(r) +} diff --git a/частотность букв.xlsx b/частотность букв.xlsx deleted file mode 100644 index eaef2a69eaa93982d0897bdb7a1f6658004f41a9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10449 zcmeHtg#A0fhlardzyW{&0Du%A${jFf^8^5Zfdv3C0YE5Saa$WF zQyV7(b$2^cM?EGtYb%mZFiy;n8>CT(~Y(=g&|@=RGGuE z;JwU8$$;Ea&8X_w2+k~A*(T0+zb;dR3aSMFGh%dLQ)zYHM#;!^6InD9822;Xi6fIVzN@L9pvFzU9 zBvzJQr8yp-II+mv(kF%1Y{W?H!3b!TtXL-`W`1zYiJ)+lYg0dX}qFx7YkDGq0C)K|K-d|ESi|StEfyfsk)D!>@sR?R1O8f zN;@?nnmT?Uw6t%l&)wkCia^v}Kk3B=M`;8G7C(8NYgusejlDBG9hF0plzr*fZgiKK z^O=hjX*qWqm)01%l7_-;`GGZZsi|X$O0+R1EqnybJi=gX!4!X;UPbLy_J@`W=nU7RtT)tw_(W@io-UdEZtTXFBkaxf_^R zb(~71wPW6UF)F3^zaZnlxMCZV?xV~)_tmN4I3CJy@8(A6D(OEO2#6pqT!nb7e=3Q; zP(t!4gd$vtN^k* zU}9&Ql>Vwy@&b6$a`#5jebllu=LTt;utc7ym{ZUz*oh>hUz*sLHr{djQ%bYO;a4ZI z+daaq~mOB>@HiK!rR35t+XVOR2hi%oGb+8~HIY z!`hNSs>@X2uEniLY0$U@Z8M>VJt1}F-h8KvIeoXUSvnfcSrw2e<(PbJ*lR5kr@lZZ zi&=r7+$21(G6wy<`u3|N+J^>D_Hb6Vr2cdurFQt(@cPm=NGl>|GDM*W8Vd0C#VhS4 zicB-}9t-2%Vj~Qtr+JpfMn&>t9I0{3>X8dNZrO)pEva^4o&eo20;0xuE@k(!Z`RI+t!? zs&3Q=*glulpCP!?M?0fr#(~8F7Zp5@%Xe3{z>zeh2(4U7*G(#LgOhhQX}8{~)2@Bh z@fJ-oJEnD8nU;uSP+L^T$d%*^02Rk9ju_^E3d@HrUr@$N2Pg}%oC=ax$d*3z)?)^m`HuiZ2s1lt@2fakIx3!+{b<|Y3FR%v^<(Nc$dfEs?X*&<$Il+{Z1ci z`lY8W9y5K~hC88h^E}S@3XDkTDA@w-k4{BNK+ZRCD<%$khMnD+_p0o7 zevMLaL8qxUy0vTSIJ5vwOzBH>Xzx`VnCk#E@!8d*xfA|>m9yK0e5eOV&lyD4uzr^_ zM{^?wQxi2OhqpFnjz2Av$)v0t3@aAgdh7agu?rVD0tpIPW*Frxc%Vily^3rxC{Mz5 z^wf{rK2!H|7Izhl>XAX);r;a*l|#ih`Z@~&2UZ%Pl_8#L(wKLM;yua9*ahEsa!kaG zig8OfKuK33%1Sbyx|{0#v6$70zrV|BsaaKFp5<~%OPqIm-&BeBw3xv1DP5Uj8wR^J z7k!Jpf3-jzDedZ^<18Gv#r_v&b6KUKT;bJR8J+OMUFic6CQ%~M-XGhJh$fgO&>rQY z`XZU4`l2?Xt6y)p`g!|V`w=+S`~>`7iH-@GWRg@Prg-)f`lb7W{Rsoo1HgWS{;%%B zc6`eIJ>KEBW|QxP@JS3}qRD^f*H684G&ePMa{P}~|Ewu@Hs>@8y@{SSte(pl(+BU0DwsRpBuB^ER&PDskJHdZ{NR78?3Dx zu_T4%!+a(TY~^p0t!nKiuv$=B1XGcMa<}ZQV#)@hIi%QEqY(V#1t~~RM3aRgds1a zkx&Ce$MlYBTKU%$>s%R$)UAQ$%_~;3FD8uXBG7#q1XAgS6)%pOU+0fyonsI{zI?nr)M50`fcfl9Bc8wAjzTgHgGTqT~v` zb0W1sNhVN)5bT5#Pj}6j&&AT_Xq6r%9U|k`F1z~&SiICq1{)7zRF=&eJ0<5q17t`f z11O?jjvix0WrvGP4A4-H5~WD0xH%@Upo}Eyn5q0UpAh8XI^Xf zx^tdAe=9I|%+xbD@&Kf*qv9tB2A!iuwgL@JY+vbj=PM7 zPq0$Fv<`@J`p5`VFY?M+PSU@hI$}D0Xy`-A;by!Nx+Wd;qOFXFLOlEVeL~gkkI1Se z&Kp9Gm4hq2>6Z5xAb#iVgkamQd-%lW5bObDB<7)S!{?$6!od35cANw2!;|YF=j?Hk z)gB$avgOS(#7o-jGZ6;b)lG~2Q6v3Sq%2iyy6I7e*0?ljyA)eYq-6%l!Cu*vtH{tD z5;4b@>sm8hPQ8wE><7bmA)5D0~aMT#mi}LHag>I7U>_Fo?qaT ziNi-3qt>~6cBy5r)zU|gt+N#q0}WHC*2kKc;*%nNCd-y`VPeq1##Y?*Ni5dDp34SU zfJ!^Q4ES$rE}gf62?2HLp&MbtYAzHNgXIdNjc+`3S&HvB$PoS|VIMdGr{ zF55?QGa{IIq~W@?Z~;f3Dd(WHuj}IibJJvdr)Y@~1^0C#BAr|ipMHG%?!1W2*R!I= z1X1Qvl!Mnr$|-JDLdDo8hYp&DlrY-2Set_yW|VzSOmljzD2z5)?_NZ8MrAg7M?ma4 z@>7I6oEBMAe$K%x>-fP5tGEcaI8W*T!bOJJN_Bw+9H|v!Ab#y9_P|t=7TT^C)u)>87kQb0H8L=|8+DoF zrEzXcDa2qUt+I3yO^}?%)m~S~14Q}`0^H8lvD`#qwTgn6QE3At4P2tr2jE)BE+yKe zg6PfS;_E!yH(XmPzJ=t;O&un;G-=Ut-Yd0EAK!b_9%|9I+U!43U?2-G@wu2V1;DF1 zrTBWN?h|}8*qG5?ddC_uq~NVjOTV#qDqmZ7e;Oe!;&cFCD_XL;2bZVW%1-0nYEwP^ zG|;99Cn#z)3+HBJcHJLDNLw*i6rw+Fn-pMyo79p;w#%)X7pzs{+#*%5aLI>yVu}d#FD{Ji%)y~@XqDI3jU zrDN2&6$Y?E%$`W8V;7q^q-~6WmeY);amnPrB<9izjl&t39IjDD@!EN`=S-9M#VUro z_D&_ZLTX+{+f7}zC|B)`;PiY zI5~{Sxy}af&)`)H2Vjm{!g@1wwMmW>f)ufVGtMA}5qrqyeNh4n&@6X+9`-OcO`1Ex= z*V({I`QZUe{@8qS!u9zR^O(l5oVRv(A^yo9;j_6@;#0WOGW+z2LMANuwDtSk1vZF& zyzWB1VmQ zd2hzL2i=JqVpriGHST3;hx?5*e5knP`?JEXE7q+#wbzY>Ra}L(Jh;e5}1PRY~)xY&*kA5uKSVX2!v>)Kx@%3b~Ei*1T@!Tmc zy?*ezjh&l8rXd=VUuZqgZPVHYF)PnC4G=MB-NHZ9E^oLcX1^&OPHQSt)&n?|Wal`J z?9%BynX++Y5cT)k*H=rgd!TL-vODkBOpYTjx~LaFXk9W7}do0?VqPr3~694X8tDa zR=*0bNWsDn@EO*gFqP<@)y}w2=_CSVZsr=!7sds$D5AoSQ^x6cT9Z0nQOH!pcf`Ux zOQXno*dLh7 zO(#KzmIx!6#z1EZ_j}^>!!X{EQY=fMaGnBcU^%VKT|rz4cG4Y}Brud`os3g>C?%Ht z`vD$lr<_En#2*jwD!auGzv%|xNl5r}J_n*lq=debkg32xCq<{@3KLQ!&|iY*sbra=sQFnw%rD z(mDor<~E4wa&@41yUrtZe7@9qdPlI%GR>bIz1L~hbE-lc5IC~f$z*BoU}cfY)RS43 z6O0XBQ7}1fAIzdT; z^vA~bqT>gKpnu8@+RZ4zaxAGWGxpT99viaeDydh5Yt+r#EnVe8w80PEP7USNc8-~K z@6f1=#&_?~v}TpSLNO%+W0Om4`?s=(rl+T-D2-I8feI(w!PD4*egl9U;76wqurA@2 zYBV4!rQ4R?fx6rG3>olNOMDhGopsi%p|~#P^Q@@kCjH|0^UvNfR43>1G;*=)r5Vl5 zs)|2#v5xSUw#RU6xh5bD#NxU5z}O)FBAp?90hV1zCJkr#<-w})dZcJZ$R#nkLIwiOJvelfyZ>NLaH5d_#l4lyD(0!YYcMfr&75hIO0*i}a!!J$qCFd>+w(H&fHL zM7?7py)%n<14i@q36GvR_)0ZUNR1g+wLshC9?2B&LyPSd9`Yr7J-FzK(K2(;5~LTd z<_ilmWWm0S9tUW1Jy+C6>RB&{ah{t=ym1cZ?|@>mk^u1M9HjY`ueTp+9CHoBdINZ`5y7#LcfSj&U0x< z^2Q#L@Ir^=;!SK{t2o%&IWoVtbuj%;Rr!A@c!+n8jMua5BEfxk2!D&(c`GqhdK*je z&ic&aT_+!x3-t|GB9+uM=ScfU*+qt&R$RB7?^kZ45xW~IiJe4|iM4|&gy>j)8?~{; z{o}VDc&j-beEBf+mauphou4Fqyb~Q1-L2PWrtu@+8$v6;NOH)?E^|yZwz|fqdqaMx zqQ5+yZ#-(N=+C*yuL;ORcZi_3Q()<{gGV}-k&w<^Cx79aRs|1?&JKE-P|Xuc?j=u~!*5p7>!)d_F!L|jT3r8p&_IN5h}{raxa!yQx-{KC0e7 zMZA|SpLsq**IRnfubA zHcfTt&ya82_b^(aG%*jU>1T(^wM(+`uHxp@lBV`N;y^!+R5%Q!+i<$e=24+}Tv}C` zH3sXBE8-g{D*AU`xTC@~Qr@Chl&uZF)pPyq06>u)5B zfEET>e{_g+p#9bQ7}?qV&-g%C_Ma;~UetDp1?k-(^bIl63IXqm3X*@Rv5eMPh0&Av zIy1%m*V$DRp`~%)xtO|%=>?Cys4Y*o8wuGJLOz4D*ohvMuUwi zYcUxGYM`X5CCQEi$T^+7i$#;)t)WbeT$c0v>d4+z0jYGRKB1*L_$uk9pqkg%s_@FE zVm&cEB7C>b(4Zjp!)2De8vTu~Eck5{|*b|x_I1EQ0!o3+-b zD{C~kbh$Gse$3EHxTWdHT4fn+;hwulMnHDF)ra8MyqBYvmy*oV&^X_FNe!Wlz7pDh z?}vV{;G&x1?;hek?q@kx;#u;y<#Qb6)=yEsq4s>KBIIV~w4FuZVe_o({KP}mhv{0+jGc1rq3YpJ=lKMR z5mFiY_hVUqEY~0Re>tS3BL8=QzaJR+gYf6Q0HPCrIX?23@b5+RzY-2Z_N4#slKNwu z$CcGzNIxJa&mPrT9}_<=m;E9(gD?z2{Ey<Aq+bZhNPoWm|5r|W4EVSn^9wKq zlG=g{>2Y=DF~H+w*e?J*NJa4Hoc>IWJtlpe5%@*=9_R67e&q)qqx?N2`-K4jyd(ku z{t=)(CV%Wxe*w&r{5FV39`#>6>|@Zs8|q(>0KhESU&izwv#lZz3z0zp015J;f{1Y< I<@~