diff --git a/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs b/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs index 8383f33a15..01cd8a319f 100644 --- a/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs +++ b/src/modules/PowerOCR/PowerOCR/Helpers/ImageMethods.cs @@ -10,6 +10,7 @@ using System.Globalization; using System.IO; using System.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows; using System.Windows.Input; @@ -146,11 +147,25 @@ internal class ImageMethods } else { + var cjkRegex = new Regex(@"\p{IsCJKUnifiedIdeographs}"); + foreach (OcrLine ocrLine in ocrResult.Lines) { + bool isBeginning = true; + bool isCJKPrev = false; foreach (OcrWord ocrWord in ocrLine.Words) { + bool isCJK = cjkRegex.IsMatch(ocrWord.Text); + + // Use spaces to separate non-CJK words. + if (!isBeginning && (!isCJK || !isCJKPrev)) + { + _ = text.Append(' '); + } + _ = text.Append(ocrWord.Text); + isCJKPrev = isCJK; + isBeginning = false; } text.Append(Environment.NewLine);