diff --git a/src/preamble.js b/src/preamble.js index 227b30433..465e47a3c 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -569,6 +569,33 @@ function Pointer_stringify(ptr, /* optional */ length) { } Module['Pointer_stringify'] = Pointer_stringify; +// Given a pointer 'ptr' to a null-terminated UTF32LE-encoded string in the emscripten HEAP, returns +// a copy of that string as a Javascript String object. +function utf32_to_jsstring(ptr) { + var i = 0; + + var str = ''; + while (1) { + var utf32 = {{{ makeGetValue('ptr', 'i*4', 'i32') }}}; + if (utf32 == 0) + return str; + ++i; + str += String.fromCharCode(utf32); + } +} +Module['utf32_to_jsstring'] = utf32_to_jsstring; + +// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr', +// null-terminated and encoded in UTF32LE form. The copy will require (str.length+1)*4 bytes of space in the HEAP. +function jsstring_to_utf32(str, outPtr) { + for(var i = 0; i < str.length; ++i) { + var utf32 = str.charCodeAt(i); + {{{ makeSetValue('outPtr', 'i*4', 'utf32', 'i32') }}} + } + {{{ makeSetValue('outPtr', 'str.length*4', 0, 'i32') }}} +} +Module['jsstring_to_utf32'] = jsstring_to_utf32; + // Memory management var PAGE_SIZE = 4096; diff --git a/tests/test_core.py b/tests/test_core.py index 5d4f35e82..7ceeebd21 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7525,6 +7525,9 @@ def process(filename): ''' self.do_run(src, '206 188 226 128 μ†ℱ ╋ℯ╳╋ 😇\nμ†ℱ ╋ℯ╳╋ 😇,206,188,226,128\n'); + def test_utf32(self): + self.do_run(open(path_from_root('tests', 'utf32.cpp')).read(), 'OK.') + def test_direct_string_constant_usage(self): if self.emcc_args is None: return self.skip('requires libcxx') diff --git a/tests/utf32.cpp b/tests/utf32.cpp new file mode 100644 index 000000000..a3e660ee6 --- /dev/null +++ b/tests/utf32.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include + +// This code tests that utf32-encoded std::wstrings can be marshalled between C++ and JS. +int main() { + std::wstring wstr = L"abc\u2603\u20AC123"; // U+2603 is snowman, U+20AC is the Euro sign. + const int len = (wstr.length()+1)*4; + char *memory = new char[len]; + + asm("var str = Module.utf32_to_jsstring(%0);" + "Module.print(str);" + "Module.jsstring_to_utf32(str, %1);" + : + : "r"(wstr.c_str()), "r"(memory)); + + // Compare memory to confirm that the string is intact after taking a route through JS side. + const char *srcPtr = reinterpret_cast(wstr.c_str()); + for(int i = 0; i < len; ++i) { + assert(memory[i] == srcPtr[i]); + } + printf("OK.\n"); + delete[] memory; +}