From 6831800684c0f4e8e380482c234da55baf88b9a6 Mon Sep 17 00:00:00 2001 From: Gurzau Raul Date: Thu, 14 Nov 2019 01:24:59 +0200 Subject: [PATCH] Backed out changeset f0968dabe1ff (bug 1590167) for build bustage at force-cargo-library-build on a CLOSED TREE. --- .cargo/config.in | 5 - Cargo.lock | 13 +- intl/hyphenation/glue/hnjalloc.h | 46 + intl/hyphenation/glue/hnjstdio.cpp | 133 + intl/hyphenation/glue/moz.build | 20 +- .../hyphenation/glue/nsHyphenationManager.cpp | 49 +- intl/hyphenation/glue/nsHyphenator.cpp | 232 +- intl/hyphenation/glue/nsHyphenator.h | 9 +- intl/hyphenation/hyphen/AUTHORS | 17 + intl/hyphenation/hyphen/COPYING | 17 + intl/hyphenation/hyphen/COPYING.LGPL | 515 ++ intl/hyphenation/hyphen/COPYING.MPL | 470 ++ intl/hyphenation/hyphen/NEWS | 106 + intl/hyphenation/hyphen/README | 134 + intl/hyphenation/hyphen/README.compound | 87 + intl/hyphenation/hyphen/README.hyphen | 108 + intl/hyphenation/hyphen/README.nonstandard | 122 + intl/hyphenation/hyphen/hyphen.c | 1201 +++++ intl/hyphenation/hyphen/hyphen.h | 175 + intl/hyphenation/hyphen/moz.build | 19 + intl/locales/af/hyphenation/hyph_af.hyf | 0 intl/locales/bg/hyphenation/hyph_bg.hyf | 0 intl/locales/ca/hyphenation/hyph_ca.hyf | 0 intl/locales/cy/hyphenation/hyph_cy.hyf | 0 intl/locales/da/hyphenation/hyph_da.hyf | 0 .../de-1901/hyphenation/hyph_de-1901.hyf | 0 .../de-1996/hyphenation/hyph_de-1996.hyf | 0 intl/locales/de-CH/hyphenation/hyph_de-CH.hyf | 0 intl/locales/en-US/hyphenation/hyph_en_US.hyf | 0 intl/locales/eo/hyphenation/hyph_eo.hyf | 0 intl/locales/es/hyphenation/hyph_es.hyf | 0 intl/locales/et/hyphenation/hyph_et.hyf | 0 intl/locales/fi/hyphenation/hyph_fi.hyf | 0 intl/locales/fr/hyphenation/hyph_fr.hyf | 0 intl/locales/gl/hyphenation/hyph_gl.hyf | 0 intl/locales/hr/hyphenation/hyph_hr.hyf | 0 intl/locales/hsb/hyphenation/hyph_hsb.dic | 4 +- intl/locales/hsb/hyphenation/hyph_hsb.hyf | 0 intl/locales/hu/hyphenation/hyph_hu.hyf | 0 intl/locales/ia/hyphenation/hyph_ia.hyf | 0 intl/locales/is/hyphenation/hyph_is.hyf | 0 intl/locales/it/hyphenation/hyph_it.hyf | 0 intl/locales/kmr/hyphenation/hyph_kmr.hyf | 0 intl/locales/la/hyphenation/hyph_la.hyf | 0 intl/locales/lt/hyphenation/hyph_lt.hyf | 0 intl/locales/mn/hyphenation/hyph_mn.hyf | 0 intl/locales/moz.build | 4 +- intl/locales/nb/hyphenation/hyph_nb.hyf | 0 intl/locales/nl/hyphenation/hyph_nl.hyf | 0 intl/locales/nn/hyphenation/hyph_nn.hyf | 0 intl/locales/pl/hyphenation/hyph_pl.hyf | 0 intl/locales/pt/hyphenation/hyph_pt.hyf | 0 intl/locales/ru/hyphenation/hyph_ru.hyf | 0 intl/locales/sh/hyphenation/hyph_sh.hyf | 0 intl/locales/sl/hyphenation/hyph_sl.hyf | 0 intl/locales/sv/hyphenation/hyph_sv.hyf | 0 intl/locales/tr/hyphenation/hyph_tr.hyf | 0 intl/locales/uk/hyphenation/hyph_uk.hyf | 0 intl/moz.build | 1 + layout/style/RunCbindgen.py | 3 +- testing/testsuite-targets.mk | 2 +- .../rust/mapped_hyph/.cargo-checksum.json | 1 - third_party/rust/mapped_hyph/COPYRIGHT | 12 - third_party/rust/mapped_hyph/Cargo.toml | 18 - third_party/rust/mapped_hyph/LICENSE-APACHE | 202 - third_party/rust/mapped_hyph/LICENSE-MIT | 25 - third_party/rust/mapped_hyph/README.md | 75 - third_party/rust/mapped_hyph/benches/bench.rs | 50 - third_party/rust/mapped_hyph/cbindgen.toml | 114 - .../mapped_hyph/doc/mapped_hyph_format.md | 98 - third_party/rust/mapped_hyph/hyph_en_US.hyf | 0 .../rust/mapped_hyph/src/bin/hyf_compile.rs | 25 - third_party/rust/mapped_hyph/src/builder.rs | 473 -- third_party/rust/mapped_hyph/src/ffi.rs | 165 - third_party/rust/mapped_hyph/src/lib.rs | 640 --- third_party/rust/mapped_hyph/src/main.rs | 67 - third_party/rust/mapped_hyph/tests/base.hyf | 0 third_party/rust/mapped_hyph/tests/base.hyph | 4543 ----------------- third_party/rust/mapped_hyph/tests/base.word | 4543 ----------------- .../rust/mapped_hyph/tests/compound.hyf | 0 .../rust/mapped_hyph/tests/compound4.hyf | 0 .../rust/mapped_hyph/tests/compound5.hyf | 0 .../rust/mapped_hyph/tests/compound6.hyf | 0 third_party/rust/mapped_hyph/tests/hyphen.hyf | 0 third_party/rust/mapped_hyph/tests/lhmin.hyf | 0 third_party/rust/mapped_hyph/tests/num.hyf | 0 third_party/rust/mapped_hyph/tests/rhmin.hyf | 0 .../rust/mapped_hyph/tests/settings2.hyf | 0 .../rust/mapped_hyph/tests/settings3.hyf | 0 third_party/rust/mapped_hyph/tests/test.rs | 169 - toolkit/library/rust/shared/Cargo.toml | 1 - toolkit/library/rust/shared/lib.rs | 1 - 92 files changed, 3260 insertions(+), 11454 deletions(-) create mode 100644 intl/hyphenation/glue/hnjalloc.h create mode 100644 intl/hyphenation/glue/hnjstdio.cpp create mode 100644 intl/hyphenation/hyphen/AUTHORS create mode 100644 intl/hyphenation/hyphen/COPYING create mode 100644 intl/hyphenation/hyphen/COPYING.LGPL create mode 100644 intl/hyphenation/hyphen/COPYING.MPL create mode 100755 intl/hyphenation/hyphen/NEWS create mode 100644 intl/hyphenation/hyphen/README create mode 100644 intl/hyphenation/hyphen/README.compound create mode 100644 intl/hyphenation/hyphen/README.hyphen create mode 100644 intl/hyphenation/hyphen/README.nonstandard create mode 100644 intl/hyphenation/hyphen/hyphen.c create mode 100644 intl/hyphenation/hyphen/hyphen.h create mode 100644 intl/hyphenation/hyphen/moz.build delete mode 100644 intl/locales/af/hyphenation/hyph_af.hyf delete mode 100644 intl/locales/bg/hyphenation/hyph_bg.hyf delete mode 100644 intl/locales/ca/hyphenation/hyph_ca.hyf delete mode 100644 intl/locales/cy/hyphenation/hyph_cy.hyf delete mode 100644 intl/locales/da/hyphenation/hyph_da.hyf delete mode 100644 intl/locales/de-1901/hyphenation/hyph_de-1901.hyf delete mode 100644 intl/locales/de-1996/hyphenation/hyph_de-1996.hyf delete mode 100644 intl/locales/de-CH/hyphenation/hyph_de-CH.hyf delete mode 100644 intl/locales/en-US/hyphenation/hyph_en_US.hyf delete mode 100644 intl/locales/eo/hyphenation/hyph_eo.hyf delete mode 100644 intl/locales/es/hyphenation/hyph_es.hyf delete mode 100644 intl/locales/et/hyphenation/hyph_et.hyf delete mode 100644 intl/locales/fi/hyphenation/hyph_fi.hyf delete mode 100644 intl/locales/fr/hyphenation/hyph_fr.hyf delete mode 100644 intl/locales/gl/hyphenation/hyph_gl.hyf delete mode 100644 intl/locales/hr/hyphenation/hyph_hr.hyf delete mode 100644 intl/locales/hsb/hyphenation/hyph_hsb.hyf delete mode 100644 intl/locales/hu/hyphenation/hyph_hu.hyf delete mode 100644 intl/locales/ia/hyphenation/hyph_ia.hyf delete mode 100644 intl/locales/is/hyphenation/hyph_is.hyf delete mode 100644 intl/locales/it/hyphenation/hyph_it.hyf delete mode 100644 intl/locales/kmr/hyphenation/hyph_kmr.hyf delete mode 100644 intl/locales/la/hyphenation/hyph_la.hyf delete mode 100644 intl/locales/lt/hyphenation/hyph_lt.hyf delete mode 100644 intl/locales/mn/hyphenation/hyph_mn.hyf delete mode 100644 intl/locales/nb/hyphenation/hyph_nb.hyf delete mode 100644 intl/locales/nl/hyphenation/hyph_nl.hyf delete mode 100644 intl/locales/nn/hyphenation/hyph_nn.hyf delete mode 100644 intl/locales/pl/hyphenation/hyph_pl.hyf delete mode 100644 intl/locales/pt/hyphenation/hyph_pt.hyf delete mode 100644 intl/locales/ru/hyphenation/hyph_ru.hyf delete mode 100644 intl/locales/sh/hyphenation/hyph_sh.hyf delete mode 100644 intl/locales/sl/hyphenation/hyph_sl.hyf delete mode 100644 intl/locales/sv/hyphenation/hyph_sv.hyf delete mode 100644 intl/locales/tr/hyphenation/hyph_tr.hyf delete mode 100644 intl/locales/uk/hyphenation/hyph_uk.hyf delete mode 100644 third_party/rust/mapped_hyph/.cargo-checksum.json delete mode 100644 third_party/rust/mapped_hyph/COPYRIGHT delete mode 100644 third_party/rust/mapped_hyph/Cargo.toml delete mode 100644 third_party/rust/mapped_hyph/LICENSE-APACHE delete mode 100644 third_party/rust/mapped_hyph/LICENSE-MIT delete mode 100644 third_party/rust/mapped_hyph/README.md delete mode 100644 third_party/rust/mapped_hyph/benches/bench.rs delete mode 100644 third_party/rust/mapped_hyph/cbindgen.toml delete mode 100644 third_party/rust/mapped_hyph/doc/mapped_hyph_format.md delete mode 100644 third_party/rust/mapped_hyph/hyph_en_US.hyf delete mode 100644 third_party/rust/mapped_hyph/src/bin/hyf_compile.rs delete mode 100644 third_party/rust/mapped_hyph/src/builder.rs delete mode 100644 third_party/rust/mapped_hyph/src/ffi.rs delete mode 100644 third_party/rust/mapped_hyph/src/lib.rs delete mode 100644 third_party/rust/mapped_hyph/src/main.rs delete mode 100644 third_party/rust/mapped_hyph/tests/base.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/base.hyph delete mode 100644 third_party/rust/mapped_hyph/tests/base.word delete mode 100644 third_party/rust/mapped_hyph/tests/compound.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/compound4.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/compound5.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/compound6.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/hyphen.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/lhmin.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/num.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/rhmin.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/settings2.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/settings3.hyf delete mode 100644 third_party/rust/mapped_hyph/tests/test.rs diff --git a/.cargo/config.in b/.cargo/config.in index 36032c98b8a7..50bf1f063287 100644 --- a/.cargo/config.in +++ b/.cargo/config.in @@ -17,11 +17,6 @@ git = "https://github.com/mozilla/neqo" replace-with = "vendored-sources" rev = "a17c1e83" -[source."https://github.com/jfkthame/mapped_hyph.git"] -git = "https://github.com/jfkthame/mapped_hyph.git" -replace-with = "vendored-sources" -tag = "v0.3.0" - [source."https://github.com/hsivonen/packed_simd"] branch = "rust_1_32" git = "https://github.com/hsivonen/packed_simd" diff --git a/Cargo.lock b/Cargo.lock index da73514b4fd6..4063faf29eb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1260,7 +1260,6 @@ dependencies = [ "kvstore 0.1.0", "lmdb-rkv-sys 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "mapped_hyph 0.3.0 (git+https://github.com/jfkthame/mapped_hyph.git?tag=v0.3.0)", "mdns_service 0.1.0", "mozurl 0.0.1", "mp4parse_capi 0.11.2", @@ -1801,15 +1800,6 @@ dependencies = [ "synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "mapped_hyph" -version = "0.3.0" -source = "git+https://github.com/jfkthame/mapped_hyph.git?tag=v0.3.0#3b5fffbe17e8cdcc6814886a9b9170fde3db13bd" -dependencies = [ - "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "marionette" version = "0.1.0" @@ -2907,7 +2897,7 @@ dependencies = [ "byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "digest 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -4301,7 +4291,6 @@ dependencies = [ "checksum lzw 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084" "checksum mach 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" "checksum malloc_size_of_derive 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "35adee9ed962cf7d07d62cb58bc45029f3227f5b5b86246caa8632f06c187bc3" -"checksum mapped_hyph 0.3.0 (git+https://github.com/jfkthame/mapped_hyph.git?tag=v0.3.0)" = "" "checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376" "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" "checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" diff --git a/intl/hyphenation/glue/hnjalloc.h b/intl/hyphenation/glue/hnjalloc.h new file mode 100644 index 000000000000..6c88158f1c22 --- /dev/null +++ b/intl/hyphenation/glue/hnjalloc.h @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * To enable us to load hyphenation dictionaries from arbitrary resource URIs, + * not just through file paths using stdio, we override the (few) stdio APIs + * that hyphen.c uses and provide our own reimplementation that calls Gecko + * i/o methods. + */ + +#include /* ensure stdio.h is loaded before our macros */ + +#undef FILE +#define FILE hnjFile + +#define fopen(path, mode) hnjFopen(path, mode) +#define fclose(file) hnjFclose(file) +#define fgets(buf, count, file) hnjFgets(buf, count, file) +#define feof(file) hnjFeof(file) +#define fgetc(file) hnjFgetc(file) + +typedef struct hnjFile_ hnjFile; + +#ifdef __cplusplus +extern "C" { +#endif + +void* hnj_malloc(size_t size); +void* hnj_realloc(void* ptr, size_t size); +void hnj_free(void* ptr); + +hnjFile* hnjFopen(const char* aURISpec, const char* aMode); + +int hnjFclose(hnjFile* f); + +char* hnjFgets(char* s, int n, hnjFile* f); + +int hnjFeof(hnjFile* f); + +int hnjFgetc(hnjFile* f); + +#ifdef __cplusplus +} +#endif diff --git a/intl/hyphenation/glue/hnjstdio.cpp b/intl/hyphenation/glue/hnjstdio.cpp new file mode 100644 index 000000000000..40e4faa8ca04 --- /dev/null +++ b/intl/hyphenation/glue/hnjstdio.cpp @@ -0,0 +1,133 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file provides substitutes for the basic stdio routines used by hyphen.c +// to read its dictionary files. We #define the stdio names to these versions +// in hnjalloc.h, so that we can use nsIURI and nsIInputStream to specify and +// access the dictionary resources. + +#include "hnjalloc.h" + +#undef FILE // Undo #defines from hnjalloc.h before #including other headers +#undef fopen +#undef fclose +#undef fgets +#undef feof +#undef fgetc + +#include "nsNetUtil.h" +#include "nsIInputStream.h" +#include "nsIURI.h" +#include "nsContentUtils.h" + +#define BUFSIZE 1024 + +struct hnjFile_ { + nsCOMPtr mStream; + char mBuffer[BUFSIZE]; + uint32_t mCurPos; + uint32_t mLimit; + bool mEOF; +}; + +// replacement for fopen() +// (not a full substitute: only supports read access) +hnjFile* hnjFopen(const char* aURISpec, const char* aMode) { + // this override only needs to support "r" + NS_ASSERTION(!strcmp(aMode, "r"), "unsupported fopen() mode in hnjFopen"); + + nsCOMPtr uri; + nsresult rv = NS_NewURI(getter_AddRefs(uri), aURISpec); + if (NS_FAILED(rv)) { + return nullptr; + } + + nsCOMPtr channel; + rv = NS_NewChannel(getter_AddRefs(channel), uri, + nsContentUtils::GetSystemPrincipal(), + nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL, + nsIContentPolicy::TYPE_OTHER); + if (NS_FAILED(rv)) { + return nullptr; + } + + nsCOMPtr instream; + rv = channel->Open(getter_AddRefs(instream)); + if (NS_FAILED(rv)) { + return nullptr; + } + + hnjFile* f = new hnjFile; + f->mStream = instream; + f->mCurPos = 0; + f->mLimit = 0; + f->mEOF = false; + + return f; +} + +// replacement for fclose() +int hnjFclose(hnjFile* f) { + NS_ASSERTION(f && f->mStream, "bad argument to hnjFclose"); + + int result = 0; + nsresult rv = f->mStream->Close(); + if (NS_FAILED(rv)) { + result = EOF; + } + f->mStream = nullptr; + + delete f; + return result; +} + +// replacement for fgetc() +int hnjFgetc(hnjFile* f) { + if (f->mCurPos >= f->mLimit) { + f->mCurPos = 0; + + nsresult rv = f->mStream->Read(f->mBuffer, BUFSIZE, &f->mLimit); + if (NS_FAILED(rv)) { + f->mLimit = 0; + } + + if (f->mLimit == 0) { + f->mEOF = true; + return EOF; + } + } + + return f->mBuffer[f->mCurPos++]; +} + +// replacement for fgets() +// (not a full reimplementation, but sufficient for libhyphen's needs) +char* hnjFgets(char* s, int n, hnjFile* f) { + NS_ASSERTION(s && f, "bad argument to hnjFgets"); + + int i = 0; + while (i < n - 1) { + int c = hnjFgetc(f); + + if (c == EOF) { + break; + } + + s[i++] = c; + + if (c == '\n' || c == '\r') { + break; + } + } + + if (i == 0) { + return nullptr; // end of file + } + + s[i] = '\0'; // null-terminate the returned string + return s; +} + +int hnjFeof(hnjFile* f) { return f->mEOF ? EOF : 0; } diff --git a/intl/hyphenation/glue/moz.build b/intl/hyphenation/glue/moz.build index a66d08c26662..92e8ad11aeca 100644 --- a/intl/hyphenation/glue/moz.build +++ b/intl/hyphenation/glue/moz.build @@ -14,18 +14,16 @@ UNIFIED_SOURCES += [ 'nsHyphenator.cpp', ] +# These files cannot be built in unified mode because they include hnjalloc.h. +SOURCES += [ + 'hnjstdio.cpp', +] + +LOCAL_INCLUDES += [ + '../hyphen', +] + FINAL_LIBRARY = 'xul' if CONFIG['CC_TYPE'] in ('clang', 'gcc'): CXXFLAGS += ['-Wno-error=shadow'] - -if CONFIG['COMPILE_ENVIRONMENT']: - GENERATED_FILES += [ - 'mapped_hyph.h' - ] - - generated = GENERATED_FILES['mapped_hyph.h'] - generated.script = '/layout/style/RunCbindgen.py:generate' - generated.inputs = [ - '/third_party/rust/mapped_hyph' - ] diff --git a/intl/hyphenation/glue/nsHyphenationManager.cpp b/intl/hyphenation/glue/nsHyphenationManager.cpp index 68953a23f12c..b20e6b754579 100644 --- a/intl/hyphenation/glue/nsHyphenationManager.cpp +++ b/intl/hyphenation/glue/nsHyphenationManager.cpp @@ -37,7 +37,8 @@ static const char kMemoryPressureNotification[] = "memory-pressure"; static const char kParentShuttingDownNotification[] = "profile-before-change"; static const char kChildShuttingDownNotification[] = "content-child-shutdown"; -class HyphenReporter final : public nsIMemoryReporter { +class HyphenReporter final : public nsIMemoryReporter, + public CountingAllocatorBase { private: ~HyphenReporter() = default; @@ -46,19 +47,14 @@ class HyphenReporter final : public nsIMemoryReporter { // For telemetry, we report the memory rounded up to the nearest KB. static uint32_t MemoryAllocatedInKB() { - size_t total = 0; - if (nsHyphenationManager::Instance()) { - total = nsHyphenationManager::Instance()->SizeOfIncludingThis( - moz_malloc_size_of); - } - return (total + 1023) / 1024; + return (MemoryAllocated() + 1023) / 1024; } NS_IMETHOD CollectReports(nsIHandleReportCallback* aHandleReport, nsISupports* aData, bool aAnonymize) override { - size_t total = 0; + size_t total = MemoryAllocated(); if (nsHyphenationManager::Instance()) { - total = nsHyphenationManager::Instance()->SizeOfIncludingThis( + total += nsHyphenationManager::Instance()->SizeOfIncludingThis( moz_malloc_size_of); } MOZ_COLLECT_REPORT("explicit/hyphenation", KIND_HEAP, UNITS_BYTES, total, @@ -69,6 +65,30 @@ class HyphenReporter final : public nsIMemoryReporter { NS_IMPL_ISUPPORTS(HyphenReporter, nsIMemoryReporter) +template <> +CountingAllocatorBase::AmountType + CountingAllocatorBase::sAmount(0); + +/** + * Allocation wrappers to track the amount of memory allocated by libhyphen. + * Note that libhyphen assumes its malloc/realloc functions are infallible! + */ +extern "C" { +void* hnj_malloc(size_t aSize); +void* hnj_realloc(void* aPtr, size_t aSize); +void hnj_free(void* aPtr); +}; + +void* hnj_malloc(size_t aSize) { + return HyphenReporter::InfallibleCountingMalloc(aSize); +} + +void* hnj_realloc(void* aPtr, size_t aSize) { + return HyphenReporter::InfallibleCountingRealloc(aPtr, aSize); +} + +void hnj_free(void* aPtr) { HyphenReporter::CountingFree(aPtr); } + nsHyphenationManager* nsHyphenationManager::sInstance = nullptr; NS_IMPL_ISUPPORTS(nsHyphenationManager, nsIObserver) @@ -237,7 +257,7 @@ void nsHyphenationManager::LoadPatternListFromOmnijar(Omnijar::Type aType) { } nsZipFind* find; - zip->FindInit("hyphenation/hyph_*.hyf", &find); + zip->FindInit("hyphenation/hyph_*.dic", &find); if (!find) { return; } @@ -258,7 +278,7 @@ void nsHyphenationManager::LoadPatternListFromOmnijar(Omnijar::Type aType) { continue; } ToLowerCase(locale); - locale.SetLength(locale.Length() - 4); // strip ".hyf" + locale.SetLength(locale.Length() - 4); // strip ".dic" locale.Cut(0, locale.RFindChar('/') + 1); // strip directory if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) { locale.Cut(0, 5); @@ -303,13 +323,13 @@ void nsHyphenationManager::LoadPatternListFromDir(nsIFile* aDir) { file->GetLeafName(dictName); NS_ConvertUTF16toUTF8 locale(dictName); ToLowerCase(locale); - if (!StringEndsWith(locale, NS_LITERAL_CSTRING(".hyf"))) { + if (!StringEndsWith(locale, NS_LITERAL_CSTRING(".dic"))) { continue; } if (StringBeginsWith(locale, NS_LITERAL_CSTRING("hyph_"))) { locale.Cut(0, 5); } - locale.SetLength(locale.Length() - 4); // strip ".hyf" + locale.SetLength(locale.Length() - 4); // strip ".dic" for (uint32_t i = 0; i < locale.Length(); ++i) { if (locale[i] == '_') { locale.Replace(i, 1, '-'); @@ -363,6 +383,9 @@ size_t nsHyphenationManager::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) { // finds it is worthwhile. result += mHyphenators.ShallowSizeOfExcludingThis(aMallocSizeOf); + for (auto i = mHyphenators.ConstIter(); !i.Done(); i.Next()) { + result += aMallocSizeOf(i.Data().get()); + } return result; } diff --git a/intl/hyphenation/glue/nsHyphenator.cpp b/intl/hyphenation/glue/nsHyphenator.cpp index bff3e8265eca..2d116516683e 100644 --- a/intl/hyphenation/glue/nsHyphenator.cpp +++ b/intl/hyphenation/glue/nsHyphenator.cpp @@ -4,147 +4,34 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsHyphenator.h" - -#include "mozilla/Telemetry.h" -#include "nsContentUtils.h" -#include "nsIChannel.h" #include "nsIFile.h" -#include "nsIFileURL.h" -#include "nsIInputStream.h" -#include "nsIJARURI.h" -#include "nsIURI.h" -#include "nsNetUtil.h" -#include "nsUnicodeProperties.h" #include "nsUTF8Utils.h" +#include "nsUnicodeProperties.h" +#include "nsIURI.h" +#include "mozilla/Telemetry.h" -#include "mapped_hyph.h" - -static const void* GetItemPtrFromJarURI(nsIJARURI* aJAR, uint32_t* aLength) { - // Try to get the jarfile's nsZipArchive, find the relevant item, and return - // a pointer to its data provided it is stored uncompressed. - nsCOMPtr jarFile; - if (NS_FAILED(aJAR->GetJARFile(getter_AddRefs(jarFile)))) { - return nullptr; - } - nsCOMPtr fileUrl = do_QueryInterface(jarFile); - if (!fileUrl) { - return nullptr; - } - nsCOMPtr file; - fileUrl->GetFile(getter_AddRefs(file)); - if (!file) { - return nullptr; - } - RefPtr archive = mozilla::Omnijar::GetReader(file); - if (archive) { - nsCString path; - aJAR->GetJAREntry(path); - nsZipItem* item = archive->GetItem(path.get()); - if (item && item->Compression() == 0 && item->Size() > 0) { - // We do NOT own this data, but it won't go away until the omnijar - // file is closed during shutdown. - const uint8_t* data = archive->GetData(item); - if (data) { - *aLength = item->Size(); - return data; - } - } - } - return nullptr; -} - -static const void* LoadResourceFromURI(nsIURI* aURI, uint32_t* aLength) { - nsCOMPtr channel; - if (NS_FAILED(NS_NewChannel(getter_AddRefs(channel), aURI, - nsContentUtils::GetSystemPrincipal(), - nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_DATA_IS_NULL, - nsIContentPolicy::TYPE_OTHER))) { - return nullptr; - } - nsCOMPtr instream; - if (NS_FAILED(channel->Open(getter_AddRefs(instream)))) { - return nullptr; - } - // Check size, bail out if it is excessively large (the largest of the - // hyphenation files currently shipped with Firefox is around 1MB - // uncompressed). - uint64_t available; - if (NS_FAILED(instream->Available(&available)) || !available || - available > 16 * 1024 * 1024) { - return nullptr; - } - char* buffer = static_cast(malloc(available)); - if (!buffer) { - return nullptr; - } - uint32_t bytesRead = 0; - if (NS_FAILED(instream->Read(buffer, available, &bytesRead)) || - bytesRead != available) { - free(buffer); - return nullptr; - } - *aLength = bytesRead; - return buffer; -} +#include "hyphen.h" nsHyphenator::nsHyphenator(nsIURI* aURI, bool aHyphenateCapitalized) - : mDict(nullptr), - mDictSize(0), - mOwnsDict(false), - mHyphenateCapitalized(aHyphenateCapitalized) { + : mDict(nullptr), mHyphenateCapitalized(aHyphenateCapitalized) { + nsCString uriSpec; + nsresult rv = aURI->GetSpec(uriSpec); + if (NS_FAILED(rv)) { + return; + } Telemetry::AutoTimer telemetry; - - nsCOMPtr jar = do_QueryInterface(aURI); - if (jar) { - // This gives us a raw pointer into the omnijar's data (if uncompressed); - // we do not own it and must not attempt to free it! - mDict = GetItemPtrFromJarURI(jar, &mDictSize); - if (!mDict) { - // Omnijar must be compressed: we need to decompress the item into our - // own buffer. (Currently this is the case on Android.) - // TODO: Allocate in shared memory for all content processes to use. - mDict = LoadResourceFromURI(aURI, &mDictSize); - mOwnsDict = true; - } - if (mDict) { - // Reject the resource from omnijar if it fails to validate. (If this - // happens, we will hit the MOZ_ASSERT_UNREACHABLE at the end of the - // constructor, indicating the build is broken in some way.) - if (!mapped_hyph_is_valid_hyphenator(static_cast(mDict), - mDictSize)) { - if (mOwnsDict) { - free(const_cast(mDict)); - } - mDict = nullptr; - mDictSize = 0; - } - } - } else if (mozilla::net::SchemeIsFile(aURI)) { - // Ask the Rust lib to mmap the file. In this case our mDictSize field - // remains zero; mDict is not a pointer to the raw data but an opaque - // reference to a Rust object, and can only be freed by passing it to - // mapped_hyph_free_dictionary(). - nsAutoCString path; - aURI->GetFilePath(path); - mDict = mapped_hyph_load_dictionary(path.get()); - } - - if (!mDict) { - // This should never happen, unless someone has included an invalid - // hyphenation file that fails to load. - MOZ_ASSERT_UNREACHABLE("invalid hyphenation resource?"); + mDict = hnj_hyphen_load(uriSpec.get()); +#ifdef DEBUG + if (mDict) { + printf("loaded hyphenation patterns from %s\n", uriSpec.get()); } +#endif } nsHyphenator::~nsHyphenator() { - if (mDict) { - if (mDictSize) { - if (mOwnsDict) { - free(const_cast(mDict)); - } - } else { - mapped_hyph_free_dictionary((HyphDic*)mDict); - } + if (mDict != nullptr) { + hnj_hyphen_free((HyphenDict*)mDict); + mDict = nullptr; } } @@ -196,12 +83,13 @@ nsresult nsHyphenator::Hyphenate(const nsAString& aString, void nsHyphenator::HyphenateWord(const nsAString& aString, uint32_t aStart, uint32_t aLimit, nsTArray& aHyphens) { - // Convert word from aStart and aLimit in aString to utf-8 for mapped_hyph, + // Convert word from aStart and aLimit in aString to utf-8 for libhyphen, // lowercasing it as we go so that it will match the (lowercased) patterns // (bug 1105644). nsAutoCString utf8; - const char16_t* cur = aString.BeginReading() + aStart; - const char16_t* end = aString.BeginReading() + aLimit; + const char16_t* const begin = aString.BeginReading(); + const char16_t* cur = begin + aStart; + const char16_t* end = begin + aLimit; bool firstLetter = true; while (cur < end) { uint32_t ch = *cur++; @@ -210,10 +98,10 @@ void nsHyphenator::HyphenateWord(const nsAString& aString, uint32_t aStart, if (cur < end && NS_IS_LOW_SURROGATE(*cur)) { ch = SURROGATE_TO_UCS4(ch, *cur++); } else { - return; // unpaired surrogate: bail out, don't hyphenate broken text + ch = 0xfffd; // unpaired surrogate, treat as REPLACEMENT CHAR } } else if (NS_IS_LOW_SURROGATE(ch)) { - return; // unpaired surrogate + ch = 0xfffd; // unpaired surrogate } // XXX What about language-specific casing? Consider Turkish I/i... @@ -223,11 +111,15 @@ void nsHyphenator::HyphenateWord(const nsAString& aString, uint32_t aStart, ch = ToLowerCase(ch); if (ch != origCh) { - // Avoid hyphenating capitalized words (bug 1550532) unless explicitly - // allowed by prefs for the language in use. - // Also never auto-hyphenate a word that has internal caps, as it may - // well be an all-caps acronym or a quirky name like iTunes. - if (!mHyphenateCapitalized || !firstLetter) { + if (firstLetter) { + // Avoid hyphenating capitalized words (bug 1550532) unless explicitly + // allowed by prefs for the language in use. + if (!mHyphenateCapitalized) { + return; + } + } else { + // Also never auto-hyphenate a word that has internal caps, as it may + // well be an all-caps acronym or a quirky name like iTunes. return; } } @@ -250,43 +142,31 @@ void nsHyphenator::HyphenateWord(const nsAString& aString, uint32_t aStart, } } - AutoTArray hyphenValues; - hyphenValues.SetLength(utf8.Length()); - int32_t result; - if (mDictSize > 0) { - result = mapped_hyph_find_hyphen_values_raw( - static_cast(mDict), mDictSize, utf8.BeginReading(), - utf8.Length(), hyphenValues.Elements(), hyphenValues.Length()); - } else { - result = mapped_hyph_find_hyphen_values_dic( - static_cast(mDict), utf8.BeginReading(), utf8.Length(), - hyphenValues.Elements(), hyphenValues.Length()); - } - if (result > 0) { - // We need to convert UTF-8 indexing as used by the hyphenation lib into - // UTF-16 indexing of the aHyphens[] array for Gecko. - uint32_t utf16index = 0; - for (uint32_t utf8index = 0; utf8index < utf8.Length();) { - // We know utf8 is valid, so we only need to look at the first byte of - // each character to determine its length and the corresponding UTF-16 - // length to add to utf16index. - const uint8_t leadByte = utf8[utf8index]; - if (leadByte < 0x80) { - utf8index += 1; - } else if (leadByte < 0xE0) { - utf8index += 2; - } else if (leadByte < 0xF0) { - utf8index += 3; - } else { - utf8index += 4; + AutoTArray utf8hyphens; + utf8hyphens.SetLength(utf8.Length() + 5); + char** rep = nullptr; + int* pos = nullptr; + int* cut = nullptr; + int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, utf8.BeginReading(), + utf8.Length(), utf8hyphens.Elements(), + nullptr, &rep, &pos, &cut); + if (!err) { + // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer + // from utf8 code unit indexing (which would match the utf8 input + // string directly) to Unicode character indexing. + // We then need to convert this to utf16 code unit offsets for Gecko. + const char* hyphPtr = utf8hyphens.Elements(); + const char16_t* cur = begin + aStart; + const char16_t* end = begin + aLimit; + while (cur < end) { + if (*hyphPtr & 0x01) { + aHyphens[cur - begin] = true; } - // The hyphenation value of interest is the one for the last code unit - // of the utf-8 character, and is recorded on the last code unit of the - // utf-16 character (in the case of a surrogate pair). - utf16index += leadByte >= 0xF0 ? 2 : 1; - if (utf16index > 0 && (hyphenValues[utf8index - 1] & 0x01)) { - aHyphens[aStart + utf16index - 1] = true; + cur++; + if (cur < end && NS_IS_SURROGATE_PAIR(*(cur - 1), *cur)) { + cur++; } + hyphPtr++; } } } diff --git a/intl/hyphenation/glue/nsHyphenator.h b/intl/hyphenation/glue/nsHyphenator.h index 0eb8a2468ff4..790e3b32e509 100644 --- a/intl/hyphenation/glue/nsHyphenator.h +++ b/intl/hyphenation/glue/nsHyphenator.h @@ -28,14 +28,7 @@ class nsHyphenator { void HyphenateWord(const nsAString& aString, uint32_t aStart, uint32_t aLimit, nsTArray& aHyphens); - const void* mDict; // If mDictSize > 0, this points to a raw byte buffer - // containing the hyphenation dictionary data (in the - // memory-mapped omnijar, or owned by us if mOwnsDict); - // if mDictSize == 0, it's a HyphDic reference created - // by mapped_hyph_load_dictionary() and must be released - // by calling mapped_hyph_free_dictionary(). - uint32_t mDictSize; - bool mOwnsDict; + void* mDict; bool mHyphenateCapitalized; }; diff --git a/intl/hyphenation/hyphen/AUTHORS b/intl/hyphenation/hyphen/AUTHORS new file mode 100644 index 000000000000..e1e0f3c844ec --- /dev/null +++ b/intl/hyphenation/hyphen/AUTHORS @@ -0,0 +1,17 @@ +Libhnj was written by Raph Levien . + +Original Libhnj source with OOo's patches are managed by Rene Engelhard and +Chris Halls at Debian: http://packages.debian.org/stable/libdevel/libhnj-dev +and http://packages.debian.org/unstable/source/libhnj + +This subset of Libhnj was extended by +Peter Novodvorsky (OOo integration), +László Németh (non-standard and compound +hyphenation with Unicode support), +Nanning Buitenhuis (substrings.c) + +Write bug reports to László Németh or in the bug tracker of hunspell.sf.net. + +--- +Please contact Raph Levien for information about licensing for +proprietary applications. diff --git a/intl/hyphenation/hyphen/COPYING b/intl/hyphenation/hyphen/COPYING new file mode 100644 index 000000000000..4c278cb77352 --- /dev/null +++ b/intl/hyphenation/hyphen/COPYING @@ -0,0 +1,17 @@ +GPL 2.0/LGPL 2.1/MPL 1.1 tri-license + +The contents of this software may be used under the terms of +the GNU General Public License Version 2 or later (the "GPL"), or +the GNU Lesser General Public License Version 2.1 or later (the "LGPL", +see COPYING.LGPL) or the Mozilla Public License Version 1.1 or later +(the "MPL", see COPYING.MPL). + +The Plain TeX hyphenation tables "hyphen.tex" by Donald E. Knuth +has a non MPL/LGPL compatible license, but freely redistributable: +"Unlimited copying and redistribution of this file are permitted as long +as this file is not modified. Modifications are permitted, but only if +the resulting file is not named hyphen.tex." + +Software distributed under these licenses is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences +for the specific language governing rights and limitations under the licenses. diff --git a/intl/hyphenation/hyphen/COPYING.LGPL b/intl/hyphenation/hyphen/COPYING.LGPL new file mode 100644 index 000000000000..c4792dd27a32 --- /dev/null +++ b/intl/hyphenation/hyphen/COPYING.LGPL @@ -0,0 +1,515 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper +mail. + +You should also get your employer (if you work as a programmer) or +your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James +Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/intl/hyphenation/hyphen/COPYING.MPL b/intl/hyphenation/hyphen/COPYING.MPL new file mode 100644 index 000000000000..7714141d1542 --- /dev/null +++ b/intl/hyphenation/hyphen/COPYING.MPL @@ -0,0 +1,470 @@ + MOZILLA PUBLIC LICENSE + Version 1.1 + + --------------- + +1. Definitions. + + 1.0.1. "Commercial Use" means distribution or otherwise making the + Covered Code available to a third party. + + 1.1. "Contributor" means each entity that creates or contributes to + the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Code, prior Modifications used by a Contributor, and the Modifications + made by that particular Contributor. + + 1.3. "Covered Code" means the Original Code or Modifications or the + combination of the Original Code and Modifications, in each case + including portions thereof. + + 1.4. "Electronic Distribution Mechanism" means a mechanism generally + accepted in the software development community for the electronic + transfer of data. + + 1.5. "Executable" means Covered Code in any form other than Source + Code. + + 1.6. "Initial Developer" means the individual or entity identified + as the Initial Developer in the Source Code notice required by Exhibit + A. + + 1.7. "Larger Work" means a work which combines Covered Code or + portions thereof with code not governed by the terms of this License. + + 1.8. "License" means this document. + + 1.8.1. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed herein. + + 1.9. "Modifications" means any addition to or deletion from the + substance or structure of either the Original Code or any previous + Modifications. When Covered Code is released as a series of files, a + Modification is: + A. Any addition to or deletion from the contents of a file + containing Original Code or previous Modifications. + + B. Any new file that contains any part of the Original Code or + previous Modifications. + + 1.10. "Original Code" means Source Code of computer software code + which is described in the Source Code notice required by Exhibit A as + Original Code, and which, at the time of its release under this + License is not already Covered Code governed by this License. + + 1.10.1. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, process, + and apparatus claims, in any patent Licensable by grantor. + + 1.11. "Source Code" means the preferred form of the Covered Code for + making modifications to it, including all modules it contains, plus + any associated interface definition files, scripts used to control + compilation and installation of an Executable, or source code + differential comparisons against either the Original Code or another + well known, available Covered Code of the Contributor's choice. The + Source Code can be in a compressed or archival form, provided the + appropriate decompression or de-archiving software is widely available + for no charge. + + 1.12. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms of, this + License or a future version of this License issued under Section 6.1. + For legal entities, "You" includes any entity which controls, is + controlled by, or is under common control with You. For purposes of + this definition, "control" means (a) the power, direct or indirect, + to cause the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty percent + (50%) of the outstanding shares or beneficial ownership of such + entity. + +2. Source Code License. + + 2.1. The Initial Developer Grant. + The Initial Developer hereby grants You a world-wide, royalty-free, + non-exclusive license, subject to third party intellectual property + claims: + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer to use, reproduce, + modify, display, perform, sublicense and distribute the Original + Code (or portions thereof) with or without Modifications, and/or + as part of a Larger Work; and + + (b) under Patents Claims infringed by the making, using or + selling of Original Code, to make, have made, use, practice, + sell, and offer for sale, and/or otherwise dispose of the + Original Code (or portions thereof). + + (c) the licenses granted in this Section 2.1(a) and (b) are + effective on the date Initial Developer first distributes + Original Code under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: 1) for code that You delete from the Original Code; 2) + separate from the Original Code; or 3) for infringements caused + by: i) the modification of the Original Code or ii) the + combination of the Original Code with other software or devices. + + 2.2. Contributor Grant. + Subject to third party intellectual property claims, each Contributor + hereby grants You a world-wide, royalty-free, non-exclusive license + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor, to use, reproduce, modify, + display, perform, sublicense and distribute the Modifications + created by such Contributor (or portions thereof) either on an + unmodified basis, with other Modifications, as Covered Code + and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either alone + and/or in combination with its Contributor Version (or portions + of such combination), to make, use, sell, offer for sale, have + made, and/or otherwise dispose of: 1) Modifications made by that + Contributor (or portions thereof); and 2) the combination of + Modifications made by that Contributor with its Contributor + Version (or portions of such combination). + + (c) the licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first makes Commercial Use of + the Covered Code. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: 1) for any code that Contributor has deleted from the + Contributor Version; 2) separate from the Contributor Version; + 3) for infringements caused by: i) third party modifications of + Contributor Version or ii) the combination of Modifications made + by that Contributor with other software (except as part of the + Contributor Version) or other devices; or 4) under Patent Claims + infringed by Covered Code in the absence of Modifications made by + that Contributor. + +3. Distribution Obligations. + + 3.1. Application of License. + The Modifications which You create or to which You contribute are + governed by the terms of this License, including without limitation + Section 2.2. The Source Code version of Covered Code may be + distributed only under the terms of this License or a future version + of this License released under Section 6.1, and You must include a + copy of this License with every copy of the Source Code You + distribute. You may not offer or impose any terms on any Source Code + version that alters or restricts the applicable version of this + License or the recipients' rights hereunder. However, You may include + an additional document offering the additional rights described in + Section 3.5. + + 3.2. Availability of Source Code. + Any Modification which You create or to which You contribute must be + made available in Source Code form under the terms of this License + either on the same media as an Executable version or via an accepted + Electronic Distribution Mechanism to anyone to whom you made an + Executable version available; and if made available via Electronic + Distribution Mechanism, must remain available for at least twelve (12) + months after the date it initially became available, or at least six + (6) months after a subsequent version of that particular Modification + has been made available to such recipients. You are responsible for + ensuring that the Source Code version remains available even if the + Electronic Distribution Mechanism is maintained by a third party. + + 3.3. Description of Modifications. + You must cause all Covered Code to which You contribute to contain a + file documenting the changes You made to create that Covered Code and + the date of any change. You must include a prominent statement that + the Modification is derived, directly or indirectly, from Original + Code provided by the Initial Developer and including the name of the + Initial Developer in (a) the Source Code, and (b) in any notice in an + Executable version or related documentation in which You describe the + origin or ownership of the Covered Code. + + 3.4. Intellectual Property Matters + (a) Third Party Claims. + If Contributor has knowledge that a license under a third party's + intellectual property rights is required to exercise the rights + granted by such Contributor under Sections 2.1 or 2.2, + Contributor must include a text file with the Source Code + distribution titled "LEGAL" which describes the claim and the + party making the claim in sufficient detail that a recipient will + know whom to contact. If Contributor obtains such knowledge after + the Modification is made available as described in Section 3.2, + Contributor shall promptly modify the LEGAL file in all copies + Contributor makes available thereafter and shall take other steps + (such as notifying appropriate mailing lists or newsgroups) + reasonably calculated to inform those who received the Covered + Code that new knowledge has been obtained. + + (b) Contributor APIs. + If Contributor's Modifications include an application programming + interface and Contributor has knowledge of patent licenses which + are reasonably necessary to implement that API, Contributor must + also include this information in the LEGAL file. + + (c) Representations. + Contributor represents that, except as disclosed pursuant to + Section 3.4(a) above, Contributor believes that Contributor's + Modifications are Contributor's original creation(s) and/or + Contributor has sufficient rights to grant the rights conveyed by + this License. + + 3.5. Required Notices. + You must duplicate the notice in Exhibit A in each file of the Source + Code. If it is not possible to put such notice in a particular Source + Code file due to its structure, then You must include such notice in a + location (such as a relevant directory) where a user would be likely + to look for such a notice. If You created one or more Modification(s) + You may add your name as a Contributor to the notice described in + Exhibit A. You must also duplicate this License in any documentation + for the Source Code where You describe recipients' rights or ownership + rights relating to Covered Code. You may choose to offer, and to + charge a fee for, warranty, support, indemnity or liability + obligations to one or more recipients of Covered Code. However, You + may do so only on Your own behalf, and not on behalf of the Initial + Developer or any Contributor. You must make it absolutely clear than + any such warranty, support, indemnity or liability obligation is + offered by You alone, and You hereby agree to indemnify the Initial + Developer and every Contributor for any liability incurred by the + Initial Developer or such Contributor as a result of warranty, + support, indemnity or liability terms You offer. + + 3.6. Distribution of Executable Versions. + You may distribute Covered Code in Executable form only if the + requirements of Section 3.1-3.5 have been met for that Covered Code, + and if You include a notice stating that the Source Code version of + the Covered Code is available under the terms of this License, + including a description of how and where You have fulfilled the + obligations of Section 3.2. The notice must be conspicuously included + in any notice in an Executable version, related documentation or + collateral in which You describe recipients' rights relating to the + Covered Code. You may distribute the Executable version of Covered + Code or ownership rights under a license of Your choice, which may + contain terms different from this License, provided that You are in + compliance with the terms of this License and that the license for the + Executable version does not attempt to limit or alter the recipient's + rights in the Source Code version from the rights set forth in this + License. If You distribute the Executable version under a different + license You must make it absolutely clear that any terms which differ + from this License are offered by You alone, not by the Initial + Developer or any Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred by + the Initial Developer or such Contributor as a result of any such + terms You offer. + + 3.7. Larger Works. + You may create a Larger Work by combining Covered Code with other code + not governed by the terms of this License and distribute the Larger + Work as a single product. In such a case, You must make sure the + requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Code due to + statute, judicial order, or regulation then You must: (a) comply with + the terms of this License to the maximum extent possible; and (b) + describe the limitations and the code they affect. Such description + must be included in the LEGAL file described in Section 3.4 and must + be included with all distributions of the Source Code. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Application of this License. + + This License applies to code to which the Initial Developer has + attached the notice in Exhibit A and to related Covered Code. + +6. Versions of the License. + + 6.1. New Versions. + Netscape Communications Corporation ("Netscape") may publish revised + and/or new versions of the License from time to time. Each version + will be given a distinguishing version number. + + 6.2. Effect of New Versions. + Once Covered Code has been published under a particular version of the + License, You may always continue to use it under the terms of that + version. You may also choose to use such Covered Code under the terms + of any subsequent version of the License published by Netscape. No one + other than Netscape has the right to modify the terms applicable to + Covered Code created under this License. + + 6.3. Derivative Works. + If You create or use a modified version of this License (which you may + only do in order to apply it to code which is not already Covered Code + governed by this License), You must (a) rename Your license so that + the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", + "MPL", "NPL" or any confusingly similar phrase do not appear in your + license (except to note that your license differs from this License) + and (b) otherwise make it clear that Your version of the license + contains terms which differ from the Mozilla Public License and + Netscape Public License. (Filling in the name of the Initial + Developer, Original Code or Contributor in the notice described in + Exhibit A shall not of themselves be deemed to be modifications of + this License.) + +7. DISCLAIMER OF WARRANTY. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF + DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. + THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE + IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, + YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE + COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER + OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +8. TERMINATION. + + 8.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to cure + such breach within 30 days of becoming aware of the breach. All + sublicenses to the Covered Code which are properly granted shall + survive any termination of this License. Provisions which, by their + nature, must remain in effect beyond the termination of this License + shall survive. + + 8.2. If You initiate litigation by asserting a patent infringement + claim (excluding declatory judgment actions) against Initial Developer + or a Contributor (the Initial Developer or Contributor against whom + You file such action is referred to as "Participant") alleging that: + + (a) such Participant's Contributor Version directly or indirectly + infringes any patent, then any and all rights granted by such + Participant to You under Sections 2.1 and/or 2.2 of this License + shall, upon 60 days notice from Participant terminate prospectively, + unless if within 60 days after receipt of notice You either: (i) + agree in writing to pay Participant a mutually agreeable reasonable + royalty for Your past and future use of Modifications made by such + Participant, or (ii) withdraw Your litigation claim with respect to + the Contributor Version against such Participant. If within 60 days + of notice, a reasonable royalty and payment arrangement are not + mutually agreed upon in writing by the parties or the litigation claim + is not withdrawn, the rights granted by Participant to You under + Sections 2.1 and/or 2.2 automatically terminate at the expiration of + the 60 day notice period specified above. + + (b) any software, hardware, or device, other than such Participant's + Contributor Version, directly or indirectly infringes any patent, then + any rights granted to You by such Participant under Sections 2.1(b) + and 2.2(b) are revoked effective as of the date You first made, used, + sold, distributed, or had made, Modifications made by that + Participant. + + 8.3. If You assert a patent infringement claim against Participant + alleging that such Participant's Contributor Version directly or + indirectly infringes any patent where such claim is resolved (such as + by license or settlement) prior to the initiation of patent + infringement litigation, then the reasonable value of the licenses + granted by such Participant under Sections 2.1 or 2.2 shall be taken + into account in determining the amount or value of any payment or + license. + + 8.4. In the event of termination under Sections 8.1 or 8.2 above, + all end user license agreements (excluding distributors and resellers) + which have been validly granted by You or any distributor hereunder + prior to termination shall survive termination. + +9. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL + DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, + OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR + ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY + CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, + WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY + RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW + PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE + EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO + THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +10. U.S. GOVERNMENT END USERS. + + The Covered Code is a "commercial item," as that term is defined in + 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer + software" and "commercial computer software documentation," as such + terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 + C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), + all U.S. Government End Users acquire Covered Code with only those + rights set forth herein. + +11. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed by + California law provisions (except to the extent applicable law, if + any, provides otherwise), excluding its conflict-of-law provisions. + With respect to disputes in which at least one party is a citizen of, + or an entity chartered or registered to do business in the United + States of America, any litigation relating to this License shall be + subject to the jurisdiction of the Federal Courts of the Northern + District of California, with venue lying in Santa Clara County, + California, with the losing party responsible for costs, including + without limitation, court costs and reasonable attorneys' fees and + expenses. The application of the United Nations Convention on + Contracts for the International Sale of Goods is expressly excluded. + Any law or regulation which provides that the language of a contract + shall be construed against the drafter shall not apply to this + License. + +12. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or indirectly, + out of its utilization of rights under this License and You agree to + work with Initial Developer and Contributors to distribute such + responsibility on an equitable basis. Nothing herein is intended or + shall be deemed to constitute any admission of liability. + +13. MULTIPLE-LICENSED CODE. + + Initial Developer may designate portions of the Covered Code as + "Multiple-Licensed". "Multiple-Licensed" means that the Initial + Developer permits you to utilize portions of the Covered Code under + Your choice of the NPL or the alternative licenses, if any, specified + by the Initial Developer in the file described in Exhibit A. + +EXHIBIT A -Mozilla Public License. + + ``The contents of this file are subject to the Mozilla Public License + Version 1.1 (the "License"); you may not use this file except in + compliance with the License. You may obtain a copy of the License at + http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific language governing rights and limitations + under the License. + + The Original Code is ______________________________________. + + The Initial Developer of the Original Code is ________________________. + Portions created by ______________________ are Copyright (C) ______ + _______________________. All Rights Reserved. + + Contributor(s): ______________________________________. + + Alternatively, the contents of this file may be used under the terms + of the _____ license (the "[___] License"), in which case the + provisions of [______] License are applicable instead of those + above. If you wish to allow use of your version of this file only + under the terms of the [____] License and not to allow others to use + your version of this file under the MPL, indicate your decision by + deleting the provisions above and replace them with the notice and + other provisions required by the [___] License. If you do not delete + the provisions above, a recipient may use your version of this file + under either the MPL or the [___] License." + + [NOTE: The text of this Exhibit A may differ slightly from the text of + the notices in the Source Code files of the Original Code. You should + use the text of this Exhibit A rather than the text found in the + Original Code Source Code for Your Modifications.] + diff --git a/intl/hyphenation/hyphen/NEWS b/intl/hyphenation/hyphen/NEWS new file mode 100755 index 000000000000..efaa78b24994 --- /dev/null +++ b/intl/hyphenation/hyphen/NEWS @@ -0,0 +1,106 @@ +2014-09-18 Hyphen 2.8.8: + - remove last coverity warning, 0 remaining + +2014-06-27 Hyphen 2.8.7: + - various clang scan-build warning fixes + +2012-09-13 Hyphen 2.8.6: + - righthyphenmin fix for 3-byte or more UTF-8 + multibyte characters by Steven Dickson + - fix for fdo#43931 (removing hard hyphen hyphenation for LibreOffice) + +2012-07-12 Hyphen 2.8.5: + - fix short alloc + +2012-06-29 Hyphen 2.8.4: + - coverity warnings + +2011-10-10 Hyphen 2.8.3: + - fix NOHYPHEN + - fix unbalanced hyphenation of LibreOffice/OOo + - set default COMPOUNDHYPHENMIN=3 at hyphens and apostrophes + - fix VERBOSE in hyphen.c + - new ./example option: -n to print hyphenation vector + +2011-10-07 Hyphen 2.8.2: + - fix for explicite COMPOUNDHYPHENMIN values + +2011-10-06 Hyphen 2.8.1: + - force minimal lefthyphenmin and righthyphenmin values of the dictionary + (eg. righthyphenmin=3 of English dictionaries in LibreOffice/OOo, + also the original TeX hyphenation patterns are correct only with this + righthyphenmin value). + +2011-10-04 Hyphen 2.8: + - Ignore leading and ending numbers (eg. manual/field based indexes + in LibreOffice/OOo) + + - Fix LibreOffice/OpenOffice.org hyphenation errors at apostrophes and + hyphens, n-dashes with default NOHYPHEN separators. + Eg. *o'c=lock -> o'clock. + +2010-12-01 Hyphen 2.7.1 bug fix release + +2010-11-27 Hyphen 2.7 release: + - The new hyphenation problem of OpenOffice.org 3.2, related to its + modified word breaking of words with hyphen characters, can be fixed + with the new NOHYPHEN feature. Also it's possible to solve the similar old + problem with apostrophes. More information: README.compound. + + - improved English dictionaries + +2010-08-10 Hyphen 2.6 release: + - maintainance release, fix all warnings, tidy up + make check with VALGRIND=memcheck, etc. + +2010-02-23 Hyphen 2.5 release: + - add Unicode ligature support for correct hyphenmin calculation + (ff, fi, fl, St, st are 1-character, ffi and ffl are 2-character length for + hyphenation) + - fix lefthyphenmin calculation for UTF-8 encoded input + + - en_US hyphenation dictionary: + - add OpenOffice.org patch to fix apostrophe handling + - add correct hyphenation for words with Unicode f-ligatures + (NOTE: hyphenation within ligatures is not supported yet + because of an implementation problem of OpenOffice.org, + see OOo issue 71608.) + + - small patches from OpenOffice.org + +2008-05-01 Hyphen 2.4 release: + - compound word hyphenation support by recursive pattern matching + based on two hyphenation pattern sets, see README.compound. + Especially useful for languages with arbitrary number of compounds (Danish, + Dutch, Finnish, German, Hungarian, Icelandic, Norwegian, Swedish etc.). + + - new dictionary parameters (minimal character numbers for hyph. distances): + LEFTHYPHENMIN: minimal hyphenation distance from the left end of the word + RIGHTHYPHENMIN: minimal hyphenation distance from the right end of the word + COMPOUNDLEFTHYPHENMIN: min. hyph. dist. from the left compound word boundary + COMPOUNDRIGHTHYPHENMIN: min. hyph. dist. from the right comp. word boundary + + - new API function: hnj_hyphen_hyphenate3() (like hyphenate2(), but + with hyphenmin options) + +en_US hyphenation patterns: + + - extended hyph_en_US.dic with TugBoat hyphenation log (fix thousand + incompletely or badly hyphenated words, for example acad-e-my, acro-nym, + acryl-amide, adren-a-line, aero-space, am-phet-a-mine, anom-aly etc.) + + - fixed hyph_en_US.dic: set the right default hyphenation distance of + the original TeX hyphenation patterns: + LEFTHYPHENMIN 2 + RIGHTHYPHENMIN 3 (not 2!) + It is not only a typographical issue. It seems, TeX hyphenation + patterns are right only with these settings, for example, + the bad "anoma-ly" is restricted in TeX only by the default + \righthyphenmin=3 (but not restricted in OpenOffice.org, until now). + + - documentation (README_hyph_en_US.dic) + + - fixes for automake configuration, compiling and checking, see ChangeLog + +2008-02-19: Hyphen 2.3.1 release: + - fix obsolete API function hnj_hyphen_hyphenate() diff --git a/intl/hyphenation/hyphen/README b/intl/hyphenation/hyphen/README new file mode 100644 index 000000000000..82c612724fe7 --- /dev/null +++ b/intl/hyphenation/hyphen/README @@ -0,0 +1,134 @@ +Hyphen - hyphenation library to use converted TeX hyphenation patterns + +(C) 1998 Raph Levien +(C) 2001 ALTLinux, Moscow +(C) 2006, 2007, 2008, 2010, 2011 László Németh + +This was part of libHnj library by Raph Levien. + +Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj +to use it in OpenOffice.org. + +Compound word and non-standard hyphenation support by László Németh. + +License is the original LibHnj license: +LibHnj is dual licensed under LGPL and MPL (see also README.libhnj). + +Because LGPL allows GPL relicensing, COPYING contains now +LGPL/GPL/MPL tri-license for explicit Mozilla source compatibility. + +Original Libhnj source with OOo's patches are managed by Rene Engelhard +and Chris Halls at Debian: + +http://packages.debian.org/stable/libdevel/libhnj-dev +and http://packages.debian.org/unstable/source/libhnj + + +OTHER FILES + +This distribution is the source of the en_US hyphenation patterns +"hyph_en_US.dic", too. See README_hyph_en_US.txt. + +Source files of hyph_en_US.dic in the distribution: + +hyphen.tex (en_US hyphenation patterns from plain TeX) + + Source: http://tug.ctan.org/text-archive/macros/plain/base/hyphen.tex + +tbhyphext.tex: hyphenation exception log from TugBoat archive + + Source of the hyphenation exception list: + http://www.ctan.org/tex-archive/info/digests/tugboat/tb0hyf.tex + + Generated with the hyphenex script + (http://www.ctan.org/tex-archive/info/digests/tugboat/hyphenex.sh) + + sh hyphenex.sh tbhyphext.tex + + +INSTALLATION + +autoreconf -fvi +./configure +make +make install + +UNIT TESTS (WITH VALGRIND DEBUGGER) + +make check +VALGRIND=memcheck make check + +USAGE + +./example hyph_en_US.dic mywords.txt + +or (under Linux) + +echo example | ./example hyph_en_US.dic /dev/stdin + +NOTE: In the case of Unicode encoded input, convert your words +to lowercase before hyphenation (under UTF-8 console environment): + +cat mywords.txt | awk '{print tolower($0)}' >mywordslow.txt + +BUILD DLL USING CROSS-COMPILATION + +./configure --host i586-mingw32 --prefix=/tmp/hyphen-dll +make +make install + +DEVELOPMENT + +See README.hyphen for hyphenation algorithm, README.nonstandard +and doc/tb87nemeth.pdf for non-standard hyphenation, +README.compound for compound word hyphenation, and tests/*. + +Description of the dictionary format: + +First line contains the character encoding (ISO8859-x, UTF-8). + +Possible options in the following lines: + +LEFTHYPHENMIN num minimal hyphenation distance from the left word end +RIGHTHYPHENMIN num minimal hyphation distance from the right word end +COMPOUNDLEFTHYPHENMIN num min. hyph. dist. from the left compound word boundary +COMPOUNDRIGHTHYPHENMIN num min. hyph. dist. from the right comp. word boundary + +hyphenation patterns see README.* files + +NEXTWORD separate the two compound sets (see README.compound) + +Default values: +Without explicite declarations, hyphenmin fields of dict struct +are zeroes, but in this case the lefthyphenmin and righthyphenmin +will be the default 2 under the hyphenation (for backward compatibility). + +Comments + +Use percent sign at the beginning of the lines to add comments to your +hpyhenation patterns (after the character encoding in the first line): + +% comment + +***************************************************************************** +* Warning! Correct working of Libhnj *needs* prepared hyphenation patterns. * + +For example, generating hyph_en_US.dic from "hyphen.us" TeX patterns: + +perl substrings.pl hyphen.us hyph_en_US.dic ISO8859-1 + +or with default LEFTHYPHENMIN and RIGHTHYPHENMIN values: + +perl substrings.pl hyphen.us hyph_en_US.dic ISO8859-1 2 3 +perl substrings.pl hyphen.gb hyph_en_GB.dic ISO8859-1 3 3 +**************************************************************************** + +OTHERS + +Java hyphenation: Peter B. West (Folio project) implements a hyphenator with +non standard hyphenation facilities based on extended Libhnj. The HyFo module +is released in binary form as jar files and in source form as zip files. +See http://sourceforge.net/project/showfiles.php?group_id=119136 + +László Németh + diff --git a/intl/hyphenation/hyphen/README.compound b/intl/hyphenation/hyphen/README.compound new file mode 100644 index 000000000000..bcb265853df0 --- /dev/null +++ b/intl/hyphenation/hyphen/README.compound @@ -0,0 +1,87 @@ +New option of Libhyphen 2.7: NOHYPHEN + +Hyphen, apostrophe and other characters may be word boundary characters, +but they don't need (extra) hyphenation. With NOHYPHEN option +it's possible to hyphenate the words parts correctly. + +Example: + +ISO8859-1 +NOHYPHEN -,' +1-1 +1'1 +NEXTLEVEL + +Description: + +1-1 and 1'1 declare hyphen and apostrophe as word boundary characters +and NOHYPHEN with the comma separated character (or character sequence) +list forbid the (extra) hyphens at the hyphen and apostrophe characters. + +Implicite NOHYPHEN declaration + +Without explicite NEXTLEVEL declaration, Hyphen 2.8 uses the +previous settings, plus in UTF-8 encoding, endash (U+2013) and +typographical apostrophe (U+2019) are NOHYPHEN characters, too. + +It's possible to enlarge the hyphenation distance from these +NOHYPHEN characters by using COMPOUNDLEFTHYPHENMIN and +COMPOUNDRIGHTHYPHENMIN attributes. + +Compound word hyphenation + +Hyphen library supports better compound word hyphenation and special +rules of compound word hyphenation of German languages and other +languages with arbitrary number of compound words. The new options, +COMPOUNDLEFTHYPHENMIN and COMPOUNDRIGHTHYPHENMIN help to set the right +style for the hyphenation of compound words. + +Algorithm + +The algorithm is an extension of the original pattern based hyphenation +algorithm. It uses two hyphenation pattern sets, defined in the same +pattern file and separated by the NEXTLEVEL keyword. First pattern +set is for hyphenation only at compound word boundaries, the second one +is for hyphenation within words or word parts. + +Recursive compound level hyphenation + +The algorithm is recursive: every word parts of a successful +first (compound) level hyphenation will be rehyphenated +by the same (first) pattern set. + +Finally, when first level hyphenation is not possible, Hyphen uses +the second level hyphenation for the word or the word parts. + +Word endings and word parts + +Patterns for word endings (patterns with ellipses) match the +word parts, too. + +Options + +COMPOUNDLEFTHYPHENMIN: min. hyph. dist. from the left compound word boundary +COMPOUNDRIGHTHYPHENMIN: min. hyph. dist. from the right comp. word boundary +NEXTLEVEL: sign second level hyphenation patterns + +Default hyphenmin values + +Default values of COMPOUNDLEFTHYPHENMIN and COMPOUNDRIGHTHYPHENMIN are 0, +and 0 under the hyphenation, too. ("0" values of +LEFTHYPHENMIN and RIGHTHYPHENMIN mean the default "2" under the hyphenation.) + +Examples + +See tests/compound* test files. + +Preparation of hyphenation patterns + +It hasn't been special pattern generator tool for compound hyphenation +patterns, yet. It is possible to use PATGEN to generate both of +pattern sets, concatenate it manually and set the requested HYPHENMIN values. +(But don't forget the preprocessing steps by substrings.pl before +concatenation.) One of the disadvantage of this method, that PATGEN +doesn't know recursive compound hyphenation of Hyphen. + +László Németh + diff --git a/intl/hyphenation/hyphen/README.hyphen b/intl/hyphenation/hyphen/README.hyphen new file mode 100644 index 000000000000..8aa8c8767922 --- /dev/null +++ b/intl/hyphenation/hyphen/README.hyphen @@ -0,0 +1,108 @@ +Brief explanation of the hyphenation algorithm herein.[1] + +Raph Levien +4 Aug 1998 + + The hyphenation algorithm is basically the same as Knuth's TeX +algorithm. However, the implementation is quite a bit faster. + + The hyphenation files from TeX can almost be used directly. There +is a preprocessing step, however. If you don't do the preprocessing +step, you'll get bad hyphenations (i.e. a silent failure). + + Start with a file such as hyphen.us. This is the TeX ushyph1.tex +file, with the exception dictionary encoded using the same rules as +the main portion of the file. Any line beginning with % is a comment. +Each other line should contain exactly one rule. + + Then, do the preprocessing - "perl substrings.pl hyphen.us". The +resulting file is hyphen.mashed. It's in Perl, and it's fairly slow +(it uses brute force algorithms; about 17 seconds on a P100), but it +could probably be redone in C with clever algorithms. This would be +valuable, for example, if it was handle user-supplied exception +dictionaries by integrating them into the rule table.[2] + + Once the rules are preprocessed, loading them is quite quick - +about 200ms on a P100. It then hyphenates at about 40,000 words per +second on a P100. I haven't benchmarked it against other +implementations (both TeX and groff contain essentially the same +algorithm), but expect that it runs quite a bit faster than any of +them. + +Knuth's algorithm + + This section contains a brief explanation of Knuth's algorithm, in +case you missed it from the TeX books. We'll use the semi-word +"example" as our running example. + + Since the beginning and end of a word are special, the algorithm is +actually run over the prepared word (prep_word in the source) +".example.". Knuths algorithm basically just does pattern matches from +the rule set, then applies the matches. The patterns in this case that +match are "xa", "xam", "mp", and "pl". These are actually stored as +"x1a", "xam3", "4m1p", and "1p2l2". Whenever numbers appear between +the letters, they are added in. If two (or more) patterns have numbers +in the same place, the highest number wins. Here's the example: + + . e x a m p l e . + x1a + x a m3 + 4m1p + 1p2l2 + ----------------- + . e x1a4m3p2l2e . + + Finally, hyphens are placed wherever odd numbers appear. They are, +however, suppressed after the first letter and before the last letter +of the word (TeX actually suppresses them before the next-to-last, as +well). So, it's "ex-am-ple", which is correct. + + Knuth uses a trie to implement this. I.e. he stores each rule in a +trie structure. For each position in the word, he searches the trie, +searching for a match. Most patterns are short, so efficiency should +be quite good. + +Theory of the algorithm + + The algorithm works as a slightly modified finite state machine. +There are two kinds of transitions: those that consume one letter of +input (which work just like your regular finite state machine), and +"fallback" transitions, which don't consume any input. If no +transition matching the next letter is found, the fallback is used. +One way of looking at this is a form of compression of the transition +tables - i.e. it behaves the same as a completely vanilla state +machine in which the actual transition table of a node is made up of +the union of transition tables of the node itself, plus its fallbacks. + + Each state is represented by a string. Thus, if the current state +is "am" and the next letter is "p", then the next state is "amp". +Fallback transitions go to states which chop off one or (sometimes) +more letters from the beginning. For example, if none of the +transitions from "amp" match the next letter, then it will fall back +to "mp". Similarly, if none of the transitions from "mp" match the +next letter, it will fall back to "m". + + Each state is also associated with a (possibly null) "match" +string. This represents the union of all patterns which are +right-justified substrings of the match string. I.e. the pattern "mp" +is a right-justified substring of the state "amp", so it's numbers get +added in. The actual calculation of this union is done by the +Perl preprocessing script, but could probably be done in C just about +as easily. + + Because each state transition either consumes one input character +or shortens the state string by one character, the total number of +state transitions is linear in the length of the word. + +[1] Documentations: + +Franklin M. Liang: Word Hy-phen-a-tion by Com-put-er. +Stanford University, 1983. http://www.tug.org/docs/liang. + +László Németh: Automatic non-standard hyphenation in OpenOffice.org, +TUGboat (27), 2006. No. 2., http://hunspell.sourceforge.net/tb87nemeth.pdf + +[2] There is the C version of pattern converter "substrings.c" +in the distribution written by Nanning Buitenhuis. Unfortunatelly, +this version hasn't handled the non standard extension of the +algorithm, yet. diff --git a/intl/hyphenation/hyphen/README.nonstandard b/intl/hyphenation/hyphen/README.nonstandard new file mode 100644 index 000000000000..fd80d12c689f --- /dev/null +++ b/intl/hyphenation/hyphen/README.nonstandard @@ -0,0 +1,122 @@ +Non-standard hyphenation +------------------------ + +Some languages use non-standard hyphenation; `discretionary' +character changes at hyphenation points. For example, +Catalan: paral·lel -> paral-lel, +Dutch: omaatje -> oma-tje, +German (before the new orthography): Schiffahrt -> Schiff-fahrt, +Hungarian: asszonnyal -> asz-szony-nyal (multiple occurance!) +Swedish: tillata -> till-lata. + +Using this extended library, you can define +non-standard hyphenation patterns. For example: + +l·1l/l=l +a1atje./a=t,1,3 +.schif1fahrt/ff=f,5,2 +.as3szon/sz=sz,2,3 +n1nyal./ny=ny,1,3 +.til1lata./ll=l,3,2 + +or with narrow boundaries: + +l·1l/l=,1,2 +a1atje./a=,1,1 +.schif1fahrt/ff=,5,1 +.as3szon/sz=,2,1 +n1nyal./ny=,1,1 +.til1lata./ll=,3,1 + +Note: Libhnj uses modified patterns by preparing substrings.pl. +Unfortunatelly, now the conversion step can generate bad non-standard +patterns (non-standard -> standard pattern conversion), so using +narrow boundaries may be better for recent Libhnj. For example, +substrings.pl generates a few bad patterns for Hungarian hyphenation +patterns resulting bad non-standard hyphenation in a few cases. Using narrow +boundaries solves this problem. Java HyFo module can check this problem. + +Syntax of the non-standard hyphenation patterns +------------------------------------------------ + +pat1tern/change[,start,cut] + +If this pattern matches the word, and this pattern win (see README.hyphen) +in the change region of the pattern, then pattern[start, start + cut - 1] +substring will be replaced with the "change". + +For example, a German ff -> ff-f hyphenation: + +f1f/ff=f + +or with expansion + +f1f/ff=f,1,2 + +will change every "ff" with "ff=f" at hyphenation. + +A more real example: + +% simple ff -> f-f hyphenation +f1f +% Schiffahrt -> Schiff-fahrt hyphenation +% +schif3fahrt/ff=f,5,2 + +Specification + +- Pattern: matching patterns of the original Liang's algorithm + - patterns must contain only one hyphenation point at change region + signed with an one-digit odd number (1, 3, 5, 7 or 9). + These point may be at subregion boundaries: schif3fahrt/ff=,5,1 + - only the greater value guarantees the win (don't mix non-standard and + non-standard patterns with the same value, for example + instead of f3f and schif3fahrt/ff=f,5,2 use f3f and schif5fahrt/ff=f,5,2) + +- Change: new characters. + Arbitrary character sequence. Equal sign (=) signs hyphenation points + for OpenOffice.org (like in the example). (In a possible German LaTeX + preprocessor, ff could be replaced with "ff, for a Hungarian one, ssz + with `ssz, according to the German and Hungarian Babel settings.) + +- Start: starting position of the change region. + - begins with 1 (not 0): schif3fahrt/ff=f,5,2 + - start dot doesn't matter: .schif3fahrt/ff=f,5,2 + - numbers don't matter: .s2c2h2i2f3f2ahrt/ff=f,5,2 + - In UTF-8 encoding, use Unicode character positions: össze/sz=sz,2,3 + ("össze" looks "össze" in an ISO 8859-1 8-bit editor). + +- Cut: length of the removed character sequence in the original word. + - In UTF-8 encoding, use Unicode character length: paral·1lel/l=l,5,3 + ("paral·lel" looks "paral·1lel" in an ISO 8859-1 8-bit editor). + +Dictionary developing +--------------------- + +There hasn't been extended PatGen pattern generator for non-standard +hyphenation patterns, yet. + +Fortunatelly, non-standard hyphenation points are forbidden in the PatGen +generated hyphenation patterns, so with a little patch can be develop +non-standard hyphenation patterns also in this case. + +Warning: If you use UTF-8 Unicode encoding in your patterns, call +substrings.pl with UTF-8 parameter to calculate right +character positions for non-standard hyphenation: + +./substrings.pl input output UTF-8 + +Programming +----------- + +Use hyphenate2() or hyphenate3() to handle non-standard hyphenation. +See hyphen.h for the documentation of the hyphenate*() functions. +See example.c for processing the output of the hyphenate*() functions. + +Warning: change characters are lower cased in the source, so you may need +case conversion of the change characters based on input word case detection. +For example, see OpenOffice.org source +(lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx). + +László Németh + diff --git a/intl/hyphenation/hyphen/hyphen.c b/intl/hyphenation/hyphen/hyphen.c new file mode 100644 index 000000000000..bd7e9a790cbc --- /dev/null +++ b/intl/hyphenation/hyphen/hyphen.c @@ -0,0 +1,1201 @@ +/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both + * licenses follows. + */ + +/* LibHnj - a library for high quality hyphenation and justification + * Copyright (C) 1998 Raph Levien, + * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), + * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) + * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307 USA. +*/ + +/* + * The contents of this file are subject to the Mozilla Public License + * Version 1.0 (the "MPL"); you may not use this file except in + * compliance with the MPL. You may obtain a copy of the MPL at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the MPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL + * for the specific language governing rights and limitations under the + * MPL. + * + */ +#include /* for NULL, malloc */ +#include /* for fprintf */ +#include /* for strdup */ +#include /* for INT_MAX */ + +#ifdef UNX +#include /* for exit */ +#endif + +#define noVERBOSE + +/* calculate hyphenmin values with long ligature length (2 or 3 characters + * instead of 1 or 2) for comparison with hyphenation without ligatures */ +#define noLONG_LIGATURE + +#ifdef LONG_LIGATURE +#define LIG_xx 1 +#define LIG_xxx 2 +#else +#define LIG_xx 0 +#define LIG_xxx 1 +#endif + +#include "hnjalloc.h" +#include "hyphen.h" + +static char * +hnj_strdup (const char *s) +{ + char *newstr; + int l; + + l = strlen (s); + newstr = (char *) hnj_malloc (l + 1); + memcpy (newstr, s, l); + newstr[l] = 0; + return newstr; +} + +/* remove cross-platform text line end characters */ +void hnj_strchomp(char * s) +{ + int k = strlen(s); + if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; + if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; +} + +/* a little bit of a hash table implementation. This simply maps strings + to state numbers */ + +typedef struct _HashTab HashTab; +typedef struct _HashEntry HashEntry; + +/* A cheap, but effective, hack. */ +#define HASH_SIZE 31627 + +struct _HashTab { + HashEntry *entries[HASH_SIZE]; +}; + +struct _HashEntry { + HashEntry *next; + char *key; + int val; +}; + +/* a char* hash function from ASU - adapted from Gtk+ */ +static unsigned int +hnj_string_hash (const char *s) +{ + const char *p; + unsigned int h=0, g; + for(p = s; *p != '\0'; p += 1) { + h = ( h << 4 ) + *p; + if ( ( g = h & 0xf0000000 ) ) { + h = h ^ (g >> 24); + h = h ^ g; + } + } + return h /* % M */; +} + +static HashTab * +hnj_hash_new (void) +{ + HashTab *hashtab; + int i; + + hashtab = (HashTab *) hnj_malloc (sizeof(HashTab)); + for (i = 0; i < HASH_SIZE; i++) + hashtab->entries[i] = NULL; + + return hashtab; +} + +static void +hnj_hash_free (HashTab *hashtab) +{ + int i; + HashEntry *e, *next; + + for (i = 0; i < HASH_SIZE; i++) + for (e = hashtab->entries[i]; e; e = next) + { + next = e->next; + hnj_free (e->key); + hnj_free (e); + } + + hnj_free (hashtab); +} + +/* assumes that key is not already present! */ +static void +hnj_hash_insert (HashTab *hashtab, const char *key, int val) +{ + int i; + HashEntry *e; + + i = hnj_string_hash (key) % HASH_SIZE; + e = (HashEntry *) hnj_malloc (sizeof(HashEntry)); + e->next = hashtab->entries[i]; + e->key = hnj_strdup (key); + e->val = val; + hashtab->entries[i] = e; +} + +/* return val if found, otherwise -1 */ +static int +hnj_hash_lookup (HashTab *hashtab, const char *key) +{ + int i; + HashEntry *e; + i = hnj_string_hash (key) % HASH_SIZE; + for (e = hashtab->entries[i]; e; e = e->next) + if (!strcmp (key, e->key)) + return e->val; + return -1; +} + +/* Get the state number, allocating a new state if necessary. */ +static int +hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string) +{ + int state_num; + + state_num = hnj_hash_lookup (hashtab, string); + + if (state_num >= 0) + return state_num; + + hnj_hash_insert (hashtab, string, dict->num_states); + /* predicate is true if dict->num_states is a power of two */ + if (!(dict->num_states & (dict->num_states - 1))) + { + dict->states = (HyphenState *) hnj_realloc (dict->states, + (dict->num_states << 1) * + sizeof(HyphenState)); + } + dict->states[dict->num_states].match = NULL; + dict->states[dict->num_states].repl = NULL; + dict->states[dict->num_states].fallback_state = -1; + dict->states[dict->num_states].num_trans = 0; + dict->states[dict->num_states].trans = NULL; + return dict->num_states++; +} + +/* add a transition from state1 to state2 through ch - assumes that the + transition does not already exist */ +static void +hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch) +{ + int num_trans; + + num_trans = dict->states[state1].num_trans; + if (num_trans == 0) + { + dict->states[state1].trans = (HyphenTrans *) hnj_malloc (sizeof(HyphenTrans)); + } + else if (!(num_trans & (num_trans - 1))) + { + dict->states[state1].trans = (HyphenTrans *) hnj_realloc (dict->states[state1].trans, + (num_trans << 1) * + sizeof(HyphenTrans)); + } + dict->states[state1].trans[num_trans].ch = ch; + dict->states[state1].trans[num_trans].new_state = state2; + dict->states[state1].num_trans++; +} + +#ifdef VERBOSE +HashTab *global[1]; + +static char * +get_state_str (int state, int level) +{ + int i; + HashEntry *e; + + for (i = 0; i < HASH_SIZE; i++) + for (e = global[level]->entries[i]; e; e = e->next) + if (e->val == state) + return e->key; + return NULL; +} +#endif + +void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) { + int i, j; + char word[MAX_CHARS]; + char pattern[MAX_CHARS]; + char * repl; + signed char replindex; + signed char replcut; + int state_num = 0; + int last_state; + char ch; + int found; + + if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { + dict->lhmin = atoi(buf + 13); + return; + } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { + dict->rhmin = atoi(buf + 14); + return; + } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { + dict->clhmin = atoi(buf + 21); + return; + } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { + dict->crhmin = atoi(buf + 22); + return; + } else if (strncmp(buf, "NOHYPHEN", 8) == 0) { + char * space = buf + 8; + while (*space != '\0' && (*space == ' ' || *space == '\t')) space++; + if (*buf != '\0') dict->nohyphen = hnj_strdup(space); + if (dict->nohyphen) { + char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1; + *nhe = 0; + for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) { + if (*nhe == ',') { + dict->nohyphenl++; + *nhe = 0; + } + } + } + return; + } + j = 0; + pattern[j] = '0'; + repl = strchr(buf, '/'); + replindex = 0; + replcut = 0; + if (repl) { + char * index = strchr(repl + 1, ','); + *repl = '\0'; + if (index) { + char * index2 = strchr(index + 1, ','); + *index = '\0'; + if (index2) { + *index2 = '\0'; + replindex = (signed char) atoi(index + 1) - 1; + replcut = (signed char) atoi(index2 + 1); + } + } else { + hnj_strchomp(repl + 1); + replindex = 0; + replcut = (signed char) strlen(buf); + } + repl = hnj_strdup(repl + 1); + } + for (i = 0; (unsigned char)buf[i] > (unsigned char)' '; i++) + { + if (buf[i] >= '0' && buf[i] <= '9') + pattern[j] = buf[i]; + else + { + word[j] = buf[i]; + pattern[++j] = '0'; + } + } + word[j] = '\0'; + pattern[j + 1] = '\0'; + + i = 0; + if (!repl) { + /* Optimize away leading zeroes */ + for (; pattern[i] == '0'; i++); + } else { + if (*word == '.') i++; + /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ + if (dict->utf8) { + int pu = -1; /* unicode character position */ + int ps = -1; /* unicode start position (original replindex) */ + size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */ + for (; pc < (strlen(word) + 1); pc++) { + /* beginning of an UTF-8 character (not '10' start bits) */ + if ((((unsigned char) word[pc]) >> 6) != 2) pu++; + if ((ps < 0) && (replindex == pu)) { + ps = replindex; + replindex = (signed char) pc; + } + if ((ps >= 0) && ((pu - ps) == replcut)) { + replcut = (signed char) (pc - replindex); + break; + } + } + if (*word == '.') replindex--; + } + } + +#ifdef VERBOSE + printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); +#endif + found = hnj_hash_lookup (hashtab, word); + state_num = hnj_get_state (dict, hashtab, word); + dict->states[state_num].match = hnj_strdup (pattern + i); + dict->states[state_num].repl = repl; + dict->states[state_num].replindex = replindex; + if (!replcut) { + dict->states[state_num].replcut = (signed char) strlen(word); + } else { + dict->states[state_num].replcut = replcut; + } + + /* now, put in the prefix transitions */ + for (; found < 0 && j > 0; --j) + { + last_state = state_num; + ch = word[j - 1]; + word[j - 1] = '\0'; + found = hnj_hash_lookup (hashtab, word); + state_num = hnj_get_state (dict, hashtab, word); + hnj_add_trans (dict, state_num, last_state, ch); + } +} + +HyphenDict * +hnj_hyphen_load (const char *fn) +{ + HyphenDict *result; + FILE *f; + f = fopen (fn, "r"); + if (f == NULL) + return NULL; + + result = hnj_hyphen_load_file(f); + + fclose(f); + return result; +} + +HyphenDict * +hnj_hyphen_load_file (FILE *f) +{ + HyphenDict *dict[2]; + HashTab *hashtab; + char buf[MAX_CHARS]; + int nextlevel = 0; + int i, j, k; + HashEntry *e; + int state_num = 0; +/* loading one or two dictionaries (separated by NEXTLEVEL keyword) */ +for (k = 0; k < 2; k++) { + hashtab = hnj_hash_new (); +#ifdef VERBOSE + global[k] = hashtab; +#endif + hnj_hash_insert (hashtab, "", 0); + dict[k] = (HyphenDict *) hnj_malloc (sizeof(HyphenDict)); + dict[k]->num_states = 1; + dict[k]->states = (HyphenState *) hnj_malloc (sizeof(HyphenState)); + dict[k]->states[0].match = NULL; + dict[k]->states[0].repl = NULL; + dict[k]->states[0].fallback_state = -1; + dict[k]->states[0].num_trans = 0; + dict[k]->states[0].trans = NULL; + dict[k]->nextlevel = NULL; + dict[k]->lhmin = 0; + dict[k]->rhmin = 0; + dict[k]->clhmin = 0; + dict[k]->crhmin = 0; + dict[k]->nohyphen = NULL; + dict[k]->nohyphenl = 0; + + /* read in character set info */ + if (k == 0) { + for (i=0;icset[i]= 0; + if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { + for (i=0;icset[i] == '\r') || (dict[k]->cset[i] == '\n')) + dict[k]->cset[i] = 0; + } else { + dict[k]->cset[0] = 0; + } + dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); + } else { + strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1); + dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0'; + dict[k]->utf8 = dict[0]->utf8; + } + + if (k == 0 || nextlevel) { + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* discard lines that don't fit in buffer */ + if (!feof(f) && strchr(buf, '\n') == NULL) { + int c; + while ((c = fgetc(f)) != '\n' && c != EOF); + /* issue warning if not a comment */ + if (buf[0] != '%') { + fprintf(stderr, "Warning: skipping too long pattern (more than %zu chars)\n", sizeof(buf)); + } + continue; + } + + if (strncmp(buf, "NEXTLEVEL", 9) == 0) { + nextlevel = 1; + break; + } else if (buf[0] != '%') { + hnj_hyphen_load_line(buf, dict[k], hashtab); + } + } + } else if (k == 1) { + /* default first level: hyphen and ASCII apostrophe */ + if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab); + else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab); + strncpy(buf, "1-1\n", MAX_CHARS-1); /* buf rewritten by hnj_hyphen_load here */ + buf[MAX_CHARS-1] = '\0'; + hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */ + hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */ + if (dict[0]->utf8) { + hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */ + hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */ + } + } + + /* Could do unioning of matches here (instead of the preprocessor script). + If we did, the pseudocode would look something like this: + + foreach state in the hash table + foreach i = [1..length(state) - 1] + state to check is substr (state, i) + look it up + if found, and if there is a match, union the match in. + + It's also possible to avoid the quadratic blowup by doing the + search in order of increasing state string sizes - then you + can break the loop after finding the first match. + + This step should be optional in any case - if there is a + preprocessed rule table, it's always faster to use that. + +*/ + + /* put in the fallback states */ + for (i = 0; i < HASH_SIZE; i++) + for (e = hashtab->entries[i]; e; e = e->next) + { + if (*(e->key)) for (j = 1; 1; j++) + { + state_num = hnj_hash_lookup (hashtab, e->key + j); + if (state_num >= 0) + break; + } + /* KBH: FIXME state 0 fallback_state should always be -1? */ + if (e->val) + dict[k]->states[e->val].fallback_state = state_num; + } +#ifdef VERBOSE + for (i = 0; i < HASH_SIZE; i++) + for (e = hashtab->entries[i]; e; e = e->next) + { + printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val, + dict[k]->states[e->val].fallback_state); + for (j = 0; j < dict[k]->states[e->val].num_trans; j++) + printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, + dict[k]->states[e->val].trans[j].new_state); + } +#endif + +#ifndef VERBOSE + hnj_hash_free (hashtab); +#endif + state_num = 0; +} + if (nextlevel) dict[0]->nextlevel = dict[1]; + else { + dict[1] -> nextlevel = dict[0]; + dict[1]->lhmin = dict[0]->lhmin; + dict[1]->rhmin = dict[0]->rhmin; + dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3); + dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3); +#ifdef VERBOSE + HashTab *r = global[0]; + global[0] = global[1]; + global[1] = r; +#endif + return dict[1]; + } + return dict[0]; +} + +void hnj_hyphen_free (HyphenDict *dict) +{ + int state_num; + HyphenState *hstate; + + for (state_num = 0; state_num < dict->num_states; state_num++) + { + hstate = &dict->states[state_num]; + if (hstate->match) + hnj_free (hstate->match); + if (hstate->repl) + hnj_free (hstate->repl); + if (hstate->trans) + hnj_free (hstate->trans); + } + if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel); + + if (dict->nohyphen) hnj_free(dict->nohyphen); + + hnj_free (dict->states); + + hnj_free (dict); +} + +#define MAX_WORD 256 + +int hnj_hyphen_hyphenate (HyphenDict *dict, + const char *word, int word_size, + char *hyphens) +{ + char *prep_word; + int i, j, k; + int state; + char ch; + HyphenState *hstate; + char *match; + int offset; + + prep_word = (char*) hnj_malloc (word_size + 3); + + j = 0; + prep_word[j++] = '.'; + + for (i = 0; i < word_size; i++) { + if (word[i] <= '9' && word[i] >= '0') { + prep_word[j++] = '.'; + } else { + prep_word[j++] = word[i]; + } + } + + prep_word[j++] = '.'; + prep_word[j] = '\0'; + + for (i = 0; i < word_size + 5; i++) + hyphens[i] = '0'; + +#ifdef VERBOSE + printf ("prep_word = %s\n", prep_word); +#endif + + /* now, run the finite state machine */ + state = 0; + for (i = 0; i < j; i++) + { + ch = prep_word[i]; + for (;;) + { + + if (state == -1) { + /* return 1; */ + /* KBH: FIXME shouldn't this be as follows? */ + state = 0; + goto try_next_letter; + } + +#ifdef VERBOSE + char *state_str; + state_str = get_state_str (state, 0); + + for (k = 0; k < i - strlen (state_str); k++) + putchar (' '); + printf ("%s", state_str); +#endif + + hstate = &dict->states[state]; + for (k = 0; k < hstate->num_trans; k++) + if (hstate->trans[k].ch == ch) + { + state = hstate->trans[k].new_state; + goto found_state; + } + state = hstate->fallback_state; +#ifdef VERBOSE + printf (" falling back, fallback_state %d\n", state); +#endif + } + found_state: +#ifdef VERBOSE + printf ("found state %d\n",state); +#endif + /* Additional optimization is possible here - especially, + elimination of trailing zeroes from the match. Leading zeroes + have already been optimized. */ + match = dict->states[state].match; + /* replacing rules not handled by hyphen_hyphenate() */ + if (match && !dict->states[state].repl) + { + offset = i + 1 - strlen (match); +#ifdef VERBOSE + for (k = 0; k < offset; k++) + putchar (' '); + printf ("%s\n", match); +#endif + /* This is a linear search because I tried a binary search and + found it to be just a teeny bit slower. */ + for (k = 0; match[k]; k++) + if (hyphens[offset + k] < match[k]) + hyphens[offset + k] = match[k]; + } + + /* KBH: we need this to make sure we keep looking in a word */ + /* for patterns even if the current character is not known in state 0 */ + /* since patterns for hyphenation may occur anywhere in the word */ + try_next_letter: ; + + } +#ifdef VERBOSE + for (i = 0; i < j; i++) + putchar (hyphens[i]); + putchar ('\n'); +#endif + + for (i = 0; i < j - 4; i++) +#if 0 + if (hyphens[i + 1] & 1) + hyphens[i] = '-'; +#else + hyphens[i] = hyphens[i + 1]; +#endif + hyphens[0] = '0'; + for (; i < word_size; i++) + hyphens[i] = '0'; + hyphens[word_size] = '\0'; + + hnj_free (prep_word); + + return 0; +} + +/* Unicode ligature length */ +int hnj_ligature(unsigned char c) { + switch (c) { + case 0x80: /* ff */ + case 0x81: /* fi */ + case 0x82: return LIG_xx; /* fl */ + case 0x83: /* ffi */ + case 0x84: return LIG_xxx; /* ffl */ + case 0x85: /* long st */ + case 0x86: return LIG_xx; /* st */ + } + return 0; +} + +/* character length of the first n byte of the input word */ +int hnj_hyphen_strnlen(const char * word, int n, int utf8) +{ + int i = 0; + int j = 0; + while (j < n && word[j] != '\0') { + i++; + /* Unicode ligature support */ + if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC)) { + i += hnj_ligature(word[j + 2]); + } + for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++); + } + return i; +} + +int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens, + char *** rep, int ** pos, int ** cut, int lhmin) +{ + int i = 1, j; + + /* Unicode ligature support */ + if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC)) { + i += hnj_ligature(word[2]); + } + + /* ignore numbers */ + for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--; + + for (j = 0; i < lhmin && word[j] != '\0'; i++) do { + /* check length of the non-standard part */ + if (*rep && *pos && *cut && (*rep)[j]) { + char * rh = strchr((*rep)[j], '='); + if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) + + hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) { + free((*rep)[j]); + (*rep)[j] = NULL; + hyphens[j] = '0'; + } + } else { + hyphens[j] = '0'; + } + j++; + + /* Unicode ligature support */ + if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC)) { + i += hnj_ligature(word[j + 2]); + } + } while (utf8 && (word[j] & 0xc0) == 0x80); + return 0; +} + +int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, + char *** rep, int ** pos, int ** cut, int rhmin) +{ + int i = 0; + int j; + + /* ignore numbers */ + for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--; + + for (j = word_size - 1; i < rhmin && j > 0; j--) { + /* check length of the non-standard part */ + if (*rep && *pos && *cut && (*rep)[j]) { + char * rh = strchr((*rep)[j], '='); + if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) + + hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) { + free((*rep)[j]); + (*rep)[j] = NULL; + hyphens[j] = '0'; + } + } else { + hyphens[j] = '0'; + } + if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++; + } + return 0; +} + +/* recursive function for compound level hyphenation */ +int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, + char * hyphens, char *** rep, int ** pos, int ** cut, + int clhmin, int crhmin, int lend, int rend) +{ + char *prep_word; + int i, j, k; + int state; + char ch; + HyphenState *hstate; + char *match; + char *repl; + signed char replindex; + signed char replcut; + int offset; + int * matchlen; + int * matchindex; + char ** matchrepl; + int isrepl = 0; + int nHyphCount; + + size_t prep_word_size = word_size + 3; + prep_word = (char*) hnj_malloc (prep_word_size); + matchlen = (int*) hnj_malloc ((word_size + 3) * sizeof(int)); + matchindex = (int*) hnj_malloc ((word_size + 3) * sizeof(int)); + matchrepl = (char**) hnj_malloc ((word_size + 3) * sizeof(char *)); + + j = 0; + prep_word[j++] = '.'; + + for (i = 0; i < word_size; i++) { + if (word[i] <= '9' && word[i] >= '0') { + prep_word[j++] = '.'; + } else { + prep_word[j++] = word[i]; + } + } + + + + prep_word[j++] = '.'; + prep_word[j] = '\0'; + + for (i = 0; i < j; i++) + hyphens[i] = '0'; + +#ifdef VERBOSE + printf ("prep_word = %s\n", prep_word); +#endif + + /* now, run the finite state machine */ + state = 0; + for (i = 0; i < j; i++) + { + ch = prep_word[i]; + for (;;) + { + + if (state == -1) { + /* return 1; */ + /* KBH: FIXME shouldn't this be as follows? */ + state = 0; + goto try_next_letter; + } + +#ifdef VERBOSE + char *state_str; + state_str = get_state_str (state, 1); + + for (k = 0; k < i - strlen (state_str); k++) + putchar (' '); + printf ("%s", state_str); +#endif + + hstate = &dict->states[state]; + for (k = 0; k < hstate->num_trans; k++) + if (hstate->trans[k].ch == ch) + { + state = hstate->trans[k].new_state; + goto found_state; + } + state = hstate->fallback_state; +#ifdef VERBOSE + printf (" falling back, fallback_state %d\n", state); +#endif + } + found_state: +#ifdef VERBOSE + printf ("found state %d\n",state); +#endif + /* Additional optimization is possible here - especially, + elimination of trailing zeroes from the match. Leading zeroes + have already been optimized. */ + match = dict->states[state].match; + repl = dict->states[state].repl; + replindex = dict->states[state].replindex; + replcut = dict->states[state].replcut; + /* replacing rules not handled by hyphen_hyphenate() */ + if (match) + { + offset = i + 1 - strlen (match); +#ifdef VERBOSE + for (k = 0; k < offset; k++) + putchar (' '); + printf ("%s (%s)\n", match, repl); +#endif + if (repl) { + if (!isrepl) for(; isrepl < word_size; isrepl++) { + matchrepl[isrepl] = NULL; + matchindex[isrepl] = -1; + } + matchlen[offset + replindex] = replcut; + } + /* This is a linear search because I tried a binary search and + found it to be just a teeny bit slower. */ + for (k = 0; match[k]; k++) { + if ((hyphens[offset + k] < match[k])) { + hyphens[offset + k] = match[k]; + if (match[k]&1) { + matchrepl[offset + k] = repl; + if (repl && (k >= replindex) && (k <= replindex + replcut)) { + matchindex[offset + replindex] = offset + k; + } + } + } + } + + } + + /* KBH: we need this to make sure we keep looking in a word */ + /* for patterns even if the current character is not known in state 0 */ + /* since patterns for hyphenation may occur anywhere in the word */ + try_next_letter: ; + + } +#ifdef VERBOSE + for (i = 0; i < j; i++) + putchar (hyphens[i]); + putchar ('\n'); +#endif + + for (i = 0; i < j - 3; i++) +#if 0 + if (hyphens[i + 1] & 1) + hyphens[i] = '-'; +#else + hyphens[i] = hyphens[i + 1]; +#endif + for (; i < word_size; i++) + hyphens[i] = '0'; + hyphens[word_size] = '\0'; + + /* now create a new char string showing hyphenation positions */ + /* count the hyphens and allocate space for the new hyphenated string */ + nHyphCount = 0; + for (i = 0; i < word_size; i++) + if (hyphens[i]&1) + nHyphCount++; + j = 0; + for (i = 0; i < word_size; i++) { + if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { + if (rep && pos && cut) { + if (!*rep) + *rep = (char **) calloc(word_size, sizeof(char *)); + if (!*pos) + *pos = (int *) calloc(word_size, sizeof(int)); + if (!*cut) { + *cut = (int *) calloc(word_size, sizeof(int)); + } + (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]); + (*pos)[matchindex[i] - 1] = matchindex[i] - i; + (*cut)[matchindex[i] - 1] = matchlen[i]; + } + j += strlen(matchrepl[matchindex[i]]); + i += matchlen[i] - 1; + } + } + + hnj_free (matchrepl); + hnj_free (matchlen); + hnj_free (matchindex); + + /* recursive hyphenation of the first (compound) level segments */ + if (dict->nextlevel) { + char ** rep2; + int * pos2; + int * cut2; + char * hyphens2; + int begin = 0; + + rep2 = (char**) hnj_malloc (word_size * sizeof(char *)); + pos2 = (int*) hnj_malloc (word_size * sizeof(int)); + cut2 = (int*) hnj_malloc (word_size * sizeof(int)); + hyphens2 = (char*) hnj_malloc (word_size + 3); + for (i = 0; i < word_size; i++) rep2[i] = NULL; + for (i = 0; i < word_size; i++) if + (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { + if (i - begin > 0) { + int hyph = 0; + prep_word[i + 2] = '\0'; + /* non-standard hyphenation at compound boundary (Schiffahrt) */ + if (rep && *rep && *pos && *cut && (*rep)[i]) { + char * l = strchr((*rep)[i], '='); + size_t offset = 2 + i - (*pos)[i]; + strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1); + prep_word[prep_word_size - 1] = '\0'; + if (l) { + hyph = (l - (*rep)[i]) - (*pos)[i]; + prep_word[2 + i + hyph] = '\0'; + } + } + hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, + hyphens2, &rep2, &pos2, &cut2, clhmin, + crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); + for (j = 0; j < i - begin; j++) { + hyphens[begin + j] = hyphens2[j]; + if (rep2[j] && rep && pos && cut) { + if (!*rep && !*pos && !*cut) { + int k; + *rep = (char **) malloc(sizeof(char *) * word_size); + *pos = (int *) malloc(sizeof(int) * word_size); + *cut = (int *) malloc(sizeof(int) * word_size); + for (k = 0; k < word_size; k++) { + (*rep)[k] = NULL; + (*pos)[k] = 0; + (*cut)[k] = 0; + } + } + (*rep)[begin + j] = rep2[j]; + (*pos)[begin + j] = pos2[j]; + (*cut)[begin + j] = cut2[j]; + } + } + prep_word[i + 2] = word[i + 1]; + if (*rep && *pos && *cut && (*rep)[i]) { + size_t offset = 1; + strncpy(prep_word + offset, word, prep_word_size - offset - 1); + prep_word[prep_word_size - 1] = '\0'; + } + } + begin = i + 1; + for (j = 0; j < word_size; j++) rep2[j] = NULL; + } + + /* non-compound */ + if (begin == 0) { + hnj_hyphen_hyph_(dict->nextlevel, word, word_size, + hyphens, rep, pos, cut, clhmin, crhmin, lend, rend); + if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, + rep, pos, cut, clhmin); + if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, + rep, pos, cut, crhmin); + } + + free(rep2); + free(cut2); + free(pos2); + free(hyphens2); + } + + hnj_free (prep_word); + return 0; +} + +/* UTF-8 normalization of hyphen and non-standard positions */ +int hnj_hyphen_norm(const char *word, int word_size, char * hyphens, + char *** rep, int ** pos, int ** cut) +{ + int i, j, k; + if ((((unsigned char) word[0]) >> 6) == 2) { + fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word); + return 1; + } + + /* calculate UTF-8 character positions */ + for (i = 0, j = -1; i < word_size; i++) { + /* beginning of an UTF-8 character (not '10' start bits) */ + if ((((unsigned char) word[i]) >> 6) != 2) j++; + hyphens[j] = hyphens[i]; + if (rep && pos && cut && *rep && *pos && *cut) { + int l = (*pos)[i]; + (*pos)[j] = 0; + for (k = 0; k < l; k++) { + if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++; + } + k = i - l + 1; + l = k + (*cut)[i]; + (*cut)[j] = 0; + for (; k < l; k++) { + if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++; + } + (*rep)[j] = (*rep)[i]; + if (j < i) { + (*rep)[i] = NULL; + (*pos)[i] = 0; + (*cut)[i] = 0; + } + } + } + hyphens[j + 1] = '\0'; +#ifdef VERBOSE + printf ("nums: %s\n", hyphens); +#endif + return 0; +} + +/* get the word with all possible hyphenations (output: hyphword) */ +void hnj_hyphen_hyphword(const char * word, int word_size, const char * hyphens, + char * hyphword, char *** rep, int ** pos, int ** cut) +{ + + if (word_size <= 0 || word_size > INT_MAX / 2) { + hyphword[0] = '\0'; + return; + } + + /* hyphword buffer size must be at least 2 * l */ + int hyphword_size = 2 * word_size - 1; + + int nonstandard = 0; + if (*rep && *pos && *cut) { + nonstandard = 1; + } + + int i; + int j = 0; + for (i = 0; i < word_size && j < hyphword_size; i++) { + hyphword[j++] = word[i]; + if (hyphens[i]&1 && j < hyphword_size) { + if (nonstandard && (*rep)[i] && j >= (*pos)[i]) { + /* non-standard */ + j -= (*pos)[i]; + char *s = (*rep)[i]; + while (*s && j < hyphword_size) { + hyphword[j++] = *s++; + } + i += (*cut)[i] - (*pos)[i]; + } else { + /* standard */ + hyphword[j++] = '='; + } + } + } + hyphword[j] = '\0'; +} + + +/* main api function with default hyphenmin parameters */ +int hnj_hyphen_hyphenate2 (HyphenDict *dict, + const char *word, int word_size, char * hyphens, + char *hyphword, char *** rep, int ** pos, int ** cut) +{ + hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, + dict->clhmin, dict->crhmin, 1, 1); + hnj_hyphen_lhmin(dict->utf8, word, word_size, + hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2)); + hnj_hyphen_rhmin(dict->utf8, word, word_size, + hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2)); + + /* nohyphen */ + if (dict->nohyphen) { + char * nh = dict->nohyphen; + int nhi; + for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { + char * nhy = (char *) strstr(word, nh); + while (nhy) { + hyphens[nhy - word + strlen(nh) - 1] = '0'; + if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0'; + nhy = (char *) strstr(nhy + 1, nh); + } + nh = nh + strlen(nh) + 1; + } + } + + if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); + if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); +#ifdef VERBOSE + printf ("nums: %s\n", hyphens); +#endif + return 0; +} + +/* previous main api function with hyphenmin parameters */ +int hnj_hyphen_hyphenate3 (HyphenDict *dict, + const char *word, int word_size, char * hyphens, + char *hyphword, char *** rep, int ** pos, int ** cut, + int lhmin, int rhmin, int clhmin, int crhmin) +{ + lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin; + rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin; + clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin; + crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin; + hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, + clhmin, crhmin, 1, 1); + hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, + rep, pos, cut, (lhmin > 0 ? lhmin : 2)); + hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, + rep, pos, cut, (rhmin > 0 ? rhmin : 2)); + if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); + + /* nohyphen */ + if (dict->nohyphen) { + char * nh = dict->nohyphen; + int nhi; + for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { + char * nhy = (char *) strstr(word, nh); + while (nhy) { + hyphens[nhy - word + strlen(nh) - 1] = 0; + if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0; + nhy = (char *) strstr(nhy + 1, nh); + } + nh = nh + strlen(nh) + 1; + } + } + + if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); + return 0; +} diff --git a/intl/hyphenation/hyphen/hyphen.h b/intl/hyphenation/hyphen/hyphen.h new file mode 100644 index 000000000000..2b4e146421ff --- /dev/null +++ b/intl/hyphenation/hyphen/hyphen.h @@ -0,0 +1,175 @@ +/* Hyphen - hyphenation library using converted TeX hyphenation patterns + * + * (C) 1998 Raph Levien + * (C) 2001 ALTLinux, Moscow + * (C) 2006, 2007, 2008 László Németh + * + * This was part of libHnj library by Raph Levien. + * + * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj + * to use it in OpenOffice.org. + * + * Non-standard and compound word hyphenation support by László Németh. + * + * License is the original LibHnj license: + * + * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both + * licenses follows. + */ + +/* LibHnj - a library for high quality hyphenation and justification + * Copyright (C) 1998 Raph Levien + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307 USA. +*/ + +/* + * The contents of this file are subject to the Mozilla Public License + * Version 1.0 (the "MPL"); you may not use this file except in + * compliance with the MPL. You may obtain a copy of the MPL at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the MPL is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL + * for the specific language governing rights and limitations under the + * MPL. + * + */ +#ifndef __HYPHEN_H__ +#define __HYPHEN_H__ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include + +typedef struct _HyphenDict HyphenDict; +typedef struct _HyphenState HyphenState; +typedef struct _HyphenTrans HyphenTrans; +#define MAX_CHARS 100 +#define MAX_NAME 20 + +struct _HyphenDict { + /* user options */ + char lhmin; /* lefthyphenmin: min. hyph. distance from the left side */ + char rhmin; /* righthyphenmin: min. hyph. distance from the right side */ + char clhmin; /* min. hyph. distance from the left compound boundary */ + char crhmin; /* min. hyph. distance from the right compound boundary */ + char * nohyphen; /* comma separated list of characters or character + sequences with forbidden hyphenation */ + int nohyphenl; /* count of elements in nohyphen */ + /* system variables */ + int num_states; + char cset[MAX_NAME]; + int utf8; + HyphenState *states; + HyphenDict *nextlevel; +}; + +struct _HyphenState { + char *match; + char *repl; + signed char replindex; + signed char replcut; + int fallback_state; + int num_trans; + HyphenTrans *trans; +}; + +struct _HyphenTrans { + char ch; + int new_state; +}; + +HyphenDict *hnj_hyphen_load (const char *fn); +HyphenDict *hnj_hyphen_load_file (FILE *f); +void hnj_hyphen_free (HyphenDict *dict); + +/* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ +int hnj_hyphen_hyphenate (HyphenDict *dict, + const char *word, int word_size, + char *hyphens); + +/* + + int hnj_hyphen_hyphenate2(): non-standard hyphenation. + + (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish + etc. orthography, see documentation.) + + input data: + word: input word + word_size: byte length of the input word + + hyphens: allocated character buffer (size = word_size + 5) + hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL + rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers + (size=word_size) or with NULL value) or NULL + + output data: + hyphens: hyphenation vector (hyphenation points signed with odd numbers) + hyphenated_word: hyphenated input word (hyphens signed with `='), + optional (NULL input) + rep: NULL (only standard hyph.), or replacements (hyphenation points + signed with `=' in replacements); + pos: NULL, or difference of the actual position and the beginning + positions of the change in input words; + cut: NULL, or counts of the removed characters of the original words + at hyphenation, + + Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the + character positions of the input word. + + For example: + Schiffahrt -> Schiff=fahrt, + pattern: f1f/ff=f,1,2 + output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2 + + Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size + length arrays): + + char ** rep = NULL; + int * pos = NULL; + int * cut = NULL; + char hyphens[MAXWORDLEN]; + hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut); + + See example in the source distribution. + +*/ + +int hnj_hyphen_hyphenate2 (HyphenDict *dict, + const char *word, int word_size, char * hyphens, + char *hyphenated_word, char *** rep, int ** pos, int ** cut); + +/* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */ +/* lhmin: lefthyphenmin + * rhmin: righthyphenmin + * clhmin: compoundlefthyphemin + * crhmin: compoundrighthyphenmin + * (see documentation) */ + +int hnj_hyphen_hyphenate3 (HyphenDict *dict, + const char *word, int word_size, char * hyphens, + char *hyphword, char *** rep, int ** pos, int ** cut, + int lhmin, int rhmin, int clhmin, int crhmin); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __HYPHEN_H__ */ diff --git a/intl/hyphenation/hyphen/moz.build b/intl/hyphenation/hyphen/moz.build new file mode 100644 index 000000000000..a93ab6835e2b --- /dev/null +++ b/intl/hyphenation/hyphen/moz.build @@ -0,0 +1,19 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# These files cannot be built in unified mode because they include hnjalloc.h. +SOURCES += [ + 'hyphen.c', +] + +FINAL_LIBRARY = 'xul' + +LOCAL_INCLUDES += [ + '../glue', +] + +# We allow warnings for third-party code that can be updated from upstream. +AllowCompilerWarnings() diff --git a/intl/locales/af/hyphenation/hyph_af.hyf b/intl/locales/af/hyphenation/hyph_af.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/bg/hyphenation/hyph_bg.hyf b/intl/locales/bg/hyphenation/hyph_bg.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/ca/hyphenation/hyph_ca.hyf b/intl/locales/ca/hyphenation/hyph_ca.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/cy/hyphenation/hyph_cy.hyf b/intl/locales/cy/hyphenation/hyph_cy.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/da/hyphenation/hyph_da.hyf b/intl/locales/da/hyphenation/hyph_da.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/de-1901/hyphenation/hyph_de-1901.hyf b/intl/locales/de-1901/hyphenation/hyph_de-1901.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/de-1996/hyphenation/hyph_de-1996.hyf b/intl/locales/de-1996/hyphenation/hyph_de-1996.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/de-CH/hyphenation/hyph_de-CH.hyf b/intl/locales/de-CH/hyphenation/hyph_de-CH.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/en-US/hyphenation/hyph_en_US.hyf b/intl/locales/en-US/hyphenation/hyph_en_US.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/eo/hyphenation/hyph_eo.hyf b/intl/locales/eo/hyphenation/hyph_eo.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/es/hyphenation/hyph_es.hyf b/intl/locales/es/hyphenation/hyph_es.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/et/hyphenation/hyph_et.hyf b/intl/locales/et/hyphenation/hyph_et.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/fi/hyphenation/hyph_fi.hyf b/intl/locales/fi/hyphenation/hyph_fi.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/fr/hyphenation/hyph_fr.hyf b/intl/locales/fr/hyphenation/hyph_fr.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/gl/hyphenation/hyph_gl.hyf b/intl/locales/gl/hyphenation/hyph_gl.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/hr/hyphenation/hyph_hr.hyf b/intl/locales/hr/hyphenation/hyph_hr.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/hsb/hyphenation/hyph_hsb.dic b/intl/locales/hsb/hyphenation/hyph_hsb.dic index 98f40b4bb4b4..db038cc7a3d3 100644 --- a/intl/locales/hsb/hyphenation/hyph_hsb.dic +++ b/intl/locales/hsb/hyphenation/hyph_hsb.dic @@ -1589,8 +1589,8 @@ izn4j iz1no 2z1p 2z1s -.w8a8r9s8z8a9w8a. -.warsza3w2a +.W8a8r9s8z8a9w8a. +.Warsza3w2a .d8o9z8n8a. .do1z2na1 .n8j8e8j9s8y8m. diff --git a/intl/locales/hsb/hyphenation/hyph_hsb.hyf b/intl/locales/hsb/hyphenation/hyph_hsb.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/hu/hyphenation/hyph_hu.hyf b/intl/locales/hu/hyphenation/hyph_hu.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/ia/hyphenation/hyph_ia.hyf b/intl/locales/ia/hyphenation/hyph_ia.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/is/hyphenation/hyph_is.hyf b/intl/locales/is/hyphenation/hyph_is.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/it/hyphenation/hyph_it.hyf b/intl/locales/it/hyphenation/hyph_it.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/kmr/hyphenation/hyph_kmr.hyf b/intl/locales/kmr/hyphenation/hyph_kmr.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/la/hyphenation/hyph_la.hyf b/intl/locales/la/hyphenation/hyph_la.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/lt/hyphenation/hyph_lt.hyf b/intl/locales/lt/hyphenation/hyph_lt.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/mn/hyphenation/hyph_mn.hyf b/intl/locales/mn/hyphenation/hyph_mn.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/moz.build b/intl/locales/moz.build index 958911c0a2d5..a952f6ebb491 100644 --- a/intl/locales/moz.build +++ b/intl/locales/moz.build @@ -42,7 +42,7 @@ locales = [ 'tr', 'uk', ] -filename = '{locale}/hyphenation/hyph_{locale}.hyf' +filename = '{locale}/hyphenation/hyph_{locale}.dic' FINAL_TARGET_FILES.hyphenation += [filename.format(locale=locale) for locale in locales] # en-US is a special case: the dic file is named like en_US. -FINAL_TARGET_FILES.hyphenation += ['en-US/hyphenation/hyph_en_US.hyf'] +FINAL_TARGET_FILES.hyphenation += ['en-US/hyphenation/hyph_en_US.dic'] diff --git a/intl/locales/nb/hyphenation/hyph_nb.hyf b/intl/locales/nb/hyphenation/hyph_nb.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/nl/hyphenation/hyph_nl.hyf b/intl/locales/nl/hyphenation/hyph_nl.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/nn/hyphenation/hyph_nn.hyf b/intl/locales/nn/hyphenation/hyph_nn.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/pl/hyphenation/hyph_pl.hyf b/intl/locales/pl/hyphenation/hyph_pl.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/pt/hyphenation/hyph_pt.hyf b/intl/locales/pt/hyphenation/hyph_pt.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/ru/hyphenation/hyph_ru.hyf b/intl/locales/ru/hyphenation/hyph_ru.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/sh/hyphenation/hyph_sh.hyf b/intl/locales/sh/hyphenation/hyph_sh.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/sl/hyphenation/hyph_sl.hyf b/intl/locales/sl/hyphenation/hyph_sl.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/sv/hyphenation/hyph_sv.hyf b/intl/locales/sv/hyphenation/hyph_sv.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/tr/hyphenation/hyph_tr.hyf b/intl/locales/tr/hyphenation/hyph_tr.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/locales/uk/hyphenation/hyph_uk.hyf b/intl/locales/uk/hyphenation/hyph_uk.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/intl/moz.build b/intl/moz.build index 4d1906f3a45c..05d94899a111 100644 --- a/intl/moz.build +++ b/intl/moz.build @@ -9,6 +9,7 @@ TEST_DIRS += [ ] DIRS += [ + 'hyphenation/hyphen', 'hyphenation/glue', 'locale', 'locales', diff --git a/layout/style/RunCbindgen.py b/layout/style/RunCbindgen.py index 9a88fae37bb7..d49943a36e34 100644 --- a/layout/style/RunCbindgen.py +++ b/layout/style/RunCbindgen.py @@ -29,8 +29,7 @@ def generate(output, cbindgen_crate_path, *in_tree_dependencies): "--lockfile", CARGO_LOCK, "--crate", - _get_crate_name(cbindgen_crate_path), - "--cpp-compat" + _get_crate_name(cbindgen_crate_path) ], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() diff --git a/testing/testsuite-targets.mk b/testing/testsuite-targets.mk index 5bb9a998e14e..739301972cea 100644 --- a/testing/testsuite-targets.mk +++ b/testing/testsuite-targets.mk @@ -224,7 +224,7 @@ stage-android: make-stage-dir $(NSINSTALL) $(topsrcdir)/mobile/android/fonts $(DEPTH)/_tests/reftest $(NSINSTALL) $(topsrcdir)/mobile/android/fonts $(DEPTH)/_tests/testing/mochitest $(NSINSTALL) -D $(DEPTH)/_tests/reftest/hyphenation - $(NSINSTALL) $(wildcard $(topsrcdir)/intl/locales/*/hyphenation/*.hyf) $(DEPTH)/_tests/reftest/hyphenation + $(NSINSTALL) $(wildcard $(topsrcdir)/intl/locales/*/hyphenation/*.dic) $(DEPTH)/_tests/reftest/hyphenation ifdef MOZ_COPY_PDBS CPP_UNIT_TEST_BINS=$(filter-out $(wildcard $(DIST)/cppunittests/*.pdb), $(wildcard $(DIST)/cppunittests/*)) diff --git a/third_party/rust/mapped_hyph/.cargo-checksum.json b/third_party/rust/mapped_hyph/.cargo-checksum.json deleted file mode 100644 index 9c7e639fa3c5..000000000000 --- a/third_party/rust/mapped_hyph/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"COPYRIGHT":"4df931055b82b96e13ad475c4cee3de5afa69a54a4c611c9d7dc6252d858d9c8","Cargo.toml":"ed3016de5a5dbfb0904cd3a442fa98cb66f8b4d8c1b801bcdcba777b57abe69d","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"4ad721b5b6a3d39ca3e2202f403d897c4a1d42896486dd58963a81f8e64ef61d","README.md":"14cbfed88443a2e7ffb5beb788cae17e19d7329e9ef6c7ebdbd45c67751f4a06","benches/bench.rs":"ed7143e66ecf8bfb12c87d1f9344157d97696b8194de9132d061129bc80d8d52","cbindgen.toml":"07d22767e85ed64cf190038205e189a8fffea8910bbe923d04f425b36b9e9e93","doc/mapped_hyph_format.md":"2f2487cf536fe4b03db6e4b384be06744ec30b3f299519492288306a93127fbb","hyph_en_US.hyf":"6262b4c5118fe277ab4add8689d9524ca72097564652baec67a8fcd5029ec9b0","src/bin/hyf_compile.rs":"8dfcad9c6e6f27bda9eb6ac6493114fdec0187fef144d86e097ffe488d00a49c","src/builder.rs":"7d4bb46ab2e00bb1cad1de8365781102a44817f23518ca617db17c07d44f5f7e","src/ffi.rs":"bdcff084276418788f4c8a1c525d7a6fd0bce900ca1561ff0353029e1171d9f1","src/lib.rs":"0126ba46f1c30a2dea2f72dec9e9639635aaba85f4b0da7b1a6e2f52624243ed","src/main.rs":"666befeb39cb1a7dfb66c6b9218d5f7b6c4ed09dbbbc8cfff6b749a33a99ebcf","tests/base.hyf":"d8bf57c6280cfa1d357d3fdba156ce64afbd9df58e28eeb084dfe3f80972b73f","tests/base.hyph":"a3f1fab24c101701fdf21e8359685d80611ab970304e2bd89ef024768b3700c8","tests/base.word":"1136c9a421b242262661b9a65723f87a5ecf77ae38eabcea057832d036d567fd","tests/compound.hyf":"929c1ba6676e4c43bc649d0abf4275ea9e8b02bffaa5acdf704a710813a7a13c","tests/compound4.hyf":"2093287bc41ee30ff9bdbf278f1f8209cb1d1a78236b46e9060af2a881572b8e","tests/compound5.hyf":"0942a5dfbb8d0ef3a937ab9da0418abb41300357cde49f4c477a59a11b2cb6bd","tests/compound6.hyf":"ebad958c2692a5b439b31e324020ed27c42dc05bd5b8c6a6dea4669e6ccf76b4","tests/hyphen.hyf":"92b8a5c86aac6a0b9f0eb7330a057065d6985fd047e851cae47039995c682d4d","tests/lhmin.hyf":"23c886704fafee7d9c54b2478029cf69a5fa946c2f2442bd86697bca5933c88d","tests/num.hyf":"4834fabe78b5c81815434d4562ce3322541649e1ea1edc555a498574bc8b237e","tests/rhmin.hyf":"239cb3d4d7f904abb43b57241e12cc1396e636220c3806e64666aca7ca46cc42","tests/settings2.hyf":"9fc4855e0b952a3593db1efef080b93ce7f1c6fe6798db0440e2bf0cc986ffa2","tests/settings3.hyf":"867db207b485a06e7d60ad10735c9111f10516ee3a5afd6306c683ace3454491","tests/test.rs":"5c81ae59b9384b70d9461407999dac1fde9214398876c4433fbbde9571cc1d94"},"package":null} \ No newline at end of file diff --git a/third_party/rust/mapped_hyph/COPYRIGHT b/third_party/rust/mapped_hyph/COPYRIGHT deleted file mode 100644 index a1254361b371..000000000000 --- a/third_party/rust/mapped_hyph/COPYRIGHT +++ /dev/null @@ -1,12 +0,0 @@ -mapped_hyph is copyright 2019 Mozilla Foundation. - -Licensed under the Apache License, Version 2.0 - or the MIT -license , -at your option. All files in the project carrying such -notice may not be copied, modified, or distributed except -according to those terms. - -Code in the subdirectories /test/ and /bench/ is dedicated -to the Public Domain. diff --git a/third_party/rust/mapped_hyph/Cargo.toml b/third_party/rust/mapped_hyph/Cargo.toml deleted file mode 100644 index 76380c3cd978..000000000000 --- a/third_party/rust/mapped_hyph/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "mapped_hyph" -description = "Hyphenation using precompiled memory-mapped tables" -version = "0.3.0" -authors = ["Jonathan Kew "] -license = "MIT/Apache-2.0" -edition = "2018" - -[dependencies] -memmap = "0.7.0" -arrayref = "0.3.5" - -[dev-dependencies] -criterion = "0.3" - -[[bench]] -name = "bench" -harness = false diff --git a/third_party/rust/mapped_hyph/LICENSE-APACHE b/third_party/rust/mapped_hyph/LICENSE-APACHE deleted file mode 100644 index d64569567334..000000000000 --- a/third_party/rust/mapped_hyph/LICENSE-APACHE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/third_party/rust/mapped_hyph/LICENSE-MIT b/third_party/rust/mapped_hyph/LICENSE-MIT deleted file mode 100644 index b4850c952004..000000000000 --- a/third_party/rust/mapped_hyph/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2019 Mozilla Foundation - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/mapped_hyph/README.md b/third_party/rust/mapped_hyph/README.md deleted file mode 100644 index 6ea145d18c40..000000000000 --- a/third_party/rust/mapped_hyph/README.md +++ /dev/null @@ -1,75 +0,0 @@ -# mapped_hyph - -mapped_hyph is a reimplementation of the hyphenation algorithm from the -[libhyphen](https://github.com/hunspell/hyphen) library -that is intended to reduce the in-memory footprint of loaded -hyphenation dictionaries, especially when the same dictionary -may be in use by multiple processes. - -To reduce memory footprint, mapped_hyph uses hyphenation dictionaries that are -"precompiled" into a flat, position-independent binary format that is used -directly by the runtime hyphenation functions. -Therefore, dictionaries do not have to be parsed into a dynamic structure in memory; -the files can simply be mmap'd into the address space and immediately used. -In addition, a compiled dictionary mapped into a shared-memory block -can be made available to multiple processes for no added physical memory cost. - -One deliberate simplification compared to libhyphen -is that mapped_hyph only accepts UTF-8 text and hyphenation dictionaries; -legacy non-Unicode encodings are not supported. - -mapped_hyph has been created primarily for use by Gecko, replacing the use of libhyphen, -and so its features (and limitations) are based on this use case. -However, it is hoped that it will also be more generally useful. - -## Functionality - -Currently, mapped_hyph supports only "standard" hyphenation, where spelling does not -change around the hyphenation position. At present this is the only kind of -hyphenation supported in Gecko. - -The compiled hyphenation dictionary format includes provision for replacement -strings and indexes, as used by libhyphen to support non-standard hyphenations -(e.g. German "Schiffahrt" -> "Schiff-fahrt"), but the `find_hyphen_values` function -will ignore any such hyphenation positions it finds. -(None of the hyphenation dictionaries shipping with Firefox includes such rules.) - -## Licensing - -mapped_hyph is dual licensed under the Apache-2.0 and MIT licenses; -see the file COPYRIGHT. - -## Documentation - -Use `cargo doc --open` to view (admittedly brief) documentation generated from -comments in the source. - -## C and C++ bindings - -See the `mapped_hyph.h` header for C/C++ APIs that can be used to load hyphenation files -and to locate valid hyphenation positions in a word. - -## Sample programs - -See main.rs for a simple example program. - -## Compiled dictionaries - -The `hyf_compile` tool is used to generate `.hyf` files for mapped_hyph -from standard `.dic` (or `.pat`) files as used by libhyphen, LibreOffice, etc. - -(A compiled version of the `hyph_en_US` dictionary from libhyphen is currently -included here, as it is handy for testing purposes.) - -## Release Notes - -### 0.2.0 - -* Implemented a hyphenation table compiler in the `builder` submodule, - and `hyf_compile` command-line tool. - -* Moved C-callable API functions into an `ffi` submodule. - -### 0.1.0 - -* Initial release. diff --git a/third_party/rust/mapped_hyph/benches/bench.rs b/third_party/rust/mapped_hyph/benches/bench.rs deleted file mode 100644 index cf4ad6cb2fb6..000000000000 --- a/third_party/rust/mapped_hyph/benches/bench.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Any copyright to the test code below is dedicated to the Public Domain. -// http://creativecommons.org/publicdomain/zero/1.0/ - -use criterion::black_box; -use criterion::criterion_group; -use criterion::criterion_main; -use criterion::BenchmarkId; -use criterion::Criterion; - -use mapped_hyph::Hyphenator; -use std::fs; - -const SAMPLE_SIZE: usize = 300; -const DIC_PATH: &str = "hyph_en_US.hyf"; - -fn bench_construct(c: &mut Criterion) { - c.bench_function("construct", |b| { - b.iter(|| { - let dic = unsafe { mapped_hyph::load_file(DIC_PATH) } - .expect(&format!("failed to load dictionary {}", DIC_PATH)); - let _ = Hyphenator::new(black_box(&*dic)); - }) - }); -} - -fn bench_find_hyphen_values(c: &mut Criterion) { - // XXX: Should we copy this file to the crate to ensure reproducability? - let data = fs::read_to_string("/usr/share/dict/words").expect("File reading failed."); - let words: Vec<&str> = data.lines().take(SAMPLE_SIZE).collect(); - - let dic = unsafe { mapped_hyph::load_file(DIC_PATH) } - .expect(&format!("failed to load dictionary {}", DIC_PATH)); - let hyph = Hyphenator::new(&*dic); - - c.bench_with_input( - BenchmarkId::new("bench_word", SAMPLE_SIZE), - &words, - |b, words| { - b.iter(|| { - let mut values: Vec = vec![0; 1000]; - for w in words { - hyph.find_hyphen_values(&w, &mut values); - } - }); - }, - ); -} - -criterion_group!(benches, bench_construct, bench_find_hyphen_values,); -criterion_main!(benches); diff --git a/third_party/rust/mapped_hyph/cbindgen.toml b/third_party/rust/mapped_hyph/cbindgen.toml deleted file mode 100644 index 9ad425292c11..000000000000 --- a/third_party/rust/mapped_hyph/cbindgen.toml +++ /dev/null @@ -1,114 +0,0 @@ -# This is a template cbindgen.toml file with all of the default values. -# Some values are commented out because their absence is the real default. -# -# See https://github.com/eqrion/cbindgen/blob/master/docs.md#cbindgentoml -# for detailed documentation of every option here. - -language = "C" - -############## Options for Wrapping the Contents of the Header ################# - -header = """/* - * Copyright 2019 Mozilla Foundation. See the COPYRIGHT - * file at the top-level directory of this distribution. - * - * Licensed under the Apache License, Version 2.0 or the MIT license - * , at your - * option. This file may not be copied, modified, or distributed - * except according to those terms. -**/ - -/* clang-format off */ -""" -trailer = "/* clang-format on */" -include_guard = "mapped_hyph_h" -autogen_warning = """/* - * Warning, this file is autogenerated by cbindgen. Don't modify this manually. - */ -""" -include_version = false -# namespace = "my_namespace" -namespaces = [] -# using_namespaces = [] -sys_includes = ["stdbool.h","stdint.h"] -includes = [] -no_includes = true - -############################ Code Style Options ################################ - -braces = "SameLine" -line_length = 100 -tab_width = 2 -documentation_style = "auto" - -############################# Codegen Options ################################## - -style = "both" - -[defines] -# "target_os = freebsd" = "DEFINE_FREEBSD" -# "feature = serde" = "DEFINE_SERDE" - -[export] -include = [] -exclude = [] -# prefix = "CAPI_" -item_types = [] -renaming_overrides_prefixing = false - -[export.rename] - -[export.body] - -[fn] -rename_args = "None" -# must_use = "MUST_USE_FUNC" -# prefix = "START_FUNC" -# postfix = "END_FUNC" -args = "auto" - -[struct] -rename_fields = "None" -# must_use = "MUST_USE_STRUCT" -derive_constructor = false -derive_eq = false -derive_neq = false -derive_lt = false -derive_lte = false -derive_gt = false -derive_gte = false - -[enum] -rename_variants = "None" -# must_use = "MUST_USE_ENUM" -add_sentinel = false -prefix_with_name = false -derive_helper_methods = false -derive_const_casts = false -derive_mut_casts = false -# cast_assert_name = "ASSERT" -derive_tagged_enum_destructor = false -derive_tagged_enum_copy_constructor = false -private_default_tagged_enum_constructor = false - -[const] -allow_static_const = true - -[macro_expansion] -bitflags = false - -############## Options for How Your Rust library Should Be Parsed ############## - -[parse] -parse_deps = false -# include = [] -exclude = [] -clean = false -extra_bindings = [] - -[parse.expand] -crates = [] -all_features = false -default_features = true -features = [] diff --git a/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md b/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md deleted file mode 100644 index d98162d7ea11..000000000000 --- a/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md +++ /dev/null @@ -1,98 +0,0 @@ -# Compiled hyphenation table format for mapped_hyph - -The file is a "flattened" representation of the list of `HyphenDict` structs -and descendant objects used by libhyphen -(see [hyphen.h](https://github.com/hunspell/hyphen/blob/master/hyphen.h)). - -Note that multi-byte integer types in the file are stored in _little-endian_ byte order. - -## Overall file header - -The file begins with a 4-byte "signature", followed by a count of the number -of hyphenation levels, and an array of offsets to each hyphenation level. -A "level" is essentially equivalent to libhyphen's `HyphenDict`. - -### Header (size: 8 bytes + 4 * numLevels) -Type | Name | Description ------|------|------------ -uint8[4] | magicNumber | 4-byte file identification code: ['H', 'y', 'f', '0'] -uint32 | numLevels | number of hyphenation levels present -uint32[numLevels] | levelOffset | offset from start of file to each Level - -Currently, there are normally 2 hyphenation levels, as the parser/compiler will -generate a default first level if no NEXTLEVEL keyword is present in the pattern file. - -## Hyphenation Level - -Each level of the hyphenation pattern begins with a Level header, followed by -the data for its states and the strings they refer to. -When the hyphenation machine is executed, we always begin at state offset 0 -(from the level's stateDataBase); each transition to a new state represents the -target directly by its offset from stateDataBase. -A state offset of 0xFFFFFF is considered invalid. - -Strings are represented as offsets from the level's stringDataBase; each string -is encoded as a one-byte length followed by `length` bytes of utf-8 data. -(So the maximum string length is 255 utf-8 code units; this is far more than any actual -hyphenation dictionary uses). -A string offset of 0xFFFF is considered invalid and represents an absent string. - -The minimum number of characters that must be kept together at the start/end of a word, -or of a component of a compound (i.e. the `...Min` values) is a count of _Unicode characters_, -not UTF-8 code units. (Note that the presentation-form ligature characters U+FB00 'ff' through U+FB06 'st' -are counted as 2 or 3 characters for this purpose.) - -### Level (size: 16 bytes + state data + string data, padded to a 4-byte boundary) -Type | Name | Description ------|------|------------ -uint32 | stateDataBase | offset from beginning of Level to start of level's State data -uint32 | stringDataBase | offset from beginning of Level to start of level's packed String data -uint16 | noHyphenStringOffset | from level's stringDataBase -uint16 | noHyphenCount | number of (NUL-separated) strings in the nohyphen string -uint8 | leftHyphenMin | minimum number of characters kept together at start of word -uint8 | rightHyphenMin | minimum number of characters kept together at end of word -uint8 | compoundLeftHyphenMin | minimum number of characters kept together at start of second component of a compound -uint8 | compoundRightHyphenMin | minimum number of characters kept together at end of first component of a compound - -## State - -Each state, referred to by its offset from the level's stateDataBase, consists of a header -followed by an array of transitions for input bytes that need to be matched in this state. -The state also records a fallback state offset, which is the transition to be taken -if the next input byte does not match any of the transition records. - -If a match string is present (i.e. `matchStringOffset` is not 0xFFFF), it is a string of hyphenation values -(encoded as ASCII digits '0'..'9') to be applied at the current position in the word. - -### StateHeader (size: 8 bytes) -Type | Name | Description ------|------|------------ -uint32 | fallbackStateOffset | (from level's stateDataBase) -uint16 | matchStringOffset | (from level's stringDataBase) -uint8 | numTransitions | count of Transitions that follow the StateHeader and optional StateHeaderExtension -uint8 | isExtended | if non-zero, the StateHeader is immediately followed by a StateHeaderExtension - -If the `isExtended` flag in the state header is set, this state includes a potential spelling change -and there is an extended form of the header present before the array of transitions. -(Note that extended states with spelling-change rules are not yet supported by the mapped_hyph engine; -none of the hyphenation dictionaries shipped with Firefox includes such rules.) - -### StateHeaderExtension (size: 4 bytes) -Type | Name | Description ------|------|------------ -uint16 | replacementStringOffset | (from level's stringDataBase) the replacement string -int8 | replacementIndex | index of the byte position (relative to current position in the word) at which the spelling replacement should happen -int8 | replacementCut | number of bytes to cut from the original word when making the replacement - -## Transitions - -The state's transitions are encoded as an array of Transition records, each corresponding to an input byte -and providing the offset of the new state. The transitions for each state are sorted by ascending value of input byte -(although in practice there are usually only a few valid transitions, and so a binary search does not seem to be -worthwhile). - -### Transition (size: 4 bytes) -Type | Name | Description ------|------|------------ -uint24 | newStateOffset | (from level's stateDataBase) -uint8 | inputByte | the input byte (utf-8 code unit) for this transition diff --git a/third_party/rust/mapped_hyph/hyph_en_US.hyf b/third_party/rust/mapped_hyph/hyph_en_US.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs b/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs deleted file mode 100644 index 4e1671102c0b..000000000000 --- a/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2019 Mozilla Foundation. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate mapped_hyph; - -use std::env; -use std::fs::File; - -fn main() -> std::io::Result<()> { - let args: Vec = env::args().collect(); - if args.len() == 3 { - let in_file = File::open(&args[1])?; - let mut out_file = File::create(&args[2])?; - mapped_hyph::builder::write_hyf_file(&mut out_file, mapped_hyph::builder::read_dic_file(&in_file))?; - } else { - println!("usage: hyf_compile "); - } - Ok(()) -} diff --git a/third_party/rust/mapped_hyph/src/builder.rs b/third_party/rust/mapped_hyph/src/builder.rs deleted file mode 100644 index 7a13947aa44b..000000000000 --- a/third_party/rust/mapped_hyph/src/builder.rs +++ /dev/null @@ -1,473 +0,0 @@ -// Copyright 2019 Mozilla Foundation. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/// Functions to compile human-readable patterns into a mapped_hyph -/// flattened representation of the hyphenation state machine. - -use std::io::{Read,BufRead,BufReader,Write}; -use std::collections::HashMap; -use std::convert::TryInto; -use std::hash::{Hash,Hasher}; - -// Wrap a HashMap so that we can implement the Hash trait. -#[derive(PartialEq, Eq, Clone)] -struct TransitionMap (HashMap); - -impl TransitionMap { - fn new() -> TransitionMap { - TransitionMap(HashMap::::new()) - } -} - -impl Hash for TransitionMap { - fn hash(&self, state: &mut H) { - // We only look at the values here; that's likely to be enough - // for a reasonable hash. - let mut transitions: Vec<&i32> = self.0.values().collect(); - transitions.sort(); - for t in transitions { - t.hash(state); - } - } -} - -#[derive(PartialEq, Eq, Hash, Clone)] -struct State { - match_string: Option>, - #[allow(dead_code)] - repl_string: Option>, - #[allow(dead_code)] - repl_index: i32, - #[allow(dead_code)] - repl_cut: i32, - fallback_state: i32, - transitions: TransitionMap, -} - -impl State { - fn new() -> State { - State { - match_string: None, - repl_string: None, - repl_index: -1, - repl_cut: -1, - fallback_state: -1, - transitions: TransitionMap::new(), - } - } -} - -/// This is only public because the read_dic_file() function returns a Vec -/// of LevelBuilder structs, which can then be passed to write_hyf_file() -/// to create the flattened output. -pub struct LevelBuilder { - states: Vec, - str_to_state: HashMap,i32>, - encoding: Option, - nohyphen: Option, - lh_min: u8, - rh_min: u8, - clh_min: u8, - crh_min: u8, -} - -impl LevelBuilder { - fn new() -> LevelBuilder { - let mut result = LevelBuilder { - states: Vec::::new(), - str_to_state: HashMap::,i32>::new(), - encoding: None, - nohyphen: None, - lh_min: 0, - rh_min: 0, - clh_min: 0, - crh_min: 0, - }; - // Initialize the builder with an empty start state. - result.str_to_state.insert(vec![], 0); - result.states.push(State::new()); - result - } - - fn find_state_number_for(&mut self, text: &[u8]) -> i32 { - let count = self.states.len() as i32; - let index = *self.str_to_state.entry(text.to_vec()).or_insert(count); - if index == count { - self.states.push(State::new()); - } - index - } - - fn add_pattern(&mut self, pattern: &str) { - let mut bytes = pattern.as_bytes(); - let mut text = Vec::::with_capacity(bytes.len()); - let mut digits = Vec::::with_capacity(bytes.len() + 1); - let mut repl_str = None; - let mut repl_index = 0; - let mut repl_cut = 0; - - // Check for replacement rule (non-standard hyphenation spelling change). - if let Some(slash) = bytes.iter().position(|x| *x == b'/') { - let parts = bytes.split_at(slash); - bytes = parts.0; - let mut it = parts.1[1 ..].split(|x| *x == b','); - if let Some(repl) = it.next() { - repl_str = Some(repl.to_vec()); - } - if let Some(num) = it.next() { - repl_index = std::str::from_utf8(num).unwrap().parse::().unwrap() - 1; - } - if let Some(num) = it.next() { - repl_cut = std::str::from_utf8(num).unwrap().parse::().unwrap(); - } - } - - // Separate the input pattern into parallel arrays of text (bytes) and digits. - let mut got_digit = false; - for byte in bytes { - if *byte <= b'9' && *byte >= b'0' { - assert!(!got_digit, "invalid pattern \"{}\": consecutive digits", pattern); - digits.push(*byte); - got_digit = true; - } else { - text.push(*byte); - if got_digit { - got_digit = false; - } else { - digits.push(b'0'); - } - } - } - if !got_digit { - digits.push(b'0'); - } - - if repl_str.is_none() { - // Optimize away leading zeroes from the digits array. - while !digits.is_empty() && digits[0] == b'0' { - digits.remove(0); - } - } else { - // Convert repl_index and repl_cut from Unicode char to byte indexing. - let start = if text[0] == b'.' { 1 } else { 0 }; - if start == 1 { - assert_eq!(digits[0], b'0', "unexpected digit before start of word"); - digits.remove(0); - } - let word = std::str::from_utf8(&text[start..]).unwrap(); - let mut chars: Vec<_> = word.char_indices().collect(); - chars.push((word.len(), '.')); - repl_cut = chars[(repl_index + repl_cut) as usize].0 as i32 - chars[repl_index as usize].0 as i32; - repl_index = chars[repl_index as usize].0 as i32; - } - - // Create the new state, or add pattern into an existing state - // (which should not already have a match_string). - let mut state_num = self.find_state_number_for(&text); - let mut state = &mut self.states[state_num as usize]; - assert!(state.match_string.is_none(), "duplicate pattern?"); - if !digits.is_empty() { - state.match_string = Some(digits); - } - if repl_str.is_some() { - state.repl_string = repl_str; - state.repl_index = repl_index; - state.repl_cut = repl_cut; - } - - // Set up prefix transitions, inserting additional states as needed. - while !text.is_empty() { - let last_state = state_num; - let ch = *text.last().unwrap(); - text.truncate(text.len() - 1); - state_num = self.find_state_number_for(&text); - if let Some(exists) = self.states[state_num as usize].transitions.0.insert(ch, last_state) { - assert_eq!(exists, last_state, "overwriting existing transition?"); - break; - } - } - } - - fn merge_duplicate_states(&mut self) { - // We loop here because when we eliminate a duplicate, and update the transitons - // that referenced it, we may thereby create new duplicates that another pass - // will find and compress further. - loop { - let orig_len = self.states.len(); - // Used to map State records to the (first) index at which they occur. - let mut state_to_index = HashMap::<&State,i32>::new(); - // Mapping of old->new state indexes, and whether each old state is - // a duplicate that should be dropped. - let mut mappings = Vec::<(i32,bool)>::with_capacity(orig_len); - let mut next_new_index: i32 = 0; - for index in 0 .. self.states.len() { - // Find existing index for this state, or allocate the next new index to it. - let new_index = *state_to_index.entry(&self.states[index]).or_insert(next_new_index); - // Record the mapping, and whether the state was a duplicate. - mappings.push((new_index, new_index != next_new_index)); - // If we used next_new_index for this state, increment it. - if new_index == next_new_index { - next_new_index += 1; - } - } - // If we didn't find any duplicates, next_new_index will have kept pace with - // index, so we know we're finished. - if next_new_index as usize == self.states.len() { - break; - } - // Iterate over all the states, either deleting them or updating indexes - // according to the mapping we created; then repeat the search. - for index in (0 .. self.states.len()).rev() { - if mappings[index].1 { - self.states.remove(index); - } else { - let state = &mut self.states[index]; - if state.fallback_state != -1 { - state.fallback_state = mappings[state.fallback_state as usize].0; - } - for t in state.transitions.0.iter_mut() { - *t.1 = mappings[*t.1 as usize].0; - } - } - } - } - } - - fn flatten(&self) -> Vec { - // Calculate total space needed for state data, and build the state_to_offset table. - let mut state_data_size = 0; - let mut state_to_offset = Vec::::with_capacity(self.states.len()); - for state in &self.states { - state_to_offset.push(state_data_size); - state_data_size += if state.repl_string.is_some() { 12 } else { 8 }; - state_data_size += state.transitions.0.len() * 4; - } - - // Helper to map a state index to its offset in the final data block. - let get_state_offset_for = |state_index: i32| -> u32 { - if state_index < 0 { - return super::INVALID_STATE_OFFSET; - } - state_to_offset[state_index as usize] as u32 - }; - - // Helper to map a byte string to its offset in the final data block, and - // store the bytes into string_data unless using an already-existing string. - let mut string_to_offset = HashMap::,usize>::new(); - let mut string_data = Vec::::new(); - let mut get_string_offset_for = |bytes: &Option>| -> u16 { - if bytes.is_none() { - return super::INVALID_STRING_OFFSET; - } - assert!(bytes.as_ref().unwrap().len() < 256); - let new_offset = string_data.len(); - let offset = *string_to_offset.entry(bytes.as_ref().unwrap().clone()).or_insert(new_offset); - if offset == new_offset { - string_data.push(bytes.as_ref().unwrap().len() as u8); - string_data.extend_from_slice(bytes.as_ref().unwrap().as_ref()); - } - offset.try_into().unwrap() - }; - - // Handle nohyphen string list if present, converting comma separators to NULs - // and trimming any surplus whitespace. - let mut nohyphen_string_offset: u16 = super::INVALID_STRING_OFFSET; - let mut nohyphen_count: u16 = 0; - if self.nohyphen.is_some() { - let nohyphen_strings: Vec<_> = self.nohyphen.as_ref().unwrap().split(',').map(|x| x.trim()).collect(); - nohyphen_count = nohyphen_strings.len().try_into().unwrap(); - nohyphen_string_offset = get_string_offset_for(&Some(nohyphen_strings.join("\0").as_bytes().to_vec())); - } - - let mut state_data = Vec::::with_capacity(state_data_size); - for state in &self.states { - state_data.extend(&get_state_offset_for(state.fallback_state).to_le_bytes()); - state_data.extend(&get_string_offset_for(&state.match_string).to_le_bytes()); - state_data.push(state.transitions.0.len() as u8); - // Determine whether to use an extended state record, and if so add the - // replacement string and index fields. - if state.repl_string.is_none() { - state_data.push(0); - } else { - state_data.push(1); - state_data.extend(&get_string_offset_for(&state.repl_string).to_le_bytes()); - state_data.push(state.repl_index as u8); - state_data.push(state.repl_cut as u8); - } - // Collect transitions into an array so we can sort them. - let mut transitions = vec![]; - for (key, value) in state.transitions.0.iter() { - transitions.push((*key, get_state_offset_for(*value))) - } - transitions.sort(); - for t in transitions { - // New state offset is stored as a 24-bit value, so we do this manually. - state_data.push((t.1 & 0xff) as u8); - state_data.push(((t.1 >> 8) & 0xff) as u8); - state_data.push(((t.1 >> 16) & 0xff) as u8); - state_data.push(t.0); - } - } - assert_eq!(state_data.len(), state_data_size); - - // Pad string data to a 4-byte boundary - while string_data.len() & 3 != 0 { - string_data.push(0); - } - - let total_size = super::LEVEL_HEADER_SIZE as usize + state_data_size + string_data.len(); - let mut result = Vec::::with_capacity(total_size); - - let state_data_base: u32 = super::LEVEL_HEADER_SIZE as u32; - let string_data_base: u32 = state_data_base + state_data_size as u32; - - result.extend(&state_data_base.to_le_bytes()); - result.extend(&string_data_base.to_le_bytes()); - result.extend(&nohyphen_string_offset.to_le_bytes()); - result.extend(&nohyphen_count.to_le_bytes()); - result.push(self.lh_min); - result.push(self.rh_min); - result.push(self.clh_min); - result.push(self.crh_min); - - result.extend(state_data.iter()); - result.extend(string_data.iter()); - - assert_eq!(result.len(), total_size); - - result - } -} - -/// Read a libhyphen-style pattern file and create the corresponding state -/// machine transitions, etc. -/// The returned Vec can be passed to write_hyf_file() to generate a flattened -/// representation of the state machine in mapped_hyph's binary format. -pub fn read_dic_file(dic_file: T) -> Vec { - let reader = BufReader::new(dic_file); - - let mut builders = Vec::::new(); - builders.push(LevelBuilder::new()); - let mut builder = &mut builders[0]; - - for (index, line) in reader.lines().enumerate() { - let mut trimmed = line.unwrap().trim().to_string(); - // Strip comments. - if let Some(i) = trimmed.find('%') { - trimmed = trimmed[..i].trim().to_string(); - } - // Ignore empty lines. - if trimmed.is_empty() { - continue; - } - // Uppercase indicates keyword rather than pattern. - if trimmed.as_bytes()[0] >= b'A' && trimmed.as_bytes()[0] <= b'Z' { - // First line is encoding; we only support UTF-8. - if builder.encoding.is_none() { - assert_eq!(trimmed, "UTF-8", "Only UTF-8 patterns are accepted!"); - builder.encoding = Some(trimmed); - continue; - } - // Check for valid keyword-value pairs. - if trimmed.contains(' ') { - let parts: Vec<&str> = trimmed.split(' ').collect(); - assert!(parts.len() == 2); - let keyword = parts[0]; - let value = parts[1]; - match keyword { - "LEFTHYPHENMIN" => builder.lh_min = value.parse::().unwrap(), - "RIGHTHYPHENMIN" => builder.rh_min = value.parse::().unwrap(), - "COMPOUNDLEFTHYPHENMIN" => builder.clh_min = value.parse::().unwrap(), - "COMPOUNDRIGHTHYPHENMIN" => builder.crh_min = value.parse::().unwrap(), - "NOHYPHEN" => builder.nohyphen = Some(trimmed), - _ => println!("unknown keyword: {}", trimmed), - } - continue; - } - // Start a new hyphenation level? - if trimmed == "NEXTLEVEL" { - builders.push(LevelBuilder::new()); - builder = builders.last_mut().unwrap(); - continue; - } - println!("unknown keyword: {}", trimmed); - continue; - } - // Patterns should always be provided in lowercase; complain if not. - assert_eq!(trimmed, trimmed.to_lowercase(), "pattern \"{}\" not lowercased at line {}", trimmed, index); - builder.add_pattern(&trimmed); - } - - // Create default first (compound-word) level if only one level was provided. - // (Maybe this should be optional? Currently just copying libhyphen behavior.) - if builders.len() == 1 { - let (lh_min, rh_min, clh_min, crh_min) = - (builders[0].lh_min, builders[0].rh_min, builders[0].clh_min, builders[0].crh_min); - builders.insert(0, LevelBuilder::new()); - builder = builders.first_mut().unwrap(); - builder.add_pattern("1-1"); - builder.add_pattern("1'1"); - builder.add_pattern("1\u{2013}1"); // en-dash - builder.add_pattern("1\u{2019}1"); // curly apostrophe - builder.nohyphen = Some("',\u{2013},\u{2019},-".to_string()); - builder.lh_min = lh_min; - builder.rh_min = rh_min; - builder.clh_min = if clh_min > 0 { clh_min } else if lh_min > 0 { lh_min } else { 3 }; - builder.crh_min = if crh_min > 0 { crh_min } else if rh_min > 0 { rh_min } else { 3 }; - } - - // Put in fallback states in each builder. - for builder in &mut builders { - for (key, state_index) in builder.str_to_state.iter() { - if key.is_empty() { - continue; - } - let mut fallback_key = key.clone(); - while !fallback_key.is_empty() { - fallback_key.remove(0); - if builder.str_to_state.contains_key(&fallback_key) { - break; - } - } - builder.states[*state_index as usize].fallback_state = builder.str_to_state[&fallback_key]; - } - } - - // Merge duplicate states to reduce size. - for builder in &mut builders { - builder.merge_duplicate_states(); - } - - builders -} - -/// Write out the state machines representing a set of hyphenation rules -/// to the given output stream. -pub fn write_hyf_file(hyf_file: &mut T, levels: Vec) -> std::io::Result<()> { - let mut flattened = vec![]; - for level in levels { - flattened.push(level.flatten()); - } - // Write file header: magic number, count of levels. - hyf_file.write_all(&[b'H', b'y', b'f', b'0'])?; - let level_count: u32 = flattened.len() as u32; - hyf_file.write_all(&level_count.to_le_bytes())?; - // Write array of offsets to each level. First level will begin immediately - // after the array of offsets. - let mut offset: u32 = super::FILE_HEADER_SIZE as u32 + 4 * level_count; - for flat in &flattened { - hyf_file.write_all(&offset.to_le_bytes())?; - offset += flat.len() as u32; - } - // Write the flattened data for each level. - for flat in &flattened { - hyf_file.write_all(&flat)?; - } - Ok(()) -} diff --git a/third_party/rust/mapped_hyph/src/ffi.rs b/third_party/rust/mapped_hyph/src/ffi.rs deleted file mode 100644 index 6e37596699cd..000000000000 --- a/third_party/rust/mapped_hyph/src/ffi.rs +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2019 Mozilla Foundation. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::slice; -use std::str; -use std::ffi::CStr; -use std::os::raw::c_char; -use std::str::Utf8Error; - -use memmap::Mmap; - -use super::Hyphenator; - -/// Opaque type representing a hyphenation dictionary loaded from a file, -/// for use in FFI function signatures. -pub struct HyphDic; - -// Helper to convert word and hyphen buffer parameters from raw C pointer/length -// pairs to the Rust types expected by mapped_hyph. -unsafe fn params_from_c<'a>(word: *const c_char, word_len: u32, - hyphens: *mut u8, hyphens_len: u32) -> - (Result<&'a str, Utf8Error>, &'a mut [u8]) { - (str::from_utf8(slice::from_raw_parts(word as *const u8, word_len as usize)), - slice::from_raw_parts_mut(hyphens, hyphens_len as usize)) -} - -/// C-callable function to load a hyphenation dictionary from a file at `path`. -/// -/// Returns null on failure. -/// -/// This does not fully validate that the file contains usable hyphenation -/// data, it only opens the file (read-only) and mmap's it into memory, and -/// does some minimal sanity-checking that it *might* be valid. -/// -/// The returned `HyphDic` must be released with `mapped_hyph_free_dictionary`. -/// -/// # Safety -/// The given `path` must be a valid pointer to a NUL-terminated (C-style) -/// string. -#[no_mangle] -pub unsafe extern "C" fn mapped_hyph_load_dictionary(path: *const c_char) -> *const HyphDic { - let path_str = match CStr::from_ptr(path).to_str() { - Ok(str) => str, - Err(_) => return std::ptr::null(), - }; - let hyph = Box::new(match super::load_file(path_str) { - Some(dic) => dic, - _ => return std::ptr::null(), - }); - Box::into_raw(hyph) as *const HyphDic -} - -/// C-callable function to free a hyphenation dictionary -/// that was loaded by `mapped_hyph_load_dictionary`. -/// -/// # Safety -/// The `dic` parameter must be a `HyphDic` pointer obtained from -/// `mapped_hyph_load_dictionary`, and not previously freed. -#[no_mangle] -pub unsafe extern "C" fn mapped_hyph_free_dictionary(dic: *mut HyphDic) { - Box::from_raw(dic); -} - -/// C-callable function to find hyphenation values for a given `word`, -/// using a dictionary loaded via `mapped_hyph_load_dictionary`. -/// -/// The `word` must be UTF-8-encoded, and is `word_len` bytes (not characters) -/// long. -/// -/// Caller must supply the `hyphens` output buffer for results; its size is -/// given in `hyphens_len`. -/// It should be at least `word_len` elements long. -/// -/// Returns -1 if `word` is not valid UTF-8, or the output `hyphens` buffer is -/// too small. -/// Otherwise returns the number of potential hyphenation positions found. -/// -/// # Panics -/// This function may panic if the given dictionary is not valid. -/// -/// # Safety -/// The `dic` parameter must be a `HyphDic` pointer obtained from -/// `mapped_hyph_load_dictionary`. -/// -/// The `word` and `hyphens` parameter must be valid pointers to memory buffers -/// of at least the respective sizes `word_len` and `hyphens_len`. -#[no_mangle] -pub unsafe extern "C" fn mapped_hyph_find_hyphen_values_dic(dic: *const HyphDic, - word: *const c_char, word_len: u32, - hyphens: *mut u8, hyphens_len: u32) -> i32 { - if word_len > hyphens_len { - return -1; - } - let (word_str, hyphen_buf) = params_from_c(word, word_len, hyphens, hyphens_len); - if word_str.is_err() { - return -1; - } - Hyphenator::new(&*(dic as *const Mmap)) - .find_hyphen_values(word_str.unwrap(), hyphen_buf) as i32 -} - -/// C-callable function to find hyphenation values for a given `word`, -/// using a dictionary loaded and owned by the caller. -/// -/// The dictionary is supplied as a raw memory buffer `dic_buf` of size -/// `dic_len`. -/// -/// The `word` must be UTF-8-encoded, and is `word_len` bytes (not characters) -/// long. -/// -/// Caller must supply the `hyphens` output buffer for results; its size is -/// given in `hyphens_len`. -/// It should be at least `word_len` elements long. -/// -/// Returns -1 if `word` is not valid UTF-8, or the output `hyphens` buffer is -/// too small. -/// Otherwise returns the number of potential hyphenation positions found. -/// -/// # Panics -/// This function may panic if the given dictionary is not valid. -/// -/// # Safety -/// The `dic_buf` parameter must be a valid pointer to a memory block of size -/// at least `dic_len`. -/// -/// The `word` and `hyphens` parameter must be valid pointers to memory buffers -/// of at least the respective sizes `word_len` and `hyphens_len`. -#[no_mangle] -pub unsafe extern "C" fn mapped_hyph_find_hyphen_values_raw(dic_buf: *const u8, dic_len: u32, - word: *const c_char, word_len: u32, - hyphens: *mut u8, hyphens_len: u32) -> i32 { - if word_len > hyphens_len { - return -1; - } - let (word_str, hyphen_buf) = params_from_c(word, word_len, hyphens, hyphens_len); - if word_str.is_err() { - return -1; - } - Hyphenator::new(slice::from_raw_parts(dic_buf, dic_len as usize)) - .find_hyphen_values(word_str.unwrap(), hyphen_buf) as i32 -} - -/// C-callable function to check if a given memory buffer `dic_buf` of size -/// `dic_len` is potentially usable as a hyphenation dictionary. -/// -/// Returns `true` if the given memory buffer looks like it may be a valid -/// hyphenation dictionary, `false` if it is clearly not usable. -/// -/// # Safety -/// The `dic_buf` parameter must be a valid pointer to a memory block of size -/// at least `dic_len`. -#[no_mangle] -pub unsafe extern "C" fn mapped_hyph_is_valid_hyphenator(dic_buf: *const u8, dic_len: u32) -> bool { - if dic_buf.is_null() { - return false; - } - let dic = Hyphenator::new(slice::from_raw_parts(dic_buf, dic_len as usize)); - dic.is_valid_hyphenator() -} diff --git a/third_party/rust/mapped_hyph/src/lib.rs b/third_party/rust/mapped_hyph/src/lib.rs deleted file mode 100644 index 6f68da8a64a3..000000000000 --- a/third_party/rust/mapped_hyph/src/lib.rs +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright 2019 Mozilla Foundation. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[macro_use] -extern crate arrayref; -extern crate memmap; - -use std::slice; -use std::str; -use std::cmp::max; -use std::fs::File; -use std::mem; - -use memmap::Mmap; - -// Make submodules available publicly. -pub mod builder; -pub mod ffi; - -// 4-byte identification expected at beginning of a compiled dictionary file. -// (This will be updated if an incompatible change to the format is made in -// some future revision.) -const MAGIC_NUMBER: [u8; 4] = [b'H', b'y', b'f', b'0']; - -const INVALID_STRING_OFFSET: u16 = 0xffff; -const INVALID_STATE_OFFSET: u32 = 0x00ff_ffff; - -const FILE_HEADER_SIZE: usize = 8; // 4-byte magic number, 4-byte count of levels -const LEVEL_HEADER_SIZE: usize = 16; - -// Transition actually holds a 24-bit new state offset and an 8-bit input byte -// to match. We will be interpreting byte ranges as Transition arrays (in the -// State::transitions() method below), so use repr(C) to ensure we have the -// memory layout we expect. -// Transition records do not depend on any specific alignment. -#[repr(C)] -#[derive(Debug,Copy,Clone)] -struct Transition(u8, u8, u8, u8); - -impl Transition { - fn new_state_offset(&self) -> usize { - // Read a 24-bit little-endian number from three bytes. - self.0 as usize + ((self.1 as usize) << 8) + ((self.2 as usize) << 16) - } - fn match_byte(&self) -> u8 { - self.3 - } -} - -// State is an area of the Level's data block that begins with a fixed header, -// followed by an array of transitions. The total size of each State's data -// depends on the number of transitions in the state. Only the basic header -// is defined by the struct here; the rest of the state is accessed via -// pointer magic. -// There are two versions of State, a basic version that supports only simple -// hyphenation (no associated spelling change), and an extended version that -// adds the replacement-string fields to support spelling changes at the -// hyphenation point. Check is_extended() to know which version is present. -// State records are NOT necessarily 4-byte aligned, so multi-byte fields -// should be read with care. -#[derive(Debug,Copy,Clone)] -#[repr(C)] -struct State { - fallback_state: [u8; 4], - match_string_offset: [u8; 2], - num_transitions: u8, - is_extended: u8, -} - -#[repr(C)] -struct StateExtended { - state: State, - repl_string_offset: [u8; 2], - repl_index: i8, - repl_cut: i8, -} - -impl State { - // Accessors for the various State header fields; see file format description. - fn fallback_state(&self) -> usize { - u32::from_le_bytes(self.fallback_state) as usize - } - fn match_string_offset(&self) -> usize { - u16::from_le_bytes(self.match_string_offset) as usize - } - fn num_transitions(&self) -> u8 { - self.num_transitions - } - fn is_extended(&self) -> bool { - self.is_extended != 0 - } - // Accessors that are only valid if is_extended() is true. - // These use `unsafe` to dereference a pointer to the relevant field; - // this is OK because Level::get_state always validates the total state size - // before returning a state reference, so these pointers will be valid for - // any extended state it returns. - #[allow(dead_code)] - fn as_extended(&self) -> &StateExtended { - debug_assert!(self.is_extended()); - unsafe { mem::transmute(self) } - } - #[allow(dead_code)] - fn repl_string_offset(&self) -> usize { - u16::from_le_bytes(self.as_extended().repl_string_offset) as usize - } - #[allow(dead_code)] - fn repl_index(&self) -> i8 { - self.as_extended().repl_index - } - #[allow(dead_code)] - fn repl_cut(&self) -> i8 { - self.as_extended().repl_cut - } - // Return the state's Transitions as a slice reference. - fn transitions(&self) -> &[Transition] { - let count = self.num_transitions() as usize; - if count == 0 { - return &[]; - } - let transition_offset = if self.is_extended() { mem::size_of::() } else { mem::size_of::() } as isize; - // We know the `offset` here will not look beyond the valid range of memory - // because Level::get_state() checks the state length (accounting for the - // number of transitions) before returning a State reference. - let trans_ptr = unsafe { (self as *const State as *const u8).offset(transition_offset) as *const Transition }; - // Again, because Level::get_state() already checked the state length, we know - // this slice address and count will be valid. - unsafe { slice::from_raw_parts(trans_ptr, count) } - } - // Look up the Transition for a given input byte, or None. - fn transition_for(&self, b: u8) -> Option { - // The transitions array is sorted by match_byte() value, but there are - // usually very few entries; benchmarking showed that using binary_search_by - // here gave no benefit (possibly slightly slower). - self.transitions().iter().copied().find(|t| t.match_byte() == b) - } - // Just for debugging use... - #[allow(dead_code)] - fn deep_show(&self, prefix: &str, dic: &Level) { - if self.match_string_offset() != INVALID_STRING_OFFSET as usize { - let match_string = dic.string_at_offset(self.match_string_offset()); - println!("{}match: {}", prefix, str::from_utf8(match_string).unwrap()); - } - for t in self.transitions() { - println!("{}{} ->", prefix, t.match_byte() as char); - let next_prefix = format!("{} ", prefix); - dic.get_state(t.new_state_offset()).unwrap().deep_show(&next_prefix, &dic); - } - } -} - -// We count the presentation-form ligature characters U+FB00..FB06 as multiple -// chars for the purposes of lefthyphenmin/righthyphenmin. In UTF-8, all these -// ligature characters are 3-byte sequences beginning with <0xEF, 0xAC>; this -// helper returns the "decomposed length" of the ligature given its trailing -// byte. -fn lig_length(trail_byte: u8) -> usize { - // This is only called on valid UTF-8 where we already know trail_byte - // must be >= 0x80. - // Ligature lengths: ff fi fl ffi ffl long-st st - const LENGTHS: [u8; 7] = [ 2u8, 2u8, 2u8, 3u8, 3u8, 2u8, 2u8 ]; - if trail_byte > 0x86 { - return 1; - } - LENGTHS[trail_byte as usize - 0x80] as usize -} - -fn is_utf8_trail_byte(byte: u8) -> bool { - (byte & 0xC0) == 0x80 -} - -fn is_ascii_digit(byte: u8) -> bool { - byte <= b'9' && byte >= b'0' -} - -fn is_odd(byte: u8) -> bool { - (byte & 0x01) == 0x01 -} - -// A hyphenation Level has a header followed by State records and packed string -// data. The total size of the slice depends on the number and size of the -// States and Strings it contains. -// Note that the data of the Level may not have any specific alignment! -#[derive(Debug,Copy,Clone)] -struct Level<'a> { - data: &'a [u8], - // Header fields cached by the constructor for faster access: - state_data_base_: usize, - string_data_base_: usize, -} - -impl Level<'_> { - // Constructor that initializes our cache variables. - fn new(data: &[u8]) -> Level { - Level { - data, - state_data_base_: u32::from_le_bytes(*array_ref!(data, 0, 4)) as usize, - string_data_base_: u32::from_le_bytes(*array_ref!(data, 4, 4)) as usize, - } - } - - // Accessors for Level header fields; see file format description. - fn state_data_base(&self) -> usize { - self.state_data_base_ // cached by constructor - } - fn string_data_base(&self) -> usize { - self.string_data_base_ // cached by constructor - } - fn nohyphen_string_offset(&self) -> usize { - u16::from_le_bytes(*array_ref!(self.data, 8, 2)) as usize - } - #[allow(dead_code)] - fn nohyphen_count(&self) -> u16 { - u16::from_le_bytes(*array_ref!(self.data, 10, 2)) - } - fn lh_min(&self) -> usize { - max(1, self.data[12] as usize) - } - fn rh_min(&self) -> usize { - max(1, self.data[13] as usize) - } - fn clh_min(&self) -> usize { - max(1, self.data[14] as usize) - } - fn crh_min(&self) -> usize { - max(1, self.data[15] as usize) - } - fn word_boundary_mins(&self) -> (usize, usize, usize, usize) { - (self.lh_min(), self.rh_min(), self.clh_min(), self.crh_min()) - } - // Strings are represented as offsets from the Level's string_data_base. - // This returns a byte slice referencing the string at a given offset, - // or an empty slice if invalid. - fn string_at_offset(&self, offset: usize) -> &'_ [u8] { - if offset == INVALID_STRING_OFFSET as usize { - return &[]; - } - let string_base = self.string_data_base() as usize + offset; - // TODO: move this to the validation function. - debug_assert!(string_base < self.data.len()); - if string_base + 1 > self.data.len() { - return &[]; - } - let len = self.data[string_base] as usize; - // TODO: move this to the validation function. - debug_assert!(string_base + 1 + len <= self.data.len()); - if string_base + 1 + len > self.data.len() { - return &[]; - } - self.data.get(string_base + 1 .. string_base + 1 + len).unwrap() - } - // The nohyphen field actually contains multiple NUL-separated substrings; - // return them as a vector of individual byte slices. - fn nohyphen(&self) -> Vec<&[u8]> { - let string_offset = self.nohyphen_string_offset(); - let nohyph_str = self.string_at_offset(string_offset as usize); - if nohyph_str.is_empty() { - return vec![]; - } - nohyph_str.split(|&b| b == 0).collect() - } - // States are represented as an offset from the Level's state_data_base. - // This returns a reference to the State at a given offset, or None if invalid. - fn get_state(&self, offset: usize) -> Option<&State> { - if offset == INVALID_STATE_OFFSET as usize { - return None; - } - debug_assert_eq!(offset & 3, 0); - let state_base = self.state_data_base() + offset; - // TODO: move this to the validation function. - debug_assert!(state_base + mem::size_of::() <= self.string_data_base()); - if state_base + mem::size_of::() > self.string_data_base() { - return None; - } - let state_ptr = &self.data[state_base] as *const u8 as *const State; - // This is safe because we just checked against self.string_data_base() above. - let state = unsafe { state_ptr.as_ref().unwrap() }; - let length = if state.is_extended() { mem::size_of::() } else { mem::size_of::() } - + mem::size_of::() * state.num_transitions() as usize; - // TODO: move this to the validation function. - debug_assert!(state_base + length <= self.string_data_base()); - if state_base + length > self.string_data_base() { - return None; - } - // This is safe because we checked the full state length against self.string_data_base(). - unsafe { state_ptr.as_ref() } - } - // Sets hyphenation values (odd = potential break, even = no break) in values[], - // and returns the change in the number of odd values present, so the caller can - // keep track of the total number of potential breaks in the word. - fn find_hyphen_values(&self, word: &str, values: &mut [u8], lh_min: usize, rh_min: usize) -> isize { - // Bail out immediately if the word is too short to hyphenate. - if word.len() < lh_min + rh_min { - return 0; - } - let start_state = self.get_state(0); - let mut st = start_state; - let mut hyph_count = 0; - for i in 0 .. word.len() + 2 { - // Loop over the word by bytes, with a virtual '.' added at each end - // to match word-boundary patterns. - let b = if i == 0 || i == word.len() + 1 { b'.' } else { word.as_bytes()[i - 1] }; - loop { - // Loop to repeatedly fall back if we don't find a matching transition. - // Note that this could infinite-loop if there is a state whose fallback - // points to itself (or a cycle of fallbacks), but this would represent - // a table compilation error. - // (A potential validation function could check for fallback cycles.) - if st.is_none() { - st = start_state; - break; - } - let state = st.unwrap(); - if let Some(tr) = state.transition_for(b) { - // Found a transition for the current byte. Look up the new state; - // if it has a match_string, merge its weights into `values`. - st = self.get_state(tr.new_state_offset()); - if let Some(state) = st { - let match_offset = state.match_string_offset(); - if match_offset != INVALID_STRING_OFFSET as usize { - if state.is_extended() { - debug_assert!(false, "extended hyphenation not supported by this function"); - } else { - let match_str = self.string_at_offset(match_offset); - let offset = i + 1 - match_str.len(); - assert!(offset + match_str.len() <= word.len() + 2); - for (j, ch) in match_str.iter().enumerate() { - let index = offset + j; - if index >= lh_min && index <= word.len() - rh_min { - // lh_min and rh_min are guaranteed to be >= 1, - // so this will not try to access outside values[]. - let old_value = values[index - 1]; - let value = ch - b'0'; - if value > old_value { - if is_odd(old_value) != is_odd(value) { - // Adjust hyph_count for the change we're making - hyph_count += if is_odd(value) { 1 } else { -1 }; - } - values[index - 1] = value; - } - } - } - } - } - } - // We have handled the current input byte; leave the fallback loop - // and get next input. - break; - } - // No transition for the current byte; go to fallback state and try again. - st = self.get_state(state.fallback_state()); - } - } - - // If the word was not purely ASCII, or if the word begins/ends with - // digits, the use of lh_min and rh_min above may not have correctly - // excluded enough positions, so we need to fix things up here. - let mut index = 0; - let mut count = 0; - let word_bytes = word.as_bytes(); - let mut clear_hyphen_at = |i| { if is_odd(values[i]) { hyph_count -= 1; } values[i] = 0; }; - // Handle lh_min. - while count < lh_min - 1 && index < word_bytes.len() { - let byte = word_bytes[index]; - clear_hyphen_at(index); - if byte < 0x80 { - index += 1; - if is_ascii_digit(byte) { - continue; // ASCII digits don't count - } - } else if byte == 0xEF && word_bytes[index + 1] == 0xAC { - // Unicode presentation-form ligature characters, which we count as - // multiple chars for the purpose of lh_min/rh_min, all begin with - // 0xEF, 0xAC in UTF-8. - count += lig_length(word_bytes[index + 2]); - clear_hyphen_at(index + 1); - clear_hyphen_at(index + 2); - index += 3; - continue; - } else { - index += 1; - while index < word_bytes.len() && is_utf8_trail_byte(word_bytes[index]) { - clear_hyphen_at(index); - index += 1; - } - } - count += 1; - } - - // Handle rh_min. - count = 0; - index = word.len(); - while count < rh_min && index > 0 { - index -= 1; - let byte = word_bytes[index]; - if index < word.len() - 1 { - clear_hyphen_at(index); - } - if byte < 0x80 { - // Only count if not an ASCII digit - if !is_ascii_digit(byte) { - count += 1; - } - continue; - } - if is_utf8_trail_byte(byte) { - continue; - } - if byte == 0xEF && word_bytes[index + 1] == 0xAC { - // Presentation-form ligatures count as multiple chars. - count += lig_length(word_bytes[index + 2]); - continue; - } - count += 1; - } - - hyph_count - } -} - -/// Hyphenation engine encapsulating a language-specific set of patterns (rules) -/// that identify possible break positions within a word. -pub struct Hyphenator<'a>(&'a [u8]); - -impl Hyphenator<'_> { - /// Return a Hyphenator that wraps the given buffer. - /// This does *not* check that the given buffer is in fact a valid hyphenation table. - /// Use is_valid_hyphenator() to determine whether it is usable. - /// (Calling hyphenation methods on a Hyphenator that wraps arbitrary, - /// unvalidated data is not unsafe, but may panic.) - pub fn new(buffer: &[u8]) -> Hyphenator { - Hyphenator(buffer) - } - - // Internal implementation details - fn magic_number(&self) -> &[u8] { - &self.0[0 .. 4] - } - fn num_levels(&self) -> usize { - u32::from_le_bytes(*array_ref!(self.0, 4, 4)) as usize - } - fn level(&self, i: usize) -> Level { - let offset = u32::from_le_bytes(*array_ref!(self.0, FILE_HEADER_SIZE + 4 * i, 4)) as usize; - let limit = if i == self.num_levels() - 1 { - self.0.len() - } else { - u32::from_le_bytes(*array_ref!(self.0, FILE_HEADER_SIZE + 4 * i + 4, 4)) as usize - }; - debug_assert!(offset + LEVEL_HEADER_SIZE <= limit && limit <= self.0.len()); - debug_assert_eq!(offset & 3, 0); - debug_assert_eq!(limit & 3, 0); - Level::new(&self.0[offset .. limit]) - } - - /// Identify acceptable hyphenation positions in the given `word`. - /// - /// The caller-supplied `values` must be at least as long as the `word`. - /// - /// On return, any elements with an odd value indicate positions in the word - /// after which a hyphen could be inserted. - /// - /// Returns the number of possible hyphenation positions that were found. - /// - /// # Panics - /// If the given `values` slice is too small to hold the results. - /// - /// If the block of memory represented by `self.0` is not in fact a valid - /// hyphenation dictionary, this function may panic with an overflow or - /// array bounds violation. - pub fn find_hyphen_values(&self, word: &str, values: &mut [u8]) -> isize { - assert!(values.len() >= word.len()); - values.iter_mut().for_each(|x| *x = 0); - let top_level = self.level(0); - let (lh_min, rh_min, clh_min, crh_min) = top_level.word_boundary_mins(); - if word.len() < lh_min + rh_min { - return 0; - } - let mut hyph_count = top_level.find_hyphen_values(word, values, lh_min, rh_min); - let compound = hyph_count > 0; - // Subsequent levels are applied to fragments between potential breaks - // already found: - for l in 1 .. self.num_levels() { - let level = self.level(l); - if hyph_count > 0 { - let mut begin = 0; - let mut lh = lh_min; - // lh_min and rh_min are both guaranteed to be greater than zero, - // so this loop will not reach fully to the end of the word. - for i in lh_min - 1 .. word.len() - rh_min { - if is_odd(values[i]) { - if i > begin { - // We've found a component of a compound; - // clear the corresponding values and apply the new level. - // (These values must be even, so hyph_count is unchanged.) - values[begin .. i].iter_mut().for_each(|x| { - *x = 0; - }); - hyph_count += level.find_hyphen_values(&word[begin ..= i], - &mut values[begin ..= i], - lh, crh_min); - } - begin = i + 1; - lh = clh_min; - } - } - if begin == 0 { - // No compound-word breaks were found, just apply level to the whole word. - hyph_count += level.find_hyphen_values(word, values, lh_min, rh_min); - } else if begin < word.len() { - // Handle trailing component of compound. - hyph_count += level.find_hyphen_values(&word[begin .. word.len()], - &mut values[begin .. word.len()], - clh_min, rh_min); - } - } else { - hyph_count += level.find_hyphen_values(word, values, lh_min, rh_min); - } - } - - // Only need to check nohyphen strings if top-level (compound) breaks were found. - if compound && hyph_count > 0 { - let nohyph = top_level.nohyphen(); - if !nohyph.is_empty() { - for i in lh_min ..= word.len() - rh_min { - if is_odd(values[i - 1]) { - for nh in &nohyph { - if i + nh.len() <= word.len() && *nh == &word.as_bytes()[i .. i + nh.len()] { - values[i - 1] = 0; - hyph_count -= 1; - break; - } - if nh.len() <= i && *nh == &word.as_bytes()[i - nh.len() .. i] { - values[i - 1] = 0; - hyph_count -= 1; - break; - } - } - } - } - } - } - - hyph_count - } - - /// Generate the hyphenated form of a `word` by inserting the given `hyphen_char` - /// at each valid break position. - /// - /// # Panics - /// If the block of memory represented by `self` is not in fact a valid - /// hyphenation dictionary, this function may panic with an overflow or - /// array bounds violation. - /// - /// Also panics if the length of the hyphenated word would overflow `usize`. - pub fn hyphenate_word(&self, word: &str, hyphchar: char) -> String { - let mut values = vec![0u8; word.len()]; - let hyph_count = self.find_hyphen_values(word, &mut values); - if hyph_count <= 0 { - return word.to_string(); - } - // We know how long the result will be, so we can preallocate here. - let result_len = word.len() + hyph_count as usize * hyphchar.len_utf8(); - let mut result = String::with_capacity(result_len); - let mut n = 0; - for ch in word.char_indices() { - if ch.0 > 0 && is_odd(values[ch.0 - 1]) { - result.push(hyphchar); - n += 1; - } - result.push(ch.1); - } - debug_assert_eq!(n, hyph_count); - debug_assert_eq!(result_len, result.len()); - result - } - - /// Check if the block of memory looks like it could be a valid hyphenation - /// table. - pub fn is_valid_hyphenator(&self) -> bool { - // Size must be at least 4 bytes for magic_number + 4 bytes num_levels; - // smaller than this cannot be safely inspected. - if self.0.len() < FILE_HEADER_SIZE { - return false; - } - if self.magic_number() != MAGIC_NUMBER { - return false; - } - // For each level, there's a 4-byte offset in the header, and the level - // has its own 16-byte header, so we can check a minimum size again here. - let num_levels = self.num_levels(); - if self.0.len() < FILE_HEADER_SIZE + LEVEL_HEADER_SIZE * num_levels { - return false; - } - // Check that state_data_base and string_data_base for each hyphenation - // level are within range. - for l in 0 .. num_levels { - let level = self.level(l); - if level.state_data_base() < LEVEL_HEADER_SIZE || - level.state_data_base() > level.string_data_base() || - level.string_data_base() > level.data.len() { - return false; - } - // TODO: consider doing more extensive validation of states and - // strings within the level? - } - // It's still possible the dic is internally broken, but at least it's - // worth trying to use it! - true - } -} - -/// Load the compiled hyphenation file at `dic_path`, if present. -/// -/// Returns `None` if the specified file cannot be opened or mapped, -/// otherwise returns a `memmap::Mmap` mapping the file. -/// -/// # Safety -/// -/// This is unsafe for the same reason Mmap::map() is unsafe: -/// mapped_hyph does not guarantee safety if the mapped file is modified -/// (e.g. by another process) while we're using it. -/// -/// This verifies that the file looks superficially like it may be a -/// compiled hyphenation table, but does *not* fully check the validity -/// of the file contents! Calling hyphenation functions with the returned -/// data is not unsafe, but may panic if the data is invalid. -pub unsafe fn load_file(dic_path: &str) -> Option { - let file = File::open(dic_path).ok()?; - let dic = Mmap::map(&file).ok()?; - let hyph = Hyphenator(&*dic); - if hyph.is_valid_hyphenator() { - return Some(dic); - } - None -} diff --git a/third_party/rust/mapped_hyph/src/main.rs b/third_party/rust/mapped_hyph/src/main.rs deleted file mode 100644 index acc24babee2e..000000000000 --- a/third_party/rust/mapped_hyph/src/main.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2019 Mozilla Foundation. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate mapped_hyph; - -use mapped_hyph::Hyphenator; - -fn main() { - let dic_path = "hyph_en_US.hyf"; - - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - - println!("{}", hyph.hyphenate_word("haha", '-')); - println!("{}", hyph.hyphenate_word("hahaha", '-')); - println!("{}", hyph.hyphenate_word("photo", '-')); - println!("{}", hyph.hyphenate_word("photograph", '-')); - println!("{}", hyph.hyphenate_word("photographer", '-')); - println!("{}", hyph.hyphenate_word("photographic", '-')); - println!("{}", hyph.hyphenate_word("photographical", '-')); - println!("{}", hyph.hyphenate_word("photographically", '-')); - println!("{}", hyph.hyphenate_word("supercalifragilisticexpialidocious", '-')); - println!("{}", hyph.hyphenate_word("o'dwyer", '=')); - println!("{}", hyph.hyphenate_word("o'callahan", '=')); - println!("{}", hyph.hyphenate_word("o’dwyer", '=')); - println!("{}", hyph.hyphenate_word("o’callahan", '=')); - println!("{}", hyph.hyphenate_word("petti-fogging", '=')); - println!("{}", hyph.hyphenate_word("e-mailing", '=')); - println!("{}", hyph.hyphenate_word("-x-mailing", '=')); - println!("{}", hyph.hyphenate_word("-strikeout-", '=')); - - let dic2 = match unsafe { mapped_hyph::load_file("tests/compound.hyf") } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", "tests/compound.hyf"), - }; - - let h2 = Hyphenator::new(&*dic2); - println!("{}", h2.hyphenate_word("motorcycle", '=')); - - let dic3 = match unsafe { mapped_hyph::load_file("tests/rhmin.hyf") } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let h3 = Hyphenator::new(&*dic3); - println!("{}", h3.hyphenate_word("övéit", '=')); - println!("{}", h3.hyphenate_word("అంగడిధర", '=')); - - let dic4 = match unsafe { mapped_hyph::load_file("tests/num.hyf") } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", "tests/num.hyf"), - }; - let h4 = Hyphenator::new(&*dic4); - - println!("{}", h4.hyphenate_word("123foobar123", '=')); - println!("{}", h4.hyphenate_word("123foobarfoobar", '=')); - println!("{}", h4.hyphenate_word("foobarfoobar123", '=')); - println!("{}", h4.hyphenate_word("123foobarfoobar123", '=')); -} diff --git a/third_party/rust/mapped_hyph/tests/base.hyf b/third_party/rust/mapped_hyph/tests/base.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/base.hyph b/third_party/rust/mapped_hyph/tests/base.hyph deleted file mode 100644 index 550c57c9ad89..000000000000 --- a/third_party/rust/mapped_hyph/tests/base.hyph +++ /dev/null @@ -1,4543 +0,0 @@ -aarhus -abase -abate -abbeys -abby -abducts -aber=ra=tions -ab=hor=rer -abil=i=ties -ab=jur=ing -ablest -abodes -abo=li=tion=ist -abor=tion -about -abram -abridged -abruptly -ab=sconds -ab=sently -ab=solved -ab=sorp=tion -ab=sti=nence -ab=strac=tor -abun=dance -abuts -abyssinian -aca=pulco -ac=cel=er=a=tor -ac=cen=tu=ated -ac=cepted -ac=ces=si=bil=ity -ac=ci=den=tal -ac=cli=mated -ac=com=mo=dat=ing -ac=com=pa=ny=ing -ac=com=plish=ments -ac=cords -ac=coun=tant -ac=cre=tion -ac=cul=tur=a=tion -ac=cu=racy -ac=cused -aces -achieve -acid -ac=knowl=edge=able -acme -acous=tics -ac=qui=es=cent -ac=quis=i=tive -acres -acrop=o=lis -acti=nome=ters -ac=ti=va=tors -ac=tors -ac=tu=ar=ial -acute -ada=gios -adap=ta=tion -adapts -ad=dict=ing -ad=di=tions -ad=dresser -ad=duc=ing -aden -ad=her=ents -adi=a=bat=i=cally -ad=join=ing -ad=judg=ing -ad=jured -ad=just=ment -ad=min=is=ter -ad=min=is=tra=tively -ad=mire -ad=mis=sions -ad=mixed -ad=mo=ni=tions -adopted -adore -adrian -ad=sorbs -adul=terer -ad=um=brat=ing -ad=van=ta=geous -ad=ven=tur=ers -ad=versely -ad=ver=tises -ad=visees -ad=vo=cacy -aer=ate -aer=obac=ter -aerosols -af=fairs -af=fec=tions -af=fil=i=at=ing -af=firmed -af=flic=tion -af=fords -afghans -afore=thought -african=izes -af=ter=im=age -af=ter=ward -age -ager -ag=glu=ti=nated -ag=gra=va=tion -ag=gres=sive -ag=ile -ag=i=ta=tor -ag=o=nies -agree=ably -agri=cul=tur=ally -aide -ail=ing -aims -air=drops -air=foil -air=line -air=planes -air=tight -akin -alamo -alas -al=ba=tross -al=bums -al=co=holism -aldrich -alert=ing -alexan=dria -alga -al=ge=rian -al=go=rithms -ali=cia -aligned -al=is=tair -al=lan -al=leges -al=le=gory -al=ler=gic -al=ley=way -al=lit=er=a=tion -al=lo=ca=tor -al=lots -al=low=ing -al=lure=ment -al=maden -al=nico -aloof=ness -al=pha=bet=ized -al=sa=tian -al=ter=ations -al=ter=nates -al=thaea -al=tru=is=ti=cally -alve=o=lar -amal=ga=mate -amass -amaze -ama=zons -am=bigu=ous -am=bled -am=bushed -amend -amer=ica -amer=i=cans -amide -am=mo=nia -among -amor=tized -amour -am=phib=ians -am=pli=fiers -am=pu=tated -amuse=ment -an=abap=tist -ana=gram -ana=logue -an=a=lyt=ic=i=ties -anaphoric -anas=to=moses -anatomy -an=chorite -an=dalu=sia -an=dover -anec=dote -anes=thetic -an=ge=leno -an=gered -an=glia -an=gola -an=gu=lar -an=i=mated -an=i=mism -anita -an=napo=lis -an=ni=hi=lated -an=no=ta=tion -an=noy -an=nu=ally -an=nuls -an=odes -anoma=lously -anselm -ant -an=tag=o=nizes -an=te=date -an=tholo=gies -an=thro=po=mor=phi=cally -an=tic=i=pates -an=ti=dotes -an=ti=mony -an=ti=quate -an=ti=semitism -an=ti=thet=i=cal -an=to=nio -anx=ious -any=way -ap=a=thy -apha=sia -api=ary -apoc=ryphal -apol=o=gist -apos=tolic -ap=pall -ap=par=ently -ap=pear -ap=pease=ment -ap=pended -ap=per=tains -ap=plauds -ap=pli=ca=ble -ap=plier -ap=pointer -ap=por=tion=ing -ap=prais=ers -ap=pre=ci=a=tion -ap=pre=hen=sively -ap=proach -ap=pro=pri=ate -ap=proval -ap=prox=i=mated -april -aptly -aquifer -ara=bi=ans -aramco -ar=bi=trat=ing -ar=cades -ar=chaism -arche=ol=o=gist -archimedes -ar=chi=tec=tures -arc=ing -ar=dently -are=quipa -ar=gos -ar=gu=ment -arid=ity -aris=to=crat -arith=me=tize -arm -arm=chairs -arm=ing -armpits -arousal -ar=rack -ar=range=ment -ar=rears -ar=rhe=nius -ar=ro=gate -ar=royo -ar=te=rial -arthri=tis -ar=tic=u=lately -ar=ti=fact -ar=tillerist -arts -as=cen=dant -as=cent -as=cot -ashamedly -ash=tray -asi=at=ics -ask=ing -as=per=sions -as=pi=ra=tion -ass -as=sas=si=nated -as=say -as=sem=blies -as=serter -as=sess -as=siduity -as=sign=ing -as=sist -as=so=ciate -as=so=ci=a=tor -as=suaged -as=sure -as=syr=i=an=ize -as=ter=oid -as=ton=ish=ingly -astride -as=tro=nom=i=cally -asym=met=ric -asyn=chronously -athe=ism -ath=letes -at=las -at=om=iza=tion -atone=ment -at=ro=phies -at=tach=ing -at=tain -at=tempt -at=ten=dants -at=ten=tion=al=ity -at=ten=u=a=tor -at=tired -at=tracted -at=tributable -at=tune -auburn -au=di=bly -au=diome=ter -au=di=tions -auger -au=gust -au=ral -aus=cul=tated -aus=terely -aus=tri=an=ize -au=then=ti=ca=tor -au=thor=i=ties -au=thors -au=to=cor=re=late -au=todecre=ments -au=toin=dex -au=toma=ton -au=topi=lot -au=tum=nal -availer -avari=cious -av=enues -avers -avian -avionic -avoid=able -avow -awak=ened -awards -aw=ful=ness -awry -ax=i=o=log=i=cal -ax=ioms -ayes -azure -ba=belizes -baby=ing -bac=chus -back=bend -back=fill -back=o=rder -backscat=ters -back=stitch -back=tracks -back=yard -bad=ger -baf=fle -bag=gage -bagro=dia -bailiff -baits -bakes -bal=ancers -bald=win -balka=niza=tion -balks -baller -bal=loon -ballplayer -bal=sam -bam=boo -ban=dage -band=pass -bane -ban=gui -bank -bankrupts -bans -bap=tism -bap=tized -bar=barism -bar=bells -bards -barest -barhop -barks -barn=hard -barom=e=ters -barr -bar=ren -bar=ron -barter -basalt -base=less -bash -ba=sics -bas=ket=ball -bassinets -batavia -bather -bath=tub -bat=ted -bat=ting -bat=tle=ments -baude=laire -bawl=ing -bay=o=net -be -beaded -beaker -bean=bag -bearded -beast -be=at=i=fi=ca=tion -beau -beau=ti=fied -beavers -becker -be=com=ingly -bed=der -bed=post -bed=spread -beecham -beefy -beethoven -be=fell -be=foul -be=fud=dles -beg=gary -be=got=ten -be=guil=ing -be=hav=ior=ism -be=hold -be=ing -be=lay -bel=fry -be=liev=able -be=lit=tles -belles -bel=liger=ents -bells -be=long -belt=ing -be=moans -bend=able -bene=dic=tions -ben=e=fi=ciary -ben=gal -bent -be=queath -be=rat=ing -beres=ford -berib=boned -berlin=ers -bernar=dine -bernoulli -bertie -be=sets -be=smirched -be=spoke -best=ing -bet -be=trayed -bette -be=tween -be=wail -be=wil=der=ment -bianco -bibles -bi=car=bon=ate -bi=con=vex -bid=der -bi=en=nial -big=ger -bi=har=monic -bi=l=abial -bilk -bil=let -billings -bimet=allism -bind -bing=ham=ton -bio=chem=istry -bi=o=log=i=cally -biopsy -bipeds -bird=baths -birm=ing=hamize -births -bi=sec=tors -bisques -bites -bit=terly -bi=valves -blab=ber=mouths -black=burn -black=foots -black=mailed -blacks -blaine -blamers -bland -blan=keters -blares -blas=phe=mous=ness -blatz -bleach=ers -bleat=ing -blem=ishes -bless=ings -blind=fold -blinked -bliss=fully -bliz=zard -bloch -block=ers -blond -blood=i=est -bloom -blos=soms -blow=fish -blud=geons -blueprint -bluish -blunted -blur=ring -blush=ing -boarded -boaster -boathouse -boatswain -bobb=sey -bo=den=heim -body=build=ing -bog=art -bo=gus -boil=ers -bold=face -bol=she=vist -bolton -bom=bas=tic -bo=nan=zas -bonds=man -bon=ham -bon=tempo -book=cases -book=keep=ers -book=store -booms -booster -boo=tle -boot=strap=ping -bor=den -bo=re=alis -born -bor=row=ers -bosses -botanist -bother -bot=tler -bo=tulism -bounce -bounden -bou=quet -bou=tique -bowd=ler=iz=ing -bowl -bow=string -box=ing -boyfriend -braced -brad=bury -brag=ger -braille -brain=storm -brakes -branch=ings -bran=dish=ing -brashly -braun -brav=ing -bray=ing -brazil -bread -bread=win=ners -break=fast -break=through -breast=works -breath=lessly -breed=ing -bren=nan -brevet -brew=ery -bribers -brick=lay=ers -bridge -bridge=work -briefed -brig -brighten -brighton -brim=ming -brings -bris=tle -britisher -broaches -broad=casts -broadly -broglie -bro=ken=ness -bronchial -brooch -brook=field -broth -brow=beat -brow=n=ian -bruce -brunette -brush=ing -bru=tal=ized -bryce -buch=wald -buck=ler -bucky -bud=dies -bud=geters -buff -buf=fet=ings -bug=ger -bugs -built -bulging -bull=doze -bull=frog -bul=ly=ing -bum=bling -bump=tious -bun=dle -bun=gler -bunkhouse -bunted -buoys -bu=reau=cracy -burgher -bur=glarproof=ing -burke -burn -burn=ings -burnt=ness -bur=row=ing -bursty -busch -bush=whacked -busi=nesslike -bus=tards -butchered -but=ter=cup -but=ter=nut -but=ton=holes -bu=tyrate -buz=zard -bye -by=pass=ing -by=stander -byzan=tinizes -cab=i=net -cache -cac=tus -cae=sarize -cager -ca=jole -calais -cal=cu=late -cal=cu=lus -cal=gary -cal=ico -callaghan -cal=loused -calm=ingly -cal=tech -ca=lypso -camem=bert -camino -cam=paign=ing -camps -cana=di=an=ize -can=celed -can=di=dacy -can=dler -ca=nine -can=nery -can=non -canon=i=cal -canopy -canto -can=vassed -ca=pa=ble -ca=pac=i=tors -capita -cap=i=tal=iz=ers -cap=ping -cap=stone -cap=ti=vates -cap=turer -car=a=vans -car=bon=dale -car=boniz=ing -card=board -car=di=ol=ogy -care=fully -ca=ress=ing -car=i=ca=ture -carls=bad -car=na=tion -car=o=line -car=pen=ters -car=riages -car=ruthers -carter -car=ton -carve -cas=cades -cashed -cas=ings -cas=sette -castes -casts -catalina -cat=a=pult -catches -cat=e=go=rizes -cathe=dral -catholi=cisms -cat=tle -caul=drons -causer -cau=tioner -cav=a=lier=ness -cav=ernous -caw=ing -ce=cil -celanese -celer=ity -cel=list -celti=cizes -cen=sor=ing -cen=taur -cen=time=ter -cen=tral=ized -cen=troid -cere=bral -cer=tain=ties -cer=ti=fies -cezanne -chaf=fey -chair=ing -chal=ices -chal=leng=ing -cham=paign -chan=cel=lor -change=abil=ity -chan=neled -chanter -chapel -chap=ter -char=ac=ter=ize -charge=able -char=i=ta=ble -char=lotte -chars -chart=ings -chas=ing -chas=tis=ers -chat=tel -chauf=feured -cheaply -check=book -check=out -cheek=bone -cheer=i=ness -cheeses -chemise -cher=ishes -cheryl -chests -cheyennes -chi=canos -chides -child=hood -chill -chime -chi=nas -chin=ning -chi=ro=prac=tor -chit -chloro=plasts -choir -choose -chop=ping -chore=o=graph -chou -chris=ten=son -chris=tian=iz=ing -christoph -chron=i=cle -chronol=ogy -chuck=les -church=go=ing -churn -ci=cero=ni=an=ize -cin=derella -ci=pher=texts -cir=cuitously -cir=cu=lat=ing -cir=cum=nav=i=gates -cir=cum=stanced -cir=cuses -cities -civet -civ=i=lized -claimed -clam=bers -clams -clap=board -clar=i=fi=ca=tions -clash -classes -clas=si=fiers -clat=tered -claus=tro=pho=bia -cleaned -cleansed -clearer -cleaved -clemente -clerked -cliches -cliffs -climb -clincher -clink -clip=pers -cloaks -clock=ings -clogs -close=ness -clos=ing -cloth=ing -cloud=ing -clowns -clucks -clumsy -clutch=ing -coaches -coali=tion -coastal -coat=ing -coax=ing -cob=web -cock=pit -co=coon -codes -cod=i=fies -co=ef=fi=cient -co=ex=ist -cof=fer -cog=i=tated -cogs -co=her=ing -coils -co=in=cid=ing -colder -col=icky -col=lab=o=ra=tor -col=lared -col=lect=ing -col=leges -collins -colom=bia -colonies -colons -col=or=less -colum=nize -com=bated -com=bi=na=tor -comb=ings -comedic -cometary -com=fort=ing -comma -com=mand=ment -com=mem=o=ra=tive -com=mended -com=ment=ing -com=mis=sion=ers -com=mit=teemen -com=mon=al=i=ties -com=mon=wealth -com=mu=ni=cated -com=mu=nists -com=mut=ing -com=pactors -com=pa=ra=bly -com=par=i=son -com=pas=sion -com=pelling -com=pen=satory -com=pe=ti=tions -com=pil=ers -com=plaint -com=pleted -com=plex=i=ties -com=pli=ca=tions -com=pli=ment=ing -com=pos=edly -com=post -com=pre=hen=si=bil=ity -com=pres=sion -com=pro=mis=ers -com=pul=sory -com=puted -com=radely -con=cate=na=tion -con=cede -con=ceived -con=cen=tra=tors -con=cep=tu=al=ized -con=certed -con=cise=ness -con=coct -con=cretes -con=cur=ring -con=demns -con=di=tional -con=doned -con=duc=tion -con=fec=tionery -con=ferred -con=fes=sions -con=fi=den=tial -con=fig=ure -con=fin=ing -con=fis=cates -con=fo=cal -con=found=ing -con=fu=cian -con=fu=sion -congo -con=gre=gat=ing -con=gress=women -con=joined -con=junc=ture -con=nected -con=nec=tor -con=nors -con=quered -con=rail -con=se=crate -con=sent=ing -con=ser=va=tion -con=served -con=sid=ered -con=sis=tent -con=sol=ers -con=so=nants -con=spir=a=tor -con=stant -con=stituent -con=sti=tu=tions -con=structed -con=structs -con=sul=tant -con=sumed -con=sump=tions -con=tain -con=tam=i=nated -con=tem=pla=tive -con=tender -con=tent=ment -con=text -con=ti=nents -con=tin=u=a=tions -con=tor=tions -con=tract=ing -con=tra=dict=ing -con=trap=tions -con=tribute -con=trite -con=trol=la=bil=ity -con=tro=versy -con=vened -con=ven=tion=ally -con=ver=santly -con=ver=sion -con=vex -con=vict -con=vinces -con=voys -cooked -cool=ers -coon -co=op=er=a=tions -co=or=di=nates -copeland -cop=ings -co=pro=ces=sor -co=quette -cords -corinthian -corks -cor=nered -corns -coro=nary -cor=po=rately -cor=rect -cor=rect=ness -cor=re=spond -cor=ri=dors -cor=rob=o=ra=tive -cor=rup=tion -cor=val=lis -cos=mopoli=tan -costs -cots -cotyle=don -coughs -coun=cil=woman -coun=selors -coun=ter=act=ing -coun=ter=feited -coun=ter=part -coun=ter=sunk -coun=try=wide -cou=plings -courser -cour=te=sies -court=rooms -covenant -cov=er=let -cov=etous=ness -cower -cowl -coypu -crack=ers -cra=dles -craftsper=son -cramps -crank -cranny -crater -craw=ford -craze -creaked -creams -cre=ation -cre=dence -cred=i=tor -creeks -cre=mates -cres=cents -cretin -cricket -crim=i=nal -crip=ple -criss=cross -crit=i=cizes -croaks -cro=cus -crop=per -crosser -crosstalk -crowd -crown=ing -cru=ci=fix=ion -cruel -cruis=ing -crum=pled -cru=sade -crushes -crux -cryp=tic -crys=tal=lize -cubans -cu=cum=bers -cuf=flink -cul=mi=nate -cul=tivable -cul=tural -cum=mings -cup=board -curb -cur=ing -curlers -cur=rent -cur=ry=ing -cur=sory -curtly -curv=ing -custer -cus=tomiz=able -cut -cuts -cyanamid -cycli=cally -cygnus -cy=press -cy=to=plasm -dab=bles -dadais=tic -dahl -dairy -dali -dam=ages -damns -damsel -danc=ing -dan=gle -danize -dare -darken -darn -darted -dar=winizes -database -dates -daunted -davy -day=dreams -daz=zled -deaden -deaf -deal=ings -deanna -death -de=bater -de=bil=i=tates -debtor -debu=tante -de=cay -de=ceit -de=cel=er=ate -de=cent -de=cid=abil=ity -dec=i=mate -de=ci=sion -decks -de=clarer -de=clin=ers -de=cod=ings -de=com=po=si=tion -dec=o=ra=tive -de=creases -decre=ments -ded=i=cated -deduct -deed=ing -deep -deere -de=feats -de=fen=dant -de=fen=es=trat=ing -de=fer=ments -de=fi=cien=cies -de=fine -def=i=ni=tions -de=for=ma=tion -defy -degra=da=tion -de=ify -de=jected -de=lay=ing -deleter -de=lib=er=ated -del=i=ca=cies -de=light=ful -de=lim=it=ing -deliri=ous -de=liv=er=ies -del=phic -del=uged -de=mand -deme=ter -de=mod=u=late -demons -demon=stra=tor -de=mul=ti=plex -denebola -den=i=grates -de=nom=i=na=tors -de=not=ing -dens=est -den=tists -deny=ing -de=par=ture -de=pen=dent -de=pleted -de=ploy -de=pose -de=pos=i=tors -de=pre=ci=ated -de=pri=va=tions -de=queued -dereg=u=late -de=rive -de=scend -de=scents -de=scrip=tively -de=sert=ers -de=serv=ings -des=ig=na=tor -de=sire -des=o=late -despatched -de=spite -desta=bi=lize -de=stroyed -de=struc=tive=ness -de=tacher -de=tained -de=tec=tive -de=te=ri=o=rated -de=ter=mi=na=tion -de=ter=min=is=tic -de=trac=tor -dev=as=tate -de=vel=op=ment -de=vi=a=tion -de=vised -de=vot=edly -de=vours -dexedrine -di=ag=nose -di=ag=o=nals -dial -di=a=logue -di=a=mond -di=ar=rhea -dick=in=son -dic=ta=to=rial -did=dle -dies -di=et=rich -dif=fer=en=tials -dif=fer=ers -dif=fusely -di=gest -dig=gings -dig=its -di=gress=ing -di=lap=i=date -dili=gence -di=lu=tion -di=men=sions -dimmed -dine -din=ing -dio=genes -diph=thong -dip=per -di=rec=tion -di=rec=torate -dirt -dis=able -dis=af=fec=tion -dis=al=low=ing -dis=ap=pear=ances -dis=ap=prove -dis=as=sem=bles -dis=bands -dis=card=ing -dis=cerns -dis=ci=plines -dis=clo=sure -dis=con=nects -dis=cord -dis=cour=ag=ing -dis=cov=ery -dis=cre=tion -dis=cuss -dis=ease -dis=fig=ure -dis=grun=tle -dis=gust=ingly -dis=hon=estly -dish=wa=ter -dis=joint -disk -dis=lo=cates -dis=may=ing -dis=mis=sers -dis=obe=di=ent -dis=own -dis=patched -dis=pen=sary -dis=persed -dis=plac=ing -dis=pleas=ing -dis=po=si=tion -dis=puter -dis=qui=et=ing -dis=rup=tion -dis=sem=ble -dis=senter -dis=sim=i=lar=i=ties -dis=so=ci=at=ing -distaff -dis=tastes -dis=till=ing -dis=tin=guish -dis=torts -dis=tresses -dis=tribu=tiv=ity -dis=turbed -ditty -di=ver=gence -di=ver=si=fies -di=vert=ing -div=i=dend -di=vin=ing -di=vi=sors -dix=ieland -dober=man -doc=toral -doc=u=men=taries -do=dec=a=he=dra -doe -dog=house -dolan -dol=lies -domenico -domi=cile -dom=i=neer=ing -don=ahue -don=key -doo=ley -door=man -dop=ers -doric -dort=mund -doted -dou=ble=header -doubt -doubts -doves -downey -down=load=ing -down=stairs -doyle -dra=co=nian -drafty -dra=gooned -dram -drape -draughts -draw=ings -dreaded -dream=ers -dregs -dress=ing -dries -driller -drip -drive=way -droop -drop=pers -droves -drudgery -drum=mers -drunkly -du=al=ity -dubuque -ducts -dug -dull=ness -dumbly -dun=bar -dun=geons -du=pli=ca=ble -dupont -du=ra=tion -dur=ward -duster -dutch=man -dwarfed -dwelt -dye=ing -dy=namism -dysen=tery -ear -ear=marked -earnest=ness -earth -earth=quakes -eases -east=erner -easy -eaves -eben -echoed -ecol=ogy -econ=o=mize -ecuador -ed=enizes -edict -edi=tion -ed=mon=ton -ed=u=cat=ing -ed=wards -ef=fect=ing -ef=fi=cacy -ef=fort=less=ness -eggshell -egyp=tian=ize -eigen=state -eighthes -eis=ner -eject=ing -elab=o=rately -elapses -el=derly -elec=tions -elec=tri=cally -elec=tro=cute -elec=troen=cephalog=ra=phy -elec=tron=ics -el=e=men=tal -el=e=va=tion -elicited -elim=i=nat=ing -elite -ella -el=lip=soids -elmhurst -else -elu=ci=da=tion -ely -eman=ci=pate -em=bar=rass -em=beds -em=bod=ied -em=brac=ing -emer=ald -emer=i=tus -emil -emits -emo=tion=ally -em=pha=siz=ing -em=ploy=able -em=po=rium -emp=tily -em=u=la=tor -en=acted -en=camp=ing -en=chanter -en=cir=cled -en=coder -en=counter -en=cour=ag=ingly -en=cum=bered -en=dan=gers -en=demic -en=dorse -en=dows -en=dur=ingly -en=fee=ble -en=fran=chise -en=gels -en=gines -en=glish=men -en=gulf -en=join -en=joys -en=light=ened -en=livens -enor=mity -en=quirer -en=riches -en=sem=bles -en=snar=ing -en=sures -en=ter=prise -en=ter=tain=ment -en=ticed -en=ti=tle -en=treat -en=trepreneurs -enu=mer=ated -en=veloped -en=v=i=ron -en=vi=sioned -ephemeral -epi=cur=izes -epis=co=palian -epi=taphs -epochs -equal=ize -equates -equi=li=brate -equips -equiv=o=cally -erased -ere -ergo -er=lang -erode -er=ra=tum -errs -es=ca=lates -es=capes -es=corts -es=pe=cially -es=quires -es=sen=tially -es=tates -es=ti=mated -eter=nal -eth=er=nets -etruria -eu=le=rian -eura=sia -eu=ro=peanized -evade -eval=u=a=tive -evap=o=ra=tion -even=hand=ed=ness -events -ev=er=glades -ev=ery=thing -ev=i=dences -evinces -evolve -ex=ac=er=bated -ex=ac=tions -ex=ag=ger=a=tions -ex=am=ined -ex=as=per=ates -ex=ceeded -ex=cel=lently -ex=cep=tions -ex=change=able -ex=ci=sion -ex=cit=ingly -ex=clam=a=tory -ex=clu=sive=ness -ex=cret=ing -ex=cused -ex=e=cu=tional -ex=em=pli=fied -ex=empts -ex=er=tion -ex=haust=edly -ex=hi=bi=tions -ex=ile -ex=is=ten=tial=ist -ex=or=bi=tant -ex=panders -ex=pect -ex=pects -ex=pe=di=tious -ex=pen=di=ture -ex=pe=ri=enc=ing -ex=per=i=ments -ex=pires -ex=pla=na=tions -ex=ploit -ex=plo=rations -ex=plo=sive -ex=po=nen=ti=at=ing -ex=ports -ex=po=sure -ex=press=ibil=ity -ex=pul=sion -ex=tem=po=ra=ne=ous -ex=ten=sive -ex=ter=mi=nate -ex=tin=guished -ex=tract -ex=tra=ne=ous -ex=trap=o=la=tion -ex=tremely -ex=ult -eye=glasses -eye=sight -fa=bles -fa=cade -facile -fac=sim=ile -fac=to=ries -fac=ulty -fa=gin -fail=soft -faint=ness -fair=ing -faith=ful -fakes -fal=la=cious -fal=mouth -fal=si=fy=ing -fa=mil=iar -fam=i=lies -fa=nati=cism -fanci=ness -fan=ning -farad -farewells -farm=ers -far=rell -fas=ci=na=tion -fasted -fas=tid=i=ous -fate -fath=omed -fat=ten -faulkner -fauna -fa=vor=ing -fayette -fear=lessly -feat -feath=er=weight -fed -fee=ble=ness -feeds -feet -fe=line -fel=low=ships -fem=i=nism -fenc=ing -fer=men=ta=tion -fe=ro=ciously -fer=tile -fer=vent -fes=tiv=ity -fet=tered -fever=ish -fiat -fi=brously -fid=dled -fief -fiendish -fif=teenth -fight=ing -fiji -files -filled -film=ing -filthy -fi=nals -finder -fines -fin=ger=print -fin=ishes -finnish -fire=boat -fire=men -fire=wall -firm=ing -fis=cally -fishes -fis=sured -fitly -fitz=patrick -fix=a=tion -fix=ture -flagged -flak -flamer -flank=ing -flash -flask -flat=tered -flaunt=ing -flaw=lessly -fledglings -fleetly -flem=ish=ing -flew -flick=ing -flinches -flirt -floated -flood -floors -flo=ren=tine -floss=ing -flour=ished -flow=er=i=ness -fluc=tu=ate -fluffier -flu=o=resce -flut=ing -fly=ing -fo=cal -foes -fogy -fold=ers -folksy -fol=som -font -fooled -foot=ball -foot=ing -for=age -forbes -forcer -fore=arms -fore=fa=thers -for=eign -fore=see=able -fore=stalls -for=ever -forge -for=get=table -for=giv=ing -for=lornly -for=mal=ized -for=ma=tively -formi=cas -for=mu=lated -for=saken -forth=with -for=tiori -for=tu=itously -for=warder -fought -foun=da=tion -founds -four=some -foxes -frag=ile -fra=grantly -fram=ing -fran=cie -fran=coise -frank=ing -fraser -fray -freckle -fred=erico -free=ing -frees -freez=ing -frenchizes -fre=quented -fresh=ened -fresh=ness -freudi=an=ism -fric=tion -friendlier -friezes -fright=ful -frisia -frivolity -from -fronts -froth=ing -frue=hauf -fruits -fuch=sia -fu=jitsu -full -fum=bling -func=tion=ally -fun=da=men=tally -fun=gal -fun=nier -fur=long -fur=ni=ture -fur=ther=more -fuses -fu=tur=is=tic -gabled -gad=getry -gag=ing -gaines -galac=tic -galaxy -gal=lantly -gal=lon -gall=stone -gam=bled -games -gang=plank -gaped -garbed -gard=ner -gar=landed -gar=risoned -gaseous -gaspee -gas=tric -gath=ered -gauche -gaunt -gawky -gaze -gear=ing -gelatin -gemma -gen=er=al=ity -gen=er=als -generic -ge=netic -genre -gen=tler -geodesic -ge=o=log=i=cal -geo=phys=i=cal -geral=dine -ger=mane -ger=mi=nates -gestapo -get=ting -ghosted -gibral=tar -gig -gig=gle -gilds -gilt -ging=hams -gipsy -girl=ish -giver -glad=dest -glance -glar=ing -glazed -gleaner -glenda -glim=mer -glints -gloat -glo=ria -glo=ry=ing -glove -glow=ing -glynn -gnu -goats -gob=lins -god=mother -goethe -gold=enly -gold=s=tine -gon=dola -goode -goodyear -goren -gor=ton -got -goth=i=ciz=ing -goug=ing -gov=ern=ment -grab -grace=fully -gra=da=tions -grad=ual -graft -grained -grams -grand=fa=ther -grandpa -grant -gran=u=lates -graph=i=cal -gras=pable -grassi=est -grat=i=fi=ca=tion -gra=tu=itously -graves -grayed -grease -gre=cian=ize -greeks -green=feld -greens -greeter -grenades -greyest -grievances -grif=fith -grimes -grinds -gripped -gritty -gro=cers -grooved -gross=est -gro=ton -group -grov=els -growl=ing -grubs -grum=bling -guano -guard=edly -gu=ber=na=to=rial -guest -guide=line -guiltier -guises -gul=lah -gum=ming -gun=ner -gur=gle -gustafson -guts -guyer -gym=nas=tics -haas -ha=bit=ual -hacks -hag -hail -hairier -hale -hall=mark -halpern -halve -ham=burg=ers -ham=mer=ing -hamp=shire -hand=books -hand=i=cap -hand=ker=chiefs -hand=shake -handy -hang=man -han=nah -hansel -hap=lessly -hap=pily -harbinger -harder -hard=ships -harken -harm=ful=ness -har=mo=niously -har=ness=ing -har=ri=man -harry -har=vardize -har=veys -has=sle -hat -hate=fully -hat=tie -hauler -hausa -havoc -hawthorne -hay=wood -head -head=lands -head=room -heals -healy -hear=ings -heartily -heater -heaved -heav=i=ness -he=brides -hedge=hog -heeds -hegelian=izes -heights -heiresses -he=li=copter -hel=l=enized -hel=met -help=fully -hem -hemp -hen=drick -hen=ri=etta -her=alds -herder -here=ford -here=un=der -her=mit -hero=ically -her=ring -hert=zog -hes=pe=rus -het=eroge=nous -heuser -hexagon -hi=ber=nate -hid=den -hi=er=ar=chic -high=field -high=nesses -hikes -hill=crest -hilt -hin=dered -hin=dus=tan -hint=ing -hired -his -his=tograms -hitch -hither -hit=ting -hoarse=ness -hobby -hoe -hoists -holds -hol=landaise -hol=low=ness -holo=caust -homage -home=o=mor=phism -home=spun -hom=ing -ho=mo=sex=ual -hon=esty -hon=ey=moon=ing -hon=o=raries -hood=lum -hooker -hoosier=ize -hooves -hope=less=ness -ho=race -horn -hor=ri=ble -hor=rors -horse=shoer -hos=pi=tal=ize -hostesses -hotly -hound=ing -house=flies -house=top -hover -howled -hu=bert -huey -hugo -hu=man=i=ties -hum=bling -hu=mid=i=fiers -hu=mil=i=a=tion -hu=mor=ers -humpty -hung -hun=gry -hunt=ley -hurl=ing -hur=ry=ing -hus=bands -husks -hutchins -hyde -hy=giene -hy=phen=ate -hy=pothe=ses -hys=ter=i=cal -ib=sen -ici=cle -icosa=he=dron -ide=al=ize -iden=ti=cal -iden=tify -id=iosyn=crasy -idles -ig=nite -ig=nores -il=le=gal=ity -il=log=i=cal -il=lu=sions -il=lus=tra=tive -im=a=gen -imag=ine -im=brium -im=ma=te=rial -im=mensely -im=mi=grat=ing -im=mov=abil=ity -im=pacted -im=pale -im=pa=tiently -im=pedes -im=pen=e=tra=ble -im=per=fectly -im=per=ma=nent -im=per=son=ations -im=pinges -im=ple=mentable -im=pli=cants -im=plied -im=por=tant -im=poses -im=po=tence -im=prac=ti=cally -im=press=ible -im=press=ment -im=pris=on=ments -im=prove=ment -im=pro=vis=ers -im=pul=sion -in=ac=ces=si=ble -in=ad=e=quate -inane -in=audi=ble -inca -in=cas -in=ces=santly -in=ci=den=tally -in=cit=ing -in=closes -in=clu=sive=ness -in=com=pa=ra=ble -in=com=pletely -in=con=gruity -in=con=sis=tent -in=con=ve=nient -in=cor=rect=ness -in=cred=u=lous -in=cu=bate -in=cur=able -in=de=ci=sive -in=dent -in=de=scrib=able -in=dex=ing -in=di=ca=tion -in=dif=fer=ence -in=dig=na=tion -in=di=rectly -in=dis=tinct -in=di=vid=u=ally -in=doc=tri=nat=ing -in=du=bitable -in=duc=tances -in=ducts -in=dus=tri=al=ist -in=dus=try -in=el=e=gant -inertly -in=ex=act -in=ex=pli=ca=ble -in=fantry -in=fec=tion -in=fe=rior -in=fer=tile -in=fi=nite -in=fir=mary -in=flated -in=flict=ing -in=form -in=for=ma=tively -in=fre=quently -in=fu=ri=at=ing -in=ge=nious=ness -in=gra=ti=ate -in=hab=ited -in=her=ently -in=her=itress -in=hibitor -in=im=i=cal -ini=tial=ized -ini=ti=at=ing -in=jec=tion -in=jured -inker -in=let -in=ner -in=nocu=ous=ness -in=oc=u=late -in=quire -in=quis=i=tive -in=scribed -in=se=curely -in=ser=tion -in=sid=i=ous=ness -in=sin=u=ated -in=sis=tently -in=som=nia -in=spi=ra=tion -in=stal=la=tion -in=stances -in=stan=ti=a=tions -in=still -in=sti=tutes -in=struct -in=structs -in=stru=ments -in=su=la=tion -in=sur=ance -in=sur=rec=tion -in=te=grand -in=tel=lect -in=tel=li=gi=ble -in=ten=si=fi=ca=tion -in=ten=sively -in=ter -in=ter=cept -in=ter=changed -in=ter=com=mu=ni=cates -in=ter=course -in=ter=ested -in=ter=fered -in=ter=group -in=ter=leaved -in=ter=minable -in=ter=mod=ule -in=ter=na=tion=al=ity -in=ter=per=sonal -in=ter=posed -in=ter=pret=ing -in=ter=re=la=tions -in=ter=rupt -in=ter=sect=ing -in=ter=state -in=ter=ven=ing -in=ter=wo=ven -in=ti=ma=tion -in=tol=er=ance -in=tractabil=ity -in=traof=fice -in=trigued -in=tro=duc=tions -in=truder -in=tu=ba=tion -in=vaders -in=va=lidi=ties -in=vari=ants -in=ven=tively -in=verses -in=vert=ing -in=ves=tiga=tive -in=vet=er=ate -in=vites -in=voked -in=volves -io=ni=ans -ira -irately -irish=man -ironic -ir=ra=tional -ir=reg=u=lar -ir=re=press=ible -ir=re=versibil=ity -ir=ri=ta=ble -irv=ing -is=fa=han -is=land -iso=lated -iso=mor=phisms -is=suance -it -ital=i=cize -item=iza=tions -it=er=a=tion -ito -izves=tia -jack=ets -jacky -ja=cobus -jailer -ja=maican -janet -janus -jar=gon -jaun=ti=ness -jay -jeanne -jef=fer=so=nian -jen=nifer -jeremy -jer=oboam -jest -je=suit=iz=ing -jew=eled -jews -jin=gled -joaquin -joes -john -joiner -jok=ers -jolts -jor=dan -jose=phus -jot=ting -jour=nals -joust=ing -joy=ous -ju=daica -judge -ju=dith -ju=goslavia -julie -jump -junc=tures -ju=niper -juras -jury -jus=ti=fiers -jut=land -kad=dish -kamikazes -kant -karp -ka=tow=ice -keel=ing -keep=ers -kemp -ken=ney -ke=pler -ker=ouac -key -key=pad -khrushchevs -kidde -kid=ney -kil=i=man=jaro -kills -kilo=joule -ki=mono -kin=dling -king=pin -kin=nick=in=nic -kir=choff -kisses -kit=ing -klein -knap=sacks -kneel -knicker=bock=ers -knights -knocked -knots -knowl=edge -knuck=les -ko=dachrome -ko=rea -kraka=toa -kro=necker -kurd -la=bel=ing -la=borer -labyrinths -lac=erta -lacks -ladies -la=goon -laid=law -lamarck -lament -lamp -lanced -land=ings -lands -lange -lan=guish -laos -lapse -largely -lar=son -lash=ing -las=zlo -later -la=tin=ity -lat=i=tudes -laud=able -laugh=lin -laun=dered -lau=rels -laven=der -law=fully -law=suit -lay=ers -lazarus -leaded -leafed -lea=guers -le=an=der -leap=ing -leary -leath=ern -leav=ing -lec=tures -leeds -left=ists -le=gal=iza=tion -leger -leg=is=lated -le=git=i=mate -leila -lemon -lends -le=niency -lens -leonardo -les=bian -les=son -let=ter -levee -lev=elly -levin -lewdly -lex=ing=ton -li=belous -lib=er=ated -li=bido -li=cense -lick -lied -lifeboat -life=time -ligget -light=hearted -like -like=ness -lil=ian -li=man -limit -lim=its -lind -lindy -lin=early -lin=gerie -lin=ing -lin=naeus -li=oness -liq=uid -lise -lis=tened -list=ings -lit=er=al=ness -lithua=nia -lit=ter=ing -live -liv=ers -lizzie -loaf -loathing -lob=ster -lo=cally -lo=ca=tor -lock=ian -lock=wood -lodges -log=a=rithm -log=i=cally -logs -loi=ters -lon=doniza=tion -lon=ers -long=ings -look=ers -looms -loose=leaf -loos=ing -lords -lorry -lossi=est -lo=tus -louisa -lour=des -lovelace -loves -low=est -loy=ally -lucerne -luck=ier -lu=di=crous -luke -lu=mi=nously -lunch -lunged -lur=ing -lust -luther -lux=u=ri=antly -lyle -lynx -mac -mac=don=ald -maces -ma=chin=ery -mackey -macro=molecule -mad=den -mad=hya -mad=sen -mag=el=lanic -mag=ill -mag=ne=ti=z=able -mag=nify -maguire -maids -mail=man -main=frames -main=tained -majesty -maker -mal=ady -mal=colm -mal=formed -ma=li=cious=ness -mal=one -mal=ton -man=age -man=ag=ing -manda=tory -manger -man=hole -man=i=cur=ing -manila -ma=nip=u=la=tive -mann -manors -man=tissa -man=u=fac=tured -mao -maps -marched -mardis -margo -mari=nade -mar=itime -mar=ketabil=ity -mark=ings -mar=malade -mar=riott -mar=shal=ing -mar=tial -mar=tyr -mar=vels -mas=cara -mask=able -ma=sonite -mas=sa=cred -mast -mas=ter=piece -mas=tur=ba=tion -match=less -ma=te=ri=al=iz=ing -math=e=mat=i=cally -mat=ings -ma=trix -mat=tered -ma=tured -mauri=cio -max=ima -max=ims -maybe -may=oral -mc=cabe -mc=cluskey -mc=don=nell -mc=gov=ern -mc=kee -mclean -mcpher=son -meal=time -mean=ing=ful -meant -mea=sure=ments -me=chan=i=cally -medal -med=field -me=di=a=tions -medicine -med=i=tat=ing -medi=ums -meet=ing -mega=hertz -meis=ter -melcher -melodies -melpomene -mem=ber=ship -mem=o=randa -mem=o=rizes -menagerie -mendelizes -men=non=ite -men=tal=i=ties -men=tor -mer=ce=nar=i=ness -mer=ci=lessly -merged -mer=i=to=ri=ous -mer=rill -mesh -mes=sen=ger -messy -met=al=liza=tion -meta=phys=i=cal -me=te=oritic -me=thod=i=cally -meth=ods -metro -mews -mica -mick -mi=cro=bi=cide -mi=croe=co=nomics -mi=cron -mi=cro=pro=cess=ing -mi=cro=scope -mi=crovaxes -mid=dle=man -mid=night -mid=stream -mid=win=ter -mi=grate -mikoyan -mileage -milk -mill -mil=likan -mil=lionth -mill=stones -mil=tonized -minaret -mind=fully -min=eral -mini -min=ima -min=i=mizes -min=istries -mi=nor -min=strels -minute -mir=a=cle -miriam -mis=car=riage -mis=con=cep=tion -mis=er=ably -mis=giv=ings -mis=led -mis=plac=ing -miss=ing -mis=soula -mis=take -mistle=toe -mis=un=der=stand -mitch -mitres -mix=tures -moats -mocked -modally -mod=er=ated -mod=ern=izer -mod=icum -mod=i=fy=ing -mod=u=lar=iz=ing -mod=ule -moghul -moines -mol=davia -moles -mol=lusk -mo=men=tar=ily -monaco -mon=day -mon=go=lian -mon=keyed -mono=cotyle=don -mono=lithic -monos=table -mon=roe -mon=tague -mont=gomery -mon=u=ment -mooned -moor -moped -morass -more=house -morn -mor=pho=log=i=cal -morsels -mort=gage -mo=saic -mosque -mo=tels -moth=er=land -mo=tion=less=ness -mot=ley -mo=tor=ized -mound -moun=tain=ously -mourn=ers -mousy -mov=able -mov=ing -muck -mud=dled -muf=fin -mugs -mul=lah -mul=ti=com=puter -mul=ti=ple -mul=ti=pli=cand -mul=ti=plies -mul=ti=stage -mum=bles -mun=dane -mu=ni=tions -mur=der=ing -mur=murs -mus=covy -mush=roomed -mu=si=cians -muskrat -mus=sorgsky -mu=ta=bil=ity -mu=ta=tions -mu=ti=lat=ing -mut=ters -myce=naean -mys=te=ri=ous -mytholo=gies -na=gasaki -nair -naked=ness -names -nanook -nap=kin -nar=cotic -nar=row=est -nash -na=tal -na=tion=al=i=ties -na=tions -nat=u=ral=ist -naugh=ti=ness -navel -navona -ne=an=derthal -nears -neb=ula -ne=ces=si=ta=tion -neck=ties -nee=dled -needy -neg=a=tives -neg=li=gi=ble -ne=groid -neigh=bor=ing -neo=clas=sic -nero -nest=ing -nets -neu=ral -neu=tral -neva -new=bury=port -new=man -news=man -next -ni=belung -nicholls -nick=name -niel=son -night=fall -ni=hilism -nim=bler -nineties -nip=ponizes -no=bil=ity -noc=tur=nally -noel -nolan -nom=i=nee -non=con=ser=va=tive -non=de=ter=min=ism -non=govern=men=tal -non=lin=ear=ity -nonorthog=o=nal -non=seg=mented -non=ter=mi=nals -nook -nord=hoff -nor=mal=iza=tion -nor=man=iza=tions -north -north=ernly -nor=walk -nos=tradamus -no=ta=rizes -note -no=tice=able -no=ti=fies -not=ting=ham -no=vak -novices -nu=ances -nu=clide -nullary -num=ber -nu=mer=able -nu=mis=matic -nurs=ing -nu=tri=tious -nyquist -oases -obe=di=ent -ob=fus=cate -ob=jec=tively -obliged -oblit=er=at=ing -ob=scene -ob=serv=able -ob=servers -ob=so=letes -ob=struc=tion -ob=vi=ated -oc=ca=sional -oc=ci=den=tal=ize -oc=clu=sions -oc=cu=pied -oc=curs -oc=tag=o=nal -octets -oddly -odi=ous -o'dwyer -of=fended -of=fer -of=fi=cer -of=fi=ciously -oft -oil=cloth -ojibwa -old=en=burg -oleo=mar=garine -olivia -olym=pus -omi=nous=ness -om=nipresent -o'neill -on=looker -onus -opaquely -open=ings -op=er=ate -op=er=a=tor -op=pen=heimer -op=pose -op=pressed -opthalmic -op=ti=mist -op=ti=miz=ing -opts -or=anges -or=bital -or=ches=tral -or=der -or=di=nar=ily -ores -or=ga=ni=za=tion -or=gans -ori=en=tal=ized -ori=fices -orig=i=na=tion -or=leans -or=nate -orr -orville -os=cil=lates -o'shea -os=teopath -oth=ello -otto -ounces -out=burst -out=door -out=grow=ing -out=law=ing -out=live -out=per=forms -out=rages -out=stand=ing -out=vot=ing -out=wit=ting -over=board -over=crowds -over=es=ti=mates -over=hangs -over=joyed -over=load -overnighter -over=pro=duc=tion -over=run=ning -over=shad=ow=ing -over=sized -over=take -overtly -overuse -over=work=ing -owen -own=er=ship -ox=i=dized -ozzie -paci=fi=ca=tion -pack=aged -pack=ers -padding -pageant -pag=i=nat=ing -painful -paint=ing -pa=ja=mas -pale -pales=tine -pal=lia=tive -palo=mar -panacea -pan=demic -pan=els -panned -pan=the=ist -panty -pa=per=ers -par -pa=rades -paragon -par=al=lel -par=al=lels -pa=ram=e=ter=ize -para=mus -para=phrases -par=cel -par=doned -paren=the=ses -pares -parisian -park=ers -par=lay -par=ody -par=rots -par=si=fal -par=takes -par=tic=i=pant -par=tic=u=lar -par=ti=tioned -par=tridges -pas=sage=way -pas=sion -pass=port -pas=teur -pas=ture -patchy -patents -patho=gen=e=sis -pa=tients -pa=tri=cians -pa=trolling -pa=trons -pat=tern=ing -paula -paulus -pave=ment -pawn -payer -pay=offs -peace=fully -peaks -pearl -peat -pe=cu=liar -pedant -pe=di=a=tri=cian -peel=ing -peer=ing -peking -pem=broke -pence -pends -pen=e=tra=tion -penin=su=las -penn=syl=va=nia -pen=tagon -peo=pled -pep=pery -per=ceived -per=cents -per=chance -peren=ni=ally -per=fect=ness -per=forms -per=i=he=lion -pe=ri=od=i=cally -per=ish=able -perkins -per=me=at=ing -per=mit -per=ni=cious -per=pe=tra=tion -per=pet=u=a=tion -per=se=cut=ing -per=se=veres -per=sist -per=sonal -per=son=i=fied -per=spi=ra=tion -per=sua=sions -per=turb -pe=ruses -per=va=sive -pester -pe=ters -petri -pet=ting -phae=dra -phaser -phe=nomeno=log=i=cal -philco -philis=tinizes -philoso=phies -phoeni=cia -phon=ing -phos=pho=rus -pho=to=genic -pho=tos -phyla -physi=cist -pi -pick -pick=et=ing -pick=man -pi=co=joule -pic=tur=ing -pied=fort -pies -pig=gy=backed -pig=tail -pil=fer=age -pil=lar -pi=lots -pin=cush=ion -pin=ing -pin=na=cle -pin=scher -pi=o=neers -pipelin=ing -pi=rate -pis=tols -pitch=ing -pithi=ness -piti=less -pi=tu=itary -pix=els -place=ment -pla=gia=rist -plain=field -plain=tive=ness -planeload -plan=ets -planocon=cave -plant=ings -plas=tic=ity -plates -pla=toon -play=boy -play=ing -play=wrights -pleas=ant -pleat -ple=nary -pli=ant -plots -plows -plug=gable -plume -plun=dered -plung=ing -plu=to=nium -poc=a=hon=tas -pod -po=et=i=cal -poincare -pointy -poi=sons -po=laris -po=lice -pol=ish -po=liter -polka -pol=luted -poly=mer -pomera=nia -pompous=ness -ponds -pool -pop -pop=ping -pop=u=lar=ized -pop=u=lous -pores -port -por=tend=ing -por=tico -por=tray -posed -po=si=tion -posits -pos=ses=sive -pos=sums -pos=te=ri=ori -post=mas=ters -postscript -pot -po=ten=tates -po=tion -pot=tery -pounces -pourer -poverty -pow=er=ful -prac=ti=ca=ble -prac=ti=tion=ers -praise -prancer -prayer -pre=al=lo=cated -pre=car=i=ously -prece=dents -pre=ciously -pre=cip=i=ta=tion -pre=cludes -pre=con=cep=tion -pre=dat=ing -pre=de=ter=mi=na=tion -pred=i=ca=tion -pre=dic=tive -pre=dom=i=nately -pre=emp=tive -pref=ac=ing -prefers -preini=tial=izes -pre=lim=i=nary -premise -pre=oc=cu=pied -pre=pared -pre=pos=ter=ously -pre=rog=a=tives -pre=scrip=tions -pre=sen=ta=tions -pre=served -pres=i=den=tial -press=ings -pre=ston -pre=sump=tu=ous=ness -pre=tend=ing -pre=texts -pre=vail=ing -pre=vent=ing -pre=vi=ously -pricers -prides -pri=mar=ily -prim=ing -princesses -prin=ci=ples -prior -pris=on=ers -pri=va=tions -prizes -pro=bate -prob=ings -pro=ce=dure -pro=cess=ing -procla=ma=tion -pro=cre=ate -pro=curer -pro=duce -pro=duc=tive -pro=fes=sion -prof=fered -prof=itabil=ity -pro=found -pro=gram -pro=gresses -pro=hi=bi=tions -pro=jec=tions -pro=le=tariat -pro=long -promi=nent -pro=moter -promptest -pro=mul=ga=tion -pro=nounce=ment -proofs -propane -prop=erly -proph=esy -pro=por=tion=ately -pro=poser -pro=pounded -pro=rate -pros=e=cutes -prosodic -prospec=tor -prostate -pro=tect=ing -pro=tege -protes=ta=tions -pro=tons -pro=to=zoan -prouder -prove=nance -prov=i=dence -pro=vi=sion -pro=vokes -prox=i=mal -pruned -prus=sian=ize -pseu=doin=struc=tion -psy=chi=a=trist -psy=cho=log=i=cally -psy=cho=so=matic -pub -pub=licly -puck=ered -puffed -puller -pulls -pulse -pump=kin -punc=tu=ally -pun=ish=able -punt -pup=peteer -pur=chases -purges -pu=rina -pur=pler -pur=posed -purse -pur=su=ing -push=down -put=nam -puz=zle=ment -py=ongyang -pythagore=anizes -quad=ran=gle -qua=dren=nial -quag=mires -quak=er=ess -qual=i=fied -qualm -quan=ti=fiers -quan=tize -quar=reled -quar=ter=ing -quasar -qua=ver=ing -queerer -queried -ques=tion=able -ques=tions -quib=ble -quick=lime -qui=et=ing -quince -quit -quiv=ers -quon=set -quo=tient -ra=bin -rach=mani=noff -rack=e=teers -ra=di=ance -ra=di=a=tors -ra=dio=g=ra=phy -rae -rages -raider -rail=roaded -rain=bow -rains -rake -ral=ston -ram=i=fi=ca=tions -rams -rand -randy -rangy -rank=ings -ran=somer -rap -rapids -rap=tur=ous -ras=cally -rasp=ing -rat=for -ra=tion -ra=tio=nal=izes -rat=tler -rav=ager -ravens -rawl=ins -rays -reach -re=acted -re=ac=ti=va=tion -reader -read=justed -re=aligned -re=al=iz=able -realm -reaped -rear -re=ar=rest -rea=son=ings -re=as=signed -reawak=ened -re=bel=lions -re=boot=ing -re=buffed -re=but=ted -re=cal=i=brated -re=ca=pit=u=lates -re=ceded -re=ceives -re=cep=tive -re=cife -re=cip=ro=cat=ing -recita=tions -reck=oned -re=claim=ing -re=clin=ing -rec=og=nize -rec=ol=lect -rec=om=mend -re=com=piles -rec=on=cil=i=a=tion -re=con=nect -re=con=sti=tuted -recorder -re=cover -recre=at=ing -recta -re=cur -re=curs=ing -red -re=de=clared -re=de=fined -re=de=vel=op=ment -re=dis=played -red=ness -re=dress=ing -re=ducibly -reeds -re=elects -reen=force=ment -reestab=lish=ing -re=ex=am=in=ing -ref=er=ences -re=fer=ral -re=fine -re=flect=ing -re=flexes -re=for=ma=tory -re=for=mu=lated -re=frained -re=fresh=ment -refugee -re=futed -re=gally -re=gen=er=at=ing -reg=i=men=ta=tion -regis -re=gressed -re=gret=table -reg=u=larly -reg=u=la=tors -re=hears=ing -re=im=bursable -reined -rein=hold -re=in=stated -rein=tro=duces -re=it=er=a=tion -re=joiced -re=la=beled -re=lat=ing -rel=a=tives -re=laxes -rel=e=gate -re=lents -relic -re=liev=ing -re=lin=quish=ing -reloader -re=luc=tance -re=mains -reme=died -re=mind -rem=i=nis=cently -re=mod=els -re=motely -re=mov=ing -re=names -ren=dezvous -re=new=able -re=nounc=ing -rented -re=open -re=or=ga=nize -re=pair=man -re=pay=ing -re=peat=edly -re=pen=tance -rep=e=ti=tious -re=place=able -re=plays -repli=cate -re=port -repos=ing -rep=re=sentably -rep=re=sent=ing -re=prieved -re=proach -re=pro=ducibil=i=ties -re=pro=grams -re=publics -re=pulses -re=puted -re=quired -req=ui=si=tions -re=scind -re=searchers -re=sem=blances -re=sent=ment -reser=voir -res=i=dent -res=ig=na=tion -re=sis=tance -re=sis=tors -re=solver -re=sort=ing -re=spect -re=spec=tive -re=sponded -re=spon=si=ble -restarts -rest=ful -restora=tions -re=strain=ers -re=stric=tive -re=sul=tant -re=sum=ing -res=ur=rec=tors -re=tail=ing -re=tal=ia=tory -re=ten=tive=ness -retina -re=tir=ing -re=tract=ing -re=trans=mis=sion -ret=ri=bu=tion -re=triever -ret=ro=spec=tion -re=type -re=unit=ing -re=vamp=ing -rev=eler -re=vere -rever=i=fies -re=verses -re=viewer -re=viser -re=vival -re=voked -rev=o=lu=tion -re=volvers -rewind=ing -rewrit=ing -rhe=sus -rhode -rhyming -rib=bons -richard -rich=mond -rico -ride -ridiculed -ri=fle -rig=ging -right=ful=ness -rigor -rims -ring=ings -ri=or=dan -ripely -rip=pling -risk -rit=u=ally -river -rivulet -road=sters -roar=ing -rob=beries -roberta -robin=sonville -rochester -rocket -rock=well -rods -roll -ro=mance -ro=man=izes -romper -roof=ing -room=ing -root -rop=ing -rose=bush -rosetta -rot -ro=ta=tions -ro=tund -rough=ness -round=ing -roused -routes -rov=ing -row=ley -roy=alty -rub=bing -rubles -rude=ness -ruf=fian -rugged=ness -rule -ru=ma=ni=ans -rummy -run=away -runoff -rup=tur=ing -rus=sell -rus=tic -rustlers -ruth=less=ness -sab=bathize -sachs -sac=ri=fice -sacro=sanct -sad=dles -sa=fari -safes -sage=brush -said -sails -sal=able -salerno -saline -sally -salters -salu=ta=tions -sal=vages -same -sam=pling -sana=to=rium -sanc=tion=ing -sand=burg -san=dra -san=est -san=skrit -sapling -saran -sari -satchel -satires -sat=isfy -sat=ur=na=lia -saud -sav=aged -saver -sa=vored -saw=fish -sax=onize -say=ings -scala -scal=ing -scam=pers -scan=ners -scape=goat -scared -scat=ter -scenic -schantz -schelling -schemers -schmitt -scholas=tic -school=houses -schroeder -schuylkill -scis=sor -scoffs -scope -score=board -scorner -scotch=gard -scotts=dale -scouted -scram=bled -scrapes -scratch=ing -scream=ers -screen=ings -scrib=bled -scripts -scrump=tious -scuf=fle -sculp=tured -scythe -sea=gate -seam -seaquar=ium -search=light -sea=son=able -seat -se=ceded -sec=ondary -sec=re=tar=ial -se=cre=tive -sec=tions -se=cur=ings -sedi=tion -see -seedy -seem=ing -seer -seg=men=ta=tions -se=gundo -seizures -se=lect=man -self=ishly -sells -se=man=tics -semi=con=duc=tor -semiper=ma=nently -sen=ate -seneca -sense -sens=ing -sen=sual -sen=ti=men=tally -sep=a=rately -sept -se=quencers -se=quen=tially -serene -se=ri=al=iz=able -serif -serra -ser=vice -serv=ings -sets -set=tler -sev=en=teens -sev=er=ance -sev=ers -sex -sex=ual -shack=led -shadi=ness -shaf=fer -shak=ers -shale -shame=ful -shang=haied -shape=less -shard -shares -sharp=en=ing -shat=ter=ing -shawano -shear=ing -sheds -sheets -shel=ley -shelves -sheri=dan -shied -shiftier -shilling -shiner -shin=toizes -ship=per -shirk -shiver -shocker -shoe=horn -shooter -shop=pers -short=age -short=ens -shorts -shoul=dered -shoved -showed -shows -shrewd -shrilled -shrink=ing -shrugs -shuf=fled -shut=off -shut=tles -siberia -sicken -side=band -sides -sid=ings -sierra -sighed -sigma -sig=na=ture -sig=ni=fi=ca=tion -sikkim -silent -silken -sills -sil=ver=man -sim=ile -si=mon -sim=plic=i=ties -sim=plis=tic -sim=u=la=tion -sin=bad -sinews -singed -sin=glet -sin=gu=larly -sin=ner -sioux -sirens -sisy=phus -sit=tings -siva -six=ties -skate -skep=ti=cal -sketch=pad -skid=ding -skill=ful=ness -skims -skipped -skir=mishes -skulked -sky -sky=rock=ets -slacks -slang -slash -slaugh=ter -slavic -slavoni=cizes -sledge=ham=mer -sleep=less -sleighs -sliced -slide -slightly -slings -slips -slo=gans -slop=pi=ness -slot=ting -slower -slug=gish=ness -slums -smacked -small=time -smasher -smell -smiles -smith=so=nian -smoked -smol=dered -smooth=ing -smug -smythe -snap -snap=shots -snatched -sneaki=est -sneers -sniffs -snod=grass -snorkel -snow=belt -snows -snuffs -soak -soared -sobers -so=cial=ists -so=ci=o=log=i=cal -socks -so=fas -softly -so=journ -sol=dier -solenoid -solid -solids -so=los -sol=vent -somber -som=er=set -son -sonny -soothe -so=phis=ti=ca=tion -sor=did -sor=est -sor=rows -soul -sound=ness -soured -south=bound -south=land -so=vi=ets -spacer -spaded -spaniardiza=tion -spanked -spare -sparked -sparsely -spat -spawned -speak=ers -spe=cial=ists -spe=cialty -spec=i=fied -speckle -spec=ta=tors -spec=trog=ra=phy -spec=u=lates -speech=less -speeds -spellings -spent -spica -spies -spilt -spin=ner -spi=rally -spir=i=tu=als -spit=ing -spleen -splic=ing -splits -spoil=ing -sponged -spon=sor=ship -spool=ers -spores -sportswriter -spot=ter -sprague -spray=ing -sprees -springi=ness -sprint -sprouted -spurn -sput=tered -squadrons -squarer -squat=ting -squeaky -squeez=ing -squirmed -stab -sta=bi=lizes -stacked -staffing -stagers -stags -stair=cases -stale=mate -stalling -stam=mer -stam=ped=ing -stan=dard -stand=ings -stans -star -star=gate -star=ring -star=tles -state -statewide -sta=tion=mas=ter -stat=ues -statu=to=rily -staves -stead=ier -stealer -steamer -steele -steeper -steered -stem -stenog=ra=pher -step=mother -stereo=scopic -ster=il=izer -stetho=scope -stew -stick=ier -stiff=ens -stigma -stillest -stim=u=late -sting=ing -stipends -stir=rer -stitch=ing -stock=holder -stodgy -stom=acher -stood -stop=gap -stor=age -storeyed -stormi=est -stouter -strafe -straight=ened -strained -strand=ing -stran=gler -stratagem -strat=i=fies -straw=berry -streamer -street=car -strengths -stretched -strict -strife -stringed -stringy -striptease -strode -strolling -stron=tium -strug=gle -stu=art -stucco -stu=dious -stuffs -stun -stupid -sturm -styli -styx -sub=com=po=nents -sub=di=rec=tory -sub=dues -sub=graph -sub=jec=tive -sub=lime -sub=merges -sub=mode -sub=or=di=nate -sub=pro=gram -sub=schema -sub=script=ing -sub=se=quent -sub=si=dies -sub=sis=tent -sub=stan=tially -sub=sta=tion -sub=strate -sub=sys=tem -sub=tle=ness -sub=trac=tion -sub=units -sub=vert=ing -suc=cess=ful -suc=cinct=ness -suck=ers -sud=den -suf=fer=ance -suf=fi=ciency -suf=fo=cated -sug=ar=ings -sug=gests -suit=ably -suits -sulks -sul=tan -sum=mands -sum=ma=tion -sum=mon -sumter -sun=der -sunken -sun=shine -su=per=com=put=ers -su=per=groups -su=pe=rior -su=per=nat=u=ral -su=per=sede -su=per=vise -sup=pers -sup=ple=ment=ing -sup=port -sup=pose -sup=press=ing -surely -surge -surly -sur=pass -sur=pris=ingly -sur=round -sur=vey=ors -sus -sus=pended -sus=pi=cions -suther=land -swab -swal=low=ing -swan -swaps -swat -sweat -swedes -sweep=stakes -sweet=est -swellings -swifter -swim=suit -swipe -switch=boards -swivel -swords -sykes -sylvia -sym=bol=ize -sym=me=try -sym=pa=thy -syn=a=gogue -syn=chro=nizes -syn=di=ca=tion -syn=ony=mously -syn=the=size -syr=ian -sys=tem=at=i=cally -taber=na=cle -ta=ble=spoon=ful -tab=u=late -tacit -tac=tic -tail -taipei -tale -talker -tallchief -tal=mudiza=tions -tam=ing -tanaka -tan=gle -tan=ta=liz=ing -taos -tapestry -tar -tar=iffs -tasked -taste=fully -tat=tered -taunts -tav=erns -taxi=cabs -tay=lor -teaches -tear=ful -tea=spoon=ful -tech=nique -te=dious -teenaged -tegu=ci=galpa -tele=graph -tele=o=log=i=cally -tele=phony -tele=vise -teller -tem=per=ance -tem=pes=tu=ous -tem=po=raries -tempt=ingly -ten=dency -tenex -tense -tent -tenure -ter=mi=nat=ing -termwise -terre -ter=rify -ter=ror=ize -testable -tes=ti=fiers -tex -tex=tile -thai=land -thank=less -thaw -the=atri=cally -theme -the=ol=ogy -the=o=riza=tion -ther=a=pies -thereof -ther=mome=ter -thes=saly -thickly -thim=bles -think=ing -thirsted -this=tle -thorns -those -thou=sand -thread -threat=ens -thrift -thrived -throne -through=out -thrusters -thumbed -thun=derer -thus -tiburon -tick=les -ti=died -tier -tight=en=ers -tilde -tillich -tim=bered -time=outs -timeta=bles -ti=m=o=nizes -tin=gling -tin=kled -tint -tip=per=ary -tire=lessly -ti=tan -tit=ter -toasts -to=geth=er=ness -toi=lets -tol=er=a=ble -tol=er=a=tion -toma=toes -ton -tonic -tool -tooth=paste -top=most -topsy -tor=ment=ing -tor=rent -tor=tur=ing -tossed -to=tallers -touch=able -tough -tourist -tow=el=ing -towns -toys -tracked -trac=tor -trader -traf=ficked -trailed -trainer -tramp -trances -transceivers -tran=scribers -trans=feral -trans=formable -trans=gressed -tran=sis=tor=ized -tran=si=tively -trans=la=tion -trans=mit=tal -trans=par=ent -transpon=der -trans=pose -trape=zoidal -trauma -traver=sal -trays -trea=sure -treat=ing -tree=top -tremor -tres=passed -tri=an=gles -tri=bunals -tricked -tricky -trig=gered -trilled -trim=ming -tripled -tri=umphal -triv=ially -troop=ers -trot=sky -trou=bleshoots -trow=els -truck=ing -truest -trumped -trunk -trust=ingly -try -tubs -tuft -tum=bled -tun=able -tunisia -tur=bu=lent -turk=ize -turn=ing -tur=tle -tu=tankhamen -tut=tle -twenty -twiner -twirling -twitch=ing -tyler -type=writ=ers -typ=ing -tyranny -ugh -ul=cers -um=brage -un=ac=cept=ably -un=aided -unan=i=mous -unattain=abil=ity -un=aware -un=blocked -un=can=celled -un=chang=ing -un=closed -un=con=di=tional -un=con=trol=lable -un=count=able -un=de=cid=able -un=der=brush -un=der=flows -un=der=lies -un=der=mine -un=der=plays -un=der=stand=ings -un=der=tak=ings -un=der=writes -undi=rected -un=done -un=easy -un=equaled -un=event=ful -un=fair=ness -un=fit -un=for=mat=ted -un=grate=fully -un=harmed -uni=di=rec=tion=al=ity -uni=for=mity -unin=dented -un=in=ter=rupted -unions -uni=tar=ian -unity -uni=ver=si=ties -un=kind=ness -un=leashed -un=link=ing -un=lucky -un=mer=ci=ful -un=nec=es=sar=ily -un=ob=tain=able -un=paid -un=prece=dented -un=prov=able -un=rav=el=ing -un=rec=og=nized -un=re=strained -un=safely -un=s=e=lected -un=skilled -un=steady -un=syn=chro=nized -un=tie -un=to=ward -un=used -un=whole=some -un=winds -un=wrap -up=dater -up=holder -up=land -up=rightly -up=sets -up=turns -urge -uri=nates -ur=su=line -us=ages -usenix -usu=ally -uti=liza=tion -utopi=anizes -ut=ters -va=ca=tion -vac=u=umed -va=grantly -va=lence -valiant -valid=ness -valu=ably -valves -van=den=berg -van=ished -van=quish=ing -vari=ably -varies -vary=ing -vastly -vau=dois -vax -veer=ing -veg=e=tated -ve=hic=u=lar -ve=lasquez -vene=tian -ven=omous -ven=tri=cles -venus -ver=bal=ized -ver=dure -ver=i=fier -vern -ver=sa=tile -ver=te=brates -vested -vet=eri=nary -via -vi=bra=tions -vi=cious=ness -vic=tim=iz=ers -vic=to=ries -vi=dal -vier -view=ing -vi=gnettes -vil=i=fi=ca=tion -vil=lages -vinci -vine=yard -vi=o=la=tor -vi=o=lins -virgo -virus -vis=i=ble -vis=ited -vi=su=al=ize -vi=tally -vladimir -vo=ca=tions -voided -vo=li=tion -volt=ages -vol=un=teer=ing -voted -vouch=ing -voy=aged -vul=garly -waco -waf=fles -wag=ne=r=ian -wail=ing -waiter -waives -wak=ing -wal=green -wal=len=stein -walls -waltham -wan=dered -wan=ing -wants -ward -ware=hous=ing -warmer -warn=ing -war=ranted -war=saw -wash=burn -wasps -watch -watch=man -wa=ter=ing -wa=tery -wausau -wave=length -wax=ers -we -weak=nesses -wear -weari=somely -weath=er=ford -webs -wed=lock -weekly -wei=d=man -weights -weiss=muller -welder -welles=ley -wenches -wes=leyan -west=hamp=ton -wet -whacked -wharves -wheel -whelp -wher=ever -whims -whip=pany -whirling -whiskers -whis=tled -white=horse -whitens -whit=lock -whit=tling -whole=ness -whoop -wi=chita -widen -wid=owed -wield=ing -wilbur -wile -wilkin=son -william -willis -wilshire -wince -wind=ing -wine=head -win=ing -win=nie -win=sett -wiped -wire=tap=pers -wised -wish=ful -witches -with=drew -with=holds -wit=ness=ing -woe=fully -wom=an=hood -won=der=ful=ness -woo -wooden -wood=stock -woofer -woonsocket -words -work=books -work=man -world=li=ness -wor=rier -wor=shiper -worth=less -wound=ing -wrap=per -wreathes -wrenched -wretch -wring -writ -writ=ing -wrote -wyner -xe=roxed -yamaha -yard -yawner -years -yel=lowed -yelped -yes=ter=days -yok=na=p=ataw=pha -york=shire -young=sters -youth=ful=ness -yukon -zeal -zen -zeus -zion=ism -zoned -zoroaster diff --git a/third_party/rust/mapped_hyph/tests/base.word b/third_party/rust/mapped_hyph/tests/base.word deleted file mode 100644 index 6d1e60849cfa..000000000000 --- a/third_party/rust/mapped_hyph/tests/base.word +++ /dev/null @@ -1,4543 +0,0 @@ -aarhus -abase -abate -abbeys -abby -abducts -aberrations -abhorrer -abilities -abjuring -ablest -abodes -abolitionist -abortion -about -abram -abridged -abruptly -absconds -absently -absolved -absorption -abstinence -abstractor -abundance -abuts -abyssinian -acapulco -accelerator -accentuated -accepted -accessibility -accidental -acclimated -accommodating -accompanying -accomplishments -accords -accountant -accretion -acculturation -accuracy -accused -aces -achieve -acid -acknowledgeable -acme -acoustics -acquiescent -acquisitive -acres -acropolis -actinometers -activators -actors -actuarial -acute -adagios -adaptation -adapts -addicting -additions -addresser -adducing -aden -adherents -adiabatically -adjoining -adjudging -adjured -adjustment -administer -administratively -admire -admissions -admixed -admonitions -adopted -adore -adrian -adsorbs -adulterer -adumbrating -advantageous -adventurers -adversely -advertises -advisees -advocacy -aerate -aerobacter -aerosols -affairs -affections -affiliating -affirmed -affliction -affords -afghans -aforethought -africanizes -afterimage -afterward -age -ager -agglutinated -aggravation -aggressive -agile -agitator -agonies -agreeably -agriculturally -aide -ailing -aims -airdrops -airfoil -airline -airplanes -airtight -akin -alamo -alas -albatross -albums -alcoholism -aldrich -alerting -alexandria -alga -algerian -algorithms -alicia -aligned -alistair -allan -alleges -allegory -allergic -alleyway -alliteration -allocator -allots -allowing -allurement -almaden -alnico -aloofness -alphabetized -alsatian -alterations -alternates -althaea -altruistically -alveolar -amalgamate -amass -amaze -amazons -ambiguous -ambled -ambushed -amend -america -americans -amide -ammonia -among -amortized -amour -amphibians -amplifiers -amputated -amusement -anabaptist -anagram -analogue -analyticities -anaphoric -anastomoses -anatomy -anchorite -andalusia -andover -anecdote -anesthetic -angeleno -angered -anglia -angola -angular -animated -animism -anita -annapolis -annihilated -annotation -annoy -annually -annuls -anodes -anomalously -anselm -ant -antagonizes -antedate -anthologies -anthropomorphically -anticipates -antidotes -antimony -antiquate -antisemitism -antithetical -antonio -anxious -anyway -apathy -aphasia -apiary -apocryphal -apologist -apostolic -appall -apparently -appear -appeasement -appended -appertains -applauds -applicable -applier -appointer -apportioning -appraisers -appreciation -apprehensively -approach -appropriate -approval -approximated -april -aptly -aquifer -arabians -aramco -arbitrating -arcades -archaism -archeologist -archimedes -architectures -arcing -ardently -arequipa -argos -argument -aridity -aristocrat -arithmetize -arm -armchairs -arming -armpits -arousal -arrack -arrangement -arrears -arrhenius -arrogate -arroyo -arterial -arthritis -articulately -artifact -artillerist -arts -ascendant -ascent -ascot -ashamedly -ashtray -asiatics -asking -aspersions -aspiration -ass -assassinated -assay -assemblies -asserter -assess -assiduity -assigning -assist -associate -associator -assuaged -assure -assyrianize -asteroid -astonishingly -astride -astronomically -asymmetric -asynchronously -atheism -athletes -atlas -atomization -atonement -atrophies -attaching -attain -attempt -attendants -attentionality -attenuator -attired -attracted -attributable -attune -auburn -audibly -audiometer -auditions -auger -august -aural -auscultated -austerely -austrianize -authenticator -authorities -authors -autocorrelate -autodecrements -autoindex -automaton -autopilot -autumnal -availer -avaricious -avenues -avers -avian -avionic -avoidable -avow -awakened -awards -awfulness -awry -axiological -axioms -ayes -azure -babelizes -babying -bacchus -backbend -backfill -backorder -backscatters -backstitch -backtracks -backyard -badger -baffle -baggage -bagrodia -bailiff -baits -bakes -balancers -baldwin -balkanization -balks -baller -balloon -ballplayer -balsam -bamboo -bandage -bandpass -bane -bangui -bank -bankrupts -bans -baptism -baptized -barbarism -barbells -bards -barest -barhop -barks -barnhard -barometers -barr -barren -barron -barter -basalt -baseless -bash -basics -basketball -bassinets -batavia -bather -bathtub -batted -batting -battlements -baudelaire -bawling -bayonet -be -beaded -beaker -beanbag -bearded -beast -beatification -beau -beautified -beavers -becker -becomingly -bedder -bedpost -bedspread -beecham -beefy -beethoven -befell -befoul -befuddles -beggary -begotten -beguiling -behaviorism -behold -being -belay -belfry -believable -belittles -belles -belligerents -bells -belong -belting -bemoans -bendable -benedictions -beneficiary -bengal -bent -bequeath -berating -beresford -beribboned -berliners -bernardine -bernoulli -bertie -besets -besmirched -bespoke -besting -bet -betrayed -bette -between -bewail -bewilderment -bianco -bibles -bicarbonate -biconvex -bidder -biennial -bigger -biharmonic -bilabial -bilk -billet -billings -bimetallism -bind -binghamton -biochemistry -biologically -biopsy -bipeds -birdbaths -birminghamize -births -bisectors -bisques -bites -bitterly -bivalves -blabbermouths -blackburn -blackfoots -blackmailed -blacks -blaine -blamers -bland -blanketers -blares -blasphemousness -blatz -bleachers -bleating -blemishes -blessings -blindfold -blinked -blissfully -blizzard -bloch -blockers -blond -bloodiest -bloom -blossoms -blowfish -bludgeons -blueprint -bluish -blunted -blurring -blushing -boarded -boaster -boathouse -boatswain -bobbsey -bodenheim -bodybuilding -bogart -bogus -boilers -boldface -bolshevist -bolton -bombastic -bonanzas -bondsman -bonham -bontempo -bookcases -bookkeepers -bookstore -booms -booster -bootle -bootstrapping -borden -borealis -born -borrowers -bosses -botanist -bother -bottler -botulism -bounce -bounden -bouquet -boutique -bowdlerizing -bowl -bowstring -boxing -boyfriend -braced -bradbury -bragger -braille -brainstorm -brakes -branchings -brandishing -brashly -braun -braving -braying -brazil -bread -breadwinners -breakfast -breakthrough -breastworks -breathlessly -breeding -brennan -brevet -brewery -bribers -bricklayers -bridge -bridgework -briefed -brig -brighten -brighton -brimming -brings -bristle -britisher -broaches -broadcasts -broadly -broglie -brokenness -bronchial -brooch -brookfield -broth -browbeat -brownian -bruce -brunette -brushing -brutalized -bryce -buchwald -buckler -bucky -buddies -budgeters -buff -buffetings -bugger -bugs -built -bulging -bulldoze -bullfrog -bullying -bumbling -bumptious -bundle -bungler -bunkhouse -bunted -buoys -bureaucracy -burgher -burglarproofing -burke -burn -burnings -burntness -burrowing -bursty -busch -bushwhacked -businesslike -bustards -butchered -buttercup -butternut -buttonholes -butyrate -buzzard -bye -bypassing -bystander -byzantinizes -cabinet -cache -cactus -caesarize -cager -cajole -calais -calculate -calculus -calgary -calico -callaghan -calloused -calmingly -caltech -calypso -camembert -camino -campaigning -camps -canadianize -canceled -candidacy -candler -canine -cannery -cannon -canonical -canopy -canto -canvassed -capable -capacitors -capita -capitalizers -capping -capstone -captivates -capturer -caravans -carbondale -carbonizing -cardboard -cardiology -carefully -caressing -caricature -carlsbad -carnation -caroline -carpenters -carriages -carruthers -carter -carton -carve -cascades -cashed -casings -cassette -castes -casts -catalina -catapult -catches -categorizes -cathedral -catholicisms -cattle -cauldrons -causer -cautioner -cavalierness -cavernous -cawing -cecil -celanese -celerity -cellist -celticizes -censoring -centaur -centimeter -centralized -centroid -cerebral -certainties -certifies -cezanne -chaffey -chairing -chalices -challenging -champaign -chancellor -changeability -channeled -chanter -chapel -chapter -characterize -chargeable -charitable -charlotte -chars -chartings -chasing -chastisers -chattel -chauffeured -cheaply -checkbook -checkout -cheekbone -cheeriness -cheeses -chemise -cherishes -cheryl -chests -cheyennes -chicanos -chides -childhood -chill -chime -chinas -chinning -chiropractor -chit -chloroplasts -choir -choose -chopping -choreograph -chou -christenson -christianizing -christoph -chronicle -chronology -chuckles -churchgoing -churn -ciceronianize -cinderella -ciphertexts -circuitously -circulating -circumnavigates -circumstanced -circuses -cities -civet -civilized -claimed -clambers -clams -clapboard -clarifications -clash -classes -classifiers -clattered -claustrophobia -cleaned -cleansed -clearer -cleaved -clemente -clerked -cliches -cliffs -climb -clincher -clink -clippers -cloaks -clockings -clogs -closeness -closing -clothing -clouding -clowns -clucks -clumsy -clutching -coaches -coalition -coastal -coating -coaxing -cobweb -cockpit -cocoon -codes -codifies -coefficient -coexist -coffer -cogitated -cogs -cohering -coils -coinciding -colder -colicky -collaborator -collared -collecting -colleges -collins -colombia -colonies -colons -colorless -columnize -combated -combinator -combings -comedic -cometary -comforting -comma -commandment -commemorative -commended -commenting -commissioners -committeemen -commonalities -commonwealth -communicated -communists -commuting -compactors -comparably -comparison -compassion -compelling -compensatory -competitions -compilers -complaint -completed -complexities -complications -complimenting -composedly -compost -comprehensibility -compression -compromisers -compulsory -computed -comradely -concatenation -concede -conceived -concentrators -conceptualized -concerted -conciseness -concoct -concretes -concurring -condemns -conditional -condoned -conduction -confectionery -conferred -confessions -confidential -configure -confining -confiscates -confocal -confounding -confucian -confusion -congo -congregating -congresswomen -conjoined -conjuncture -connected -connector -connors -conquered -conrail -consecrate -consenting -conservation -conserved -considered -consistent -consolers -consonants -conspirator -constant -constituent -constitutions -constructed -constructs -consultant -consumed -consumptions -contain -contaminated -contemplative -contender -contentment -context -continents -continuations -contortions -contracting -contradicting -contraptions -contribute -contrite -controllability -controversy -convened -conventionally -conversantly -conversion -convex -convict -convinces -convoys -cooked -coolers -coon -cooperations -coordinates -copeland -copings -coprocessor -coquette -cords -corinthian -corks -cornered -corns -coronary -corporately -correct -correctness -correspond -corridors -corroborative -corruption -corvallis -cosmopolitan -costs -cots -cotyledon -coughs -councilwoman -counselors -counteracting -counterfeited -counterpart -countersunk -countrywide -couplings -courser -courtesies -courtrooms -covenant -coverlet -covetousness -cower -cowl -coypu -crackers -cradles -craftsperson -cramps -crank -cranny -crater -crawford -craze -creaked -creams -creation -credence -creditor -creeks -cremates -crescents -cretin -cricket -criminal -cripple -crisscross -criticizes -croaks -crocus -cropper -crosser -crosstalk -crowd -crowning -crucifixion -cruel -cruising -crumpled -crusade -crushes -crux -cryptic -crystallize -cubans -cucumbers -cufflink -culminate -cultivable -cultural -cummings -cupboard -curb -curing -curlers -current -currying -cursory -curtly -curving -custer -customizable -cut -cuts -cyanamid -cyclically -cygnus -cypress -cytoplasm -dabbles -dadaistic -dahl -dairy -dali -damages -damns -damsel -dancing -dangle -danize -dare -darken -darn -darted -darwinizes -database -dates -daunted -davy -daydreams -dazzled -deaden -deaf -dealings -deanna -death -debater -debilitates -debtor -debutante -decay -deceit -decelerate -decent -decidability -decimate -decision -decks -declarer -decliners -decodings -decomposition -decorative -decreases -decrements -dedicated -deduct -deeding -deep -deere -defeats -defendant -defenestrating -deferments -deficiencies -define -definitions -deformation -defy -degradation -deify -dejected -delaying -deleter -deliberated -delicacies -delightful -delimiting -delirious -deliveries -delphic -deluged -demand -demeter -demodulate -demons -demonstrator -demultiplex -denebola -denigrates -denominators -denoting -densest -dentists -denying -departure -dependent -depleted -deploy -depose -depositors -depreciated -deprivations -dequeued -deregulate -derive -descend -descents -descriptively -deserters -deservings -designator -desire -desolate -despatched -despite -destabilize -destroyed -destructiveness -detacher -detained -detective -deteriorated -determination -deterministic -detractor -devastate -development -deviation -devised -devotedly -devours -dexedrine -diagnose -diagonals -dial -dialogue -diamond -diarrhea -dickinson -dictatorial -diddle -dies -dietrich -differentials -differers -diffusely -digest -diggings -digits -digressing -dilapidate -diligence -dilution -dimensions -dimmed -dine -dining -diogenes -diphthong -dipper -direction -directorate -dirt -disable -disaffection -disallowing -disappearances -disapprove -disassembles -disbands -discarding -discerns -disciplines -disclosure -disconnects -discord -discouraging -discovery -discretion -discuss -disease -disfigure -disgruntle -disgustingly -dishonestly -dishwater -disjoint -disk -dislocates -dismaying -dismissers -disobedient -disown -dispatched -dispensary -dispersed -displacing -displeasing -disposition -disputer -disquieting -disruption -dissemble -dissenter -dissimilarities -dissociating -distaff -distastes -distilling -distinguish -distorts -distresses -distributivity -disturbed -ditty -divergence -diversifies -diverting -dividend -divining -divisors -dixieland -doberman -doctoral -documentaries -dodecahedra -doe -doghouse -dolan -dollies -domenico -domicile -domineering -donahue -donkey -dooley -doorman -dopers -doric -dortmund -doted -doubleheader -doubt -doubts -doves -downey -downloading -downstairs -doyle -draconian -drafty -dragooned -dram -drape -draughts -drawings -dreaded -dreamers -dregs -dressing -dries -driller -drip -driveway -droop -droppers -droves -drudgery -drummers -drunkly -duality -dubuque -ducts -dug -dullness -dumbly -dunbar -dungeons -duplicable -dupont -duration -durward -duster -dutchman -dwarfed -dwelt -dyeing -dynamism -dysentery -ear -earmarked -earnestness -earth -earthquakes -eases -easterner -easy -eaves -eben -echoed -ecology -economize -ecuador -edenizes -edict -edition -edmonton -educating -edwards -effecting -efficacy -effortlessness -eggshell -egyptianize -eigenstate -eighthes -eisner -ejecting -elaborately -elapses -elderly -elections -electrically -electrocute -electroencephalography -electronics -elemental -elevation -elicited -eliminating -elite -ella -ellipsoids -elmhurst -else -elucidation -ely -emancipate -embarrass -embeds -embodied -embracing -emerald -emeritus -emil -emits -emotionally -emphasizing -employable -emporium -emptily -emulator -enacted -encamping -enchanter -encircled -encoder -encounter -encouragingly -encumbered -endangers -endemic -endorse -endows -enduringly -enfeeble -enfranchise -engels -engines -englishmen -engulf -enjoin -enjoys -enlightened -enlivens -enormity -enquirer -enriches -ensembles -ensnaring -ensures -enterprise -entertainment -enticed -entitle -entreat -entrepreneurs -enumerated -enveloped -environ -envisioned -ephemeral -epicurizes -episcopalian -epitaphs -epochs -equalize -equates -equilibrate -equips -equivocally -erased -ere -ergo -erlang -erode -erratum -errs -escalates -escapes -escorts -especially -esquires -essentially -estates -estimated -eternal -ethernets -etruria -eulerian -eurasia -europeanized -evade -evaluative -evaporation -evenhandedness -events -everglades -everything -evidences -evinces -evolve -exacerbated -exactions -exaggerations -examined -exasperates -exceeded -excellently -exceptions -exchangeable -excision -excitingly -exclamatory -exclusiveness -excreting -excused -executional -exemplified -exempts -exertion -exhaustedly -exhibitions -exile -existentialist -exorbitant -expanders -expect -expects -expeditious -expenditure -experiencing -experiments -expires -explanations -exploit -explorations -explosive -exponentiating -exports -exposure -expressibility -expulsion -extemporaneous -extensive -exterminate -extinguished -extract -extraneous -extrapolation -extremely -exult -eyeglasses -eyesight -fables -facade -facile -facsimile -factories -faculty -fagin -failsoft -faintness -fairing -faithful -fakes -fallacious -falmouth -falsifying -familiar -families -fanaticism -fanciness -fanning -farad -farewells -farmers -farrell -fascination -fasted -fastidious -fate -fathomed -fatten -faulkner -fauna -favoring -fayette -fearlessly -feat -featherweight -fed -feebleness -feeds -feet -feline -fellowships -feminism -fencing -fermentation -ferociously -fertile -fervent -festivity -fettered -feverish -fiat -fibrously -fiddled -fief -fiendish -fifteenth -fighting -fiji -files -filled -filming -filthy -finals -finder -fines -fingerprint -finishes -finnish -fireboat -firemen -firewall -firming -fiscally -fishes -fissured -fitly -fitzpatrick -fixation -fixture -flagged -flak -flamer -flanking -flash -flask -flattered -flaunting -flawlessly -fledglings -fleetly -flemishing -flew -flicking -flinches -flirt -floated -flood -floors -florentine -flossing -flourished -floweriness -fluctuate -fluffier -fluoresce -fluting -flying -focal -foes -fogy -folders -folksy -folsom -font -fooled -football -footing -forage -forbes -forcer -forearms -forefathers -foreign -foreseeable -forestalls -forever -forge -forgettable -forgiving -forlornly -formalized -formatively -formicas -formulated -forsaken -forthwith -fortiori -fortuitously -forwarder -fought -foundation -founds -foursome -foxes -fragile -fragrantly -framing -francie -francoise -franking -fraser -fray -freckle -frederico -freeing -frees -freezing -frenchizes -frequented -freshened -freshness -freudianism -friction -friendlier -friezes -frightful -frisia -frivolity -from -fronts -frothing -fruehauf -fruits -fuchsia -fujitsu -full -fumbling -functionally -fundamentally -fungal -funnier -furlong -furniture -furthermore -fuses -futuristic -gabled -gadgetry -gaging -gaines -galactic -galaxy -gallantly -gallon -gallstone -gambled -games -gangplank -gaped -garbed -gardner -garlanded -garrisoned -gaseous -gaspee -gastric -gathered -gauche -gaunt -gawky -gaze -gearing -gelatin -gemma -generality -generals -generic -genetic -genre -gentler -geodesic -geological -geophysical -geraldine -germane -germinates -gestapo -getting -ghosted -gibraltar -gig -giggle -gilds -gilt -ginghams -gipsy -girlish -giver -gladdest -glance -glaring -glazed -gleaner -glenda -glimmer -glints -gloat -gloria -glorying -glove -glowing -glynn -gnu -goats -goblins -godmother -goethe -goldenly -goldstine -gondola -goode -goodyear -goren -gorton -got -gothicizing -gouging -government -grab -gracefully -gradations -gradual -graft -grained -grams -grandfather -grandpa -grant -granulates -graphical -graspable -grassiest -gratification -gratuitously -graves -grayed -grease -grecianize -greeks -greenfeld -greens -greeter -grenades -greyest -grievances -griffith -grimes -grinds -gripped -gritty -grocers -grooved -grossest -groton -group -grovels -growling -grubs -grumbling -guano -guardedly -gubernatorial -guest -guideline -guiltier -guises -gullah -gumming -gunner -gurgle -gustafson -guts -guyer -gymnastics -haas -habitual -hacks -hag -hail -hairier -hale -hallmark -halpern -halve -hamburgers -hammering -hampshire -handbooks -handicap -handkerchiefs -handshake -handy -hangman -hannah -hansel -haplessly -happily -harbinger -harder -hardships -harken -harmfulness -harmoniously -harnessing -harriman -harry -harvardize -harveys -hassle -hat -hatefully -hattie -hauler -hausa -havoc -hawthorne -haywood -head -headlands -headroom -heals -healy -hearings -heartily -heater -heaved -heaviness -hebrides -hedgehog -heeds -hegelianizes -heights -heiresses -helicopter -hellenized -helmet -helpfully -hem -hemp -hendrick -henrietta -heralds -herder -hereford -hereunder -hermit -heroically -herring -hertzog -hesperus -heterogenous -heuser -hexagon -hibernate -hidden -hierarchic -highfield -highnesses -hikes -hillcrest -hilt -hindered -hindustan -hinting -hired -his -histograms -hitch -hither -hitting -hoarseness -hobby -hoe -hoists -holds -hollandaise -hollowness -holocaust -homage -homeomorphism -homespun -homing -homosexual -honesty -honeymooning -honoraries -hoodlum -hooker -hoosierize -hooves -hopelessness -horace -horn -horrible -horrors -horseshoer -hospitalize -hostesses -hotly -hounding -houseflies -housetop -hover -howled -hubert -huey -hugo -humanities -humbling -humidifiers -humiliation -humorers -humpty -hung -hungry -huntley -hurling -hurrying -husbands -husks -hutchins -hyde -hygiene -hyphenate -hypotheses -hysterical -ibsen -icicle -icosahedron -idealize -identical -identify -idiosyncrasy -idles -ignite -ignores -illegality -illogical -illusions -illustrative -imagen -imagine -imbrium -immaterial -immensely -immigrating -immovability -impacted -impale -impatiently -impedes -impenetrable -imperfectly -impermanent -impersonations -impinges -implementable -implicants -implied -important -imposes -impotence -impractically -impressible -impressment -imprisonments -improvement -improvisers -impulsion -inaccessible -inadequate -inane -inaudible -inca -incas -incessantly -incidentally -inciting -incloses -inclusiveness -incomparable -incompletely -incongruity -inconsistent -inconvenient -incorrectness -incredulous -incubate -incurable -indecisive -indent -indescribable -indexing -indication -indifference -indignation -indirectly -indistinct -individually -indoctrinating -indubitable -inductances -inducts -industrialist -industry -inelegant -inertly -inexact -inexplicable -infantry -infection -inferior -infertile -infinite -infirmary -inflated -inflicting -inform -informatively -infrequently -infuriating -ingeniousness -ingratiate -inhabited -inherently -inheritress -inhibitor -inimical -initialized -initiating -injection -injured -inker -inlet -inner -innocuousness -inoculate -inquire -inquisitive -inscribed -insecurely -insertion -insidiousness -insinuated -insistently -insomnia -inspiration -installation -instances -instantiations -instill -institutes -instruct -instructs -instruments -insulation -insurance -insurrection -integrand -intellect -intelligible -intensification -intensively -inter -intercept -interchanged -intercommunicates -intercourse -interested -interfered -intergroup -interleaved -interminable -intermodule -internationality -interpersonal -interposed -interpreting -interrelations -interrupt -intersecting -interstate -intervening -interwoven -intimation -intolerance -intractability -intraoffice -intrigued -introductions -intruder -intubation -invaders -invalidities -invariants -inventively -inverses -inverting -investigative -inveterate -invites -invoked -involves -ionians -ira -irately -irishman -ironic -irrational -irregular -irrepressible -irreversibility -irritable -irving -isfahan -island -isolated -isomorphisms -issuance -it -italicize -itemizations -iteration -ito -izvestia -jackets -jacky -jacobus -jailer -jamaican -janet -janus -jargon -jauntiness -jay -jeanne -jeffersonian -jennifer -jeremy -jeroboam -jest -jesuitizing -jeweled -jews -jingled -joaquin -joes -john -joiner -jokers -jolts -jordan -josephus -jotting -journals -jousting -joyous -judaica -judge -judith -jugoslavia -julie -jump -junctures -juniper -juras -jury -justifiers -jutland -kaddish -kamikazes -kant -karp -katowice -keeling -keepers -kemp -kenney -kepler -kerouac -key -keypad -khrushchevs -kidde -kidney -kilimanjaro -kills -kilojoule -kimono -kindling -kingpin -kinnickinnic -kirchoff -kisses -kiting -klein -knapsacks -kneel -knickerbockers -knights -knocked -knots -knowledge -knuckles -kodachrome -korea -krakatoa -kronecker -kurd -labeling -laborer -labyrinths -lacerta -lacks -ladies -lagoon -laidlaw -lamarck -lament -lamp -lanced -landings -lands -lange -languish -laos -lapse -largely -larson -lashing -laszlo -later -latinity -latitudes -laudable -laughlin -laundered -laurels -lavender -lawfully -lawsuit -layers -lazarus -leaded -leafed -leaguers -leander -leaping -leary -leathern -leaving -lectures -leeds -leftists -legalization -leger -legislated -legitimate -leila -lemon -lends -leniency -lens -leonardo -lesbian -lesson -letter -levee -levelly -levin -lewdly -lexington -libelous -liberated -libido -license -lick -lied -lifeboat -lifetime -ligget -lighthearted -like -likeness -lilian -liman -limit -limits -lind -lindy -linearly -lingerie -lining -linnaeus -lioness -liquid -lise -listened -listings -literalness -lithuania -littering -live -livers -lizzie -loaf -loathing -lobster -locally -locator -lockian -lockwood -lodges -logarithm -logically -logs -loiters -londonization -loners -longings -lookers -looms -looseleaf -loosing -lords -lorry -lossiest -lotus -louisa -lourdes -lovelace -loves -lowest -loyally -lucerne -luckier -ludicrous -luke -luminously -lunch -lunged -luring -lust -luther -luxuriantly -lyle -lynx -mac -macdonald -maces -machinery -mackey -macromolecule -madden -madhya -madsen -magellanic -magill -magnetizable -magnify -maguire -maids -mailman -mainframes -maintained -majesty -maker -malady -malcolm -malformed -maliciousness -malone -malton -manage -managing -mandatory -manger -manhole -manicuring -manila -manipulative -mann -manors -mantissa -manufactured -mao -maps -marched -mardis -margo -marinade -maritime -marketability -markings -marmalade -marriott -marshaling -martial -martyr -marvels -mascara -maskable -masonite -massacred -mast -masterpiece -masturbation -matchless -materializing -mathematically -matings -matrix -mattered -matured -mauricio -maxima -maxims -maybe -mayoral -mccabe -mccluskey -mcdonnell -mcgovern -mckee -mclean -mcpherson -mealtime -meaningful -meant -measurements -mechanically -medal -medfield -mediations -medicine -meditating -mediums -meeting -megahertz -meister -melcher -melodies -melpomene -membership -memoranda -memorizes -menagerie -mendelizes -mennonite -mentalities -mentor -mercenariness -mercilessly -merged -meritorious -merrill -mesh -messenger -messy -metallization -metaphysical -meteoritic -methodically -methods -metro -mews -mica -mick -microbicide -microeconomics -micron -microprocessing -microscope -microvaxes -middleman -midnight -midstream -midwinter -migrate -mikoyan -mileage -milk -mill -millikan -millionth -millstones -miltonized -minaret -mindfully -mineral -mini -minima -minimizes -ministries -minor -minstrels -minute -miracle -miriam -miscarriage -misconception -miserably -misgivings -misled -misplacing -missing -missoula -mistake -mistletoe -misunderstand -mitch -mitres -mixtures -moats -mocked -modally -moderated -modernizer -modicum -modifying -modularizing -module -moghul -moines -moldavia -moles -mollusk -momentarily -monaco -monday -mongolian -monkeyed -monocotyledon -monolithic -monostable -monroe -montague -montgomery -monument -mooned -moor -moped -morass -morehouse -morn -morphological -morsels -mortgage -mosaic -mosque -motels -motherland -motionlessness -motley -motorized -mound -mountainously -mourners -mousy -movable -moving -muck -muddled -muffin -mugs -mullah -multicomputer -multiple -multiplicand -multiplies -multistage -mumbles -mundane -munitions -murdering -murmurs -muscovy -mushroomed -musicians -muskrat -mussorgsky -mutability -mutations -mutilating -mutters -mycenaean -mysterious -mythologies -nagasaki -nair -nakedness -names -nanook -napkin -narcotic -narrowest -nash -natal -nationalities -nations -naturalist -naughtiness -navel -navona -neanderthal -nears -nebula -necessitation -neckties -needled -needy -negatives -negligible -negroid -neighboring -neoclassic -nero -nesting -nets -neural -neutral -neva -newburyport -newman -newsman -next -nibelung -nicholls -nickname -nielson -nightfall -nihilism -nimbler -nineties -nipponizes -nobility -nocturnally -noel -nolan -nominee -nonconservative -nondeterminism -nongovernmental -nonlinearity -nonorthogonal -nonsegmented -nonterminals -nook -nordhoff -normalization -normanizations -north -northernly -norwalk -nostradamus -notarizes -note -noticeable -notifies -nottingham -novak -novices -nuances -nuclide -nullary -number -numerable -numismatic -nursing -nutritious -nyquist -oases -obedient -obfuscate -objectively -obliged -obliterating -obscene -observable -observers -obsoletes -obstruction -obviated -occasional -occidentalize -occlusions -occupied -occurs -octagonal -octets -oddly -odious -o'dwyer -offended -offer -officer -officiously -oft -oilcloth -ojibwa -oldenburg -oleomargarine -olivia -olympus -ominousness -omnipresent -o'neill -onlooker -onus -opaquely -openings -operate -operator -oppenheimer -oppose -oppressed -opthalmic -optimist -optimizing -opts -oranges -orbital -orchestral -order -ordinarily -ores -organization -organs -orientalized -orifices -origination -orleans -ornate -orr -orville -oscillates -o'shea -osteopath -othello -otto -ounces -outburst -outdoor -outgrowing -outlawing -outlive -outperforms -outrages -outstanding -outvoting -outwitting -overboard -overcrowds -overestimates -overhangs -overjoyed -overload -overnighter -overproduction -overrunning -overshadowing -oversized -overtake -overtly -overuse -overworking -owen -ownership -oxidized -ozzie -pacification -packaged -packers -padding -pageant -paginating -painful -painting -pajamas -pale -palestine -palliative -palomar -panacea -pandemic -panels -panned -pantheist -panty -paperers -par -parades -paragon -parallel -parallels -parameterize -paramus -paraphrases -parcel -pardoned -parentheses -pares -parisian -parkers -parlay -parody -parrots -parsifal -partakes -participant -particular -partitioned -partridges -passageway -passion -passport -pasteur -pasture -patchy -patents -pathogenesis -patients -patricians -patrolling -patrons -patterning -paula -paulus -pavement -pawn -payer -payoffs -peacefully -peaks -pearl -peat -peculiar -pedant -pediatrician -peeling -peering -peking -pembroke -pence -pends -penetration -peninsulas -pennsylvania -pentagon -peopled -peppery -perceived -percents -perchance -perennially -perfectness -performs -perihelion -periodically -perishable -perkins -permeating -permit -pernicious -perpetration -perpetuation -persecuting -perseveres -persist -personal -personified -perspiration -persuasions -perturb -peruses -pervasive -pester -peters -petri -petting -phaedra -phaser -phenomenological -philco -philistinizes -philosophies -phoenicia -phoning -phosphorus -photogenic -photos -phyla -physicist -pi -pick -picketing -pickman -picojoule -picturing -piedfort -pies -piggybacked -pigtail -pilferage -pillar -pilots -pincushion -pining -pinnacle -pinscher -pioneers -pipelining -pirate -pistols -pitching -pithiness -pitiless -pituitary -pixels -placement -plagiarist -plainfield -plaintiveness -planeload -planets -planoconcave -plantings -plasticity -plates -platoon -playboy -playing -playwrights -pleasant -pleat -plenary -pliant -plots -plows -pluggable -plume -plundered -plunging -plutonium -pocahontas -pod -poetical -poincare -pointy -poisons -polaris -police -polish -politer -polka -polluted -polymer -pomerania -pompousness -ponds -pool -pop -popping -popularized -populous -pores -port -portending -portico -portray -posed -position -posits -possessive -possums -posteriori -postmasters -postscript -pot -potentates -potion -pottery -pounces -pourer -poverty -powerful -practicable -practitioners -praise -prancer -prayer -preallocated -precariously -precedents -preciously -precipitation -precludes -preconception -predating -predetermination -predication -predictive -predominately -preemptive -prefacing -prefers -preinitializes -preliminary -premise -preoccupied -prepared -preposterously -prerogatives -prescriptions -presentations -preserved -presidential -pressings -preston -presumptuousness -pretending -pretexts -prevailing -preventing -previously -pricers -prides -primarily -priming -princesses -principles -prior -prisoners -privations -prizes -probate -probings -procedure -processing -proclamation -procreate -procurer -produce -productive -profession -proffered -profitability -profound -program -progresses -prohibitions -projections -proletariat -prolong -prominent -promoter -promptest -promulgation -pronouncement -proofs -propane -properly -prophesy -proportionately -proposer -propounded -prorate -prosecutes -prosodic -prospector -prostate -protecting -protege -protestations -protons -protozoan -prouder -provenance -providence -provision -provokes -proximal -pruned -prussianize -pseudoinstruction -psychiatrist -psychologically -psychosomatic -pub -publicly -puckered -puffed -puller -pulls -pulse -pumpkin -punctually -punishable -punt -puppeteer -purchases -purges -purina -purpler -purposed -purse -pursuing -pushdown -putnam -puzzlement -pyongyang -pythagoreanizes -quadrangle -quadrennial -quagmires -quakeress -qualified -qualm -quantifiers -quantize -quarreled -quartering -quasar -quavering -queerer -queried -questionable -questions -quibble -quicklime -quieting -quince -quit -quivers -quonset -quotient -rabin -rachmaninoff -racketeers -radiance -radiators -radiography -rae -rages -raider -railroaded -rainbow -rains -rake -ralston -ramifications -rams -rand -randy -rangy -rankings -ransomer -rap -rapids -rapturous -rascally -rasping -ratfor -ration -rationalizes -rattler -ravager -ravens -rawlins -rays -reach -reacted -reactivation -reader -readjusted -realigned -realizable -realm -reaped -rear -rearrest -reasonings -reassigned -reawakened -rebellions -rebooting -rebuffed -rebutted -recalibrated -recapitulates -receded -receives -receptive -recife -reciprocating -recitations -reckoned -reclaiming -reclining -recognize -recollect -recommend -recompiles -reconciliation -reconnect -reconstituted -recorder -recover -recreating -recta -recur -recursing -red -redeclared -redefined -redevelopment -redisplayed -redness -redressing -reducibly -reeds -reelects -reenforcement -reestablishing -reexamining -references -referral -refine -reflecting -reflexes -reformatory -reformulated -refrained -refreshment -refugee -refuted -regally -regenerating -regimentation -regis -regressed -regrettable -regularly -regulators -rehearsing -reimbursable -reined -reinhold -reinstated -reintroduces -reiteration -rejoiced -relabeled -relating -relatives -relaxes -relegate -relents -relic -relieving -relinquishing -reloader -reluctance -remains -remedied -remind -reminiscently -remodels -remotely -removing -renames -rendezvous -renewable -renouncing -rented -reopen -reorganize -repairman -repaying -repeatedly -repentance -repetitious -replaceable -replays -replicate -report -reposing -representably -representing -reprieved -reproach -reproducibilities -reprograms -republics -repulses -reputed -required -requisitions -rescind -researchers -resemblances -resentment -reservoir -resident -resignation -resistance -resistors -resolver -resorting -respect -respective -responded -responsible -restarts -restful -restorations -restrainers -restrictive -resultant -resuming -resurrectors -retailing -retaliatory -retentiveness -retina -retiring -retracting -retransmission -retribution -retriever -retrospection -retype -reuniting -revamping -reveler -revere -reverifies -reverses -reviewer -reviser -revival -revoked -revolution -revolvers -rewinding -rewriting -rhesus -rhode -rhyming -ribbons -richard -richmond -rico -ride -ridiculed -rifle -rigging -rightfulness -rigor -rims -ringings -riordan -ripely -rippling -risk -ritually -river -rivulet -roadsters -roaring -robberies -roberta -robinsonville -rochester -rocket -rockwell -rods -roll -romance -romanizes -romper -roofing -rooming -root -roping -rosebush -rosetta -rot -rotations -rotund -roughness -rounding -roused -routes -roving -rowley -royalty -rubbing -rubles -rudeness -ruffian -ruggedness -rule -rumanians -rummy -runaway -runoff -rupturing -russell -rustic -rustlers -ruthlessness -sabbathize -sachs -sacrifice -sacrosanct -saddles -safari -safes -sagebrush -said -sails -salable -salerno -saline -sally -salters -salutations -salvages -same -sampling -sanatorium -sanctioning -sandburg -sandra -sanest -sanskrit -sapling -saran -sari -satchel -satires -satisfy -saturnalia -saud -savaged -saver -savored -sawfish -saxonize -sayings -scala -scaling -scampers -scanners -scapegoat -scared -scatter -scenic -schantz -schelling -schemers -schmitt -scholastic -schoolhouses -schroeder -schuylkill -scissor -scoffs -scope -scoreboard -scorner -scotchgard -scottsdale -scouted -scrambled -scrapes -scratching -screamers -screenings -scribbled -scripts -scrumptious -scuffle -sculptured -scythe -seagate -seam -seaquarium -searchlight -seasonable -seat -seceded -secondary -secretarial -secretive -sections -securings -sedition -see -seedy -seeming -seer -segmentations -segundo -seizures -selectman -selfishly -sells -semantics -semiconductor -semipermanently -senate -seneca -sense -sensing -sensual -sentimentally -separately -sept -sequencers -sequentially -serene -serializable -serif -serra -service -servings -sets -settler -seventeens -severance -severs -sex -sexual -shackled -shadiness -shaffer -shakers -shale -shameful -shanghaied -shapeless -shard -shares -sharpening -shattering -shawano -shearing -sheds -sheets -shelley -shelves -sheridan -shied -shiftier -shilling -shiner -shintoizes -shipper -shirk -shiver -shocker -shoehorn -shooter -shoppers -shortage -shortens -shorts -shouldered -shoved -showed -shows -shrewd -shrilled -shrinking -shrugs -shuffled -shutoff -shuttles -siberia -sicken -sideband -sides -sidings -sierra -sighed -sigma -signature -signification -sikkim -silent -silken -sills -silverman -simile -simon -simplicities -simplistic -simulation -sinbad -sinews -singed -singlet -singularly -sinner -sioux -sirens -sisyphus -sittings -siva -sixties -skate -skeptical -sketchpad -skidding -skillfulness -skims -skipped -skirmishes -skulked -sky -skyrockets -slacks -slang -slash -slaughter -slavic -slavonicizes -sledgehammer -sleepless -sleighs -sliced -slide -slightly -slings -slips -slogans -sloppiness -slotting -slower -sluggishness -slums -smacked -smalltime -smasher -smell -smiles -smithsonian -smoked -smoldered -smoothing -smug -smythe -snap -snapshots -snatched -sneakiest -sneers -sniffs -snodgrass -snorkel -snowbelt -snows -snuffs -soak -soared -sobers -socialists -sociological -socks -sofas -softly -sojourn -soldier -solenoid -solid -solids -solos -solvent -somber -somerset -son -sonny -soothe -sophistication -sordid -sorest -sorrows -soul -soundness -soured -southbound -southland -soviets -spacer -spaded -spaniardization -spanked -spare -sparked -sparsely -spat -spawned -speakers -specialists -specialty -specified -speckle -spectators -spectrography -speculates -speechless -speeds -spellings -spent -spica -spies -spilt -spinner -spirally -spirituals -spiting -spleen -splicing -splits -spoiling -sponged -sponsorship -spoolers -spores -sportswriter -spotter -sprague -spraying -sprees -springiness -sprint -sprouted -spurn -sputtered -squadrons -squarer -squatting -squeaky -squeezing -squirmed -stab -stabilizes -stacked -staffing -stagers -stags -staircases -stalemate -stalling -stammer -stampeding -standard -standings -stans -star -stargate -starring -startles -state -statewide -stationmaster -statues -statutorily -staves -steadier -stealer -steamer -steele -steeper -steered -stem -stenographer -stepmother -stereoscopic -sterilizer -stethoscope -stew -stickier -stiffens -stigma -stillest -stimulate -stinging -stipends -stirrer -stitching -stockholder -stodgy -stomacher -stood -stopgap -storage -storeyed -stormiest -stouter -strafe -straightened -strained -stranding -strangler -stratagem -stratifies -strawberry -streamer -streetcar -strengths -stretched -strict -strife -stringed -stringy -striptease -strode -strolling -strontium -struggle -stuart -stucco -studious -stuffs -stun -stupid -sturm -styli -styx -subcomponents -subdirectory -subdues -subgraph -subjective -sublime -submerges -submode -subordinate -subprogram -subschema -subscripting -subsequent -subsidies -subsistent -substantially -substation -substrate -subsystem -subtleness -subtraction -subunits -subverting -successful -succinctness -suckers -sudden -sufferance -sufficiency -suffocated -sugarings -suggests -suitably -suits -sulks -sultan -summands -summation -summon -sumter -sunder -sunken -sunshine -supercomputers -supergroups -superior -supernatural -supersede -supervise -suppers -supplementing -support -suppose -suppressing -surely -surge -surly -surpass -surprisingly -surround -surveyors -sus -suspended -suspicions -sutherland -swab -swallowing -swan -swaps -swat -sweat -swedes -sweepstakes -sweetest -swellings -swifter -swimsuit -swipe -switchboards -swivel -swords -sykes -sylvia -symbolize -symmetry -sympathy -synagogue -synchronizes -syndication -synonymously -synthesize -syrian -systematically -tabernacle -tablespoonful -tabulate -tacit -tactic -tail -taipei -tale -talker -tallchief -talmudizations -taming -tanaka -tangle -tantalizing -taos -tapestry -tar -tariffs -tasked -tastefully -tattered -taunts -taverns -taxicabs -taylor -teaches -tearful -teaspoonful -technique -tedious -teenaged -tegucigalpa -telegraph -teleologically -telephony -televise -teller -temperance -tempestuous -temporaries -temptingly -tendency -tenex -tense -tent -tenure -terminating -termwise -terre -terrify -terrorize -testable -testifiers -tex -textile -thailand -thankless -thaw -theatrically -theme -theology -theorization -therapies -thereof -thermometer -thessaly -thickly -thimbles -thinking -thirsted -thistle -thorns -those -thousand -thread -threatens -thrift -thrived -throne -throughout -thrusters -thumbed -thunderer -thus -tiburon -tickles -tidied -tier -tighteners -tilde -tillich -timbered -timeouts -timetables -timonizes -tingling -tinkled -tint -tipperary -tirelessly -titan -titter -toasts -togetherness -toilets -tolerable -toleration -tomatoes -ton -tonic -tool -toothpaste -topmost -topsy -tormenting -torrent -torturing -tossed -totallers -touchable -tough -tourist -toweling -towns -toys -tracked -tractor -trader -trafficked -trailed -trainer -tramp -trances -transceivers -transcribers -transferal -transformable -transgressed -transistorized -transitively -translation -transmittal -transparent -transponder -transpose -trapezoidal -trauma -traversal -trays -treasure -treating -treetop -tremor -trespassed -triangles -tribunals -tricked -tricky -triggered -trilled -trimming -tripled -triumphal -trivially -troopers -trotsky -troubleshoots -trowels -trucking -truest -trumped -trunk -trustingly -try -tubs -tuft -tumbled -tunable -tunisia -turbulent -turkize -turning -turtle -tutankhamen -tuttle -twenty -twiner -twirling -twitching -tyler -typewriters -typing -tyranny -ugh -ulcers -umbrage -unacceptably -unaided -unanimous -unattainability -unaware -unblocked -uncancelled -unchanging -unclosed -unconditional -uncontrollable -uncountable -undecidable -underbrush -underflows -underlies -undermine -underplays -understandings -undertakings -underwrites -undirected -undone -uneasy -unequaled -uneventful -unfairness -unfit -unformatted -ungratefully -unharmed -unidirectionality -uniformity -unindented -uninterrupted -unions -unitarian -unity -universities -unkindness -unleashed -unlinking -unlucky -unmerciful -unnecessarily -unobtainable -unpaid -unprecedented -unprovable -unraveling -unrecognized -unrestrained -unsafely -unselected -unskilled -unsteady -unsynchronized -untie -untoward -unused -unwholesome -unwinds -unwrap -updater -upholder -upland -uprightly -upsets -upturns -urge -urinates -ursuline -usages -usenix -usually -utilization -utopianizes -utters -vacation -vacuumed -vagrantly -valence -valiant -validness -valuably -valves -vandenberg -vanished -vanquishing -variably -varies -varying -vastly -vaudois -vax -veering -vegetated -vehicular -velasquez -venetian -venomous -ventricles -venus -verbalized -verdure -verifier -vern -versatile -vertebrates -vested -veterinary -via -vibrations -viciousness -victimizers -victories -vidal -vier -viewing -vignettes -vilification -villages -vinci -vineyard -violator -violins -virgo -virus -visible -visited -visualize -vitally -vladimir -vocations -voided -volition -voltages -volunteering -voted -vouching -voyaged -vulgarly -waco -waffles -wagnerian -wailing -waiter -waives -waking -walgreen -wallenstein -walls -waltham -wandered -waning -wants -ward -warehousing -warmer -warning -warranted -warsaw -washburn -wasps -watch -watchman -watering -watery -wausau -wavelength -waxers -we -weaknesses -wear -wearisomely -weatherford -webs -wedlock -weekly -weidman -weights -weissmuller -welder -wellesley -wenches -wesleyan -westhampton -wet -whacked -wharves -wheel -whelp -wherever -whims -whippany -whirling -whiskers -whistled -whitehorse -whitens -whitlock -whittling -wholeness -whoop -wichita -widen -widowed -wielding -wilbur -wile -wilkinson -william -willis -wilshire -wince -winding -winehead -wining -winnie -winsett -wiped -wiretappers -wised -wishful -witches -withdrew -withholds -witnessing -woefully -womanhood -wonderfulness -woo -wooden -woodstock -woofer -woonsocket -words -workbooks -workman -worldliness -worrier -worshiper -worthless -wounding -wrapper -wreathes -wrenched -wretch -wring -writ -writing -wrote -wyner -xeroxed -yamaha -yard -yawner -years -yellowed -yelped -yesterdays -yoknapatawpha -yorkshire -youngsters -youthfulness -yukon -zeal -zen -zeus -zionism -zoned -zoroaster diff --git a/third_party/rust/mapped_hyph/tests/compound.hyf b/third_party/rust/mapped_hyph/tests/compound.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/compound4.hyf b/third_party/rust/mapped_hyph/tests/compound4.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/compound5.hyf b/third_party/rust/mapped_hyph/tests/compound5.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/compound6.hyf b/third_party/rust/mapped_hyph/tests/compound6.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/hyphen.hyf b/third_party/rust/mapped_hyph/tests/hyphen.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/lhmin.hyf b/third_party/rust/mapped_hyph/tests/lhmin.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/num.hyf b/third_party/rust/mapped_hyph/tests/num.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/rhmin.hyf b/third_party/rust/mapped_hyph/tests/rhmin.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/settings2.hyf b/third_party/rust/mapped_hyph/tests/settings2.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/settings3.hyf b/third_party/rust/mapped_hyph/tests/settings3.hyf deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/third_party/rust/mapped_hyph/tests/test.rs b/third_party/rust/mapped_hyph/tests/test.rs deleted file mode 100644 index 95eae86f67e2..000000000000 --- a/third_party/rust/mapped_hyph/tests/test.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Any copyright to the test code below is dedicated to the Public Domain. -// http://creativecommons.org/publicdomain/zero/1.0/ - -use mapped_hyph::Hyphenator; - -#[test] -fn basic_tests() { - let dic_path = "hyph_en_US.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("haha", '-'), "haha"); - assert_eq!(hyph.hyphenate_word("hahaha", '-'), "ha-haha"); - assert_eq!(hyph.hyphenate_word("photo", '-'), "photo"); - assert_eq!(hyph.hyphenate_word("photograph", '-'), "pho-to-graph"); - assert_eq!(hyph.hyphenate_word("photographer", '-'), "pho-tog-ra-pher"); - assert_eq!(hyph.hyphenate_word("photographic", '-'), "pho-to-graphic"); - assert_eq!(hyph.hyphenate_word("photographical", '-'), "pho-to-graph-i-cal"); - assert_eq!(hyph.hyphenate_word("photographically", '-'), "pho-to-graph-i-cally"); - assert_eq!(hyph.hyphenate_word("supercalifragilisticexpialidocious", '-'), "su-per-cal-ifrag-ilis-tic-ex-pi-ali-do-cious"); -} - -// Testcases adapted from tests included with libhyphen. -// (Using only the UTF-8 dictionaries/tests, and omitting those that require -// the extended hyphenation algorithm.) - -#[test] -fn base() { - let dic_path = "tests/base.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - use std::fs::File; - use std::io::{BufRead,BufReader}; - let words: Vec = { - let file = File::open("tests/base.word").unwrap(); - BufReader::new(file).lines().map(|l| l.unwrap()).collect() - }; - let hyphs: Vec = { - let file = File::open("tests/base.hyph").unwrap(); - BufReader::new(file).lines().map(|l| l.unwrap()).collect() - }; - for i in 0 .. words.len() { - assert_eq!(hyph.hyphenate_word(&words[i], '='), hyphs[i]); - } -} - -#[test] -fn compound() { - let dic_path = "tests/compound.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("motorcycle", '-'), "mo-tor-cy-cle"); -} - -#[test] -fn compound4() { - let dic_path = "tests/compound4.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("motorcycle", '-'), "motor-cycle"); -} - -#[test] -fn compound5() { - let dic_path = "tests/compound5.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("postea", '-'), "post-e-a"); -} - -#[test] -fn compound6() { - let dic_path = "tests/compound6.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("meaque", '-'), "me-a-que"); -} - -#[test] -fn settings2() { - let dic_path = "tests/settings2.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("őőőőőőő", '='), "ő=ő=ő=ő=ő=ő=ő"); -} - -#[test] -fn settings3() { - let dic_path = "tests/settings3.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("őőőőőőő", '='), "őő=ő=ő=ő=őő"); -} - -#[test] -fn hyphen() { - let dic_path = "tests/hyphen.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("foobar'foobar-foobar’foobar", '='), "foobar'foobar-foobar’foobar"); -} - -#[test] -fn lhmin() { - let dic_path = "tests/lhmin.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("miért", '='), "mi=ért"); -} - -#[test] -fn rhmin() { - let dic_path = "tests/rhmin.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("övéit", '='), "övéit"); - assert_eq!(hyph.hyphenate_word("అంగడిధర", '='), "అం=గ=డిధర"); -} - -#[test] -fn num() { - let dic_path = "tests/num.hyf"; - let dic = match unsafe { mapped_hyph::load_file(dic_path) } { - Some(dic) => dic, - _ => panic!("failed to load dictionary {}", dic_path), - }; - let hyph = Hyphenator::new(&*dic); - assert_eq!(hyph.hyphenate_word("foobar", '='), "foobar"); - assert_eq!(hyph.hyphenate_word("foobarfoobar", '='), "foobar=foobar"); - assert_eq!(hyph.hyphenate_word("barfoobarfoo", '='), "barfoo=barfoo"); - assert_eq!(hyph.hyphenate_word("123foobarfoobar", '='), "123foobar=foobar"); - assert_eq!(hyph.hyphenate_word("foobarfoobar123", '='), "foobar=foobar123"); - assert_eq!(hyph.hyphenate_word("123foobarfoobar123", '='), "123foobar=foobar123"); - assert_eq!(hyph.hyphenate_word("123barfoobarfoo", '='), "123barfoo=barfoo"); - assert_eq!(hyph.hyphenate_word("barfoobarfoo123", '='), "barfoo=barfoo123"); - assert_eq!(hyph.hyphenate_word("123barfoobarfoo123", '='), "123barfoo=barfoo123"); -} diff --git a/toolkit/library/rust/shared/Cargo.toml b/toolkit/library/rust/shared/Cargo.toml index 573dd4cfb8b4..e5fee6db5935 100644 --- a/toolkit/library/rust/shared/Cargo.toml +++ b/toolkit/library/rust/shared/Cargo.toml @@ -43,7 +43,6 @@ audio_thread_priority = "0.20.2" mdns_service = { path="../../../../media/mtransport/mdns_service", optional = true } neqo_glue = { path = "../../../../netwerk/socket/neqo_glue" } rlbox_lucet_sandbox = { version = "0.1.0", optional = true } -mapped_hyph = { git = "https://github.com/jfkthame/mapped_hyph.git", tag = "v0.3.0" } [build-dependencies] rustc_version = "0.2" diff --git a/toolkit/library/rust/shared/lib.rs b/toolkit/library/rust/shared/lib.rs index 3dc53c44b7b4..e6e066a66514 100644 --- a/toolkit/library/rust/shared/lib.rs +++ b/toolkit/library/rust/shared/lib.rs @@ -6,7 +6,6 @@ extern crate geckoservo; -extern crate mapped_hyph; extern crate kvstore; extern crate mp4parse_capi; extern crate nsstring;