From e189ab729db58ce2e9b86649c1fca6b156b5b355 Mon Sep 17 00:00:00 2001 From: Ingmar Steiner Date: Tue, 6 Jan 2015 13:31:14 +0100 Subject: [PATCH] normalize line endings some more --- LICENSE.txt | 148 +- .../src/builder/bin/redstart.bat | 18 +- .../src/builder/bin/transcription.bat | 18 +- .../src/runtime/bin/marytts-client.bat | 20 +- .../bin/marytts-component-installer.bat | 18 +- .../src/runtime/bin/marytts-server.bat | 28 +- .../runtime/doc/examples/client/MaryClient.py | 734 ++-- .../doc/examples/client/MaryClientUser.java | 204 +- .../doc/examples/client/maryclient-http.py | 370 +- .../runtime/doc/examples/client/maryclient.rb | 522 +-- .../doc/examples/client/maryclient.tcl | 1410 +++--- .../tools/voiceimport/importMain.config | 166 +- .../voiceimport/templates/hsmm-voice.config | 218 +- .../templates/unitselection-voice.config | 164 +- .../upgrade/en_US-cmu-slt-hsmm-4.x.config | 222 +- .../tools/upgrade/en_US-cmu-slt-hsmm-5.config | 172 +- .../marytts/tools/emospeak/sampletexts_de.txt | 20 +- .../marytts/tools/emospeak/sampletexts_en.txt | 22 +- .../resources/marytts/util/dom/MaryXML.xsd | 1420 +++--- .../lib/modules/tib/cap/phoneme-list-tib.xml | 248 +- .../modules/tib/cap/tonerule-params-tib.xml | 234 +- .../tib/prosody/tobipredparams-tib.xml | 226 +- .../marytts/client/air/BMLSpeechPsydule.java | 446 +- .../marytts/client/air/MarySpeechPsydule.java | 364 +- .../language/de/infostruct/GerNetQuery.java | 600 +-- .../language/de/infostruct/Stemmer.java | 530 +-- .../language/tib/ContourGenerator.java | 3794 ++++++++--------- .../language/tib/KlattDurationModeller.java | 3128 +++++++------- .../java/marytts/language/tib/Prosody.java | 270 +- .../signalproc/demo/ChangeMyVoiceUI.java | 2430 +++++------ .../demo/LPCCrossSynthesisOnline.java | 158 +- 31 files changed, 9161 insertions(+), 9161 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index d06ec7a2..0910ee3d 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,74 +1,74 @@ -MARY Software User Agreement -11 April 2011 - -MARY is licensed under the following terms. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see . - - - - -Applicable Licenses - -MARY is built upon a number of other open source technologies and products. -Here is a list of those products with links to their licenses. - -hts_engine: the HMM-based speech synthesis code in MARY TTS is based on HTS, ported to Java by DFKI. The original HTS can be obtained from -http://hts-engine.sourceforge.net/ -- it is released under the New and -Simplified BSD License. - -freetts: MARY uses code from FreeTTS (http://freetts.sf.net) for various -processing modules and as the source of one method for waveform synthesis. -FreeTTS is licensed under the (BSD-style) FreeTTS license, see -doc/licenses/freetts-license.txt. - -JTok: The JTok tokenizer from http://heartofgold.dfki.de is distributed -under the GNU Lesser General Public License, see http://www.gnu.org or -doc/licenses/LGPL.txt. - -jsresources.jar: A few utility classes from http://www.jsresources.org -are distributed under the terms of the jsresources license, see -doc/licenses/jsresources-license.txt. - -log4j: MARY uses log4j (http://logging.apache.org/log4j) as a logging -mechanism. log4j is distributed under the Apache Software License, see -http://www.apache.org or doc/licenses/apache-software-license.txt - -JUnit: For unit testing of the java source, mary uses JUnit -(http://junit.org). JUnit is licensed under the Common Public License, see -http://junit.org or doc/licenses/CPL.txt. - -java-diff: A java diff implementation from http://www.incava.org/projects/java-diff for input-output-comparisons in the -Mary Expert Interface. java-diff is licensed under the GNU Lesser General -Public License, see http://www.gnu.org or doc/licenses/LGPL.txt. - -fast-md5: A fast md5 checksum implementation from http://www.twmacinta.com/myjava/fast_md5.php -used for computing checksums after downloading voices. fast-md5 is licensed under -the GNU Lesser General Public License, see http://www.gnu.org or doc/licenses/LGPL.txt. - -JavaOpenAIR: MARY can optionally be used as an OpenAIR component, -building on the JavaOpenAIR reference implementation from -http://www.mindmakers.org, which is licensed under the -(BSD-style) JavaOpenAIR license, see doc/licenses/JavaOpenAIR-license.txt -(files concerned: JavaOpenAIR.jar) - -mwdumper: A tool for extracting sets of pages from a MediaWiki dump file. -mwdumper is MIT-style like licensed, see http://www.mediawiki.org/wiki/Mwdumper -and for the license http://en.wikipedia.org/wiki/MIT_License. -(files concerned: mwdumper-2008-04-13.jar) - - -sgt: The Scientific Graphics Toolkit (sgt) is provided by the NOAA/PMEL/EPIC group (see http://www.epic.noaa.gov/java/sgt/) under the BSD-style EPIC license, see doc/licenses/epic-license.txt. - -IT IS YOUR OBLIGATION TO READ AND ACCEPT ALL SUCH TERMS -AND CONDITIONS PRIOR TO USE OF THIS CONTENT. \ No newline at end of file +MARY Software User Agreement +11 April 2011 + +MARY is licensed under the following terms. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see . + + + + +Applicable Licenses + +MARY is built upon a number of other open source technologies and products. +Here is a list of those products with links to their licenses. + +hts_engine: the HMM-based speech synthesis code in MARY TTS is based on HTS, ported to Java by DFKI. The original HTS can be obtained from +http://hts-engine.sourceforge.net/ -- it is released under the New and +Simplified BSD License. + +freetts: MARY uses code from FreeTTS (http://freetts.sf.net) for various +processing modules and as the source of one method for waveform synthesis. +FreeTTS is licensed under the (BSD-style) FreeTTS license, see +doc/licenses/freetts-license.txt. + +JTok: The JTok tokenizer from http://heartofgold.dfki.de is distributed +under the GNU Lesser General Public License, see http://www.gnu.org or +doc/licenses/LGPL.txt. + +jsresources.jar: A few utility classes from http://www.jsresources.org +are distributed under the terms of the jsresources license, see +doc/licenses/jsresources-license.txt. + +log4j: MARY uses log4j (http://logging.apache.org/log4j) as a logging +mechanism. log4j is distributed under the Apache Software License, see +http://www.apache.org or doc/licenses/apache-software-license.txt + +JUnit: For unit testing of the java source, mary uses JUnit +(http://junit.org). JUnit is licensed under the Common Public License, see +http://junit.org or doc/licenses/CPL.txt. + +java-diff: A java diff implementation from http://www.incava.org/projects/java-diff for input-output-comparisons in the +Mary Expert Interface. java-diff is licensed under the GNU Lesser General +Public License, see http://www.gnu.org or doc/licenses/LGPL.txt. + +fast-md5: A fast md5 checksum implementation from http://www.twmacinta.com/myjava/fast_md5.php +used for computing checksums after downloading voices. fast-md5 is licensed under +the GNU Lesser General Public License, see http://www.gnu.org or doc/licenses/LGPL.txt. + +JavaOpenAIR: MARY can optionally be used as an OpenAIR component, +building on the JavaOpenAIR reference implementation from +http://www.mindmakers.org, which is licensed under the +(BSD-style) JavaOpenAIR license, see doc/licenses/JavaOpenAIR-license.txt +(files concerned: JavaOpenAIR.jar) + +mwdumper: A tool for extracting sets of pages from a MediaWiki dump file. +mwdumper is MIT-style like licensed, see http://www.mediawiki.org/wiki/Mwdumper +and for the license http://en.wikipedia.org/wiki/MIT_License. +(files concerned: mwdumper-2008-04-13.jar) + + +sgt: The Scientific Graphics Toolkit (sgt) is provided by the NOAA/PMEL/EPIC group (see http://www.epic.noaa.gov/java/sgt/) under the BSD-style EPIC license, see doc/licenses/epic-license.txt. + +IT IS YOUR OBLIGATION TO READ AND ACCEPT ALL SUCH TERMS +AND CONDITIONS PRIOR TO USE OF THIS CONTENT. diff --git a/marytts-assembly/assembly-builder/src/builder/bin/redstart.bat b/marytts-assembly/assembly-builder/src/builder/bin/redstart.bat index 967edb86..71546765 100644 --- a/marytts-assembly/assembly-builder/src/builder/bin/redstart.bat +++ b/marytts-assembly/assembly-builder/src/builder/bin/redstart.bat @@ -1,9 +1,9 @@ -@echo off -set BINDIR=%~dp0 -call :RESOLVE "%BINDIR%\.." MARY_BASE -java -showversion -ea "%*" -cp ".;%MARY_BASE%/lib/*" marytts.tools.redstart.Redstart -goto :EOF - -:RESOLVE -set %2=%~f1 -goto :EOF +@echo off +set BINDIR=%~dp0 +call :RESOLVE "%BINDIR%\.." MARY_BASE +java -showversion -ea "%*" -cp ".;%MARY_BASE%/lib/*" marytts.tools.redstart.Redstart +goto :EOF + +:RESOLVE +set %2=%~f1 +goto :EOF diff --git a/marytts-assembly/assembly-builder/src/builder/bin/transcription.bat b/marytts-assembly/assembly-builder/src/builder/bin/transcription.bat index f0c6993a..74cdbebc 100644 --- a/marytts-assembly/assembly-builder/src/builder/bin/transcription.bat +++ b/marytts-assembly/assembly-builder/src/builder/bin/transcription.bat @@ -1,9 +1,9 @@ -@echo off -set BINDIR=%~dp0 -call :RESOLVE "%BINDIR%\.." MARY_BASE -java -showversion -ea "%*" -cp ".;%MARY_BASE%\lib\*" marytts.tools.transcription.TranscriptionGUI -goto :EOF - -:RESOLVE -set %2=%~f1 -goto :EOF +@echo off +set BINDIR=%~dp0 +call :RESOLVE "%BINDIR%\.." MARY_BASE +java -showversion -ea "%*" -cp ".;%MARY_BASE%\lib\*" marytts.tools.transcription.TranscriptionGUI +goto :EOF + +:RESOLVE +set %2=%~f1 +goto :EOF diff --git a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-client.bat b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-client.bat index bc7534b4..9df09bf2 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-client.bat +++ b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-client.bat @@ -1,10 +1,10 @@ -@echo off -set BINDIR=%~dp0 -call :RESOLVE "%BINDIR%\.." MARY_BASE - -java -showversion -ea -Dserver.host=localhost -Dserver.port=59125 -jar "%MARY_BASE%\lib\marytts-client-${project.version}-jar-with-dependencies.jar" -goto :EOF - -:RESOLVE -set %2=%~f1 -goto :EOF +@echo off +set BINDIR=%~dp0 +call :RESOLVE "%BINDIR%\.." MARY_BASE + +java -showversion -ea -Dserver.host=localhost -Dserver.port=59125 -jar "%MARY_BASE%\lib\marytts-client-${project.version}-jar-with-dependencies.jar" +goto :EOF + +:RESOLVE +set %2=%~f1 +goto :EOF diff --git a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-component-installer.bat b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-component-installer.bat index 8f4ff657..0a02e2e0 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-component-installer.bat +++ b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-component-installer.bat @@ -1,9 +1,9 @@ -@echo off -set BINDIR=%~dp0 -call :RESOLVE "%BINDIR%\.." MARY_BASE -java -showversion -ea -Dmary.base="%MARY_BASE%" -cp ".;%MARY_BASE%\lib\*" marytts.tools.install.InstallerGUI -goto :EOF - -:RESOLVE -set %2=%~f1 -goto :EOF +@echo off +set BINDIR=%~dp0 +call :RESOLVE "%BINDIR%\.." MARY_BASE +java -showversion -ea -Dmary.base="%MARY_BASE%" -cp ".;%MARY_BASE%\lib\*" marytts.tools.install.InstallerGUI +goto :EOF + +:RESOLVE +set %2=%~f1 +goto :EOF diff --git a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-server.bat b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-server.bat index f3cc2dad..8c8b99d3 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-server.bat +++ b/marytts-assembly/assembly-runtime/src/runtime/bin/marytts-server.bat @@ -1,14 +1,14 @@ -@echo off - -rem Set the Mary base installation directory in an environment variable: -set BINDIR=%~dp0 - -call :RESOLVE "%BINDIR%\.." MARY_BASE - -set CLASSPATH=".;%MARY_BASE%\lib\*" -java -showversion -ea -Xms40m -Xmx1g -cp %CLASSPATH% "-Dmary.base=%MARY_BASE%" marytts.server.Mary -goto :EOF - -:RESOLVE -set %2=%~f1 -goto :EOF +@echo off + +rem Set the Mary base installation directory in an environment variable: +set BINDIR=%~dp0 + +call :RESOLVE "%BINDIR%\.." MARY_BASE + +set CLASSPATH=".;%MARY_BASE%\lib\*" +java -showversion -ea -Xms40m -Xmx1g -cp %CLASSPATH% "-Dmary.base=%MARY_BASE%" marytts.server.Mary +goto :EOF + +:RESOLVE +set %2=%~f1 +goto :EOF diff --git a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClient.py b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClient.py index 88f1d913..46569873 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClient.py +++ b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClient.py @@ -1,367 +1,367 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -import socket, sys, types, getopt - - -languageNames = {'de':'German', - 'en':'English', - 'en_US':'US English', - 'tib':'Tibetan'} - -class MaryClient: - specificationVersion = "0.1" - - """Python implementation of a MARY TTS client""" - def __init__( self, host="cling.dfki.uni-sb.de", port=59125, profile=False, quiet=False ): - self.host = host - self.port = port - self.profile = profile - self.quiet = quiet - self.allVoices = None # array of Voice objects - self.voicesByLocaleMap = {} # Map locale strings to arrays of Voice objects - self.allDataTypes = None # array of DataType objects - self.inputDataTypes = None # array of DataType objects - self.outputDataTypes = None # array of DataType objects - self.serverExampleTexts = {} - self.voiceExampleTexts = {} - self.serverVersionInfo = u'' - - if not self.quiet: - sys.stderr.write( "MARY TTS Python Client %s\n" % ( self.specificationVersion ) ) - try: - info = self.getServerVersionInfo() - except: - sys.stderr.write( "Problem connecting to mary server at %s:%i\n" % ( self.host, self.port ) ) - raise - sys.stderr.write( "Connected to %s:%i, " % ( self.host, self.port ) ) - sys.stderr.write( info ) - sys.stderr.write( '\n' ) - - def __getServerInfo( self, request="", marySocket=None ): - """Get answer to request from mary server. Returns a list of unicode strings, - each representing a line without the line break. - """ - closeSocket = False - if marySocket is None: - closeSocket = True - marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) - marySocket.connect( ( self.host, self.port ) ) - assert isinstance(marySocket, socket.SocketType) - maryFile = marySocket.makefile( 'rwb', 1 ) # read-write, line-buffered - maryFile.write( unicode( request+"\n" ).encode( 'utf-8' ) ) - result = [] - while True: - got = unicode( maryFile.readline().strip(), 'utf-8' ) - # read until end of file or an empty line is read: - if not got: break - result.append(got) - if closeSocket: - marySocket.close() - return result - - def getServerVersionInfo( self ): - "Get version info from server. Returns a unicode string" - if self.serverVersionInfo == u'': - # need to get it from server - self.serverVersionInfo = u'\n'.join(self.__getServerInfo("MARY VERSION")) - return self.serverVersionInfo - - def getAllDataTypes(self, locale=None): - """Obtain a list of all data types known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an array of DataType objects - """ - if self.allDataTypes is None: - self.__fillDataTypes() - assert self.allDataTypes is not None and len( self.allDataTypes ) > 0 - if locale is None: - return self.allDataTypes - else: - assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) - return [d for d in self.allDataTypes if d.locale is None or d.locale == locale] - - def getInputDataTypes(self,locale=None): - """Obtain a list of input data types known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an arry of DataType objects - """ - if self.inputDataTypes is None: - self.__fillDataTypes() - assert self.inputDataTypes is not None and len( self.inputDataTypes ) > 0 - if locale is None: - return self.inputDataTypes - else: - assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) - return [d for d in self.inputDataTypes if d.locale is None or d.locale == locale] - - def getOutputDataTypes(self, locale=None): - """Obtain a list of output data types known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an arry of DataType objects - """ - if self.outputDataTypes is None: - self.__fillDataTypes() - assert self.outputDataTypes is not None and len( self.outputDataTypes ) > 0 - if locale is None: - return self.outputDataTypes - else: - assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) - return [d for d in self.outputDataTypes if d.locale is None or d.locale == locale] - - - def __fillDataTypes( self ): - self.allDataTypes = [] - self.inputDataTypes = [] - self.outputDataTypes = [] - marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) - marySocket.connect( ( self.host, self.port ) ) - # Expect a variable number of lines of the kind - # RAWMARYXML INPUT OUTPUT - # TEXT_DE LOCALE=de INPUT - # AUDIO OUTPUT - typeStrings = self.__getServerInfo( "MARY LIST DATATYPES", marySocket ) - if not typeStrings or len(typeStrings) == 0: - raise IOError( "Could not get list of data types from Mary server" ) - marySocket.close() - for typeString in typeStrings: - parts = typeString.split() - if len( parts ) == 0: - continue - name = parts[0] - isInputType = False - isOutputType = False - locale = None - for part in parts[1:]: - if part[:7] == "LOCALE=": - locale = part[7:] - elif part == "INPUT": - isInputType = True - elif part == "OUTPUT": - isOutputType = True - dt = DataType( name, locale, isInputType, isOutputType ) - self.allDataTypes.append( dt ) - if dt.isInputType: - self.inputDataTypes.append( dt ) - if dt.isOutputType: - self.outputDataTypes.append( dt ) - - def getVoices( self, locale=None ): - """Obtain a list of voices known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an array of Voice objects - """ - if self.allVoices is None: - self.__fillVoices() - assert self.allVoices is not None and len( self.allVoices ) > 0 - if locale is None: - return self.allVoices - else: - assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) - if self.voicesByLocaleMap.has_key(locale): - return self.voicesByLocaleMap[locale] - else: - raise Exception("No voices for locale '%s'" % (locale)) - - def __fillVoices( self ): - self.allVoices = [] - self.voicesByLocaleMap = {} - marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) - marySocket.connect( ( self.host, self.port ) ) - # Expect a variable number of lines of the kind - # de7 de female - # us2 en male - # dfki-stadium-emo de male limited - voiceStrings = self.__getServerInfo( "MARY LIST VOICES", marySocket ) - if not voiceStrings or len(voiceStrings) == 0: - raise IOError( "Could not get list of voices from Mary server" ) - marySocket.close() - for voiceString in voiceStrings: - parts = voiceString.split() - if len( parts ) < 3: - continue - name = parts[0] - locale = parts[1] - gender = parts[2] - domain = None - if len( parts ) > 3: - domain = parts[3] - voice = Voice( name, locale, gender, domain ) - self.allVoices.append( voice ) - localeVoices = None - if self.voicesByLocaleMap.has_key( locale ): - localeVoices = self.voicesByLocaleMap[locale] - else: - localeVoices = [] - self.voicesByLocaleMap[locale] = localeVoices - localeVoices.append( voice ) - - def getGeneralDomainVoices( self, locale=None ): - """Obtain a list of general domain voices known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an array of Voice objects - """ - return [v for v in self.getVoices( locale ) if not v.isLimitedDomain] - - def getLimitedDomainVoices( self, locale=None ): - """Obtain a list of limited domain voices known to the server. If the information is not - yet available, the server is queried. This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities. - Returns an array of Voice objects - """ - return [v for v in self.getVoices( locale ) if v.isLimitedDomain] - - def getAvailableLanguages(self): - """ Check available voices and return a list of tuples (abbrev, name) - representing the available languages -- e.g. [('en', 'English'),('de', 'German')]. - """ - if self.allVoices is None: - self.__fillVoices() - assert self.allVoices is not None and len( self.allVoices ) > 0 - languages = [] - for l in self.voicesByLocaleMap.keys(): - if languageNames.has_key(l): - languages.append((l,languageNames[l])) - else: - languages.append((l, l)) - return languages - - def getServerExampleText( self, dataType ): - """Request an example text for a given data type from the server. - dataType the string representation of the data type, - e.g. "RAWMARYXML". This is optional information - which is not required for the normal operation of the client, but - may help to avoid incompatibilities.""" - if not self.serverExampleTexts.has_key( dataType ): - exampleTexts = self.__getServerInfo( "MARY EXAMPLETEXT %s" % ( dataType ) ) - if not exampleTexts or len(exampleTexts) == 0: - raise IOError( "Could not get example text for type '%s' from Mary server" % (dataType)) - exampleText = u'\n'.join(exampleTexts) - self.serverExampleTexts[dataType] = exampleText - return self.serverExampleTexts[dataType] - - def process( self, input, inputType, outputType, audioType=None, defaultVoiceName=None, output=sys.stdout ): - assert type( input ) in types.StringTypes - assert type( inputType ) in types.StringTypes - assert type( outputType ) in types.StringTypes - assert audioType is None or type( audioType ) in types.StringTypes - assert defaultVoiceName is None or type( defaultVoiceName ) in types.StringTypes - assert callable( getattr( output, 'write' ) ) - if type( input ) != types.UnicodeType: - input = unicode( input, 'utf-8' ) - maryInfoSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) - maryInfoSocket.connect( ( self.host, self.port ) ) - assert type( maryInfoSocket ) is socket.SocketType - maryInfo = maryInfoSocket.makefile( 'rwb', 1 ) # read-write, line-buffered - maryInfo.write( unicode( "MARY IN=%s OUT=%s" % ( inputType, outputType ), 'utf-8' ) ) - if audioType: - maryInfo.write( unicode( " AUDIO=%s" % ( audioType ), 'utf-8' ) ) - if defaultVoiceName: - maryInfo.write( unicode( " VOICE=%s" % ( defaultVoiceName ), 'utf-8' ) ) - maryInfo.write( "\r\n" ) - # Receive a request ID: - id = maryInfo.readline() - maryDataSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) - maryDataSocket.connect( ( self.host, self.port ) ) - assert type( maryDataSocket ) is socket.SocketType - maryDataSocket.sendall( id ) # includes newline - maryDataSocket.sendall( input.encode( 'utf-8' ) ) - maryDataSocket.shutdown( 1 ) # shutdown writing - # Set mary info socket to non-blocking, so we only read somthing - # if there is something to read: - maryInfoSocket.setblocking( 0 ) - while True: - try: - err = maryInfoSocket.recv( 8192 ) - if err: sys.stderr.write( err ) - except: - pass - got = maryDataSocket.recv( 8192 ) - if not got: break - output.write( got ) - maryInfoSocket.setblocking( 1 ) - while True: - err = maryInfoSocket.recv( 8192 ) - if not err: break - sys.stderr.write( err ) - - - -################ data representation classes ################## - -class DataType: - def __init__( self, name, locale=None, isInputType=False, isOutputType=False ): - self.name = name - self.locale = locale - self.isInputType = isInputType - self.isOutputType = isOutputType - - def isTextType( self ): - return self.name != "AUDIO" - -class Voice: - - def __init__( self, name, locale, gender, domain="general" ): - self.name = name - self.locale = locale - self.gender = gender - self.domain = domain - if not domain or domain == "general": - self.isLimitedDomain = False - else: - self.isLimitedDomain = True - - def __str__(self): - if languageNames.has_key(self.locale): - langName = languageNames[self.locale] - else: - langName = self.locale - if self.isLimitedDomain: - return "%s (%s, %s %s)" % (self.name, self.domain, langName, self.gender) - else: - return "%s (%s %s)" % (self.name, langName, self.gender) - -##################### Main ######################### - -if __name__ == '__main__': - - serverHost = "cling.dfki.uni-sb.de" - serverPort = 59125 - inputType = "TEXT" - outputType = "AUDIO" - audioType = "WAVE" - defaultVoice = None - inputEncoding = 'utf-8' - ( options, rest ) = getopt.getopt( sys.argv[1:], '', \ - ['server.host=', 'server.port=', 'input.type=', 'output.type=', \ - 'audio.type=', 'voice.default=', 'input.encoding='] ) - for ( option, value ) in options: - if option == '--server.host': serverHost = value - elif option == '--server.port': serverPort = int( value ) - elif option == '--input.type': inputType = value - elif option == '--output.type': outputType = value - elif option == '--audio.type': audioType = value - elif option == '--voice.default': defaultVoice = value - elif option == '--input.encoding': inputEncoding = value - if len( rest )>0: # have input file - inputFile = file( rest[0] ) - else: - inputFile = sys.stdin - input = unicode( ''.join( inputFile.readlines() ), inputEncoding ) - if len( rest )>1: # also have output file - outputFile = file( rest[1] ) - else: - outputFile = sys.stdout - - maryClient = MaryClient( serverHost, serverPort ) - maryClient.process( input, inputType, outputType, audioType, defaultVoice, outputFile ) +#!/usr/bin/python +# -*- coding: utf-8 -*- +import socket, sys, types, getopt + + +languageNames = {'de':'German', + 'en':'English', + 'en_US':'US English', + 'tib':'Tibetan'} + +class MaryClient: + specificationVersion = "0.1" + + """Python implementation of a MARY TTS client""" + def __init__( self, host="cling.dfki.uni-sb.de", port=59125, profile=False, quiet=False ): + self.host = host + self.port = port + self.profile = profile + self.quiet = quiet + self.allVoices = None # array of Voice objects + self.voicesByLocaleMap = {} # Map locale strings to arrays of Voice objects + self.allDataTypes = None # array of DataType objects + self.inputDataTypes = None # array of DataType objects + self.outputDataTypes = None # array of DataType objects + self.serverExampleTexts = {} + self.voiceExampleTexts = {} + self.serverVersionInfo = u'' + + if not self.quiet: + sys.stderr.write( "MARY TTS Python Client %s\n" % ( self.specificationVersion ) ) + try: + info = self.getServerVersionInfo() + except: + sys.stderr.write( "Problem connecting to mary server at %s:%i\n" % ( self.host, self.port ) ) + raise + sys.stderr.write( "Connected to %s:%i, " % ( self.host, self.port ) ) + sys.stderr.write( info ) + sys.stderr.write( '\n' ) + + def __getServerInfo( self, request="", marySocket=None ): + """Get answer to request from mary server. Returns a list of unicode strings, + each representing a line without the line break. + """ + closeSocket = False + if marySocket is None: + closeSocket = True + marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) + marySocket.connect( ( self.host, self.port ) ) + assert isinstance(marySocket, socket.SocketType) + maryFile = marySocket.makefile( 'rwb', 1 ) # read-write, line-buffered + maryFile.write( unicode( request+"\n" ).encode( 'utf-8' ) ) + result = [] + while True: + got = unicode( maryFile.readline().strip(), 'utf-8' ) + # read until end of file or an empty line is read: + if not got: break + result.append(got) + if closeSocket: + marySocket.close() + return result + + def getServerVersionInfo( self ): + "Get version info from server. Returns a unicode string" + if self.serverVersionInfo == u'': + # need to get it from server + self.serverVersionInfo = u'\n'.join(self.__getServerInfo("MARY VERSION")) + return self.serverVersionInfo + + def getAllDataTypes(self, locale=None): + """Obtain a list of all data types known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an array of DataType objects + """ + if self.allDataTypes is None: + self.__fillDataTypes() + assert self.allDataTypes is not None and len( self.allDataTypes ) > 0 + if locale is None: + return self.allDataTypes + else: + assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) + return [d for d in self.allDataTypes if d.locale is None or d.locale == locale] + + def getInputDataTypes(self,locale=None): + """Obtain a list of input data types known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an arry of DataType objects + """ + if self.inputDataTypes is None: + self.__fillDataTypes() + assert self.inputDataTypes is not None and len( self.inputDataTypes ) > 0 + if locale is None: + return self.inputDataTypes + else: + assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) + return [d for d in self.inputDataTypes if d.locale is None or d.locale == locale] + + def getOutputDataTypes(self, locale=None): + """Obtain a list of output data types known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an arry of DataType objects + """ + if self.outputDataTypes is None: + self.__fillDataTypes() + assert self.outputDataTypes is not None and len( self.outputDataTypes ) > 0 + if locale is None: + return self.outputDataTypes + else: + assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) + return [d for d in self.outputDataTypes if d.locale is None or d.locale == locale] + + + def __fillDataTypes( self ): + self.allDataTypes = [] + self.inputDataTypes = [] + self.outputDataTypes = [] + marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) + marySocket.connect( ( self.host, self.port ) ) + # Expect a variable number of lines of the kind + # RAWMARYXML INPUT OUTPUT + # TEXT_DE LOCALE=de INPUT + # AUDIO OUTPUT + typeStrings = self.__getServerInfo( "MARY LIST DATATYPES", marySocket ) + if not typeStrings or len(typeStrings) == 0: + raise IOError( "Could not get list of data types from Mary server" ) + marySocket.close() + for typeString in typeStrings: + parts = typeString.split() + if len( parts ) == 0: + continue + name = parts[0] + isInputType = False + isOutputType = False + locale = None + for part in parts[1:]: + if part[:7] == "LOCALE=": + locale = part[7:] + elif part == "INPUT": + isInputType = True + elif part == "OUTPUT": + isOutputType = True + dt = DataType( name, locale, isInputType, isOutputType ) + self.allDataTypes.append( dt ) + if dt.isInputType: + self.inputDataTypes.append( dt ) + if dt.isOutputType: + self.outputDataTypes.append( dt ) + + def getVoices( self, locale=None ): + """Obtain a list of voices known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an array of Voice objects + """ + if self.allVoices is None: + self.__fillVoices() + assert self.allVoices is not None and len( self.allVoices ) > 0 + if locale is None: + return self.allVoices + else: + assert isinstance(locale, types.UnicodeType), "Unexpected type for locale: '%s'" % (type(locale)) + if self.voicesByLocaleMap.has_key(locale): + return self.voicesByLocaleMap[locale] + else: + raise Exception("No voices for locale '%s'" % (locale)) + + def __fillVoices( self ): + self.allVoices = [] + self.voicesByLocaleMap = {} + marySocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) + marySocket.connect( ( self.host, self.port ) ) + # Expect a variable number of lines of the kind + # de7 de female + # us2 en male + # dfki-stadium-emo de male limited + voiceStrings = self.__getServerInfo( "MARY LIST VOICES", marySocket ) + if not voiceStrings or len(voiceStrings) == 0: + raise IOError( "Could not get list of voices from Mary server" ) + marySocket.close() + for voiceString in voiceStrings: + parts = voiceString.split() + if len( parts ) < 3: + continue + name = parts[0] + locale = parts[1] + gender = parts[2] + domain = None + if len( parts ) > 3: + domain = parts[3] + voice = Voice( name, locale, gender, domain ) + self.allVoices.append( voice ) + localeVoices = None + if self.voicesByLocaleMap.has_key( locale ): + localeVoices = self.voicesByLocaleMap[locale] + else: + localeVoices = [] + self.voicesByLocaleMap[locale] = localeVoices + localeVoices.append( voice ) + + def getGeneralDomainVoices( self, locale=None ): + """Obtain a list of general domain voices known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an array of Voice objects + """ + return [v for v in self.getVoices( locale ) if not v.isLimitedDomain] + + def getLimitedDomainVoices( self, locale=None ): + """Obtain a list of limited domain voices known to the server. If the information is not + yet available, the server is queried. This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities. + Returns an array of Voice objects + """ + return [v for v in self.getVoices( locale ) if v.isLimitedDomain] + + def getAvailableLanguages(self): + """ Check available voices and return a list of tuples (abbrev, name) + representing the available languages -- e.g. [('en', 'English'),('de', 'German')]. + """ + if self.allVoices is None: + self.__fillVoices() + assert self.allVoices is not None and len( self.allVoices ) > 0 + languages = [] + for l in self.voicesByLocaleMap.keys(): + if languageNames.has_key(l): + languages.append((l,languageNames[l])) + else: + languages.append((l, l)) + return languages + + def getServerExampleText( self, dataType ): + """Request an example text for a given data type from the server. + dataType the string representation of the data type, + e.g. "RAWMARYXML". This is optional information + which is not required for the normal operation of the client, but + may help to avoid incompatibilities.""" + if not self.serverExampleTexts.has_key( dataType ): + exampleTexts = self.__getServerInfo( "MARY EXAMPLETEXT %s" % ( dataType ) ) + if not exampleTexts or len(exampleTexts) == 0: + raise IOError( "Could not get example text for type '%s' from Mary server" % (dataType)) + exampleText = u'\n'.join(exampleTexts) + self.serverExampleTexts[dataType] = exampleText + return self.serverExampleTexts[dataType] + + def process( self, input, inputType, outputType, audioType=None, defaultVoiceName=None, output=sys.stdout ): + assert type( input ) in types.StringTypes + assert type( inputType ) in types.StringTypes + assert type( outputType ) in types.StringTypes + assert audioType is None or type( audioType ) in types.StringTypes + assert defaultVoiceName is None or type( defaultVoiceName ) in types.StringTypes + assert callable( getattr( output, 'write' ) ) + if type( input ) != types.UnicodeType: + input = unicode( input, 'utf-8' ) + maryInfoSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) + maryInfoSocket.connect( ( self.host, self.port ) ) + assert type( maryInfoSocket ) is socket.SocketType + maryInfo = maryInfoSocket.makefile( 'rwb', 1 ) # read-write, line-buffered + maryInfo.write( unicode( "MARY IN=%s OUT=%s" % ( inputType, outputType ), 'utf-8' ) ) + if audioType: + maryInfo.write( unicode( " AUDIO=%s" % ( audioType ), 'utf-8' ) ) + if defaultVoiceName: + maryInfo.write( unicode( " VOICE=%s" % ( defaultVoiceName ), 'utf-8' ) ) + maryInfo.write( "\r\n" ) + # Receive a request ID: + id = maryInfo.readline() + maryDataSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) + maryDataSocket.connect( ( self.host, self.port ) ) + assert type( maryDataSocket ) is socket.SocketType + maryDataSocket.sendall( id ) # includes newline + maryDataSocket.sendall( input.encode( 'utf-8' ) ) + maryDataSocket.shutdown( 1 ) # shutdown writing + # Set mary info socket to non-blocking, so we only read somthing + # if there is something to read: + maryInfoSocket.setblocking( 0 ) + while True: + try: + err = maryInfoSocket.recv( 8192 ) + if err: sys.stderr.write( err ) + except: + pass + got = maryDataSocket.recv( 8192 ) + if not got: break + output.write( got ) + maryInfoSocket.setblocking( 1 ) + while True: + err = maryInfoSocket.recv( 8192 ) + if not err: break + sys.stderr.write( err ) + + + +################ data representation classes ################## + +class DataType: + def __init__( self, name, locale=None, isInputType=False, isOutputType=False ): + self.name = name + self.locale = locale + self.isInputType = isInputType + self.isOutputType = isOutputType + + def isTextType( self ): + return self.name != "AUDIO" + +class Voice: + + def __init__( self, name, locale, gender, domain="general" ): + self.name = name + self.locale = locale + self.gender = gender + self.domain = domain + if not domain or domain == "general": + self.isLimitedDomain = False + else: + self.isLimitedDomain = True + + def __str__(self): + if languageNames.has_key(self.locale): + langName = languageNames[self.locale] + else: + langName = self.locale + if self.isLimitedDomain: + return "%s (%s, %s %s)" % (self.name, self.domain, langName, self.gender) + else: + return "%s (%s %s)" % (self.name, langName, self.gender) + +##################### Main ######################### + +if __name__ == '__main__': + + serverHost = "cling.dfki.uni-sb.de" + serverPort = 59125 + inputType = "TEXT" + outputType = "AUDIO" + audioType = "WAVE" + defaultVoice = None + inputEncoding = 'utf-8' + ( options, rest ) = getopt.getopt( sys.argv[1:], '', \ + ['server.host=', 'server.port=', 'input.type=', 'output.type=', \ + 'audio.type=', 'voice.default=', 'input.encoding='] ) + for ( option, value ) in options: + if option == '--server.host': serverHost = value + elif option == '--server.port': serverPort = int( value ) + elif option == '--input.type': inputType = value + elif option == '--output.type': outputType = value + elif option == '--audio.type': audioType = value + elif option == '--voice.default': defaultVoice = value + elif option == '--input.encoding': inputEncoding = value + if len( rest )>0: # have input file + inputFile = file( rest[0] ) + else: + inputFile = sys.stdin + input = unicode( ''.join( inputFile.readlines() ), inputEncoding ) + if len( rest )>1: # also have output file + outputFile = file( rest[1] ) + else: + outputFile = sys.stdout + + maryClient = MaryClient( serverHost, serverPort ) + maryClient.process( input, inputType, outputType, audioType, defaultVoice, outputFile ) diff --git a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClientUser.java b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClientUser.java index ace6d924..a05246cc 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClientUser.java +++ b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/MaryClientUser.java @@ -1,102 +1,102 @@ -/** - * Copyright 2000-2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * Permission is hereby granted, free of charge, to use and distribute - * this software and its documentation without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of this work, and to - * permit persons to whom this work is furnished to do so, subject to - * the following conditions: - * - * 1. The code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * 2. Any modifications must be clearly marked as such. - * 3. Original authors' names are not deleted. - * 4. The authors' names are not used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH - * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE - * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF - * THIS SOFTWARE. - */ - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.net.UnknownHostException; -import java.util.Locale; - -import javax.sound.sampled.AudioInputStream; -import javax.sound.sampled.AudioSystem; -import javax.sound.sampled.LineEvent; -import javax.sound.sampled.LineListener; -import javax.sound.sampled.UnsupportedAudioFileException; - -import marytts.util.data.audio.AudioPlayer; -import marytts.client.MaryClient; -import marytts.util.http.Address; - -/** - * A demo class illustrating how to use the MaryClient class. - * This will connect to a MARY server, version 4.x. - * It requires maryclient.jar from MARY 4.0. - * This works transparently with MARY servers in both http and socket server mode. - * - * Compile this as follows: - * javac -cp maryclient.jar MaryClientUser.java - * - * And run as: - * java -cp .:maryclient.jar MaryClientUser - * - * @author marc - * - */ - -public class MaryClientUser { - - public static void main(String[] args) - throws IOException, UnknownHostException, UnsupportedAudioFileException, - InterruptedException - { - String serverHost = System.getProperty("server.host", "cling.dfki.uni-sb.de"); - int serverPort = Integer.getInteger("server.port", 59125).intValue(); - MaryClient mary = MaryClient.getMaryClient(new Address(serverHost, serverPort)); - String text = "Willkommen in der Welt der Sprachsynthese!"; - // If the given locale is not supported by the server, it returns - // an ambigous exception: "Problem processing the data." - String locale = "de"; // or US English (en-US), Telugu (te), Turkish (tr), ... - String inputType = "TEXT"; - String outputType = "AUDIO"; - String audioType = "WAVE"; - String defaultVoiceName = null; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - mary.process(text, inputType, outputType, locale, audioType, defaultVoiceName, baos); - // The byte array constitutes a full wave file, including the headers. - // And now, play the audio data: - AudioInputStream ais = AudioSystem.getAudioInputStream( - new ByteArrayInputStream(baos.toByteArray())); - LineListener lineListener = new LineListener() { - public void update(LineEvent event) { - if (event.getType() == LineEvent.Type.START) { - System.err.println("Audio started playing."); - } else if (event.getType() == LineEvent.Type.STOP) { - System.err.println("Audio stopped playing."); - } else if (event.getType() == LineEvent.Type.OPEN) { - System.err.println("Audio line opened."); - } else if (event.getType() == LineEvent.Type.CLOSE) { - System.err.println("Audio line closed."); - } - } - }; - - AudioPlayer ap = new AudioPlayer(ais, lineListener); - ap.start(); - } -} +/** + * Copyright 2000-2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * Permission is hereby granted, free of charge, to use and distribute + * this software and its documentation without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of this work, and to + * permit persons to whom this work is furnished to do so, subject to + * the following conditions: + * + * 1. The code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * 2. Any modifications must be clearly marked as such. + * 3. Original authors' names are not deleted. + * 4. The authors' names are not used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE + * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.Locale; + +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.LineEvent; +import javax.sound.sampled.LineListener; +import javax.sound.sampled.UnsupportedAudioFileException; + +import marytts.util.data.audio.AudioPlayer; +import marytts.client.MaryClient; +import marytts.util.http.Address; + +/** + * A demo class illustrating how to use the MaryClient class. + * This will connect to a MARY server, version 4.x. + * It requires maryclient.jar from MARY 4.0. + * This works transparently with MARY servers in both http and socket server mode. + * + * Compile this as follows: + * javac -cp maryclient.jar MaryClientUser.java + * + * And run as: + * java -cp .:maryclient.jar MaryClientUser + * + * @author marc + * + */ + +public class MaryClientUser { + + public static void main(String[] args) + throws IOException, UnknownHostException, UnsupportedAudioFileException, + InterruptedException + { + String serverHost = System.getProperty("server.host", "cling.dfki.uni-sb.de"); + int serverPort = Integer.getInteger("server.port", 59125).intValue(); + MaryClient mary = MaryClient.getMaryClient(new Address(serverHost, serverPort)); + String text = "Willkommen in der Welt der Sprachsynthese!"; + // If the given locale is not supported by the server, it returns + // an ambigous exception: "Problem processing the data." + String locale = "de"; // or US English (en-US), Telugu (te), Turkish (tr), ... + String inputType = "TEXT"; + String outputType = "AUDIO"; + String audioType = "WAVE"; + String defaultVoiceName = null; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mary.process(text, inputType, outputType, locale, audioType, defaultVoiceName, baos); + // The byte array constitutes a full wave file, including the headers. + // And now, play the audio data: + AudioInputStream ais = AudioSystem.getAudioInputStream( + new ByteArrayInputStream(baos.toByteArray())); + LineListener lineListener = new LineListener() { + public void update(LineEvent event) { + if (event.getType() == LineEvent.Type.START) { + System.err.println("Audio started playing."); + } else if (event.getType() == LineEvent.Type.STOP) { + System.err.println("Audio stopped playing."); + } else if (event.getType() == LineEvent.Type.OPEN) { + System.err.println("Audio line opened."); + } else if (event.getType() == LineEvent.Type.CLOSE) { + System.err.println("Audio line closed."); + } + } + }; + + AudioPlayer ap = new AudioPlayer(ais, lineListener); + ap.start(); + } +} diff --git a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient-http.py b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient-http.py index 67655cd2..cf9782e8 100755 --- a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient-http.py +++ b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient-http.py @@ -1,185 +1,185 @@ -#!/usr/bin/env python -import httplib, urllib - -# A basic mary client in Python, -# kindly donated to the MARY TTS project -# by Hugh Sasse. Thanks Hugh! - -# A very basic Python class for accessing -# the MARY TTS system using the modern -# HTTP server. -# Warning, this is probably ghastly Python, -# most of my time of late has been with -# other languages, so I'm not up to date -# with all the stylistic conventions of -# modern Python. -# This does seem to work OK though. - -class maryclient: - """A basic handler for MARY-TTS HTTP clients - - At present, there is no checking for - allowed voices, locales, and so on. - Most of the useful parameters can be - accessed by get_ and set_ methods. - Relying on winsound, this is Windows - specific. - """ - def __init__(self): - """Set up useful defaults (for - people in England, anyway)""" - self.host = "127.0.0.1" - self.port = 59125 - self.input_type = "TEXT" - self.output_type = "AUDIO" - self.audio = "WAVE_FILE" - self.locale = "en_GB" - self.voice = "dfki-prudence-hsmm" - - def set_host(self, a_host): - """Set the host for the TTS server.""" - self.host = a_host - - def get_host(self): - """Get the host for the TTS server.""" - self.host - - def set_port(self, a_port): - """Set the port for the TTS server.""" - self.port = a_port - - def get_port(self): - """Get the port for the TTS server.""" - self.port - - def set_input_type(self, type): - """Set the type of input being - supplied to the TTS server - (such as 'TEXT').""" - self.input_type = type - - def get_input_type(self): - """Get the type of input being - supplied to the TTS server - (such as 'TEXT').""" - self.input_type - - def set_output_type(self, type): - """Set the type of input being - supplied to the TTS server - (such as 'AUDIO').""" - self.output_type = type - - def get_output_type(self): - """Get the type of input being - supplied to the TTS server - (such as "AUDIO").""" - self.output_type - - def set_locale(self, a_locale): - """Set the locale - (such as "en_GB").""" - self.locale = a_locale - - def get_locale(self): - """Get the locale - (such as "en_GB").""" - self.locale - - def set_audio(self, audio_type): - """Set the audio type for playback - (such as "WAVE_FILE").""" - self.audio = audio_type - - def get_audio(self): - """Get the audio type for playback - (such as "WAVE_FILE").""" - self.audio - - def set_voice(self, a_voice): - """Set the voice to speak with - (such as "dfki-prudence-hsmm").""" - self.voice = a_voice - - def get_voice(self): - """Get the voice to speak with - (such as "dfki-prudence-hsmm").""" - self.voice - - def generate(self, message): - """Given a message in message, - return a response in the appropriate - format.""" - raw_params = {"INPUT_TEXT": message, - "INPUT_TYPE": self.input_type, - "OUTPUT_TYPE": self.output_type, - "LOCALE": self.locale, - "AUDIO": self.audio, - "VOICE": self.voice, - } - params = urllib.urlencode(raw_params) - headers = {} - - # Open connection to self.host, self.port. - conn = httplib.HTTPConnection(self.host, self.port) - - # conn.set_debuglevel(5) - - conn.request("POST", "/process", params, headers) - response = conn.getresponse() - if response.status != 200: - print response.getheaders() - raise RuntimeError("{0}: {1}".format(response.status, - response.reason)) - return response.read() - -# If this is invoked as a program, just give -# a greeting to show it is working. -# The platform specific code is moved to this -# part so that this file may be imported without -# bringing platform specific code in. -if __name__ == "__main__": - - # For handling command line arguments: - import sys - import platform - - # check we are on Windows: - system = platform.system().lower() - if (system == "windows"): - - import winsound - - class Player: - def __init__(self): - pass - - def play(self, a_sound): - winsound.PlaySound(a_sound, winsound.SND_MEMORY) - - #if ("cygwin" in system): - else: - # Not sure how to do audio on cygwin, - # portably for python. So have a sound - # player class that doesn't play sounds. - # A null object, if you like. - class Player: - def __init__(self): - pass - - def play(self, a_sound): - print("Here I would play a sound if I knew how") - pass - - # Probably want to parse arguments to - # set the voice, etc., here - - client = maryclient() - client.set_audio("WAVE_FILE") # for example - - player = Player() - the_sound = client.generate("hello from Mary Text to Speech, with Python.") - if client.output_type == "AUDIO": - player.play(the_sound) - -# vi:set sw=4 et: +#!/usr/bin/env python +import httplib, urllib + +# A basic mary client in Python, +# kindly donated to the MARY TTS project +# by Hugh Sasse. Thanks Hugh! + +# A very basic Python class for accessing +# the MARY TTS system using the modern +# HTTP server. +# Warning, this is probably ghastly Python, +# most of my time of late has been with +# other languages, so I'm not up to date +# with all the stylistic conventions of +# modern Python. +# This does seem to work OK though. + +class maryclient: + """A basic handler for MARY-TTS HTTP clients + + At present, there is no checking for + allowed voices, locales, and so on. + Most of the useful parameters can be + accessed by get_ and set_ methods. + Relying on winsound, this is Windows + specific. + """ + def __init__(self): + """Set up useful defaults (for + people in England, anyway)""" + self.host = "127.0.0.1" + self.port = 59125 + self.input_type = "TEXT" + self.output_type = "AUDIO" + self.audio = "WAVE_FILE" + self.locale = "en_GB" + self.voice = "dfki-prudence-hsmm" + + def set_host(self, a_host): + """Set the host for the TTS server.""" + self.host = a_host + + def get_host(self): + """Get the host for the TTS server.""" + self.host + + def set_port(self, a_port): + """Set the port for the TTS server.""" + self.port = a_port + + def get_port(self): + """Get the port for the TTS server.""" + self.port + + def set_input_type(self, type): + """Set the type of input being + supplied to the TTS server + (such as 'TEXT').""" + self.input_type = type + + def get_input_type(self): + """Get the type of input being + supplied to the TTS server + (such as 'TEXT').""" + self.input_type + + def set_output_type(self, type): + """Set the type of input being + supplied to the TTS server + (such as 'AUDIO').""" + self.output_type = type + + def get_output_type(self): + """Get the type of input being + supplied to the TTS server + (such as "AUDIO").""" + self.output_type + + def set_locale(self, a_locale): + """Set the locale + (such as "en_GB").""" + self.locale = a_locale + + def get_locale(self): + """Get the locale + (such as "en_GB").""" + self.locale + + def set_audio(self, audio_type): + """Set the audio type for playback + (such as "WAVE_FILE").""" + self.audio = audio_type + + def get_audio(self): + """Get the audio type for playback + (such as "WAVE_FILE").""" + self.audio + + def set_voice(self, a_voice): + """Set the voice to speak with + (such as "dfki-prudence-hsmm").""" + self.voice = a_voice + + def get_voice(self): + """Get the voice to speak with + (such as "dfki-prudence-hsmm").""" + self.voice + + def generate(self, message): + """Given a message in message, + return a response in the appropriate + format.""" + raw_params = {"INPUT_TEXT": message, + "INPUT_TYPE": self.input_type, + "OUTPUT_TYPE": self.output_type, + "LOCALE": self.locale, + "AUDIO": self.audio, + "VOICE": self.voice, + } + params = urllib.urlencode(raw_params) + headers = {} + + # Open connection to self.host, self.port. + conn = httplib.HTTPConnection(self.host, self.port) + + # conn.set_debuglevel(5) + + conn.request("POST", "/process", params, headers) + response = conn.getresponse() + if response.status != 200: + print response.getheaders() + raise RuntimeError("{0}: {1}".format(response.status, + response.reason)) + return response.read() + +# If this is invoked as a program, just give +# a greeting to show it is working. +# The platform specific code is moved to this +# part so that this file may be imported without +# bringing platform specific code in. +if __name__ == "__main__": + + # For handling command line arguments: + import sys + import platform + + # check we are on Windows: + system = platform.system().lower() + if (system == "windows"): + + import winsound + + class Player: + def __init__(self): + pass + + def play(self, a_sound): + winsound.PlaySound(a_sound, winsound.SND_MEMORY) + + #if ("cygwin" in system): + else: + # Not sure how to do audio on cygwin, + # portably for python. So have a sound + # player class that doesn't play sounds. + # A null object, if you like. + class Player: + def __init__(self): + pass + + def play(self, a_sound): + print("Here I would play a sound if I knew how") + pass + + # Probably want to parse arguments to + # set the voice, etc., here + + client = maryclient() + client.set_audio("WAVE_FILE") # for example + + player = Player() + the_sound = client.generate("hello from Mary Text to Speech, with Python.") + if client.output_type == "AUDIO": + player.play(the_sound) + +# vi:set sw=4 et: diff --git a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.rb b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.rb index 6ab1c089..c4156cbb 100755 --- a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.rb +++ b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.rb @@ -1,261 +1,261 @@ -#!/usr/bin/env ruby -# -# A basic mary client in Ruby, -# kindly donated to the MARY TTS project -# by Hugh Sasse. Thanks Hugh! - - -# Ruby client for the MARY TTS HTTP server. -# This is for Windows only, and relies on -# the Win32-Sound gem to access the audio. -# -# - -require 'rubygems' -require 'net/http' -require 'uri' - -# A fairly minimal client class for the -# MARY TTS system. This uses the modern -# HTTP interface to access the server. -# At present, this doesn't wrap the methods -# which provide documentation or lists of -# voices or features. -class MaryClient - attr_accessor :host, :port - attr_accessor :input_type, :output_type - attr_accessor :locale, :audio, :voice - - # Set up the defaults for the MARY TTS - # server, which is assumed to be running - # on the local host, with British voices - # installed. These may be modified with - # the appropriate methods. - # host = 127.0.0.1) - # port = 59125 - # input_type = "TEXT" - # output_type = "AUDIO" - # audio = "WAVE_FILE" - # locale = "en_GB" - # voice = "dfki-prudence-hsmm" - def initialize - @host = "127.0.0.1" # The local machine - @port = 59125 - @input_type = "TEXT" - @output_type = "AUDIO" - @locale = "en_GB" - @audio = "WAVE_FILE" - @voice = "dfki-prudence-hsmm" - end - - # Process a text message, which with a - # new client, will return the audio. - # This is so that platform dependent parts - # are kept separate. - def generate(message) - raw_params = {"INPUT_TEXT" => message, - "INPUT_TYPE" => @input_type, - "OUTPUT_TYPE" => @output_type, - "LOCALE" => @locale, - "AUDIO" => @audio, - "VOICE" => @voice, - } - res = Net::HTTP.post_form(URI.parse("http://#{@host}:#{@port}/process"), raw_params) - res.value # Throw an exception on failure - #puts res.body - return res.body - end -end - - -# If this invoked as a program with no -# argumens, just give a greeting to show -# that it is working. If arguments are -# supplied, process options to work out -# what to do with the arguments. -if __FILE__ == $0 - - # These files are only loaded when this is - # invoked as a program. - require 'rbconfig' - require 'getoptlong' - - # PLATFORM SPECIFIC CODE. - # Needs more work [!] - case Config::CONFIG['host_os'] - when /darwin/i - raise NotImplementedError.new("Don't know how to play audio on a Mac") - when /linux/i - raise NotImplementedError.new("Far too many ways to play audio on Linux, you'll need to choose something") - when /sunos|solaris/i - raise NotImplementedError.new("Have not played audio on Suns for too long to implement this.") - when /java/i - raise NotImplementedError.new("Don't know how to play audio from Java ") - when /win32|cygwin|mingw32/i - # The various things that can use the Win32 - # sound gem - require 'win32/sound' - # Create a player class that will play the - # sound that the Mary TTS system returns - class Player - - # Play the audio passed in. - # Possibly this should receive the audio - # type so we can check that we can play it, - # but at the moment that is the - # responsibility of the user. - def self.play(sound) - Win32::Sound.play(sound, Win32::Sound::MEMORY) - end - end - else - raise NotImplementedError.new("Haven't thought how to support this OS yet") - end - - - client = nil - split = "" - - if ARGV.size.zero? - client = MaryClient.new() - sound = client.generate("Hello from Mary Text to Speech with Ruby.") - Player.play(sound) - else - args_mode = :words - stdout_mode = :absorb - opts = GetoptLong::new( - ["--audio", "-a", GetoptLong::REQUIRED_ARGUMENT], - ["--echo", "-e", GetoptLong::NO_ARGUMENT], - ["--help", "-h", GetoptLong::NO_ARGUMENT], - ["--host", "-H", GetoptLong::REQUIRED_ARGUMENT], - ["--input-type", "-i", GetoptLong::REQUIRED_ARGUMENT], - ["--locale", "-l", GetoptLong::REQUIRED_ARGUMENT], - ["--read", "-r", GetoptLong::NO_ARGUMENT], - - ["--split", "-s", GetoptLong::REQUIRED_ARGUMENT], - ["--output-type", "-o", GetoptLong::REQUIRED_ARGUMENT], - ["--port", "-P", GetoptLong::REQUIRED_ARGUMENT], - ["--tee", "-t", GetoptLong::NO_ARGUMENT], - ["--voice", "-v", GetoptLong::REQUIRED_ARGUMENT] - ) - - opts.each do |opt, arg| - unless ["--help", "-h"].include?(opt) - # skip if we are only getting help - client ||= MaryClient.new() - end - case opt - when "--help", "-h" - puts <<-EOHELP -Usage: #{$0} [options] [arguments] ---audio -a - Audio format. Defualt: WAVE_FILE ---echo -e - Act as an echo command and send output - arguments to the synthesizer only (not - to standard output. - Turns off --read|-r ---help -h - Print this help, then exit. ---host -H - The host which is the server. - Default: 127.0.0.1 ---input-type -i - The type of the input supplied to the - TTS system. Default: TEXT ---locale -l - The locale of the input. Default: en_GB ---output-type -o - The output type from the TTS system. - Default: AUDIO ---port -P - The port for the TTS server - Default: 59125 ---read -r - Read the files passed as arguments. - Turns off --echo|-e ---split -s (lines|paragraphs) - When reading files, split the input - into lines or paragraphs. Paragraphs - mean reading up to the next double - newline. Note, the argument is literally - "lines" or "paragraphs" (or some - abbreviation of those) without the - quotes. - Default is paragraphs. ---tee -t - Act as tee: send the output to the TTS - system, and to standard output. ---voice -v - The voice to use. - Default: dfki-prudence-hsmm - EOHELP - exit(0) - when "--audio", "-a" - client.audio = arg - when "--echo", "-e" - args_mode = :words - when "--host", "-H" - client.host = arg - when "--input-type", "-i" - client.input_type = arg - when "--locale", "-l" - client.locale = arg - when "--output-type", "-o" - client.output_type = arg - when "--port", "-P" - client.port = arg.to_i - when "--read", "-r" - args_mode = :files - when "--split", "-s" - case arg - when /^p/i - split = "" - when /^l/i - split = $/ - end - when "--tee", "-t" - stdout_mode = :emit - when "--voice", "-v" - client.voice = arg - end - end - - client ||= MaryClient.new() - case args_mode - when :words - input_text = ARGV.join(" ") - unless input_text =~ /\A\s*\Z/m - sound = client.generate(input_text) - if client.output_type == "AUDIO" - Player.play(sound) - end - end - if stdout_mode == :emit - puts input_text - end - when :files - # Slurp in paragraphs so sentences - # don't get broken in stupid places. - $/ = split # paragraph mode - ARGF.each do |paragraph| - begin - unless paragraph =~ /\A\s*\Z/m - sound = client.generate(paragraph) - if client.output_type == "AUDIO" - # and client.audio == "WAVE_FILE" - Player.play(sound) - end - end - rescue Exception => e - puts "got error #{e} while trying to say #{paragraph.inspect}" - raise - end - if stdout_mode == :emit - puts paragraph - end # end if - end # end ARGF.each - end # end case - end # if ARGV.size.zero? -end - +#!/usr/bin/env ruby +# +# A basic mary client in Ruby, +# kindly donated to the MARY TTS project +# by Hugh Sasse. Thanks Hugh! + + +# Ruby client for the MARY TTS HTTP server. +# This is for Windows only, and relies on +# the Win32-Sound gem to access the audio. +# +# + +require 'rubygems' +require 'net/http' +require 'uri' + +# A fairly minimal client class for the +# MARY TTS system. This uses the modern +# HTTP interface to access the server. +# At present, this doesn't wrap the methods +# which provide documentation or lists of +# voices or features. +class MaryClient + attr_accessor :host, :port + attr_accessor :input_type, :output_type + attr_accessor :locale, :audio, :voice + + # Set up the defaults for the MARY TTS + # server, which is assumed to be running + # on the local host, with British voices + # installed. These may be modified with + # the appropriate methods. + # host = 127.0.0.1) + # port = 59125 + # input_type = "TEXT" + # output_type = "AUDIO" + # audio = "WAVE_FILE" + # locale = "en_GB" + # voice = "dfki-prudence-hsmm" + def initialize + @host = "127.0.0.1" # The local machine + @port = 59125 + @input_type = "TEXT" + @output_type = "AUDIO" + @locale = "en_GB" + @audio = "WAVE_FILE" + @voice = "dfki-prudence-hsmm" + end + + # Process a text message, which with a + # new client, will return the audio. + # This is so that platform dependent parts + # are kept separate. + def generate(message) + raw_params = {"INPUT_TEXT" => message, + "INPUT_TYPE" => @input_type, + "OUTPUT_TYPE" => @output_type, + "LOCALE" => @locale, + "AUDIO" => @audio, + "VOICE" => @voice, + } + res = Net::HTTP.post_form(URI.parse("http://#{@host}:#{@port}/process"), raw_params) + res.value # Throw an exception on failure + #puts res.body + return res.body + end +end + + +# If this invoked as a program with no +# argumens, just give a greeting to show +# that it is working. If arguments are +# supplied, process options to work out +# what to do with the arguments. +if __FILE__ == $0 + + # These files are only loaded when this is + # invoked as a program. + require 'rbconfig' + require 'getoptlong' + + # PLATFORM SPECIFIC CODE. + # Needs more work [!] + case Config::CONFIG['host_os'] + when /darwin/i + raise NotImplementedError.new("Don't know how to play audio on a Mac") + when /linux/i + raise NotImplementedError.new("Far too many ways to play audio on Linux, you'll need to choose something") + when /sunos|solaris/i + raise NotImplementedError.new("Have not played audio on Suns for too long to implement this.") + when /java/i + raise NotImplementedError.new("Don't know how to play audio from Java ") + when /win32|cygwin|mingw32/i + # The various things that can use the Win32 + # sound gem + require 'win32/sound' + # Create a player class that will play the + # sound that the Mary TTS system returns + class Player + + # Play the audio passed in. + # Possibly this should receive the audio + # type so we can check that we can play it, + # but at the moment that is the + # responsibility of the user. + def self.play(sound) + Win32::Sound.play(sound, Win32::Sound::MEMORY) + end + end + else + raise NotImplementedError.new("Haven't thought how to support this OS yet") + end + + + client = nil + split = "" + + if ARGV.size.zero? + client = MaryClient.new() + sound = client.generate("Hello from Mary Text to Speech with Ruby.") + Player.play(sound) + else + args_mode = :words + stdout_mode = :absorb + opts = GetoptLong::new( + ["--audio", "-a", GetoptLong::REQUIRED_ARGUMENT], + ["--echo", "-e", GetoptLong::NO_ARGUMENT], + ["--help", "-h", GetoptLong::NO_ARGUMENT], + ["--host", "-H", GetoptLong::REQUIRED_ARGUMENT], + ["--input-type", "-i", GetoptLong::REQUIRED_ARGUMENT], + ["--locale", "-l", GetoptLong::REQUIRED_ARGUMENT], + ["--read", "-r", GetoptLong::NO_ARGUMENT], + + ["--split", "-s", GetoptLong::REQUIRED_ARGUMENT], + ["--output-type", "-o", GetoptLong::REQUIRED_ARGUMENT], + ["--port", "-P", GetoptLong::REQUIRED_ARGUMENT], + ["--tee", "-t", GetoptLong::NO_ARGUMENT], + ["--voice", "-v", GetoptLong::REQUIRED_ARGUMENT] + ) + + opts.each do |opt, arg| + unless ["--help", "-h"].include?(opt) + # skip if we are only getting help + client ||= MaryClient.new() + end + case opt + when "--help", "-h" + puts <<-EOHELP +Usage: #{$0} [options] [arguments] +--audio -a + Audio format. Defualt: WAVE_FILE +--echo -e + Act as an echo command and send output + arguments to the synthesizer only (not + to standard output. + Turns off --read|-r +--help -h + Print this help, then exit. +--host -H + The host which is the server. + Default: 127.0.0.1 +--input-type -i + The type of the input supplied to the + TTS system. Default: TEXT +--locale -l + The locale of the input. Default: en_GB +--output-type -o + The output type from the TTS system. + Default: AUDIO +--port -P + The port for the TTS server + Default: 59125 +--read -r + Read the files passed as arguments. + Turns off --echo|-e +--split -s (lines|paragraphs) + When reading files, split the input + into lines or paragraphs. Paragraphs + mean reading up to the next double + newline. Note, the argument is literally + "lines" or "paragraphs" (or some + abbreviation of those) without the + quotes. + Default is paragraphs. +--tee -t + Act as tee: send the output to the TTS + system, and to standard output. +--voice -v + The voice to use. + Default: dfki-prudence-hsmm + EOHELP + exit(0) + when "--audio", "-a" + client.audio = arg + when "--echo", "-e" + args_mode = :words + when "--host", "-H" + client.host = arg + when "--input-type", "-i" + client.input_type = arg + when "--locale", "-l" + client.locale = arg + when "--output-type", "-o" + client.output_type = arg + when "--port", "-P" + client.port = arg.to_i + when "--read", "-r" + args_mode = :files + when "--split", "-s" + case arg + when /^p/i + split = "" + when /^l/i + split = $/ + end + when "--tee", "-t" + stdout_mode = :emit + when "--voice", "-v" + client.voice = arg + end + end + + client ||= MaryClient.new() + case args_mode + when :words + input_text = ARGV.join(" ") + unless input_text =~ /\A\s*\Z/m + sound = client.generate(input_text) + if client.output_type == "AUDIO" + Player.play(sound) + end + end + if stdout_mode == :emit + puts input_text + end + when :files + # Slurp in paragraphs so sentences + # don't get broken in stupid places. + $/ = split # paragraph mode + ARGF.each do |paragraph| + begin + unless paragraph =~ /\A\s*\Z/m + sound = client.generate(paragraph) + if client.output_type == "AUDIO" + # and client.audio == "WAVE_FILE" + Player.play(sound) + end + end + rescue Exception => e + puts "got error #{e} while trying to say #{paragraph.inspect}" + raise + end + if stdout_mode == :emit + puts paragraph + end # end if + end # end ARGF.each + end # end case + end # if ARGV.size.zero? +end + diff --git a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.tcl b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.tcl index 155951a6..3a358235 100644 --- a/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.tcl +++ b/marytts-assembly/assembly-runtime/src/runtime/doc/examples/client/maryclient.tcl @@ -1,705 +1,705 @@ -# Tcl/Tk MARY TTS client. - -# This has been tested on Windows, and because -# of the use of sound there will be portability -# issues. However, there should be enough here -# for a reasonable start at a client, for any -# platform that supports Tcl/Tk. The platform -# specific code has, as far as possible, been -# isolated in the part of the code that detects -# whether this is being run as a program. - -# Notes: -# More work will need to be done with this, -# in order to make the code clean. It should -# probably be wrapped in a package, to solve -# any namespace issues. There are a lot of -# global variables. It seems that some of -# these are necessary for the menus to work. -# Handling of temporary files could be improved. - -# TODO: -# Create modifier sliders, for the effects. -# Extend the query proc to make use of them. -# Turn the Help menu into something more useful. -# Debug the actions for the Edit menu. -# Provide a means of getting example inputs -# from the server. -# Provide a means of re-loading all the -# dynamically collected information when the -# server is changed from the menu. This means -# that we need to delete the existing menu -# entries in order to add them correctly. -# How do we ensure temporary files are removed -# in the event of a problem? if {catch {}} ...? -# Maybe leaving them around is diagnostic info? -# Make that an option? -# Add error handling code for network and disk -# failures likely to beset such clients. -# Add sensible defaults for things the user must -# always set at startup, but these will be -# platform spacific. Always default to Audio -# output for example, or is it possible that -# people have no voices installed? - - -# This is a GUI, so: -package require Tk - -# We are communicating with the Mary server -# with HTTP. -package require http - -# Use the local machine in preference to the -# one in Germany. -set mary_tts_default_host "127.0.0.1" -set mary_tts_default_port 59125 - -# Actual host and port, and global old -# copies to allow revert on cancel in the -# dialogues. Apparently upvar #0 is the -# norm for that sort of thing [Tcl Wiki] -set mary_tts_host $mary_tts_default_host -set old_mary_tts_host $mary_tts_host -set mary_tts_port $mary_tts_default_port -set old_mary_tts_port $mary_tts_port - -# Informational URLs -set informational_urls [ list \ -version datatypes voices \ -audioformats audioeffects ] - -####### - -# Obtain a static page from the server, i.e. -# no parameters are needed to get it. -proc get_page { relative_url } { - global mary_tts_host mary_tts_port - set url http://$mary_tts_host:$mary_tts_port/$relative_url - set result [::http::geturl $url] - return [::http::data $result] -} - -proc list_of_lines {str} { - return [ split $str "\n" ] -} - - -# We will need to collect this information -# when we have the server and port chosen. -proc get_audioeffects {} { - return [list_of_lines [get_page audioeffects] ] -} - -proc get_audioformats {} { - return [list_of_lines [get_page audioformats] ] -} - -proc get_datatypes {} { - return [ list_of_lines [get_page datatypes] ] -} - - -proc get_voices {} { - return [list_of_lines [get_page voices] ] -} - -# Handling post queries. - -# Submit the query to the server, using the -# http POST method. -proc make_query {url encoded_params} { - set http [::http::geturl $url -query $encoded_params] - set result [::http::data $http] - return $result -} - -# Get the text from the input text area -proc get_input_text {} { - return [.io.inp.input_area get 1.0 end] -} - -# Get the text from the output text area -proc get_output_text {} { - return [.io.out.output_area get 1.0 end] -} - -# Collect the audio data from the server. -proc collect_audio_data {text_to_process} { - global mary_tts_host mary_tts_port - global inputtype outputtype locales - global audioformat voice - set url "http://$mary_tts_host:$mary_tts_port/process" - # ::http::formatQuery converts a list of - # key value pairs into the correct format - # for http POST. - set params [::http::formatQuery INPUT_TEXT $text_to_process INPUT_TYPE $inputtype OUTPUT_TYPE $outputtype LOCALE $locales($voice) AUDIO $audioformat VOICE $voice ] - set result [make_query $url $params] - return $result -} - -# Pushes the query to the server and gets -# the results back, displaying or playing -# them. -proc generate_output {text_to_process} { - global outputtype - set result [collect_audio_data $text_to_process] - if {$outputtype eq "AUDIO"} { - # call the platform dependent implementation. - play $result - } else { - clear_output - add_message $result - } - # Return the result so we can save it if - # the user requires it. - return $result -} - - -# These next procs are for handling the -# lists of data one gets back from the server -# which possibly have several words per line, -# separated by spaces. - -# If the first word of each listed line is -# significant, extract the list of first words. -proc collect_first_words_of_phrase_list {a_list} { - for {set i 0} {$i < [llength $a_list]} {incr i} { - set data [lindex $a_list $i ] - set word [ lindex [split $data " "] 0 ] - lappend words $word - } - return $words -} - - -# If the second word of each listed line is -# significant, extract the list of second words. -proc collect_second_words_of_phrase_list {a_list} { - for {set i 0} {$i < [llength $a_list]} {incr i} { - set data [lindex $a_list $i ] - set word [ lindex [split $data " "] 1 ] - lappend words $word - } - return $words -} - - -# The list of datatypes must be separated into -# input data types and output data types so that -# interactions with the server make sense. -# This handles the inputs. -proc collect_first_words_of_input_types {a_list} { - for {set i 0} {$i < [llength $a_list]} {incr i} { - set data [lindex $a_list $i ] - if {[ string match -nocase "*input*" $data ]} { - set word [ lindex [split $data " "] 0 ] - lappend words $word - } - } - return $words -} - - -# The list of datatypes must be separated into -# input data types and output data types so that -# interactions with the server make sense. -# This handles the outputs. -proc collect_first_words_of_output_types {a_list} { - for {set i 0} {$i < [llength $a_list]} {incr i} { - set data [lindex $a_list $i ] - if {[string match -nocase "*output*" $data]} { - set word [ lindex [split $data " "] 0 ] - lappend words $word - } - } - return $words -} - -# setup all the variables to hold voices, -# audio options, etc., based on what the -# server can do. -proc setup_globals {} { - global audioeffects audioformats voices - global inputtypes outputtypes audioformat voice - global inputtype outputtype locales - - set audioeffects [get_audioeffects] - set audioformats [get_audioformats] - set audioformat [lindex $audioformats 0 ] - set datatypes_data [get_datatypes] - set inputtypes [collect_first_words_of_input_types $datatypes_data] - set inputtype [lindex $inputtypes 0] - set outputtypes [collect_first_words_of_output_types $datatypes_data] - set outputtype [lindex $outputtypes 0] - set voices_data [get_voices] - set voices [collect_first_words_of_phrase_list $voices_data] - set locales_list [collect_second_words_of_phrase_list $voices_data ] - for {set i 0} {$i < [llength $voices]} {incr i} { - set locales([lindex $voices $i]) [lindex $locales_list $i] - } - set voice [lindex $voices 0] -} - -# A general procedure for filling in the -# elements of a listbox from a list. -# At present this is unused, but it could -# be useful later. [It took a while to -# figure out so I'm not ready to kill it -# with YAGNI.] -proc add_listbox_items {a_var a_widget} { - upvar $a_var var - foreach item $var { - $a_widget insert end $item - } -} - -# Create the menubuttons along the top. -# Usual File, Edit and Help menus plus -# those to set attributes. -proc create_menubuttons {} { - set buttons [ list file File edit Edit \ - server "Server" \ - inputtype "Input type" outputtype "Output type" \ - voice Voice \ - audioformat "Audio format" \ - textstyle "Text style" help Help ] - - set count 1 - foreach { menu_tag string_tag} $buttons { - menubutton .menus.$menu_tag -text $string_tag \ - -menu .menus.${menu_tag}.menu -underline 0 -font ClientFont - menu .menus.${menu_tag}.menu -tearoff true - grid .menus.$menu_tag -in .menus -row 1 -column $count -sticky w - incr count - } -} - -# Get the contents of a text file for reading -# or loading into a text widget, etc. -proc text_file_contents {what_for} { - set a_file [tk_getOpenFile -title $what_for ] - set the_text "" - - if {$a_file != ""} { - set a_stream [open $a_file r ] - set the_text [read $a_stream] - close $a_stream - } - - return $the_text -} - - -# Save the_text to a text file specified -# by the user, for the given reason (what_for). -# At the moment there is no error handling -# for this (disk full, write protected, etc). -proc save_text_file {the_text what_for} { - set a_file [tk_getSaveFile -title $what_for -parent .] - if {$a_file != ""} { - set a_stream [open $a_file w ] - puts $a_stream $the_text - close $a_stream - } -} - -# Save the_data to a binary file specified -# by the user, for the given reason (what_for), -# a text string. -# At the moment there is no error handling -# for this (disk full, write protected, etc). -proc save_binary_file {the_data what_for} { - set a_file [tk_getSaveFile -title $what_for -parent .] - if {$a_file != ""} { - set a_stream [open $a_file w ] - fconfigure $a_stream -translation binary - puts -nonewline $a_stream $the_data - close $a_stream - } -} - -# Create the menu for File operations -proc create_menu_file {} { - set fmenu .menus.file.menu - $fmenu add command -label "New" \ - -font ClientFont -command { - .io.inp.input_area delete 1.0 end - } - # Replace the contents of the input text - # widget by the data from the open file. - # YAGNI, but is there any reason - # to allow inserting a file, rather than - # replacing the text with file contents? - # - $fmenu add command -label "Open" \ - -font ClientFont -command { - set the_text [text_file_contents "File to load"] - if {$the_text != ""} { - .io.inp.input_area delete 1.0 end - .io.inp.input_area insert end $the_text - } - } - - $fmenu add command -label "Read" \ - -font ClientFont -command { - generate_output [text_file_contents "File to read"] - } - # How to make these disabled for now? - $fmenu add command -label "Save Input" \ - -font ClientFont -command { - set the_text [get_input_text] - save_text_file $the_text "Save Input" - } - $fmenu add command -label "Save Output" \ - -font ClientFont -command { - set the_text [get_output_text] - save_text_file $the_text "Save Output" - } -} - -# Create the menu for edit operations -proc create_menu_edit {} { - set emenu .menus.edit.menu - $emenu add command -label "Select All from Input Area" \ - -font ClientFont -command { - # This code says copy the selection as well. - # May be wrong for some platforms, but is - # it more useful? - .io.inp.input_area tag add sel 1.0 end - event generate .io.inp.input_area <> -} - $emenu add command -label "Select All from Output Area" \ - -font ClientFont -command { - # This code says copy the selection as well. - # May be wrong for some platforms, but is - # it more useful? - .io.out.output_area tag add sel 1.0 end - event generate .io.out.output_area <> -} - $emenu add command -label "Copy from Input Area" \ - -font ClientFont -command { - # this appears not to work. FIXME - event generate .io.inp.input_area <> - } - $emenu add command -label "Copy from Output Area" \ - -font ClientFont -command { - # this appears not to work. FIXME - event generate .io.out.output_area <> - } - $emenu add command -label "Paste into Input Area" \ - -font ClientFont -command { - # this appears not to work. FIXME - event generate .io.inp.input_area <> - } - $emenu add command \ - -font ClientFont -label "Insert example text into Input Area"\ - -command { - } - # Add specific editing commands here later. - # For example, we would like to be able to - # add whole tags to the XML based formats, - # wrap matching tags around selected text. - # Also we need to find out what happens with - # copy cut and paste, given that X Windows - # is different from MS Windows. - # Allow example text to be inserted. - # However, my thinking is that this should not - # overwrite as it is in the Java application, - # because this rubs out edits when switching - # voices, and this can be annoying when - # exploring the system. -} - -# Set the server properties, mostly just -# host and port. Maybe later protocol will -# be possible for https connections? -proc create_menu_server {} { - set smenu .menus.server.menu - $smenu add command -label "host" -font ClientFont -command { - create_entry_dialog "MARY TTS server name" "hostname/IP Address" mary_tts_host - } - $smenu add command -label "port" -font ClientFont -command { - create_entry_dialog "MARY TTS server port" "pott number" mary_tts_port - } -} - -# setup the fonts for the various areas on the dipslay. -proc setup_font {family size} { - foreach win {.io .controls .entry.dialogue } { - font configure ClientFont -family $family -size $size - } -} - -# Create the menu for changing the text size. -proc create_menu_textstyle {} { - set tmenu .menus.textstyle.menu - - $tmenu add cascade -label "Courier" -underline 0 -menu \ - $tmenu.courier -font ClientFont - $tmenu add cascade -label "Times" -underline 0 -menu \ - $tmenu.times -font ClientFont - $tmenu add cascade -label "Helvetica" -underline 0 -menu \ - $tmenu.helvetica -font ClientFont - foreach {name family} [list $tmenu.courier Courier \ - $tmenu.times Times $tmenu.helvetica Helvetica ] { - set m1 [menu $name] - foreach pts {6 7 8 9 10 12 14 16 18 20 24 28 32 36} { - $m1 add command -label "$pts" -font ClientFont\ - -command [list setup_font $family $pts ] - } - } -} - - - -# Create the menu for Help -proc create_menu_help {} { - # This is all pretty much "wet paint" - # Is there enough to merit separate menus? - set hmenu .menus.help.menu - $hmenu add command -label "Introduction" -font ClientFont\ - -command { - tk_messageBox -message "This is a basic Tcl/Tk -client for the MARY TTS system. Most of the options -are reached through the menus on the top. Some -facilities are presently lacking. - -Most of the interface should be self-explanatory. -In the File menu, Read will read a given file aloud -(or at least take it as input for the present -form of processing), whereas Open will load it -into the input area. Save input and Save output -refer to the contents of the text windows. The -save button next to the play button will save -the output to a file; this is assumed to be a -text file, unless the output is audio, in which -case it is a binary file. - -The Edit menu has cut and paste facilities, -but these don't seem to work reliably. The -default key bindings for text areas should -be useable. - -You will need to set the input and output types -and the audio format before pressing play. -Code does not yet exist to figure out sensible -defaults for your platform. - -This does not have support for the effects, yet. - -Contributions from developers welcome." -type ok - } - $hmenu add command -label "About" -command {} -font ClientFont -} - -# We need to create menus for the available -# voices and audio formats, etc. -# When we have the data for these menus from -# the server, create them by using the global -# lists of information. -proc create_radio_menu_from_list {what} { - global $what - set plural "${what}s" - upvar 1 $plural var - foreach item $var { - .menus.${what}.menu add radiobutton -label $item -variable $what \ - -value $item -font ClientFont - } -} - -proc reset_entry_and_var {a_variable} { - upvar #0 $a_variable var - upvar #0 old_$a_variable old_var - set var $old_var - destroy .entry_dialogue -} -# Create the toplevel for choosing a host -# or port, something taken from an entry. -proc create_entry_dialog {a_message a_label a_variable} { - upvar #0 $a_variable var - upvar #0 old_$a_variable old_var - toplevel .entry_dialogue - label .entry_dialogue.the_message -text $a_message \ - -font ClientFont - label .entry_dialogue.the_label -text $a_label -font ClientFont - entry .entry_dialogue.the_entry -textvariable $a_variable \ - -font ClientFont - button .entry_dialogue.ok -text "OK" -font ClientFont -command { - destroy .entry_dialogue - } - button .entry_dialogue.cancel -text "Cancel" -font ClientFont \ - -command "reset_entry_and_var $a_variable" - - grid .entry_dialogue.the_message -row 1 -column 1 - grid .entry_dialogue.the_label -row 2 -column 1 - grid .entry_dialogue.the_entry -row 2 -column 2 - grid .entry_dialogue.ok -row 3 -column 1 - grid .entry_dialogue.cancel -row 3 -column 2 -} - -# Add a message to the end of the output -# text widget. -proc add_message {a_message} { - .io.out.output_area configure -state normal - .io.out.output_area insert end $a_message - .io.out.output_area configure -state disabled -} - - -# Clear the text in the output text widget. -proc clear_output {} { - .io.out.output_area configure -state normal - .io.out.output_area delete 1.0 end - .io.out.output_area configure -state disabled -} - -# Sound generation is platform dependent. -# This provides an "abstract" function to -# be overridden by the platform dependent -# code. In this case it alerts the user -# in the output window that nothing is going -# to happen. -proc play {sound} { - add_message \ - "play sound not implemented on this platform apparently" -} - -# Graphical stuff. - -# In order to be able to scale the font, define a font. -font create ClientFont -family [font actual TkDefaultFont -family] \ - -size [font actual TkDefaultFont -size] - -frame .menus -create_menubuttons -create_menu_file -create_menu_edit -create_menu_server -create_menu_textstyle -create_menu_help -# Fill in the other menus at runtime. - -# .io communicates text with the user, -# through an input and output window. -frame .io -frame .io.inp -frame .io.out -# .controls will hold the play button and -# the effects controls. -frame .controls - -# Draw the controls in .io -label .io.inp.input_label -text "Input Area" -font ClientFont -text .io.inp.input_area -height 10 -width 40 \ --xscrollcommand ".io.inp.input_x set" \ --yscrollcommand ".io.inp.input_y set" -font ClientFont -scrollbar .io.inp.input_x -orient horizontal \ --command ".io.inp.input_area xview" -scrollbar .io.inp.input_y -orient vertical \ --command ".io.inp.input_area yview" - -label .io.out.output_label -text "Output Area" -font ClientFont -text .io.out.output_area -height 10 -width 40 -state disabled \ --xscrollcommand ".io.out.output_x set" \ --yscrollcommand ".io.out.output_y set" -font ClientFont -scrollbar .io.out.output_x -orient horizontal \ --command ".io.out.output_area xview" -scrollbar .io.out.output_y -orient vertical \ --command ".io.out.output_area yview" - -grid .io.inp -in .io -row 1 -column 1 -grid .io.out -in .io -row 1 -column 2 -grid .io.inp.input_label -in .io.inp -row 1 -column 1 -grid .io.inp.input_area -in .io.inp -row 2 -column 1 -grid .io.inp.input_y -in .io.inp -row 2 -column 2 -sticky ns -grid .io.inp.input_x -in .io.inp -row 3 -column 1 -sticky ew - -grid .io.out.output_label -in .io.out -row 1 -column 1 -grid .io.out.output_area -in .io.out -row 2 -column 1 -grid .io.out.output_y -in .io.out -row 2 -column 2 -sticky ns -grid .io.out.output_x -in .io.out -row 3 -column 1 -sticky ew - -button .controls.play -text "play" -font ClientFont -command { - generate_output [get_input_text] -} -grid .controls.play -in .controls -row 1 -column 1 - -button .controls.save -text "save" -font ClientFont -command { - global outputtype - set input_text [get_input_text] - if { $outputtype eq "AUDIO" } { - save_binary_file [collect_audio_data $input_text ] "Save audio file" - } else { - save_text_file [collect_audio_data $input_text ] "Save output to file" - } -} - -grid .controls.save -in .controls -row 1 -column 2 - -pack .menus .io .controls -in . -side top - - - -# Detect whether this is the main program -# This test was taken from the Tcl Wiki, and -# seems to work OK. - -if {[info exists argv0] && [file tail [info script]] eq [file tail $argv0]} { - - # Try to find the temporary files directory. - catch { set tmpdir "/tmp" } - catch { set tmpdir $::env(TRASH_FOLDER) } - catch { set tmpdir $::env(TMP) } - catch { set tmpdir $::env(TEMP) } - # This needs better handling of - # possible alternatives - # This is needed for Windows sound only. - - # Do the platform dependent things. - if {$tcl_platform(platform) eq "windows"} { - package require twapi - - proc play {sound} { - global tmpdir - # Write sound to a temporary file - set sndfile [file join $tmpdir "MARYTTS_sound.[pid].wav" ] - set stream [open $sndfile w] - # Make sure the file is binary: - fconfigure $stream -translation binary - puts -nonewline $stream $sound - close $stream - # Play the file. - ::twapi::play_sound $sndfile - # Remove the file. - file delete $sndfile - } - } - # Put other platforms here. - - # Setup the globals with reference to the - # server, which is assumed to be working. - # Since we have options to alter this with - # menu items, there probably needs to be - # some way to reload all this. But we need - # to know how to delete the existing menu - # entries to do that. - setup_globals - create_radio_menu_from_list inputtype - create_radio_menu_from_list outputtype - create_radio_menu_from_list voice - create_radio_menu_from_list audioformat - - # Note, at the moment voices holds locales, - # gender, and voice type - - # At the moment this is just diagnostic: - ## add_message [ join $voices "\n" ] - # it tells us we have a basically working - # system and the list of voices has been - # picked up and manipulated correctly. - # So it is commented out now. -} - - +# Tcl/Tk MARY TTS client. + +# This has been tested on Windows, and because +# of the use of sound there will be portability +# issues. However, there should be enough here +# for a reasonable start at a client, for any +# platform that supports Tcl/Tk. The platform +# specific code has, as far as possible, been +# isolated in the part of the code that detects +# whether this is being run as a program. + +# Notes: +# More work will need to be done with this, +# in order to make the code clean. It should +# probably be wrapped in a package, to solve +# any namespace issues. There are a lot of +# global variables. It seems that some of +# these are necessary for the menus to work. +# Handling of temporary files could be improved. + +# TODO: +# Create modifier sliders, for the effects. +# Extend the query proc to make use of them. +# Turn the Help menu into something more useful. +# Debug the actions for the Edit menu. +# Provide a means of getting example inputs +# from the server. +# Provide a means of re-loading all the +# dynamically collected information when the +# server is changed from the menu. This means +# that we need to delete the existing menu +# entries in order to add them correctly. +# How do we ensure temporary files are removed +# in the event of a problem? if {catch {}} ...? +# Maybe leaving them around is diagnostic info? +# Make that an option? +# Add error handling code for network and disk +# failures likely to beset such clients. +# Add sensible defaults for things the user must +# always set at startup, but these will be +# platform spacific. Always default to Audio +# output for example, or is it possible that +# people have no voices installed? + + +# This is a GUI, so: +package require Tk + +# We are communicating with the Mary server +# with HTTP. +package require http + +# Use the local machine in preference to the +# one in Germany. +set mary_tts_default_host "127.0.0.1" +set mary_tts_default_port 59125 + +# Actual host and port, and global old +# copies to allow revert on cancel in the +# dialogues. Apparently upvar #0 is the +# norm for that sort of thing [Tcl Wiki] +set mary_tts_host $mary_tts_default_host +set old_mary_tts_host $mary_tts_host +set mary_tts_port $mary_tts_default_port +set old_mary_tts_port $mary_tts_port + +# Informational URLs +set informational_urls [ list \ +version datatypes voices \ +audioformats audioeffects ] + +####### + +# Obtain a static page from the server, i.e. +# no parameters are needed to get it. +proc get_page { relative_url } { + global mary_tts_host mary_tts_port + set url http://$mary_tts_host:$mary_tts_port/$relative_url + set result [::http::geturl $url] + return [::http::data $result] +} + +proc list_of_lines {str} { + return [ split $str "\n" ] +} + + +# We will need to collect this information +# when we have the server and port chosen. +proc get_audioeffects {} { + return [list_of_lines [get_page audioeffects] ] +} + +proc get_audioformats {} { + return [list_of_lines [get_page audioformats] ] +} + +proc get_datatypes {} { + return [ list_of_lines [get_page datatypes] ] +} + + +proc get_voices {} { + return [list_of_lines [get_page voices] ] +} + +# Handling post queries. + +# Submit the query to the server, using the +# http POST method. +proc make_query {url encoded_params} { + set http [::http::geturl $url -query $encoded_params] + set result [::http::data $http] + return $result +} + +# Get the text from the input text area +proc get_input_text {} { + return [.io.inp.input_area get 1.0 end] +} + +# Get the text from the output text area +proc get_output_text {} { + return [.io.out.output_area get 1.0 end] +} + +# Collect the audio data from the server. +proc collect_audio_data {text_to_process} { + global mary_tts_host mary_tts_port + global inputtype outputtype locales + global audioformat voice + set url "http://$mary_tts_host:$mary_tts_port/process" + # ::http::formatQuery converts a list of + # key value pairs into the correct format + # for http POST. + set params [::http::formatQuery INPUT_TEXT $text_to_process INPUT_TYPE $inputtype OUTPUT_TYPE $outputtype LOCALE $locales($voice) AUDIO $audioformat VOICE $voice ] + set result [make_query $url $params] + return $result +} + +# Pushes the query to the server and gets +# the results back, displaying or playing +# them. +proc generate_output {text_to_process} { + global outputtype + set result [collect_audio_data $text_to_process] + if {$outputtype eq "AUDIO"} { + # call the platform dependent implementation. + play $result + } else { + clear_output + add_message $result + } + # Return the result so we can save it if + # the user requires it. + return $result +} + + +# These next procs are for handling the +# lists of data one gets back from the server +# which possibly have several words per line, +# separated by spaces. + +# If the first word of each listed line is +# significant, extract the list of first words. +proc collect_first_words_of_phrase_list {a_list} { + for {set i 0} {$i < [llength $a_list]} {incr i} { + set data [lindex $a_list $i ] + set word [ lindex [split $data " "] 0 ] + lappend words $word + } + return $words +} + + +# If the second word of each listed line is +# significant, extract the list of second words. +proc collect_second_words_of_phrase_list {a_list} { + for {set i 0} {$i < [llength $a_list]} {incr i} { + set data [lindex $a_list $i ] + set word [ lindex [split $data " "] 1 ] + lappend words $word + } + return $words +} + + +# The list of datatypes must be separated into +# input data types and output data types so that +# interactions with the server make sense. +# This handles the inputs. +proc collect_first_words_of_input_types {a_list} { + for {set i 0} {$i < [llength $a_list]} {incr i} { + set data [lindex $a_list $i ] + if {[ string match -nocase "*input*" $data ]} { + set word [ lindex [split $data " "] 0 ] + lappend words $word + } + } + return $words +} + + +# The list of datatypes must be separated into +# input data types and output data types so that +# interactions with the server make sense. +# This handles the outputs. +proc collect_first_words_of_output_types {a_list} { + for {set i 0} {$i < [llength $a_list]} {incr i} { + set data [lindex $a_list $i ] + if {[string match -nocase "*output*" $data]} { + set word [ lindex [split $data " "] 0 ] + lappend words $word + } + } + return $words +} + +# setup all the variables to hold voices, +# audio options, etc., based on what the +# server can do. +proc setup_globals {} { + global audioeffects audioformats voices + global inputtypes outputtypes audioformat voice + global inputtype outputtype locales + + set audioeffects [get_audioeffects] + set audioformats [get_audioformats] + set audioformat [lindex $audioformats 0 ] + set datatypes_data [get_datatypes] + set inputtypes [collect_first_words_of_input_types $datatypes_data] + set inputtype [lindex $inputtypes 0] + set outputtypes [collect_first_words_of_output_types $datatypes_data] + set outputtype [lindex $outputtypes 0] + set voices_data [get_voices] + set voices [collect_first_words_of_phrase_list $voices_data] + set locales_list [collect_second_words_of_phrase_list $voices_data ] + for {set i 0} {$i < [llength $voices]} {incr i} { + set locales([lindex $voices $i]) [lindex $locales_list $i] + } + set voice [lindex $voices 0] +} + +# A general procedure for filling in the +# elements of a listbox from a list. +# At present this is unused, but it could +# be useful later. [It took a while to +# figure out so I'm not ready to kill it +# with YAGNI.] +proc add_listbox_items {a_var a_widget} { + upvar $a_var var + foreach item $var { + $a_widget insert end $item + } +} + +# Create the menubuttons along the top. +# Usual File, Edit and Help menus plus +# those to set attributes. +proc create_menubuttons {} { + set buttons [ list file File edit Edit \ + server "Server" \ + inputtype "Input type" outputtype "Output type" \ + voice Voice \ + audioformat "Audio format" \ + textstyle "Text style" help Help ] + + set count 1 + foreach { menu_tag string_tag} $buttons { + menubutton .menus.$menu_tag -text $string_tag \ + -menu .menus.${menu_tag}.menu -underline 0 -font ClientFont + menu .menus.${menu_tag}.menu -tearoff true + grid .menus.$menu_tag -in .menus -row 1 -column $count -sticky w + incr count + } +} + +# Get the contents of a text file for reading +# or loading into a text widget, etc. +proc text_file_contents {what_for} { + set a_file [tk_getOpenFile -title $what_for ] + set the_text "" + + if {$a_file != ""} { + set a_stream [open $a_file r ] + set the_text [read $a_stream] + close $a_stream + } + + return $the_text +} + + +# Save the_text to a text file specified +# by the user, for the given reason (what_for). +# At the moment there is no error handling +# for this (disk full, write protected, etc). +proc save_text_file {the_text what_for} { + set a_file [tk_getSaveFile -title $what_for -parent .] + if {$a_file != ""} { + set a_stream [open $a_file w ] + puts $a_stream $the_text + close $a_stream + } +} + +# Save the_data to a binary file specified +# by the user, for the given reason (what_for), +# a text string. +# At the moment there is no error handling +# for this (disk full, write protected, etc). +proc save_binary_file {the_data what_for} { + set a_file [tk_getSaveFile -title $what_for -parent .] + if {$a_file != ""} { + set a_stream [open $a_file w ] + fconfigure $a_stream -translation binary + puts -nonewline $a_stream $the_data + close $a_stream + } +} + +# Create the menu for File operations +proc create_menu_file {} { + set fmenu .menus.file.menu + $fmenu add command -label "New" \ + -font ClientFont -command { + .io.inp.input_area delete 1.0 end + } + # Replace the contents of the input text + # widget by the data from the open file. + # YAGNI, but is there any reason + # to allow inserting a file, rather than + # replacing the text with file contents? + # + $fmenu add command -label "Open" \ + -font ClientFont -command { + set the_text [text_file_contents "File to load"] + if {$the_text != ""} { + .io.inp.input_area delete 1.0 end + .io.inp.input_area insert end $the_text + } + } + + $fmenu add command -label "Read" \ + -font ClientFont -command { + generate_output [text_file_contents "File to read"] + } + # How to make these disabled for now? + $fmenu add command -label "Save Input" \ + -font ClientFont -command { + set the_text [get_input_text] + save_text_file $the_text "Save Input" + } + $fmenu add command -label "Save Output" \ + -font ClientFont -command { + set the_text [get_output_text] + save_text_file $the_text "Save Output" + } +} + +# Create the menu for edit operations +proc create_menu_edit {} { + set emenu .menus.edit.menu + $emenu add command -label "Select All from Input Area" \ + -font ClientFont -command { + # This code says copy the selection as well. + # May be wrong for some platforms, but is + # it more useful? + .io.inp.input_area tag add sel 1.0 end + event generate .io.inp.input_area <> +} + $emenu add command -label "Select All from Output Area" \ + -font ClientFont -command { + # This code says copy the selection as well. + # May be wrong for some platforms, but is + # it more useful? + .io.out.output_area tag add sel 1.0 end + event generate .io.out.output_area <> +} + $emenu add command -label "Copy from Input Area" \ + -font ClientFont -command { + # this appears not to work. FIXME + event generate .io.inp.input_area <> + } + $emenu add command -label "Copy from Output Area" \ + -font ClientFont -command { + # this appears not to work. FIXME + event generate .io.out.output_area <> + } + $emenu add command -label "Paste into Input Area" \ + -font ClientFont -command { + # this appears not to work. FIXME + event generate .io.inp.input_area <> + } + $emenu add command \ + -font ClientFont -label "Insert example text into Input Area"\ + -command { + } + # Add specific editing commands here later. + # For example, we would like to be able to + # add whole tags to the XML based formats, + # wrap matching tags around selected text. + # Also we need to find out what happens with + # copy cut and paste, given that X Windows + # is different from MS Windows. + # Allow example text to be inserted. + # However, my thinking is that this should not + # overwrite as it is in the Java application, + # because this rubs out edits when switching + # voices, and this can be annoying when + # exploring the system. +} + +# Set the server properties, mostly just +# host and port. Maybe later protocol will +# be possible for https connections? +proc create_menu_server {} { + set smenu .menus.server.menu + $smenu add command -label "host" -font ClientFont -command { + create_entry_dialog "MARY TTS server name" "hostname/IP Address" mary_tts_host + } + $smenu add command -label "port" -font ClientFont -command { + create_entry_dialog "MARY TTS server port" "pott number" mary_tts_port + } +} + +# setup the fonts for the various areas on the dipslay. +proc setup_font {family size} { + foreach win {.io .controls .entry.dialogue } { + font configure ClientFont -family $family -size $size + } +} + +# Create the menu for changing the text size. +proc create_menu_textstyle {} { + set tmenu .menus.textstyle.menu + + $tmenu add cascade -label "Courier" -underline 0 -menu \ + $tmenu.courier -font ClientFont + $tmenu add cascade -label "Times" -underline 0 -menu \ + $tmenu.times -font ClientFont + $tmenu add cascade -label "Helvetica" -underline 0 -menu \ + $tmenu.helvetica -font ClientFont + foreach {name family} [list $tmenu.courier Courier \ + $tmenu.times Times $tmenu.helvetica Helvetica ] { + set m1 [menu $name] + foreach pts {6 7 8 9 10 12 14 16 18 20 24 28 32 36} { + $m1 add command -label "$pts" -font ClientFont\ + -command [list setup_font $family $pts ] + } + } +} + + + +# Create the menu for Help +proc create_menu_help {} { + # This is all pretty much "wet paint" + # Is there enough to merit separate menus? + set hmenu .menus.help.menu + $hmenu add command -label "Introduction" -font ClientFont\ + -command { + tk_messageBox -message "This is a basic Tcl/Tk +client for the MARY TTS system. Most of the options +are reached through the menus on the top. Some +facilities are presently lacking. + +Most of the interface should be self-explanatory. +In the File menu, Read will read a given file aloud +(or at least take it as input for the present +form of processing), whereas Open will load it +into the input area. Save input and Save output +refer to the contents of the text windows. The +save button next to the play button will save +the output to a file; this is assumed to be a +text file, unless the output is audio, in which +case it is a binary file. + +The Edit menu has cut and paste facilities, +but these don't seem to work reliably. The +default key bindings for text areas should +be useable. + +You will need to set the input and output types +and the audio format before pressing play. +Code does not yet exist to figure out sensible +defaults for your platform. + +This does not have support for the effects, yet. + +Contributions from developers welcome." -type ok + } + $hmenu add command -label "About" -command {} -font ClientFont +} + +# We need to create menus for the available +# voices and audio formats, etc. +# When we have the data for these menus from +# the server, create them by using the global +# lists of information. +proc create_radio_menu_from_list {what} { + global $what + set plural "${what}s" + upvar 1 $plural var + foreach item $var { + .menus.${what}.menu add radiobutton -label $item -variable $what \ + -value $item -font ClientFont + } +} + +proc reset_entry_and_var {a_variable} { + upvar #0 $a_variable var + upvar #0 old_$a_variable old_var + set var $old_var + destroy .entry_dialogue +} +# Create the toplevel for choosing a host +# or port, something taken from an entry. +proc create_entry_dialog {a_message a_label a_variable} { + upvar #0 $a_variable var + upvar #0 old_$a_variable old_var + toplevel .entry_dialogue + label .entry_dialogue.the_message -text $a_message \ + -font ClientFont + label .entry_dialogue.the_label -text $a_label -font ClientFont + entry .entry_dialogue.the_entry -textvariable $a_variable \ + -font ClientFont + button .entry_dialogue.ok -text "OK" -font ClientFont -command { + destroy .entry_dialogue + } + button .entry_dialogue.cancel -text "Cancel" -font ClientFont \ + -command "reset_entry_and_var $a_variable" + + grid .entry_dialogue.the_message -row 1 -column 1 + grid .entry_dialogue.the_label -row 2 -column 1 + grid .entry_dialogue.the_entry -row 2 -column 2 + grid .entry_dialogue.ok -row 3 -column 1 + grid .entry_dialogue.cancel -row 3 -column 2 +} + +# Add a message to the end of the output +# text widget. +proc add_message {a_message} { + .io.out.output_area configure -state normal + .io.out.output_area insert end $a_message + .io.out.output_area configure -state disabled +} + + +# Clear the text in the output text widget. +proc clear_output {} { + .io.out.output_area configure -state normal + .io.out.output_area delete 1.0 end + .io.out.output_area configure -state disabled +} + +# Sound generation is platform dependent. +# This provides an "abstract" function to +# be overridden by the platform dependent +# code. In this case it alerts the user +# in the output window that nothing is going +# to happen. +proc play {sound} { + add_message \ + "play sound not implemented on this platform apparently" +} + +# Graphical stuff. + +# In order to be able to scale the font, define a font. +font create ClientFont -family [font actual TkDefaultFont -family] \ + -size [font actual TkDefaultFont -size] + +frame .menus +create_menubuttons +create_menu_file +create_menu_edit +create_menu_server +create_menu_textstyle +create_menu_help +# Fill in the other menus at runtime. + +# .io communicates text with the user, +# through an input and output window. +frame .io +frame .io.inp +frame .io.out +# .controls will hold the play button and +# the effects controls. +frame .controls + +# Draw the controls in .io +label .io.inp.input_label -text "Input Area" -font ClientFont +text .io.inp.input_area -height 10 -width 40 \ +-xscrollcommand ".io.inp.input_x set" \ +-yscrollcommand ".io.inp.input_y set" -font ClientFont +scrollbar .io.inp.input_x -orient horizontal \ +-command ".io.inp.input_area xview" +scrollbar .io.inp.input_y -orient vertical \ +-command ".io.inp.input_area yview" + +label .io.out.output_label -text "Output Area" -font ClientFont +text .io.out.output_area -height 10 -width 40 -state disabled \ +-xscrollcommand ".io.out.output_x set" \ +-yscrollcommand ".io.out.output_y set" -font ClientFont +scrollbar .io.out.output_x -orient horizontal \ +-command ".io.out.output_area xview" +scrollbar .io.out.output_y -orient vertical \ +-command ".io.out.output_area yview" + +grid .io.inp -in .io -row 1 -column 1 +grid .io.out -in .io -row 1 -column 2 +grid .io.inp.input_label -in .io.inp -row 1 -column 1 +grid .io.inp.input_area -in .io.inp -row 2 -column 1 +grid .io.inp.input_y -in .io.inp -row 2 -column 2 -sticky ns +grid .io.inp.input_x -in .io.inp -row 3 -column 1 -sticky ew + +grid .io.out.output_label -in .io.out -row 1 -column 1 +grid .io.out.output_area -in .io.out -row 2 -column 1 +grid .io.out.output_y -in .io.out -row 2 -column 2 -sticky ns +grid .io.out.output_x -in .io.out -row 3 -column 1 -sticky ew + +button .controls.play -text "play" -font ClientFont -command { + generate_output [get_input_text] +} +grid .controls.play -in .controls -row 1 -column 1 + +button .controls.save -text "save" -font ClientFont -command { + global outputtype + set input_text [get_input_text] + if { $outputtype eq "AUDIO" } { + save_binary_file [collect_audio_data $input_text ] "Save audio file" + } else { + save_text_file [collect_audio_data $input_text ] "Save output to file" + } +} + +grid .controls.save -in .controls -row 1 -column 2 + +pack .menus .io .controls -in . -side top + + + +# Detect whether this is the main program +# This test was taken from the Tcl Wiki, and +# seems to work OK. + +if {[info exists argv0] && [file tail [info script]] eq [file tail $argv0]} { + + # Try to find the temporary files directory. + catch { set tmpdir "/tmp" } + catch { set tmpdir $::env(TRASH_FOLDER) } + catch { set tmpdir $::env(TMP) } + catch { set tmpdir $::env(TEMP) } + # This needs better handling of + # possible alternatives + # This is needed for Windows sound only. + + # Do the platform dependent things. + if {$tcl_platform(platform) eq "windows"} { + package require twapi + + proc play {sound} { + global tmpdir + # Write sound to a temporary file + set sndfile [file join $tmpdir "MARYTTS_sound.[pid].wav" ] + set stream [open $sndfile w] + # Make sure the file is binary: + fconfigure $stream -translation binary + puts -nonewline $stream $sound + close $stream + # Play the file. + ::twapi::play_sound $sndfile + # Remove the file. + file delete $sndfile + } + } + # Put other platforms here. + + # Setup the globals with reference to the + # server, which is assumed to be working. + # Since we have options to alter this with + # menu items, there probably needs to be + # some way to reload all this. But we need + # to know how to delete the existing menu + # entries to do that. + setup_globals + create_radio_menu_from_list inputtype + create_radio_menu_from_list outputtype + create_radio_menu_from_list voice + create_radio_menu_from_list audioformat + + # Note, at the moment voices holds locales, + # gender, and voice type + + # At the moment this is just diagnostic: + ## add_message [ join $voices "\n" ] + # it tells us we have a basically working + # system and the list of voices has been + # picked up and manipulated correctly. + # So it is commented out now. +} + + diff --git a/marytts-builder/src/main/resources/marytts/tools/voiceimport/importMain.config b/marytts-builder/src/main/resources/marytts/tools/voiceimport/importMain.config index d0073a53..631dc574 100644 --- a/marytts-builder/src/main/resources/marytts/tools/voiceimport/importMain.config +++ b/marytts-builder/src/main/resources/marytts/tools/voiceimport/importMain.config @@ -1,83 +1,83 @@ -# Config file for DatabaseImportMain -# Determines which voice import components are loaded - -group raw_acoustics Raw Acoustics - -marytts.tools.voiceimport.PraatPitchmarker raw_acoustics -#marytts.tools.voiceimport.SnackPitchmarker raw_acoustics -#marytts.tools.voiceimport.SnackVoiceQualityProcessor raw_acoustics -#marytts.tools.voiceimport.AutocorrelationPitchmarker raw_acoustics -marytts.tools.voiceimport.MCEPMaker raw_acoustics - -group transcripts Transcripts Conversion - -#marytts.tools.voiceimport.Mary2FestvoxTranscripts transcripts -marytts.tools.voiceimport.Festvox2MaryTranscripts transcripts - -group labeling Automatic Labeling - -#marytts.tools.voiceimport.EndpointDetector labeling -marytts.tools.voiceimport.AllophonesExtractor labeling -marytts.tools.voiceimport.EHMMLabeler labeling -marytts.tools.voiceimport.HTKLabeler labeling -marytts.tools.voiceimport.LabelPauseDeleter labeling -marytts.tools.voiceimport.LabelledFilesInspector labeling - -group labels_align Label-Transcript Alignment - -marytts.tools.voiceimport.PhoneUnitLabelComputer labels_align -marytts.tools.voiceimport.HalfPhoneUnitLabelComputer labels_align -marytts.tools.voiceimport.TranscriptionAligner labels_align - -group text_features Feature Extraction - -marytts.tools.voiceimport.FeatureSelection text_features -marytts.tools.voiceimport.PhoneUnitFeatureComputer text_features -marytts.tools.voiceimport.HalfPhoneUnitFeatureComputer text_features - -group labels_features Verify Alignment - -marytts.tools.voiceimport.PhoneLabelFeatureAligner labels_features -marytts.tools.voiceimport.HalfPhoneLabelFeatureAligner labels_features -#marytts.tools.voiceimport.QualityControl labels_features - -group basic_data Basic Data Files - -marytts.tools.voiceimport.WaveTimelineMaker basic_data -marytts.tools.voiceimport.BasenameTimelineMaker basic_data -marytts.tools.voiceimport.MCepTimelineMaker basic_data -#marytts.tools.voiceimport.HnmTimelineMaker basic_data - -group acoustic_models Acoustic Models - -marytts.tools.voiceimport.PhoneUnitfileWriter acoustic_models -marytts.tools.voiceimport.PhoneFeatureFileWriter acoustic_models -marytts.tools.voiceimport.DurationCARTTrainer acoustic_models -#marytts.tools.voiceimport.DurationTreeTrainer acoustic_models -marytts.tools.voiceimport.F0CARTTrainer acoustic_models - -group us_files Unit Selection Files - -marytts.tools.voiceimport.HalfPhoneUnitfileWriter us_files -marytts.tools.voiceimport.HalfPhoneFeatureFileWriter us_files -marytts.tools.voiceimport.F0PolynomialFeatureFileWriter us_files -marytts.tools.voiceimport.AcousticFeatureFileWriter us_files -#marytts.tools.voiceimport.F0PolynomialTreeTrainer us_files -#marytts.tools.voiceimport.F0PolynomialInspector us_files -marytts.tools.voiceimport.JoinCostFileMaker us_files -#marytts.tools.voiceimport.JoinModeller us_files -marytts.tools.voiceimport.CARTBuilder us_files - -group hts_trainer HMM Voice Trainer - -marytts.tools.voiceimport.HMMVoiceDataPreparation hts_trainer -marytts.tools.voiceimport.HMMVoiceConfigure hts_trainer -#marytts.tools.voiceimport.HMMVoiceConfigureAdapt hts_trainer -marytts.tools.voiceimport.HMMVoiceFeatureSelection hts_trainer -marytts.tools.voiceimport.HMMVoiceMakeData hts_trainer -marytts.tools.voiceimport.HMMVoiceMakeVoice hts_trainer - -group install Install Voice - -marytts.tools.voiceimport.VoiceCompiler install -marytts.tools.voiceimport.HMMVoiceCompiler install +# Config file for DatabaseImportMain +# Determines which voice import components are loaded + +group raw_acoustics Raw Acoustics + +marytts.tools.voiceimport.PraatPitchmarker raw_acoustics +#marytts.tools.voiceimport.SnackPitchmarker raw_acoustics +#marytts.tools.voiceimport.SnackVoiceQualityProcessor raw_acoustics +#marytts.tools.voiceimport.AutocorrelationPitchmarker raw_acoustics +marytts.tools.voiceimport.MCEPMaker raw_acoustics + +group transcripts Transcripts Conversion + +#marytts.tools.voiceimport.Mary2FestvoxTranscripts transcripts +marytts.tools.voiceimport.Festvox2MaryTranscripts transcripts + +group labeling Automatic Labeling + +#marytts.tools.voiceimport.EndpointDetector labeling +marytts.tools.voiceimport.AllophonesExtractor labeling +marytts.tools.voiceimport.EHMMLabeler labeling +marytts.tools.voiceimport.HTKLabeler labeling +marytts.tools.voiceimport.LabelPauseDeleter labeling +marytts.tools.voiceimport.LabelledFilesInspector labeling + +group labels_align Label-Transcript Alignment + +marytts.tools.voiceimport.PhoneUnitLabelComputer labels_align +marytts.tools.voiceimport.HalfPhoneUnitLabelComputer labels_align +marytts.tools.voiceimport.TranscriptionAligner labels_align + +group text_features Feature Extraction + +marytts.tools.voiceimport.FeatureSelection text_features +marytts.tools.voiceimport.PhoneUnitFeatureComputer text_features +marytts.tools.voiceimport.HalfPhoneUnitFeatureComputer text_features + +group labels_features Verify Alignment + +marytts.tools.voiceimport.PhoneLabelFeatureAligner labels_features +marytts.tools.voiceimport.HalfPhoneLabelFeatureAligner labels_features +#marytts.tools.voiceimport.QualityControl labels_features + +group basic_data Basic Data Files + +marytts.tools.voiceimport.WaveTimelineMaker basic_data +marytts.tools.voiceimport.BasenameTimelineMaker basic_data +marytts.tools.voiceimport.MCepTimelineMaker basic_data +#marytts.tools.voiceimport.HnmTimelineMaker basic_data + +group acoustic_models Acoustic Models + +marytts.tools.voiceimport.PhoneUnitfileWriter acoustic_models +marytts.tools.voiceimport.PhoneFeatureFileWriter acoustic_models +marytts.tools.voiceimport.DurationCARTTrainer acoustic_models +#marytts.tools.voiceimport.DurationTreeTrainer acoustic_models +marytts.tools.voiceimport.F0CARTTrainer acoustic_models + +group us_files Unit Selection Files + +marytts.tools.voiceimport.HalfPhoneUnitfileWriter us_files +marytts.tools.voiceimport.HalfPhoneFeatureFileWriter us_files +marytts.tools.voiceimport.F0PolynomialFeatureFileWriter us_files +marytts.tools.voiceimport.AcousticFeatureFileWriter us_files +#marytts.tools.voiceimport.F0PolynomialTreeTrainer us_files +#marytts.tools.voiceimport.F0PolynomialInspector us_files +marytts.tools.voiceimport.JoinCostFileMaker us_files +#marytts.tools.voiceimport.JoinModeller us_files +marytts.tools.voiceimport.CARTBuilder us_files + +group hts_trainer HMM Voice Trainer + +marytts.tools.voiceimport.HMMVoiceDataPreparation hts_trainer +marytts.tools.voiceimport.HMMVoiceConfigure hts_trainer +#marytts.tools.voiceimport.HMMVoiceConfigureAdapt hts_trainer +marytts.tools.voiceimport.HMMVoiceFeatureSelection hts_trainer +marytts.tools.voiceimport.HMMVoiceMakeData hts_trainer +marytts.tools.voiceimport.HMMVoiceMakeVoice hts_trainer + +group install Install Voice + +marytts.tools.voiceimport.VoiceCompiler install +marytts.tools.voiceimport.HMMVoiceCompiler install diff --git a/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/hsmm-voice.config b/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/hsmm-voice.config index e6a69153..7d6a144e 100644 --- a/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/hsmm-voice.config +++ b/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/hsmm-voice.config @@ -1,109 +1,109 @@ -#Auto-generated config file for voice ${VOICENAME} - -name = ${VOICENAME} -locale = ${LOCALE} - - -#################################################################### -####################### Module settings ########################### -#################################################################### -# For keys ending in ".list", values will be appended across config files, -# so that .list keys can occur in several config files. -# For all other keys, values will be copied to the global config, so -# keys should be unique across config files. - -hmm.voices.list = \ - ${VOICENAME} - -# If this setting is not present, a default value of 0 is assumed. -voice.${VOICENAME}.wants.to.be.default = 0 - -# Set your voice specifications -voice.${VOICENAME}.gender = ${GENDER} -voice.${VOICENAME}.locale = ${LOCALE} -voice.${VOICENAME}.domain = ${DOMAIN} -voice.${VOICENAME}.samplingRate = ${SAMPLINGRATE} -voice.${VOICENAME}.framePeriod = ${FRAMEPERIOD} - -# HMM Voice-specific parameters -# parameters used during models training -# MGC: stage=gamma=0 alpha=0.42 16KHz linear gain; alpha=0.55 48Khz log gain (default) -# LSP: gamma>0 -# LSP: gamma=1 alpha=0.0 linear gain/log gain -# Mel-LSP: gamma=1 alpha=0.42 log gain -# MGC-LSP: gamma=3 alpha=0.42 log gain -voice.${VOICENAME}.alpha = ${ALPHA} -voice.${VOICENAME}.gamma = ${GAMMA} -#voice.${VOICENAME}.logGain = ${LOGGAIN} -voice.${VOICENAME}.logGain = true - -# Parameter beta for postfiltering (parameter for tuning) -voice.${VOICENAME}.beta = 0.1 - -# HMM Voice-specific files -# Information about trees -voice.${VOICENAME}.Ftd = jar:/marytts/voice/${PACKAGE}/tree-dur.inf -voice.${VOICENAME}.Ftf = jar:/marytts/voice/${PACKAGE}/tree-lf0.inf -voice.${VOICENAME}.Ftm = jar:/marytts/voice/${PACKAGE}/tree-mgc.inf -voice.${VOICENAME}.Fts = jar:/marytts/voice/${PACKAGE}/tree-str.inf - -# Information about means and variances PDFs -voice.${VOICENAME}.Fmd = jar:/marytts/voice/${PACKAGE}/dur.pdf -voice.${VOICENAME}.Fmf = jar:/marytts/voice/${PACKAGE}/lf0.pdf -voice.${VOICENAME}.Fmm = jar:/marytts/voice/${PACKAGE}/mgc.pdf -voice.${VOICENAME}.Fms = jar:/marytts/voice/${PACKAGE}/str.pdf - -# Information about Global Mean and Variance PDFs -voice.${VOICENAME}.useGV = true - -# Variable for allowing context-dependent GV (without sil) -voice.${VOICENAME}.useContextDependentGV = true - -# GV method: gradient or derivative (default gradient) -voice.${VOICENAME}.gvMethod = gradient - -# Max number of GV iterations (parameters for tuning) -voice.${VOICENAME}.maxMgcGvIter = 100 -voice.${VOICENAME}.maxLf0GvIter = 100 -voice.${VOICENAME}.maxStrGvIter = 100 - -# GV weights for each parameter: between 0.0-2.0 -voice.${VOICENAME}.gvWeightMgc = 1.0 -voice.${VOICENAME}.gvWeightLf0 = 1.0 -voice.${VOICENAME}.gvWeightStr = 1.0 - -# Mean and variance files for GV -voice.${VOICENAME}.Fgvf = jar:/marytts/voice/${PACKAGE}/gv-lf0.pdf -voice.${VOICENAME}.Fgvm = jar:/marytts/voice/${PACKAGE}/gv-mgc.pdf -voice.${VOICENAME}.Fgvs = jar:/marytts/voice/${PACKAGE}/gv-str.pdf - -# A context features file example for start-up testing. -voice.${VOICENAME}.FeaFile = jar:/marytts/voice/${PACKAGE}/features_example.pfeats - -# Tricky phones file in case there were problematic phones during training, empty otherwise. -voice.${VOICENAME}.trickyPhonesFile = jar:/marytts/voice/${PACKAGE}/trickyPhones.txt - -# Information about Mixed Excitation -voice.${VOICENAME}.useMixExc = true - -# Filters for mixed excitation -# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters -voice.${VOICENAME}.Fif = jar:/marytts/voice/${PACKAGE}/${MIXEXCFILTERFILE} -# Number of filters in bandpass bank -voice.${VOICENAME}.in = ${NUMMIXEXCFILTERS} - -# Information about acousticModels (if true allows prosody modification specified in MARYXML input) -voice.${VOICENAME}.useAcousticModels = true - -# acoustic models to use (HMM models or carts from other voices can be specified) -#(uncoment to allow prosody modification specified in MARYXML input) -voice.${VOICENAME}.acousticModels = duration F0 - -voice.${VOICENAME}.duration.model = hmm -# voice.${VOICENAME}.duration.data not used for hmm models -voice.${VOICENAME}.duration.attribute = d - -voice.${VOICENAME}.F0.model = hmm -# voice.${VOICENAME}.F0.data not used for hmm models -voice.${VOICENAME}.F0.attribute = f0 - +#Auto-generated config file for voice ${VOICENAME} + +name = ${VOICENAME} +locale = ${LOCALE} + + +#################################################################### +####################### Module settings ########################### +#################################################################### +# For keys ending in ".list", values will be appended across config files, +# so that .list keys can occur in several config files. +# For all other keys, values will be copied to the global config, so +# keys should be unique across config files. + +hmm.voices.list = \ + ${VOICENAME} + +# If this setting is not present, a default value of 0 is assumed. +voice.${VOICENAME}.wants.to.be.default = 0 + +# Set your voice specifications +voice.${VOICENAME}.gender = ${GENDER} +voice.${VOICENAME}.locale = ${LOCALE} +voice.${VOICENAME}.domain = ${DOMAIN} +voice.${VOICENAME}.samplingRate = ${SAMPLINGRATE} +voice.${VOICENAME}.framePeriod = ${FRAMEPERIOD} + +# HMM Voice-specific parameters +# parameters used during models training +# MGC: stage=gamma=0 alpha=0.42 16KHz linear gain; alpha=0.55 48Khz log gain (default) +# LSP: gamma>0 +# LSP: gamma=1 alpha=0.0 linear gain/log gain +# Mel-LSP: gamma=1 alpha=0.42 log gain +# MGC-LSP: gamma=3 alpha=0.42 log gain +voice.${VOICENAME}.alpha = ${ALPHA} +voice.${VOICENAME}.gamma = ${GAMMA} +#voice.${VOICENAME}.logGain = ${LOGGAIN} +voice.${VOICENAME}.logGain = true + +# Parameter beta for postfiltering (parameter for tuning) +voice.${VOICENAME}.beta = 0.1 + +# HMM Voice-specific files +# Information about trees +voice.${VOICENAME}.Ftd = jar:/marytts/voice/${PACKAGE}/tree-dur.inf +voice.${VOICENAME}.Ftf = jar:/marytts/voice/${PACKAGE}/tree-lf0.inf +voice.${VOICENAME}.Ftm = jar:/marytts/voice/${PACKAGE}/tree-mgc.inf +voice.${VOICENAME}.Fts = jar:/marytts/voice/${PACKAGE}/tree-str.inf + +# Information about means and variances PDFs +voice.${VOICENAME}.Fmd = jar:/marytts/voice/${PACKAGE}/dur.pdf +voice.${VOICENAME}.Fmf = jar:/marytts/voice/${PACKAGE}/lf0.pdf +voice.${VOICENAME}.Fmm = jar:/marytts/voice/${PACKAGE}/mgc.pdf +voice.${VOICENAME}.Fms = jar:/marytts/voice/${PACKAGE}/str.pdf + +# Information about Global Mean and Variance PDFs +voice.${VOICENAME}.useGV = true + +# Variable for allowing context-dependent GV (without sil) +voice.${VOICENAME}.useContextDependentGV = true + +# GV method: gradient or derivative (default gradient) +voice.${VOICENAME}.gvMethod = gradient + +# Max number of GV iterations (parameters for tuning) +voice.${VOICENAME}.maxMgcGvIter = 100 +voice.${VOICENAME}.maxLf0GvIter = 100 +voice.${VOICENAME}.maxStrGvIter = 100 + +# GV weights for each parameter: between 0.0-2.0 +voice.${VOICENAME}.gvWeightMgc = 1.0 +voice.${VOICENAME}.gvWeightLf0 = 1.0 +voice.${VOICENAME}.gvWeightStr = 1.0 + +# Mean and variance files for GV +voice.${VOICENAME}.Fgvf = jar:/marytts/voice/${PACKAGE}/gv-lf0.pdf +voice.${VOICENAME}.Fgvm = jar:/marytts/voice/${PACKAGE}/gv-mgc.pdf +voice.${VOICENAME}.Fgvs = jar:/marytts/voice/${PACKAGE}/gv-str.pdf + +# A context features file example for start-up testing. +voice.${VOICENAME}.FeaFile = jar:/marytts/voice/${PACKAGE}/features_example.pfeats + +# Tricky phones file in case there were problematic phones during training, empty otherwise. +voice.${VOICENAME}.trickyPhonesFile = jar:/marytts/voice/${PACKAGE}/trickyPhones.txt + +# Information about Mixed Excitation +voice.${VOICENAME}.useMixExc = true + +# Filters for mixed excitation +# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters +voice.${VOICENAME}.Fif = jar:/marytts/voice/${PACKAGE}/${MIXEXCFILTERFILE} +# Number of filters in bandpass bank +voice.${VOICENAME}.in = ${NUMMIXEXCFILTERS} + +# Information about acousticModels (if true allows prosody modification specified in MARYXML input) +voice.${VOICENAME}.useAcousticModels = true + +# acoustic models to use (HMM models or carts from other voices can be specified) +#(uncoment to allow prosody modification specified in MARYXML input) +voice.${VOICENAME}.acousticModels = duration F0 + +voice.${VOICENAME}.duration.model = hmm +# voice.${VOICENAME}.duration.data not used for hmm models +voice.${VOICENAME}.duration.attribute = d + +voice.${VOICENAME}.F0.model = hmm +# voice.${VOICENAME}.F0.data not used for hmm models +voice.${VOICENAME}.F0.attribute = f0 + diff --git a/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/unitselection-voice.config b/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/unitselection-voice.config index a5d6e2fe..b3a735a3 100644 --- a/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/unitselection-voice.config +++ b/marytts-builder/src/main/resources/marytts/tools/voiceimport/templates/unitselection-voice.config @@ -1,82 +1,82 @@ -# Auto-generated config file for voice ${VOICENAME} - -name = ${VOICENAME} -locale = ${LOCALE} - -#################################################################### -####################### Module settings ########################### -#################################################################### -# For keys ending in ".list", values will be appended across config files, -# so that .list keys can occur in several config files. -# For all other keys, values will be copied to the global config, so -# keys should be unique across config files. - -unitselection.voices.list = \ - ${VOICENAME} - -# If this setting is not present, a default value of 0 is assumed. -# More means higher assumed quality. -voice.${VOICENAME}.wants.to.be.default = 0 - - -# Set your voice specifications -voice.${VOICENAME}.gender = ${GENDER} -voice.${VOICENAME}.locale = ${LOCALE} -voice.${VOICENAME}.domain = ${DOMAIN} -voice.${VOICENAME}.samplingRate = ${SAMPLINGRATE} - -# Relative weight of the target cost function vs. the join cost function -voice.${VOICENAME}.viterbi.wTargetCosts = 0.7 - -# Beam size in dynamic programming: smaller => faster but worse quality. -# (set to -1 to disable beam search; very slow but best available quality) -voice.${VOICENAME}.viterbi.beamsize = 100 - -# Java classes to use for the various unit selection components -voice.${VOICENAME}.databaseClass = marytts.unitselection.data.DiphoneUnitDatabase -voice.${VOICENAME}.selectorClass = marytts.unitselection.select.DiphoneUnitSelector -voice.${VOICENAME}.concatenatorClass = marytts.unitselection.concat.OverlapUnitConcatenator -voice.${VOICENAME}.targetCostClass = marytts.unitselection.select.DiphoneFFRTargetCostFunction -voice.${VOICENAME}.joinCostClass = marytts.unitselection.select.JoinCostFeatures -voice.${VOICENAME}.unitReaderClass = marytts.unitselection.data.UnitFileReader -voice.${VOICENAME}.cartReaderClass = marytts.cart.io.MARYCartReader -voice.${VOICENAME}.audioTimelineReaderClass = marytts.unitselection.data.TimelineReader - -# Voice-specific files -voice.${VOICENAME}.featureFile = MARY_BASE/lib/voices/${VOICENAME}/halfphoneFeatures_ac.mry -voice.${VOICENAME}.targetCostWeights = jar:/marytts/voice/${PACKAGE}/halfphoneUnitFeatureDefinition_ac.txt -voice.${VOICENAME}.joinCostFile = MARY_BASE/lib/voices/${VOICENAME}/joinCostFeatures.mry -voice.${VOICENAME}.joinCostWeights = jar:/marytts/voice/${PACKAGE}/joinCostWeights.txt -voice.${VOICENAME}.unitsFile = MARY_BASE/lib/voices/${VOICENAME}/halfphoneUnits.mry -voice.${VOICENAME}.cartFile = jar:/marytts/voice/${PACKAGE}/cart.mry -voice.${VOICENAME}.audioTimelineFile = MARY_BASE/lib/voices/${VOICENAME}/timeline_waveforms.mry -voice.${VOICENAME}.basenameTimeline = MARY_BASE/lib/voices/${VOICENAME}/timeline_basenames.mry - -# Modules to use for predicting acoustic target features for this voice: - -voice.${VOICENAME}.acousticModels = duration F0 midF0 rightF0 - -voice.${VOICENAME}.duration.model = cart -voice.${VOICENAME}.duration.data = jar:/marytts/voice/${PACKAGE}/dur.tree -voice.${VOICENAME}.duration.attribute = d - -voice.${VOICENAME}.F0.model = cart -voice.${VOICENAME}.F0.data = jar:/marytts/voice/${PACKAGE}/f0.left.tree -voice.${VOICENAME}.F0.attribute = f0 -voice.${VOICENAME}.F0.attribute.format = (0,%.0f) -voice.${VOICENAME}.F0.predictFrom = firstVowels -voice.${VOICENAME}.F0.applyTo = firstVoicedSegments - -voice.${VOICENAME}.midF0.model = cart -voice.${VOICENAME}.midF0.data = jar:/marytts/voice/${PACKAGE}/f0.mid.tree -voice.${VOICENAME}.midF0.attribute = f0 -voice.${VOICENAME}.midF0.attribute.format = (50,%.0f) -voice.${VOICENAME}.midF0.predictFrom = firstVowels -voice.${VOICENAME}.midF0.applyTo = firstVowels - -voice.${VOICENAME}.rightF0.model = cart -voice.${VOICENAME}.rightF0.data = jar:/marytts/voice/${PACKAGE}/f0.right.tree -voice.${VOICENAME}.rightF0.attribute = f0 -voice.${VOICENAME}.rightF0.attribute.format = (100,%.0f) -voice.${VOICENAME}.rightF0.predictFrom = firstVowels -voice.${VOICENAME}.rightF0.applyTo = lastVoicedSegments +# Auto-generated config file for voice ${VOICENAME} + +name = ${VOICENAME} +locale = ${LOCALE} + +#################################################################### +####################### Module settings ########################### +#################################################################### +# For keys ending in ".list", values will be appended across config files, +# so that .list keys can occur in several config files. +# For all other keys, values will be copied to the global config, so +# keys should be unique across config files. + +unitselection.voices.list = \ + ${VOICENAME} + +# If this setting is not present, a default value of 0 is assumed. +# More means higher assumed quality. +voice.${VOICENAME}.wants.to.be.default = 0 + + +# Set your voice specifications +voice.${VOICENAME}.gender = ${GENDER} +voice.${VOICENAME}.locale = ${LOCALE} +voice.${VOICENAME}.domain = ${DOMAIN} +voice.${VOICENAME}.samplingRate = ${SAMPLINGRATE} + +# Relative weight of the target cost function vs. the join cost function +voice.${VOICENAME}.viterbi.wTargetCosts = 0.7 + +# Beam size in dynamic programming: smaller => faster but worse quality. +# (set to -1 to disable beam search; very slow but best available quality) +voice.${VOICENAME}.viterbi.beamsize = 100 + +# Java classes to use for the various unit selection components +voice.${VOICENAME}.databaseClass = marytts.unitselection.data.DiphoneUnitDatabase +voice.${VOICENAME}.selectorClass = marytts.unitselection.select.DiphoneUnitSelector +voice.${VOICENAME}.concatenatorClass = marytts.unitselection.concat.OverlapUnitConcatenator +voice.${VOICENAME}.targetCostClass = marytts.unitselection.select.DiphoneFFRTargetCostFunction +voice.${VOICENAME}.joinCostClass = marytts.unitselection.select.JoinCostFeatures +voice.${VOICENAME}.unitReaderClass = marytts.unitselection.data.UnitFileReader +voice.${VOICENAME}.cartReaderClass = marytts.cart.io.MARYCartReader +voice.${VOICENAME}.audioTimelineReaderClass = marytts.unitselection.data.TimelineReader + +# Voice-specific files +voice.${VOICENAME}.featureFile = MARY_BASE/lib/voices/${VOICENAME}/halfphoneFeatures_ac.mry +voice.${VOICENAME}.targetCostWeights = jar:/marytts/voice/${PACKAGE}/halfphoneUnitFeatureDefinition_ac.txt +voice.${VOICENAME}.joinCostFile = MARY_BASE/lib/voices/${VOICENAME}/joinCostFeatures.mry +voice.${VOICENAME}.joinCostWeights = jar:/marytts/voice/${PACKAGE}/joinCostWeights.txt +voice.${VOICENAME}.unitsFile = MARY_BASE/lib/voices/${VOICENAME}/halfphoneUnits.mry +voice.${VOICENAME}.cartFile = jar:/marytts/voice/${PACKAGE}/cart.mry +voice.${VOICENAME}.audioTimelineFile = MARY_BASE/lib/voices/${VOICENAME}/timeline_waveforms.mry +voice.${VOICENAME}.basenameTimeline = MARY_BASE/lib/voices/${VOICENAME}/timeline_basenames.mry + +# Modules to use for predicting acoustic target features for this voice: + +voice.${VOICENAME}.acousticModels = duration F0 midF0 rightF0 + +voice.${VOICENAME}.duration.model = cart +voice.${VOICENAME}.duration.data = jar:/marytts/voice/${PACKAGE}/dur.tree +voice.${VOICENAME}.duration.attribute = d + +voice.${VOICENAME}.F0.model = cart +voice.${VOICENAME}.F0.data = jar:/marytts/voice/${PACKAGE}/f0.left.tree +voice.${VOICENAME}.F0.attribute = f0 +voice.${VOICENAME}.F0.attribute.format = (0,%.0f) +voice.${VOICENAME}.F0.predictFrom = firstVowels +voice.${VOICENAME}.F0.applyTo = firstVoicedSegments + +voice.${VOICENAME}.midF0.model = cart +voice.${VOICENAME}.midF0.data = jar:/marytts/voice/${PACKAGE}/f0.mid.tree +voice.${VOICENAME}.midF0.attribute = f0 +voice.${VOICENAME}.midF0.attribute.format = (50,%.0f) +voice.${VOICENAME}.midF0.predictFrom = firstVowels +voice.${VOICENAME}.midF0.applyTo = firstVowels + +voice.${VOICENAME}.rightF0.model = cart +voice.${VOICENAME}.rightF0.data = jar:/marytts/voice/${PACKAGE}/f0.right.tree +voice.${VOICENAME}.rightF0.attribute = f0 +voice.${VOICENAME}.rightF0.attribute.format = (100,%.0f) +voice.${VOICENAME}.rightF0.predictFrom = firstVowels +voice.${VOICENAME}.rightF0.applyTo = lastVoicedSegments diff --git a/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-4.x.config b/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-4.x.config index 7246b045..110e7c78 100644 --- a/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-4.x.config +++ b/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-4.x.config @@ -1,111 +1,111 @@ -#Auto-generated config file for voice cmu-slt-hsmm - -name = cmu-slt-hsmm -en_US-voice.version = 4.3.0 - -voice.version = 4.3.0 - -# Declare "group names" as component that other components can require. -# These correspond to abstract "groups" of which this component is an instance. -provides = \ - en_US-voice \ - hmm-voice - -# List the dependencies, as a whitespace-separated list. -# For each required component, an optional minimum version and an optional -# download url can be given. -# We can require a component by name or by an abstract "group name" -# as listed under the "provides" element. -requires = \ - en_US \ - marybase - -requires.marybase.version = 4.3.0 -requires.en_US.version = 4.3.0 -requires.en_US.download = http://mary.dfki.de/download/mary-install-4.x.x.jar - -#################################################################### -####################### Module settings ########################### -#################################################################### -# For keys ending in ".list", values will be appended across config files, -# so that .list keys can occur in several config files. -# For all other keys, values will be copied to the global config, so -# keys should be unique across config files. - -hmm.voices.list = \ - cmu-slt-hsmm - -# If this setting is not present, a default value of 0 is assumed. -voice.cmu-slt-hsmm.wants.to.be.default = 0 - -# Set your voice specifications -voice.cmu-slt-hsmm.gender = female -voice.cmu-slt-hsmm.locale = en_US -voice.cmu-slt-hsmm.domain = general -voice.cmu-slt-hsmm.samplingRate = 16000 - -# HMM Voice-specific parameters -# parameters used during models training -# MGC: stage=gamma=0 alpha=0.42 linear gain (default) -# LSP: gamma>0 -# LSP: gamma=1 alpha=0.0 linear gain/log gain -# Mel-LSP: gamma=1 alpha=0.42 log gain -# MGC-LSP: gamma=3 alpha=0.42 log gain -voice.cmu-slt-hsmm.alpha = 0.42 -voice.cmu-slt-hsmm.gamma = 0 -voice.cmu-slt-hsmm.logGain = false - -# Parameter beta for postfiltering -voice.cmu-slt-hsmm.beta = 0.1 - -# HMM Voice-specific files -# Information about trees -voice.cmu-slt-hsmm.Ftd = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-dur.inf -voice.cmu-slt-hsmm.Ftf = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-lf0.inf -voice.cmu-slt-hsmm.Ftm = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-mgc.inf -voice.cmu-slt-hsmm.Fts = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-str.inf - -# Information about means and variances PDFs -voice.cmu-slt-hsmm.Fmd = MARY_BASE/lib/voices/cmu-slt-hsmm/dur.pdf -voice.cmu-slt-hsmm.Fmf = MARY_BASE/lib/voices/cmu-slt-hsmm/lf0.pdf -voice.cmu-slt-hsmm.Fmm = MARY_BASE/lib/voices/cmu-slt-hsmm/mgc.pdf -voice.cmu-slt-hsmm.Fms = MARY_BASE/lib/voices/cmu-slt-hsmm/str.pdf - -# Information about Global Mean and Variance PDFs -voice.cmu-slt-hsmm.useGV = true -voice.cmu-slt-hsmm.maxMgcGvIter = 200 -voice.cmu-slt-hsmm.maxLf0GvIter = 200 -voice.cmu-slt-hsmm.Fgvf = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-lf0-littend.pdf -voice.cmu-slt-hsmm.Fgvm = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-mgc-littend.pdf -voice.cmu-slt-hsmm.Fgvs = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-str-littend.pdf - -# A context features file example for start-up testing. -voice.cmu-slt-hsmm.FeaFile = MARY_BASE/lib/voices/cmu-slt-hsmm/arctic_a0422.pfeats - -# Tricky phones file in case there were problematic phones during training, empty otherwise. -voice.cmu-slt-hsmm.trickyPhonesFile = MARY_BASE/lib/voices/cmu-slt-hsmm/trickyPhones.txt - -# Information about Mixed Excitation -voice.cmu-slt-hsmm.useMixExc = true - -# Filters for mixed excitation -# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters -voice.cmu-slt-hsmm.Fif = MARY_BASE/lib/voices/cmu-slt-hsmm/mix_excitation_filters.txt -# Number of filters in bandpass bank -voice.cmu-slt-hsmm.in = 5 - -# Information about acousticModels (if true allows prosody modification specified in MARYXML input) -voice.cmu-slt-hsmm.useAcousticModels = true - -# acoustic models to use (HMM models or carts from other voices can be specified) -#(uncoment to allow prosody modification specified in MARYXML input) -voice.cmu-slt-hsmm.acousticModels = duration F0 - -voice.cmu-slt-hsmm.duration.model = hmm -voice.cmu-slt-hsmm.duration.data = MARY_BASE/conf/en_US-cmu-slt-hsmm.config -voice.cmu-slt-hsmm.duration.attribute = d - -voice.cmu-slt-hsmm.F0.model = hmm -voice.cmu-slt-hsmm.F0.data = MARY_BASE/conf/en_US-cmu-slt-hsmm.config -voice.cmu-slt-hsmm.F0.attribute = f0 - +#Auto-generated config file for voice cmu-slt-hsmm + +name = cmu-slt-hsmm +en_US-voice.version = 4.3.0 + +voice.version = 4.3.0 + +# Declare "group names" as component that other components can require. +# These correspond to abstract "groups" of which this component is an instance. +provides = \ + en_US-voice \ + hmm-voice + +# List the dependencies, as a whitespace-separated list. +# For each required component, an optional minimum version and an optional +# download url can be given. +# We can require a component by name or by an abstract "group name" +# as listed under the "provides" element. +requires = \ + en_US \ + marybase + +requires.marybase.version = 4.3.0 +requires.en_US.version = 4.3.0 +requires.en_US.download = http://mary.dfki.de/download/mary-install-4.x.x.jar + +#################################################################### +####################### Module settings ########################### +#################################################################### +# For keys ending in ".list", values will be appended across config files, +# so that .list keys can occur in several config files. +# For all other keys, values will be copied to the global config, so +# keys should be unique across config files. + +hmm.voices.list = \ + cmu-slt-hsmm + +# If this setting is not present, a default value of 0 is assumed. +voice.cmu-slt-hsmm.wants.to.be.default = 0 + +# Set your voice specifications +voice.cmu-slt-hsmm.gender = female +voice.cmu-slt-hsmm.locale = en_US +voice.cmu-slt-hsmm.domain = general +voice.cmu-slt-hsmm.samplingRate = 16000 + +# HMM Voice-specific parameters +# parameters used during models training +# MGC: stage=gamma=0 alpha=0.42 linear gain (default) +# LSP: gamma>0 +# LSP: gamma=1 alpha=0.0 linear gain/log gain +# Mel-LSP: gamma=1 alpha=0.42 log gain +# MGC-LSP: gamma=3 alpha=0.42 log gain +voice.cmu-slt-hsmm.alpha = 0.42 +voice.cmu-slt-hsmm.gamma = 0 +voice.cmu-slt-hsmm.logGain = false + +# Parameter beta for postfiltering +voice.cmu-slt-hsmm.beta = 0.1 + +# HMM Voice-specific files +# Information about trees +voice.cmu-slt-hsmm.Ftd = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-dur.inf +voice.cmu-slt-hsmm.Ftf = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-lf0.inf +voice.cmu-slt-hsmm.Ftm = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-mgc.inf +voice.cmu-slt-hsmm.Fts = MARY_BASE/lib/voices/cmu-slt-hsmm/tree-str.inf + +# Information about means and variances PDFs +voice.cmu-slt-hsmm.Fmd = MARY_BASE/lib/voices/cmu-slt-hsmm/dur.pdf +voice.cmu-slt-hsmm.Fmf = MARY_BASE/lib/voices/cmu-slt-hsmm/lf0.pdf +voice.cmu-slt-hsmm.Fmm = MARY_BASE/lib/voices/cmu-slt-hsmm/mgc.pdf +voice.cmu-slt-hsmm.Fms = MARY_BASE/lib/voices/cmu-slt-hsmm/str.pdf + +# Information about Global Mean and Variance PDFs +voice.cmu-slt-hsmm.useGV = true +voice.cmu-slt-hsmm.maxMgcGvIter = 200 +voice.cmu-slt-hsmm.maxLf0GvIter = 200 +voice.cmu-slt-hsmm.Fgvf = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-lf0-littend.pdf +voice.cmu-slt-hsmm.Fgvm = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-mgc-littend.pdf +voice.cmu-slt-hsmm.Fgvs = MARY_BASE/lib/voices/cmu-slt-hsmm/gv-str-littend.pdf + +# A context features file example for start-up testing. +voice.cmu-slt-hsmm.FeaFile = MARY_BASE/lib/voices/cmu-slt-hsmm/arctic_a0422.pfeats + +# Tricky phones file in case there were problematic phones during training, empty otherwise. +voice.cmu-slt-hsmm.trickyPhonesFile = MARY_BASE/lib/voices/cmu-slt-hsmm/trickyPhones.txt + +# Information about Mixed Excitation +voice.cmu-slt-hsmm.useMixExc = true + +# Filters for mixed excitation +# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters +voice.cmu-slt-hsmm.Fif = MARY_BASE/lib/voices/cmu-slt-hsmm/mix_excitation_filters.txt +# Number of filters in bandpass bank +voice.cmu-slt-hsmm.in = 5 + +# Information about acousticModels (if true allows prosody modification specified in MARYXML input) +voice.cmu-slt-hsmm.useAcousticModels = true + +# acoustic models to use (HMM models or carts from other voices can be specified) +#(uncoment to allow prosody modification specified in MARYXML input) +voice.cmu-slt-hsmm.acousticModels = duration F0 + +voice.cmu-slt-hsmm.duration.model = hmm +voice.cmu-slt-hsmm.duration.data = MARY_BASE/conf/en_US-cmu-slt-hsmm.config +voice.cmu-slt-hsmm.duration.attribute = d + +voice.cmu-slt-hsmm.F0.model = hmm +voice.cmu-slt-hsmm.F0.data = MARY_BASE/conf/en_US-cmu-slt-hsmm.config +voice.cmu-slt-hsmm.F0.attribute = f0 + diff --git a/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-5.config b/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-5.config index a197f02e..48e225e2 100644 --- a/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-5.config +++ b/marytts-builder/src/test/resources/marytts/tools/upgrade/en_US-cmu-slt-hsmm-5.config @@ -1,86 +1,86 @@ -#Auto-generated config file for voice cmu-slt-hsmm - -name = cmu-slt-hsmm -locale = en_US - -#################################################################### -####################### Module settings ########################### -#################################################################### -# For keys ending in ".list", values will be appended across config files, -# so that .list keys can occur in several config files. -# For all other keys, values will be copied to the global config, so -# keys should be unique across config files. - -hmm.voices.list = cmu-slt-hsmm - -# If this setting is not present, a default value of 0 is assumed. -voice.cmu-slt-hsmm.wants.to.be.default = 0 - -# Set your voice specifications -voice.cmu-slt-hsmm.gender = female -voice.cmu-slt-hsmm.locale = en_US -voice.cmu-slt-hsmm.domain = general -voice.cmu-slt-hsmm.samplingRate = 16000 - -# HMM Voice-specific parameters -# parameters used during models training -# MGC: stage=gamma=0 alpha=0.42 linear gain (default) -# LSP: gamma>0 -# LSP: gamma=1 alpha=0.0 linear gain/log gain -# Mel-LSP: gamma=1 alpha=0.42 log gain -# MGC-LSP: gamma=3 alpha=0.42 log gain -voice.cmu-slt-hsmm.alpha = 0.42 -voice.cmu-slt-hsmm.gamma = 0 -voice.cmu-slt-hsmm.logGain = false - -# Parameter beta for postfiltering -voice.cmu-slt-hsmm.beta = 0.1 - -# HMM Voice-specific files -# Information about trees -voice.cmu-slt-hsmm.Ftd = jar:/marytts/voice/CmuSltHsmm/tree-dur.inf -voice.cmu-slt-hsmm.Ftf = jar:/marytts/voice/CmuSltHsmm/tree-lf0.inf -voice.cmu-slt-hsmm.Ftm = jar:/marytts/voice/CmuSltHsmm/tree-mgc.inf -voice.cmu-slt-hsmm.Fts = jar:/marytts/voice/CmuSltHsmm/tree-str.inf - -# Information about means and variances PDFs -voice.cmu-slt-hsmm.Fmd = jar:/marytts/voice/CmuSltHsmm/dur.pdf -voice.cmu-slt-hsmm.Fmf = jar:/marytts/voice/CmuSltHsmm/lf0.pdf -voice.cmu-slt-hsmm.Fmm = jar:/marytts/voice/CmuSltHsmm/mgc.pdf -voice.cmu-slt-hsmm.Fms = jar:/marytts/voice/CmuSltHsmm/str.pdf - -# Information about Global Mean and Variance PDFs -voice.cmu-slt-hsmm.useGV = true -voice.cmu-slt-hsmm.maxMgcGvIter = 200 -voice.cmu-slt-hsmm.maxLf0GvIter = 200 -voice.cmu-slt-hsmm.Fgvf = jar:/marytts/voice/CmuSltHsmm/gv-lf0-littend.pdf -voice.cmu-slt-hsmm.Fgvm = jar:/marytts/voice/CmuSltHsmm/gv-mgc-littend.pdf -voice.cmu-slt-hsmm.Fgvs = jar:/marytts/voice/CmuSltHsmm/gv-str-littend.pdf - -# A context features file example for start-up testing. -voice.cmu-slt-hsmm.FeaFile = jar:/marytts/voice/CmuSltHsmm/arctic_a0422.pfeats - -# Tricky phones file in case there were problematic phones during training, empty otherwise. -voice.cmu-slt-hsmm.trickyPhonesFile = jar:/marytts/voice/CmuSltHsmm/trickyPhones.txt - -# Information about Mixed Excitation -voice.cmu-slt-hsmm.useMixExc = true - -# Filters for mixed excitation -# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters -voice.cmu-slt-hsmm.Fif = jar:/marytts/voice/CmuSltHsmm/mix_excitation_filters.txt -# Number of filters in bandpass bank -voice.cmu-slt-hsmm.in = 5 - -# Information about acousticModels (if true allows prosody modification specified in MARYXML input) -voice.cmu-slt-hsmm.useAcousticModels = true - -# acoustic models to use (HMM models or carts from other voices can be specified) -#(uncoment to allow prosody modification specified in MARYXML input) -voice.cmu-slt-hsmm.acousticModels = duration F0 - -voice.cmu-slt-hsmm.duration.model = hmm -voice.cmu-slt-hsmm.duration.attribute = d - -voice.cmu-slt-hsmm.F0.model = hmm -voice.cmu-slt-hsmm.F0.attribute = f0 +#Auto-generated config file for voice cmu-slt-hsmm + +name = cmu-slt-hsmm +locale = en_US + +#################################################################### +####################### Module settings ########################### +#################################################################### +# For keys ending in ".list", values will be appended across config files, +# so that .list keys can occur in several config files. +# For all other keys, values will be copied to the global config, so +# keys should be unique across config files. + +hmm.voices.list = cmu-slt-hsmm + +# If this setting is not present, a default value of 0 is assumed. +voice.cmu-slt-hsmm.wants.to.be.default = 0 + +# Set your voice specifications +voice.cmu-slt-hsmm.gender = female +voice.cmu-slt-hsmm.locale = en_US +voice.cmu-slt-hsmm.domain = general +voice.cmu-slt-hsmm.samplingRate = 16000 + +# HMM Voice-specific parameters +# parameters used during models training +# MGC: stage=gamma=0 alpha=0.42 linear gain (default) +# LSP: gamma>0 +# LSP: gamma=1 alpha=0.0 linear gain/log gain +# Mel-LSP: gamma=1 alpha=0.42 log gain +# MGC-LSP: gamma=3 alpha=0.42 log gain +voice.cmu-slt-hsmm.alpha = 0.42 +voice.cmu-slt-hsmm.gamma = 0 +voice.cmu-slt-hsmm.logGain = false + +# Parameter beta for postfiltering +voice.cmu-slt-hsmm.beta = 0.1 + +# HMM Voice-specific files +# Information about trees +voice.cmu-slt-hsmm.Ftd = jar:/marytts/voice/CmuSltHsmm/tree-dur.inf +voice.cmu-slt-hsmm.Ftf = jar:/marytts/voice/CmuSltHsmm/tree-lf0.inf +voice.cmu-slt-hsmm.Ftm = jar:/marytts/voice/CmuSltHsmm/tree-mgc.inf +voice.cmu-slt-hsmm.Fts = jar:/marytts/voice/CmuSltHsmm/tree-str.inf + +# Information about means and variances PDFs +voice.cmu-slt-hsmm.Fmd = jar:/marytts/voice/CmuSltHsmm/dur.pdf +voice.cmu-slt-hsmm.Fmf = jar:/marytts/voice/CmuSltHsmm/lf0.pdf +voice.cmu-slt-hsmm.Fmm = jar:/marytts/voice/CmuSltHsmm/mgc.pdf +voice.cmu-slt-hsmm.Fms = jar:/marytts/voice/CmuSltHsmm/str.pdf + +# Information about Global Mean and Variance PDFs +voice.cmu-slt-hsmm.useGV = true +voice.cmu-slt-hsmm.maxMgcGvIter = 200 +voice.cmu-slt-hsmm.maxLf0GvIter = 200 +voice.cmu-slt-hsmm.Fgvf = jar:/marytts/voice/CmuSltHsmm/gv-lf0-littend.pdf +voice.cmu-slt-hsmm.Fgvm = jar:/marytts/voice/CmuSltHsmm/gv-mgc-littend.pdf +voice.cmu-slt-hsmm.Fgvs = jar:/marytts/voice/CmuSltHsmm/gv-str-littend.pdf + +# A context features file example for start-up testing. +voice.cmu-slt-hsmm.FeaFile = jar:/marytts/voice/CmuSltHsmm/arctic_a0422.pfeats + +# Tricky phones file in case there were problematic phones during training, empty otherwise. +voice.cmu-slt-hsmm.trickyPhonesFile = jar:/marytts/voice/CmuSltHsmm/trickyPhones.txt + +# Information about Mixed Excitation +voice.cmu-slt-hsmm.useMixExc = true + +# Filters for mixed excitation +# File format: one column with all the taps, where the number of taps per filter = numTaps/numFilters +voice.cmu-slt-hsmm.Fif = jar:/marytts/voice/CmuSltHsmm/mix_excitation_filters.txt +# Number of filters in bandpass bank +voice.cmu-slt-hsmm.in = 5 + +# Information about acousticModels (if true allows prosody modification specified in MARYXML input) +voice.cmu-slt-hsmm.useAcousticModels = true + +# acoustic models to use (HMM models or carts from other voices can be specified) +#(uncoment to allow prosody modification specified in MARYXML input) +voice.cmu-slt-hsmm.acousticModels = duration F0 + +voice.cmu-slt-hsmm.duration.model = hmm +voice.cmu-slt-hsmm.duration.attribute = d + +voice.cmu-slt-hsmm.F0.model = hmm +voice.cmu-slt-hsmm.F0.attribute = f0 diff --git a/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_de.txt b/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_de.txt index e29f66c9..57ec3c5a 100644 --- a/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_de.txt +++ b/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_de.txt @@ -1,11 +1,11 @@ -Das habe ich jetzt schon drei Mal gesagt! -Ach bin ich aufgeregt! Ich kann's kaum erwarten! -Hilfe, da hinten kommt ein Monster! -Hau endlich ab! Ich habe die Nase voll von dir! -Mensch, jetzt lass mich halt in Ruhe! -Ach Mensch, ich freue mich so für dich! -Heute ist wirklich ein schöner Tag! -Und dann lebten sie glücklich und zufrieden, bis an ihr Lebensende. -Ach lass mich doch in Frieden mit diesem Schwachsinn. -Jetzt ist alles zu spät. Ich gebe auf. +Das habe ich jetzt schon drei Mal gesagt! +Ach bin ich aufgeregt! Ich kann's kaum erwarten! +Hilfe, da hinten kommt ein Monster! +Hau endlich ab! Ich habe die Nase voll von dir! +Mensch, jetzt lass mich halt in Ruhe! +Ach Mensch, ich freue mich so für dich! +Heute ist wirklich ein schöner Tag! +Und dann lebten sie glücklich und zufrieden, bis an ihr Lebensende. +Ach lass mich doch in Frieden mit diesem Schwachsinn. +Jetzt ist alles zu spät. Ich gebe auf. Mensch, ist das so langweilig hier! \ No newline at end of file diff --git a/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_en.txt b/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_en.txt index 40e04f50..1370393d 100644 --- a/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_en.txt +++ b/marytts-client/src/main/resources/marytts/tools/emospeak/sampletexts_en.txt @@ -1,11 +1,11 @@ -I have said this for the third time now! -Oh, I'm so excited! I can hardly await it! -Help, there's a monster coming behind me! -Go away now! I am fed up with you! -Will you finally leave me alone! -Wow, I am delighted for you! -Today is really a nice day. -And they lived happily ever after. -Ah, stop bugging me with that nonsense. -It's all too late now. I give up. -Man, it's so boring here. +I have said this for the third time now! +Oh, I'm so excited! I can hardly await it! +Help, there's a monster coming behind me! +Go away now! I am fed up with you! +Will you finally leave me alone! +Wow, I am delighted for you! +Today is really a nice day. +And they lived happily ever after. +Ah, stop bugging me with that nonsense. +It's all too late now. I give up. +Man, it's so boring here. diff --git a/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd b/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd index 9e9d9f82..0507a568 100644 --- a/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd +++ b/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd @@ -1,713 +1,713 @@ - - - - MaryXML v0.5, 2009-12-18 - - - Portions Copyright 2000-2009 DFKI GmbH. -All Rights Reserved. Use is subject to license terms. - -Permission is hereby granted, free of charge, to use and distribute -this software and its documentation without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of this work, and to -permit persons to whom this work is furnished to do so, subject to -the following conditions: - -1. The code must retain the above copyright notice, this list of - conditions and the following disclaimer. -2. Any modifications must be clearly marked as such. -3. Original authors' names are not deleted. -4. The authors' names are not used to endorse or promote products - derived from this software without specific prior written - permission. - -DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE -CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL -DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. - - - - This Schema uses parts of the XML Schema - draft, SSML 1.0 Core Schema (20020222), for which the - copyright notice is reproduced here: - Copyright 1998-2002 W3C (MIT, - INRIA, Keio), All Rights Reserved. Permission to use, - copy, modify and distribute the SSML core schema and its - accompanying documentation for any purpose and without - fee is hereby granted in perpetuity, provided that the - above copyright notice and this paragraph appear in all - copies. The copyright holders make no representation - about the suitability of the schema for any purpose. - It is provided "as is" without expressed or implied - warranty. - - - - ********************************************* - Importing dependent namespaces - ********************************************* - - - - - - - ********************************************* - General Datatypes - ********************************************* - - - - - Duration follows "Times" in - CCS specification; e.g. "25ms", "3s" - - - - - - - - Relative change: e.g. +10, -5.5, - +15%, -9.0% - - - - - - - - Relative change only in percentages: e.g. - +15%, -9.0% - - - - - - - - Relative change in semi-tones: - e.g. +10st, -5.2st - - - - - - - - Absolute value in semi-tones: - e.g. 10st, 5.2st - - - - - - - - Absolute value in Hertz: - e.g. 10Hz, 80Hz - - - - - - - - The positive-decimal type specifies a positive decimal value. - - - - - - - - pitch labels given by ssml - e.g. x-low, high - - - - - - - - - - - - - rate labels given by ssml - e.g. x-low, high - - - - - - - - - - - - - values for volume - - - - - - - - - - - - - 0.0 - 100.0 - - - - - - - - - - say-as datatypes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ********************************************* - Elements - ********************************************* - - - - - - - - - - - The 'allowed-within-sentence' - group uses this abstract element. Elements with aws as - their substitution class are then alternatives for - 'allowed-within-sentence'. - - - - - - - - - - - The 'allowed-within-phrase' - group uses this abstract element. Elements with awp as - their substitution class are then alternatives for - 'allowed-within-phrase'. - - - - - - - - - - - ********************************************* - Structure Elements - ********************************************* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ********************************************* - Elements allowed within a sentence - (and not within a phrase) - ********************************************* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ********************************************* - Elements allowed within a phrase - ********************************************* - In intermediate processing steps, it is necessary - to allow those elements destinated to be within a phrase - to occur directly under a sentence node (before the - phrase node is created!). Therefore, all elements allowed within a - phrase are also allowed within a sentence. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The pitch baseline declination during a phrase - (difference between baseline start and end). - Default: ca. -2.5st - - - - - - - - - - - - - - - The pitch range development during a phrase - (difference between range start and end). - Default: ca. -0.5st - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + MaryXML v0.5, 2009-12-18 + + + Portions Copyright 2000-2009 DFKI GmbH. +All Rights Reserved. Use is subject to license terms. - - - - - - - +Permission is hereby granted, free of charge, to use and distribute +this software and its documentation without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of this work, and to +permit persons to whom this work is furnished to do so, subject to +the following conditions: + +1. The code must retain the above copyright notice, this list of + conditions and the following disclaimer. +2. Any modifications must be clearly marked as such. +3. Original authors' names are not deleted. +4. The authors' names are not used to endorse or promote products + derived from this software without specific prior written + permission. + +DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE +CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. + + + + This Schema uses parts of the XML Schema + draft, SSML 1.0 Core Schema (20020222), for which the + copyright notice is reproduced here: + Copyright 1998-2002 W3C (MIT, + INRIA, Keio), All Rights Reserved. Permission to use, + copy, modify and distribute the SSML core schema and its + accompanying documentation for any purpose and without + fee is hereby granted in perpetuity, provided that the + above copyright notice and this paragraph appear in all + copies. The copyright holders make no representation + about the suitability of the schema for any purpose. + It is provided "as is" without expressed or implied + warranty. + + + + ********************************************* + Importing dependent namespaces + ********************************************* + + + + + + + ********************************************* + General Datatypes + ********************************************* + + + + + Duration follows "Times" in + CCS specification; e.g. "25ms", "3s" + + + + + + + + Relative change: e.g. +10, -5.5, + +15%, -9.0% + + + + + + + + Relative change only in percentages: e.g. + +15%, -9.0% + + + + + + + + Relative change in semi-tones: + e.g. +10st, -5.2st + + + + + + + + Absolute value in semi-tones: + e.g. 10st, 5.2st + + + + + + + + Absolute value in Hertz: + e.g. 10Hz, 80Hz + + + + + + + + The positive-decimal type specifies a positive decimal value. + + + + + + + + pitch labels given by ssml + e.g. x-low, high + + + + + + + + + + + + + rate labels given by ssml + e.g. x-low, high + + + + + + + + + + + + + values for volume + + + + + + + + + + + + + 0.0 - 100.0 + + + + + + + + + + say-as datatypes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ********************************************* + Elements + ********************************************* + + + + + + + + + + + The 'allowed-within-sentence' + group uses this abstract element. Elements with aws as + their substitution class are then alternatives for + 'allowed-within-sentence'. + + + + + + + + + + + The 'allowed-within-phrase' + group uses this abstract element. Elements with awp as + their substitution class are then alternatives for + 'allowed-within-phrase'. + + + + + + + + + + + ********************************************* + Structure Elements + ********************************************* + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + ********************************************* + Elements allowed within a sentence + (and not within a phrase) + ********************************************* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ********************************************* + Elements allowed within a phrase + ********************************************* + In intermediate processing steps, it is necessary + to allow those elements destinated to be within a phrase + to occur directly under a sentence node (before the + phrase node is created!). Therefore, all elements allowed within a + phrase are also allowed within a sentence. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The pitch baseline declination during a phrase + (difference between baseline start and end). + Default: ca. -2.5st + + + + + + + + + + + + + + + The pitch range development during a phrase + (difference between range start and end). + Default: ca. -0.5st + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/marytts-jungle/lib/modules/tib/cap/phoneme-list-tib.xml b/marytts-jungle/lib/modules/tib/cap/phoneme-list-tib.xml index 5249c0ae..aa4309d6 100644 --- a/marytts-jungle/lib/modules/tib/cap/phoneme-list-tib.xml +++ b/marytts-jungle/lib/modules/tib/cap/phoneme-list-tib.xml @@ -1,124 +1,124 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/marytts-jungle/lib/modules/tib/cap/tonerule-params-tib.xml b/marytts-jungle/lib/modules/tib/cap/tonerule-params-tib.xml index 5c0392f0..eb504893 100644 --- a/marytts-jungle/lib/modules/tib/cap/tonerule-params-tib.xml +++ b/marytts-jungle/lib/modules/tib/cap/tonerule-params-tib.xml @@ -1,117 +1,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/marytts-jungle/lib/modules/tib/prosody/tobipredparams-tib.xml b/marytts-jungle/lib/modules/tib/prosody/tobipredparams-tib.xml index fb7bfbb5..1c4bda31 100644 --- a/marytts-jungle/lib/modules/tib/prosody/tobipredparams-tib.xml +++ b/marytts-jungle/lib/modules/tib/prosody/tobipredparams-tib.xml @@ -1,113 +1,113 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/marytts-jungle/src/main/java/marytts/client/air/BMLSpeechPsydule.java b/marytts-jungle/src/main/java/marytts/client/air/BMLSpeechPsydule.java index eff4a6b5..3eae0f2e 100644 --- a/marytts-jungle/src/main/java/marytts/client/air/BMLSpeechPsydule.java +++ b/marytts-jungle/src/main/java/marytts/client/air/BMLSpeechPsydule.java @@ -1,223 +1,223 @@ -/** - * Portions Copyright 2005, Communicative Machines - * Portions Copyright 2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * Permission is hereby granted, free of charge, to use and distribute - * this software and its documentation without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of this work, and to - * permit persons to whom this work is furnished to do so, subject to - * the following conditions: - * - * 1. The code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * 2. Any modifications must be clearly marked as such. - * 3. Original authors' names are not deleted. - * 4. The authors' names are not used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH - * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE - * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF - * THIS SOFTWARE. - */ -package marytts.client.air; - -import java.io.ByteArrayOutputStream; -import java.io.StringReader; -import java.io.StringWriter; - -import javax.xml.transform.Source; -import javax.xml.transform.Templates; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.URIResolver; -import javax.xml.transform.stream.StreamResult; -import javax.xml.transform.stream.StreamSource; - -import com.cmlabs.air.DataSample; -import com.cmlabs.air.Message; -import com.cmlabs.air.Time; - - -/** - * A PsyClone / OpenAIR module processing BML ("Behavior Markup Language") - * input and generating audio and enriched-BML output. - * This can be used as part of a "BML realiser" component, - * e.g. together with the Greta agent. - * @author Marc Schröder - * - */ -public class BMLSpeechPsydule extends MarySpeechPsydule -{ - protected String BMLOUTPUTTYPE = "Greta.Data.EnrichedBMLCode"; - private Templates bml2ssmlStylesheet; - private Templates mergeMaryxmlIntoBMLStylesheet; - - public BMLSpeechPsydule(String airhost, int airport) throws Exception - { - super(airhost, airport); - } - - protected void initialize() throws Exception - { - name = "BMLSpeechPsydule"; - WHITEBOARD = "Greta.Whiteboard"; - INPUTTYPE = "Greta.Data.BMLCode"; - AUDIOOUTPUTTYPE = "Greta.Data.Audio"; - DEFAULTVOICE = System.getProperty("voice.default", "kevin16"); - BMLOUTPUTTYPE = "Greta.Data.EnrichedBMLCode"; - - TransformerFactory tFactory = TransformerFactory.newInstance(); - - tFactory.setURIResolver(new URIResolver() { - public Source resolve(String href, String base) { - if (href.endsWith("ssml-to-mary.xsl")) { - return new StreamSource(this.getClass().getResourceAsStream("ssml-to-mary.xsl")); - } else { - return null; - } - } - }); - StreamSource stylesheetStream = - new StreamSource(this.getClass().getResourceAsStream("bml-to-ssml.xsl")); - bml2ssmlStylesheet = tFactory.newTemplates(stylesheetStream); - stylesheetStream = new StreamSource(this.getClass().getResourceAsStream("merge-maryxml-into-bml.xsl")); - mergeMaryxmlIntoBMLStylesheet = tFactory.newTemplates(stylesheetStream); - } - - - - protected void processInput(String input) throws Exception - { - System.out.println(new Time().printTime()+" - started processing"); - String bml = input; - String ssml = bml2ssml(bml); - System.out.println(new Time().printTime()+" - converted to SSML"); - String acoustparams = ssml2acoustparams(ssml); - System.out.println(new Time().printTime()+" - created ACOUSTPARAMS"); - String enrichedBML = mergeBmlAndAcoustparams(bml, acoustparams); - System.out.println(new Time().printTime()+" - merged phone times into BML"); - // post enriched BML to whiteboard - plug.postMessage(WHITEBOARD, BMLOUTPUTTYPE, enrichedBML, "", ""); - System.out.println(new Time().printTime()+" - posted enriched BML"); - byte[] audio = acoustparams2audio(acoustparams); - System.out.println(new Time().printTime()+" - created audio ("+audio.length+" bytes)"); - // post audio to whiteboard - DataSample audioData = new DataSample(); - //audioData.fromBinaryBuffer(0, audio, 0, audio.length); - audioData.data = audio; - audioData.size = audio.length; - Message audioMessage = new Message(name, WHITEBOARD, AUDIOOUTPUTTYPE, audioData); - plug.postMessage(WHITEBOARD, audioMessage, ""); - System.out.println(new Time().printTime()+" - posted audio"); - - } - - /** - * Extract the SSML section from the BML data. - * @param bml string representation of a full BML document - * containing SSML. - * @return string representation of a standalone SSML document - * corresponding to the embedded SSML tags - */ - private String bml2ssml(String bml) throws Exception - { - // Extract from BML string the SSML string: - StreamSource bmlSource = new StreamSource(new StringReader(bml)); - StringWriter ssmlWriter = new StringWriter(); - StreamResult ssmlResult = new StreamResult(ssmlWriter); - // Transformer is not guaranteed to be thread-safe -- therefore, we - // need one per thread. - Transformer transformer = bml2ssmlStylesheet.newTransformer(); - transformer.transform(bmlSource, ssmlResult); - return ssmlWriter.toString(); - - } - - private String mergeBmlAndAcoustparams(String bml, final String acoustparams) throws Exception - { - StreamSource bmlSource = new StreamSource(new StringReader(bml)); - StringWriter mergedWriter = new StringWriter(); - StreamResult mergedResult = new StreamResult(mergedWriter); - // Transformer is not guaranteed to be thread-safe -- therefore, we - // need one per thread. - Transformer mergingTransformer = mergeMaryxmlIntoBMLStylesheet.newTransformer(); - mergingTransformer.setURIResolver(new URIResolver() { - public Source resolve(String href, String base) { - if (href == null) { - return null; - } else if (href.equals("mary.acoustparams")) { - return new StreamSource(new StringReader(acoustparams)); - } else { - return null; - } - } - }); - - mergingTransformer.transform(bmlSource, mergedResult); - return mergedWriter.toString(); - } - - private String ssml2acoustparams(String ssml) throws Exception - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - mary.process(ssml, "SSML", "ACOUSTPARAMS", "en_US", "WAVE", DEFAULTVOICE, baos); - return new String(baos.toByteArray(), "UTF-8"); - } - - private byte[] acoustparams2audio(String acoustparams) throws Exception - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - mary.process(acoustparams, "ACOUSTPARAMS", "AUDIO", "en_US", "WAVE", DEFAULTVOICE, baos); - return baos.toByteArray(); - } - - - - - - - - - - - - - - /** - * A standalone program which routes messages between a - * PsyClone/OpenAIR server and a MARY server. - * The server host and port for both servers can be given as - * system properties: Mary server: "server.host" - * (default: cling.dfki.uni-sb.de) and "server.port" (default: 59125); - * OpenAIR server: "airserver.host" (default: localhost) and - * "airserver.port" (default: 10000). - * This server will listen for incoming BML files, and - * generate Enriched BML and Audio data from it. - * In the current version, the name of the OpenAIR whiteboard - * is hard-coded as "Greta.Whiteboard"; the input data type is - * "Greta.Data.BMLCode"; the enriched BML output type is - * "Greta.Data.EnrichedBMLCode"; and the audio output is - * "Greta.Data.Audio". - * @param args - * @throws Exception - */ - public static void main(String[] args) throws Exception - { - - String airhost = System.getProperty("airserver.host", "localhost"); - int airport = Integer.getInteger("airserver.port", 10000).intValue(); - - BMLSpeechPsydule reader = new BMLSpeechPsydule(airhost, airport); - reader.listenAndProcess(); - } - - -} +/** + * Portions Copyright 2005, Communicative Machines + * Portions Copyright 2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * Permission is hereby granted, free of charge, to use and distribute + * this software and its documentation without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of this work, and to + * permit persons to whom this work is furnished to do so, subject to + * the following conditions: + * + * 1. The code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * 2. Any modifications must be clearly marked as such. + * 3. Original authors' names are not deleted. + * 4. The authors' names are not used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE + * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ +package marytts.client.air; + +import java.io.ByteArrayOutputStream; +import java.io.StringReader; +import java.io.StringWriter; + +import javax.xml.transform.Source; +import javax.xml.transform.Templates; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.URIResolver; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import com.cmlabs.air.DataSample; +import com.cmlabs.air.Message; +import com.cmlabs.air.Time; + + +/** + * A PsyClone / OpenAIR module processing BML ("Behavior Markup Language") + * input and generating audio and enriched-BML output. + * This can be used as part of a "BML realiser" component, + * e.g. together with the Greta agent. + * @author Marc Schröder + * + */ +public class BMLSpeechPsydule extends MarySpeechPsydule +{ + protected String BMLOUTPUTTYPE = "Greta.Data.EnrichedBMLCode"; + private Templates bml2ssmlStylesheet; + private Templates mergeMaryxmlIntoBMLStylesheet; + + public BMLSpeechPsydule(String airhost, int airport) throws Exception + { + super(airhost, airport); + } + + protected void initialize() throws Exception + { + name = "BMLSpeechPsydule"; + WHITEBOARD = "Greta.Whiteboard"; + INPUTTYPE = "Greta.Data.BMLCode"; + AUDIOOUTPUTTYPE = "Greta.Data.Audio"; + DEFAULTVOICE = System.getProperty("voice.default", "kevin16"); + BMLOUTPUTTYPE = "Greta.Data.EnrichedBMLCode"; + + TransformerFactory tFactory = TransformerFactory.newInstance(); + + tFactory.setURIResolver(new URIResolver() { + public Source resolve(String href, String base) { + if (href.endsWith("ssml-to-mary.xsl")) { + return new StreamSource(this.getClass().getResourceAsStream("ssml-to-mary.xsl")); + } else { + return null; + } + } + }); + StreamSource stylesheetStream = + new StreamSource(this.getClass().getResourceAsStream("bml-to-ssml.xsl")); + bml2ssmlStylesheet = tFactory.newTemplates(stylesheetStream); + stylesheetStream = new StreamSource(this.getClass().getResourceAsStream("merge-maryxml-into-bml.xsl")); + mergeMaryxmlIntoBMLStylesheet = tFactory.newTemplates(stylesheetStream); + } + + + + protected void processInput(String input) throws Exception + { + System.out.println(new Time().printTime()+" - started processing"); + String bml = input; + String ssml = bml2ssml(bml); + System.out.println(new Time().printTime()+" - converted to SSML"); + String acoustparams = ssml2acoustparams(ssml); + System.out.println(new Time().printTime()+" - created ACOUSTPARAMS"); + String enrichedBML = mergeBmlAndAcoustparams(bml, acoustparams); + System.out.println(new Time().printTime()+" - merged phone times into BML"); + // post enriched BML to whiteboard + plug.postMessage(WHITEBOARD, BMLOUTPUTTYPE, enrichedBML, "", ""); + System.out.println(new Time().printTime()+" - posted enriched BML"); + byte[] audio = acoustparams2audio(acoustparams); + System.out.println(new Time().printTime()+" - created audio ("+audio.length+" bytes)"); + // post audio to whiteboard + DataSample audioData = new DataSample(); + //audioData.fromBinaryBuffer(0, audio, 0, audio.length); + audioData.data = audio; + audioData.size = audio.length; + Message audioMessage = new Message(name, WHITEBOARD, AUDIOOUTPUTTYPE, audioData); + plug.postMessage(WHITEBOARD, audioMessage, ""); + System.out.println(new Time().printTime()+" - posted audio"); + + } + + /** + * Extract the SSML section from the BML data. + * @param bml string representation of a full BML document + * containing SSML. + * @return string representation of a standalone SSML document + * corresponding to the embedded SSML tags + */ + private String bml2ssml(String bml) throws Exception + { + // Extract from BML string the SSML string: + StreamSource bmlSource = new StreamSource(new StringReader(bml)); + StringWriter ssmlWriter = new StringWriter(); + StreamResult ssmlResult = new StreamResult(ssmlWriter); + // Transformer is not guaranteed to be thread-safe -- therefore, we + // need one per thread. + Transformer transformer = bml2ssmlStylesheet.newTransformer(); + transformer.transform(bmlSource, ssmlResult); + return ssmlWriter.toString(); + + } + + private String mergeBmlAndAcoustparams(String bml, final String acoustparams) throws Exception + { + StreamSource bmlSource = new StreamSource(new StringReader(bml)); + StringWriter mergedWriter = new StringWriter(); + StreamResult mergedResult = new StreamResult(mergedWriter); + // Transformer is not guaranteed to be thread-safe -- therefore, we + // need one per thread. + Transformer mergingTransformer = mergeMaryxmlIntoBMLStylesheet.newTransformer(); + mergingTransformer.setURIResolver(new URIResolver() { + public Source resolve(String href, String base) { + if (href == null) { + return null; + } else if (href.equals("mary.acoustparams")) { + return new StreamSource(new StringReader(acoustparams)); + } else { + return null; + } + } + }); + + mergingTransformer.transform(bmlSource, mergedResult); + return mergedWriter.toString(); + } + + private String ssml2acoustparams(String ssml) throws Exception + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mary.process(ssml, "SSML", "ACOUSTPARAMS", "en_US", "WAVE", DEFAULTVOICE, baos); + return new String(baos.toByteArray(), "UTF-8"); + } + + private byte[] acoustparams2audio(String acoustparams) throws Exception + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mary.process(acoustparams, "ACOUSTPARAMS", "AUDIO", "en_US", "WAVE", DEFAULTVOICE, baos); + return baos.toByteArray(); + } + + + + + + + + + + + + + + /** + * A standalone program which routes messages between a + * PsyClone/OpenAIR server and a MARY server. + * The server host and port for both servers can be given as + * system properties: Mary server: "server.host" + * (default: cling.dfki.uni-sb.de) and "server.port" (default: 59125); + * OpenAIR server: "airserver.host" (default: localhost) and + * "airserver.port" (default: 10000). + * This server will listen for incoming BML files, and + * generate Enriched BML and Audio data from it. + * In the current version, the name of the OpenAIR whiteboard + * is hard-coded as "Greta.Whiteboard"; the input data type is + * "Greta.Data.BMLCode"; the enriched BML output type is + * "Greta.Data.EnrichedBMLCode"; and the audio output is + * "Greta.Data.Audio". + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + + String airhost = System.getProperty("airserver.host", "localhost"); + int airport = Integer.getInteger("airserver.port", 10000).intValue(); + + BMLSpeechPsydule reader = new BMLSpeechPsydule(airhost, airport); + reader.listenAndProcess(); + } + + +} diff --git a/marytts-jungle/src/main/java/marytts/client/air/MarySpeechPsydule.java b/marytts-jungle/src/main/java/marytts/client/air/MarySpeechPsydule.java index da50bc79..2138b5a8 100644 --- a/marytts-jungle/src/main/java/marytts/client/air/MarySpeechPsydule.java +++ b/marytts-jungle/src/main/java/marytts/client/air/MarySpeechPsydule.java @@ -1,182 +1,182 @@ -/** - * Portions Copyright 2005, Communicative Machines - * Portions Copyright 2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * Permission is hereby granted, free of charge, to use and distribute - * this software and its documentation without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of this work, and to - * permit persons to whom this work is furnished to do so, subject to - * the following conditions: - * - * 1. The code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * 2. Any modifications must be clearly marked as such. - * 3. Original authors' names are not deleted. - * 4. The authors' names are not used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH - * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE - * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF - * THIS SOFTWARE. - */ -package marytts.client.air; - -import java.io.ByteArrayOutputStream; - -import marytts.client.MaryClient; - -import com.cmlabs.air.DataSample; -import com.cmlabs.air.JavaAIRPlug; -import com.cmlabs.air.Message; -import com.cmlabs.air.Time; - - -/** - * A PsyClone / OpenAIR module processing SSML - * (Speech Synthesis Markup Language) - * input and generating audio output. - * @author Marc Schröder - * - */ -public class MarySpeechPsydule -{ - protected String WHITEBOARD; - protected String INPUTTYPE; - protected String AUDIOOUTPUTTYPE; - protected String DEFAULTVOICE; - protected String name; - - protected JavaAIRPlug plug; - protected MaryClient mary; - - public MarySpeechPsydule(String airhost, int airport) throws Exception - { - initialize(); - mary = MaryClient.getMaryClient(); - plug = new JavaAIRPlug(name, airhost, airport); - if (!plug.init()) { - System.out.println("Could not connect to the Server on " + airhost + - " on port " + airport + "..."); - System.exit(0); - } - - System.out.println("Connected to the AIR Server on " + airhost + - ":" + airport); - - if (!plug.openTwoWayConnectionTo(WHITEBOARD)) { - System.out.println("Could not open callback connection to "+WHITEBOARD+"..."); - } - - String xml = ""; - - if (!plug.sendRegistration(xml)) { - System.out.println("Could not register for messages of type "+INPUTTYPE+"..."); - } else { - System.out.println("Listening on whiteboard "+WHITEBOARD+" for messages of type "+INPUTTYPE+"..."); - } - } - - protected void initialize() throws Exception - { - name = "MarySpeechPsydule"; - WHITEBOARD = System.getProperty("mary.psyclone.whiteboard", "WB1"); - INPUTTYPE = "Mary.Input.SSML"; - AUDIOOUTPUTTYPE = "Mary.Output.Audio"; - DEFAULTVOICE = System.getProperty("voice.default", "kevin16"); - } - - - public void listenAndProcess() - { - Message message; - while (true) { - if ( (message = plug.waitForNewMessage(100)) != null) { - Time start = new Time(); - System.out.println(start.printTime() + ":" + name - + ": received wakeup message from " + - message.from); - try { - String input = message.getContent(); - processInput(input); - Time end = new Time(); - System.out.println("Processing took "+end.difference(start)+" ms"); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - - } - - protected void processInput(String input) throws Exception - { - byte[] audio = ssml2audio(input); - System.out.println("Audio: "+audio.length+" bytes"); - // post audio to whiteboard - DataSample audioData = new DataSample(); - //audioData.fromBinaryBuffer(0, audio, 0, audio.length); - audioData.data = audio; - audioData.size = audio.length; - Message audioMessage = new Message(name, WHITEBOARD, AUDIOOUTPUTTYPE, audioData); - plug.postMessage(WHITEBOARD, audioMessage, ""); - - } - - - private byte[] ssml2audio(String acoustparams) throws Exception - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - mary.process(acoustparams, "SSML", "AUDIO", "en_US", "WAVE", DEFAULTVOICE, baos); - return baos.toByteArray(); - } - - - - - - - - - - - - - - /** - * A standalone program which routes messages between a - * PsyClone/OpenAIR server and a MARY server. - * The server host and port for both servers can be given as - * system properties: Mary server: "server.host" - * (default: cling.dfki.uni-sb.de) and "server.port" (default: 59125); - * OpenAIR server: "airserver.host" (default: localhost) and - * "airserver.port" (default: 10000). - * This server will listen for incoming SSML files, and - * generate Audio data from it. - * In the current version, the name of the OpenAIR whiteboard - * can be set via the system property "mary.psyclone.whiteboard" - * (default: "WB1"); the input data type is - * "Mary.Input.SSML"; and the audio output is - * "Greta.Data.Audio". - * @param args - * @throws Exception - */ - public static void main(String[] args) throws Exception - { - - String airhost = System.getProperty("airserver.host", "localhost"); - int airport = Integer.getInteger("airserver.port", 10000).intValue(); - - MarySpeechPsydule reader = new MarySpeechPsydule(airhost, airport); - reader.listenAndProcess(); - } - - -} +/** + * Portions Copyright 2005, Communicative Machines + * Portions Copyright 2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * Permission is hereby granted, free of charge, to use and distribute + * this software and its documentation without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of this work, and to + * permit persons to whom this work is furnished to do so, subject to + * the following conditions: + * + * 1. The code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * 2. Any modifications must be clearly marked as such. + * 3. Original authors' names are not deleted. + * 4. The authors' names are not used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE + * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ +package marytts.client.air; + +import java.io.ByteArrayOutputStream; + +import marytts.client.MaryClient; + +import com.cmlabs.air.DataSample; +import com.cmlabs.air.JavaAIRPlug; +import com.cmlabs.air.Message; +import com.cmlabs.air.Time; + + +/** + * A PsyClone / OpenAIR module processing SSML + * (Speech Synthesis Markup Language) + * input and generating audio output. + * @author Marc Schröder + * + */ +public class MarySpeechPsydule +{ + protected String WHITEBOARD; + protected String INPUTTYPE; + protected String AUDIOOUTPUTTYPE; + protected String DEFAULTVOICE; + protected String name; + + protected JavaAIRPlug plug; + protected MaryClient mary; + + public MarySpeechPsydule(String airhost, int airport) throws Exception + { + initialize(); + mary = MaryClient.getMaryClient(); + plug = new JavaAIRPlug(name, airhost, airport); + if (!plug.init()) { + System.out.println("Could not connect to the Server on " + airhost + + " on port " + airport + "..."); + System.exit(0); + } + + System.out.println("Connected to the AIR Server on " + airhost + + ":" + airport); + + if (!plug.openTwoWayConnectionTo(WHITEBOARD)) { + System.out.println("Could not open callback connection to "+WHITEBOARD+"..."); + } + + String xml = ""; + + if (!plug.sendRegistration(xml)) { + System.out.println("Could not register for messages of type "+INPUTTYPE+"..."); + } else { + System.out.println("Listening on whiteboard "+WHITEBOARD+" for messages of type "+INPUTTYPE+"..."); + } + } + + protected void initialize() throws Exception + { + name = "MarySpeechPsydule"; + WHITEBOARD = System.getProperty("mary.psyclone.whiteboard", "WB1"); + INPUTTYPE = "Mary.Input.SSML"; + AUDIOOUTPUTTYPE = "Mary.Output.Audio"; + DEFAULTVOICE = System.getProperty("voice.default", "kevin16"); + } + + + public void listenAndProcess() + { + Message message; + while (true) { + if ( (message = plug.waitForNewMessage(100)) != null) { + Time start = new Time(); + System.out.println(start.printTime() + ":" + name + + ": received wakeup message from " + + message.from); + try { + String input = message.getContent(); + processInput(input); + Time end = new Time(); + System.out.println("Processing took "+end.difference(start)+" ms"); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + } + + protected void processInput(String input) throws Exception + { + byte[] audio = ssml2audio(input); + System.out.println("Audio: "+audio.length+" bytes"); + // post audio to whiteboard + DataSample audioData = new DataSample(); + //audioData.fromBinaryBuffer(0, audio, 0, audio.length); + audioData.data = audio; + audioData.size = audio.length; + Message audioMessage = new Message(name, WHITEBOARD, AUDIOOUTPUTTYPE, audioData); + plug.postMessage(WHITEBOARD, audioMessage, ""); + + } + + + private byte[] ssml2audio(String acoustparams) throws Exception + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mary.process(acoustparams, "SSML", "AUDIO", "en_US", "WAVE", DEFAULTVOICE, baos); + return baos.toByteArray(); + } + + + + + + + + + + + + + + /** + * A standalone program which routes messages between a + * PsyClone/OpenAIR server and a MARY server. + * The server host and port for both servers can be given as + * system properties: Mary server: "server.host" + * (default: cling.dfki.uni-sb.de) and "server.port" (default: 59125); + * OpenAIR server: "airserver.host" (default: localhost) and + * "airserver.port" (default: 10000). + * This server will listen for incoming SSML files, and + * generate Audio data from it. + * In the current version, the name of the OpenAIR whiteboard + * can be set via the system property "mary.psyclone.whiteboard" + * (default: "WB1"); the input data type is + * "Mary.Input.SSML"; and the audio output is + * "Greta.Data.Audio". + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + + String airhost = System.getProperty("airserver.host", "localhost"); + int airport = Integer.getInteger("airserver.port", 10000).intValue(); + + MarySpeechPsydule reader = new MarySpeechPsydule(airhost, airport); + reader.listenAndProcess(); + } + + +} diff --git a/marytts-jungle/src/main/java/marytts/language/de/infostruct/GerNetQuery.java b/marytts-jungle/src/main/java/marytts/language/de/infostruct/GerNetQuery.java index a11835bd..096e3c84 100644 --- a/marytts-jungle/src/main/java/marytts/language/de/infostruct/GerNetQuery.java +++ b/marytts-jungle/src/main/java/marytts/language/de/infostruct/GerNetQuery.java @@ -1,300 +1,300 @@ -/** - * Copyright 2000-2008 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * This file is part of MARY TTS. - * - * MARY TTS is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - * - */ -package marytts.language.de.infostruct; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Collection; -import java.util.Vector; - -import marytts.util.MaryUtils; - -import org.apache.log4j.BasicConfigurator; -import org.apache.log4j.Logger; - - -/************************************************ - * Execute a query in the mySql germanet database. - * To run it /opt/jdbc/src/ or similar schould be - * in your classpath. - * - * How to run: /opt/jdk-1.4.1/bin/java -cp ./:/opt/jdbc/lib/mysql-connector-java-3.0.9-stable-bin.jar de.dfki.lt.mary.util.GerNetQuery [Word] [pos] [type] - * Run example: /opt/jdk-1.4.1/bin/java -cp ./:/opt/jdbc/lib/mysql-connector-java-3.0.9-stable-bin.jar de.dfki.lt.mary.util.GerNetQuery Welt n hype - * @author Marilisa Amoia & Massimo Romanelli - ***********************************************/ -public class GerNetQuery { - private Connection con; - private Statement stmt; - private static Logger logger = MaryUtils.getLogger("GerNetQuery"); - - public GerNetQuery(String database, String user, String password) throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { - connect(database, user, password); - } //constructor - - //Connecting - private void connect(String database, String user, String password) - throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { - //Load Driver: - Class.forName("org.gjt.mm.mysql.Driver").newInstance(); - //Connection to Database - con = - DriverManager.getConnection( - "jdbc:mysql://" + database + "?user=" + user + "&password=" + password); - stmt = con.createStatement(); - } //connect - - private void closeResultSet(ResultSet rs){ - if(rs != null) - try { - rs.close(); - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } - } - - public String getSynString(String lex, String pos) { - StringBuilder result = new StringBuilder(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetWord sw1, SynsetWord sw2 WHERE sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Offset=sw2.Offset and sw1.Pos=sw2.Pos and sw2.Word<>sw1.Word "); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.append("#").append(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result.toString(); - - } //getSynString - - - - public Vector getSynVector(String lex, String pos) { - Vector result = new Vector(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetWord sw1, SynsetWord sw2 WHERE sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Offset=sw2.Offset and sw1.Pos=sw2.Pos and sw2.Word<>sw1.Word "); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.add(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result; - } //getSynVector - - public String getHyperString(String lex, String pos) { - StringBuilder result = new StringBuilder(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='@' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.append("#").append(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result.toString(); - - } //getHyperString - - public Vector getHyperVector(String lex, String pos) { - Vector result = new Vector(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='@' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.add(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result; - } //getHyperVector - - public String getHypoString(String lex, String pos) { - StringBuilder result = new StringBuilder(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='~' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.append("#").append(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result.toString(); - } //getHypoString - - public Vector getHypoVector(String lex, String pos) { - Vector result = new Vector(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='~' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.add(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result; - } //getHypoVector - - public String getAntoString(String lex, String pos) { - StringBuilder result = new StringBuilder(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='!' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.append("#").append(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result.toString(); - - } //getAntoString - - public Vector getAntoVector(String lex, String pos) { - Vector result = new Vector(); - ResultSet rs = null; - try { - long startTime = System.currentTimeMillis(); - rs = - stmt.executeQuery( - "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='!' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" - + lex - + "' AND sw1.Pos='" - + pos - + "' AND sw1.Pos=sw2.Pos"); - long endTime = System.currentTimeMillis(); - logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); - while (rs.next()) { - result.add(rs.getString("Word")); - } //while - } catch (SQLException e) { - logger.warn("Cannot access Germanet:", e); - } finally{ - closeResultSet(rs); - } - return result; - } //getAntoVector - - public static void main(java.lang.String argv[]) - throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { - GerNetQuery query = new GerNetQuery(System.getProperty("germanet.database"), - System.getProperty("germanet.user"), System.getProperty("germanet.password")); - BasicConfigurator.configure(); - Vector result = new Vector(); - if (argv[2].equals("hypo")) - result = query.getHypoVector(argv[0], argv[1]); - else if (argv[2].equals("hype")) - result = query.getHyperVector(argv[0], argv[1]); - else if (argv[2].equals("anto")) - result = query.getAntoVector(argv[0], argv[1]); - else if (argv[2].equals("syn")) - result = query.getSynVector(argv[0], argv[1]); - else if (argv[0].equals("intDef")) - result = query.getHypoVector("gepäck", "n"); - - System.out.println("result is: " + (Collection) result); - } //main - -} //GerNetQuery +/** + * Copyright 2000-2008 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * This file is part of MARY TTS. + * + * MARY TTS is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + */ +package marytts.language.de.infostruct; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collection; +import java.util.Vector; + +import marytts.util.MaryUtils; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Logger; + + +/************************************************ + * Execute a query in the mySql germanet database. + * To run it /opt/jdbc/src/ or similar schould be + * in your classpath. + * + * How to run: /opt/jdk-1.4.1/bin/java -cp ./:/opt/jdbc/lib/mysql-connector-java-3.0.9-stable-bin.jar de.dfki.lt.mary.util.GerNetQuery [Word] [pos] [type] + * Run example: /opt/jdk-1.4.1/bin/java -cp ./:/opt/jdbc/lib/mysql-connector-java-3.0.9-stable-bin.jar de.dfki.lt.mary.util.GerNetQuery Welt n hype + * @author Marilisa Amoia & Massimo Romanelli + ***********************************************/ +public class GerNetQuery { + private Connection con; + private Statement stmt; + private static Logger logger = MaryUtils.getLogger("GerNetQuery"); + + public GerNetQuery(String database, String user, String password) throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { + connect(database, user, password); + } //constructor + + //Connecting + private void connect(String database, String user, String password) + throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { + //Load Driver: + Class.forName("org.gjt.mm.mysql.Driver").newInstance(); + //Connection to Database + con = + DriverManager.getConnection( + "jdbc:mysql://" + database + "?user=" + user + "&password=" + password); + stmt = con.createStatement(); + } //connect + + private void closeResultSet(ResultSet rs){ + if(rs != null) + try { + rs.close(); + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } + } + + public String getSynString(String lex, String pos) { + StringBuilder result = new StringBuilder(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetWord sw1, SynsetWord sw2 WHERE sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Offset=sw2.Offset and sw1.Pos=sw2.Pos and sw2.Word<>sw1.Word "); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.append("#").append(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result.toString(); + + } //getSynString + + + + public Vector getSynVector(String lex, String pos) { + Vector result = new Vector(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetWord sw1, SynsetWord sw2 WHERE sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Offset=sw2.Offset and sw1.Pos=sw2.Pos and sw2.Word<>sw1.Word "); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.add(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result; + } //getSynVector + + public String getHyperString(String lex, String pos) { + StringBuilder result = new StringBuilder(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='@' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.append("#").append(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result.toString(); + + } //getHyperString + + public Vector getHyperVector(String lex, String pos) { + Vector result = new Vector(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='@' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.add(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result; + } //getHyperVector + + public String getHypoString(String lex, String pos) { + StringBuilder result = new StringBuilder(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='~' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.append("#").append(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result.toString(); + } //getHypoString + + public Vector getHypoVector(String lex, String pos) { + Vector result = new Vector(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='~' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.add(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result; + } //getHypoVector + + public String getAntoString(String lex, String pos) { + StringBuilder result = new StringBuilder(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='!' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.append("#").append(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result.toString(); + + } //getAntoString + + public Vector getAntoVector(String lex, String pos) { + Vector result = new Vector(); + ResultSet rs = null; + try { + long startTime = System.currentTimeMillis(); + rs = + stmt.executeQuery( + "SELECT DISTINCT(sw2.Word) FROM SynsetPtr ptr1, SynsetWord sw1, SynsetWord sw2 WHERE ptr1.Ptr='!' AND ptr1.SourcePos=sw1.Pos AND ptr1.SourceOffset=sw1.Offset AND sw2.Offset=ptr1.TargetOffSet AND sw1.Word='" + + lex + + "' AND sw1.Pos='" + + pos + + "' AND sw1.Pos=sw2.Pos"); + long endTime = System.currentTimeMillis(); + logger.debug("Germanet Query took " + (endTime - startTime) + " ms."); + while (rs.next()) { + result.add(rs.getString("Word")); + } //while + } catch (SQLException e) { + logger.warn("Cannot access Germanet:", e); + } finally{ + closeResultSet(rs); + } + return result; + } //getAntoVector + + public static void main(java.lang.String argv[]) + throws ClassNotFoundException, IllegalAccessException, InstantiationException, SQLException { + GerNetQuery query = new GerNetQuery(System.getProperty("germanet.database"), + System.getProperty("germanet.user"), System.getProperty("germanet.password")); + BasicConfigurator.configure(); + Vector result = new Vector(); + if (argv[2].equals("hypo")) + result = query.getHypoVector(argv[0], argv[1]); + else if (argv[2].equals("hype")) + result = query.getHyperVector(argv[0], argv[1]); + else if (argv[2].equals("anto")) + result = query.getAntoVector(argv[0], argv[1]); + else if (argv[2].equals("syn")) + result = query.getSynVector(argv[0], argv[1]); + else if (argv[0].equals("intDef")) + result = query.getHypoVector("gepäck", "n"); + + System.out.println("result is: " + (Collection) result); + } //main + +} //GerNetQuery diff --git a/marytts-jungle/src/main/java/marytts/language/de/infostruct/Stemmer.java b/marytts-jungle/src/main/java/marytts/language/de/infostruct/Stemmer.java index a39d5a86..6af097b0 100644 --- a/marytts-jungle/src/main/java/marytts/language/de/infostruct/Stemmer.java +++ b/marytts-jungle/src/main/java/marytts/language/de/infostruct/Stemmer.java @@ -1,265 +1,265 @@ -/** - * Copyright 2000-2008 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * This file is part of MARY TTS. - * - * MARY TTS is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - * - */ -package marytts.language.de.infostruct; - -import marytts.util.MaryUtils; - -import org.apache.log4j.Logger; - - -public class Stemmer { - - private String vowels = "aeiouyäöü"; - private String sEndings = "bdfghklmnrt"; - private String stEndings = "bdfghklmnt"; - private Logger logger; - - public Stemmer() { - logger = MaryUtils.getLogger("Stemmer"); - } - - //extract the stemm of the original word - public String findStem(String word) { - word = prepareWord(word); - // print prepared word - - int posR1 = calculateR(word, 0, 2); //(word, 0); - int posR2 = calculateR(word, posR1, 0); - // R1 is the region after the first non-vowel following a vowel - String r1 = word.substring(posR1, word.length()); - // R2 is the region after the first non-vowel following a vowel in R1 - String r2 = word.substring(posR2, word.length()); - // print R1 and R2 - - String wordWithoutR1 = word.substring(0, posR1); - // print beginn of word - - //begin the cut-endings operation - String r1AfterStep1 = step1(word, r1); - // print cutted word after step1 - - String r1AfterStep2 = step2(wordWithoutR1 + r1AfterStep1, r1AfterStep1); - // print cutted word after step2 - - //result = step3(wordWithoutR1+result,result,r2); - //System.out.println("step3: " +wordWithoutR1 + result); - // return wordWithoutR1+result; - String result = finalAdjust(wordWithoutR1 + r1AfterStep2); - logger.debug(word + " => " + wordWithoutR1 + "|" + r1 + - " => (step1)" + wordWithoutR1 + r1AfterStep1 + - " => (step2)" + wordWithoutR1 + r1AfterStep2 + - " => " + result); - return result; - } //findStemm - - private String prepareWord(String word) { - String result = Character.toString(word.charAt(0)); - for (int i = 1; i < word.length(); i++) { - switch (word.charAt(i)) { - case 'u' : - { - if ((i != (word.length() - 1)) - && (vowels.indexOf(word.charAt(i - 1)) > -1) - && (vowels.indexOf(word.charAt(i + 1)) > -1)) { - result = result + "U"; - } //if - else - result = result + word.charAt(i); - break; - } //case - case 'y' : - { - if ((i != (word.length() - 1)) - && (vowels.indexOf(word.charAt(i - 1)) > -1) - && (vowels.indexOf(word.charAt(i + 1)) > -1)) { - result = result + "Y"; - } //if - else - result = result + word.charAt(i); - break; - } //case - case 'ß' : - { - result = result + "ss"; - break; - } //case - default : - { - result = result + word.charAt(i); - break; - } //default - } //switch - } //for - return result; - } //prepareWord - - /** - * Try to cut off a simple suffix, corresponding to adjective/noun inflection endings. - * @param word - * @param r1 - * @return - */ - private String step1(String word, String r1) { - String result = r1; - if (r1.endsWith("ern")) { - result = result.substring(0, result.length() - 3); - } else if (r1.endsWith("er") || r1.endsWith("es") || r1.endsWith("em") || r1.endsWith("en")) { - result = result.substring(0, result.length() - 2); - } else if (r1.endsWith("e") || (r1.endsWith("s") && (sEndings.indexOf(word.charAt(word.length() - 2)) > -1))) { - result = result.substring(0, result.length() - 1); - } //else if - return result; - } //step1 - - /** - * Try to cut off a longer suffix, taking into account Komparativ and Superlativ forms - * of adjectives. - * @param word - * @param r - * @return - */ - private String step2(String word, String r) { - String result = r; - if (r.endsWith("est")) { - result = result.substring(0, result.length() - 3); - } else if ( - r.endsWith("er") - || r.endsWith("en") - || (r.endsWith("st") - && (stEndings.indexOf(word.charAt(word.length() - 3)) > -1) - && (word.length() - 3 > 2))) { - result = result.substring(0, result.length() - 2); - } //else if - return result; - } //step2 - - /** - * Try to cut of adjective- and noun-building suffixes. - * @param word - * @param r1 - * @param r2 - * @return - */ - private String step3(String word, String r1, String r2) { - String result = r1; - if (r2.endsWith("isch") && (r1.charAt(r1.length() - 4) != 'e')) { - result = result.substring(0, result.length() - 4); - } else if (r2.endsWith("lich") || r2.endsWith("heit")) { - if (r1.endsWith("erlich") || r1.endsWith("enlich")) { - result = result.substring(0, result.length() - 6); - } else - result = result.substring(0, result.length() - 4); - } else if (r2.endsWith("keit")) { - if (r2.endsWith("lichkeit")) { - result = result.substring(0, result.length() - 8); - } else if (r2.endsWith("igkeit")) { - result = result.substring(0, result.length() - 6); - } else - result = result.substring(0, result.length() - 4); - } else if (r2.endsWith("end") || r2.endsWith("ung")) { - if ((r2.substring(0, r2.length() - 3)).endsWith("ig") && (r1.charAt(r1.length() - 5) != 'e')) { - result = result.substring(0, result.length() - 5); - } else - result = result.substring(0, result.length() - 3); - } else if ((r2.endsWith("ig") || r2.endsWith("ik")) && r1.charAt(r1.length() - 3) != 'e') { - result = result.substring(0, result.length() - 2); - } //else if - return result; - } // step3 - - /** - * In the given word, and starting from a given index, find the first consonant - * after a vowel sequence and return the position after that consonant, or - * constraint, whichever is larger. - * @param word the string in which to search for a V-C pattern - * @param index index in word from which to start - * @param constraint minimum return value - * @return the position after the consonant, or constraint, whichever is larger. - */ - private int calculateR(String word, int index, int constraint) { - int result = index; - for (int i = index; i < word.length(); i++) { - if (vowels.indexOf(word.charAt(i)) > -1) { - for (int j = i; j < word.length(); j++) { - if ((vowels.indexOf(word.charAt(j)) == -1) && j > 1) { - result = ++j; - if (result > constraint) { - break; - } else - result = constraint; - } //if - } // for:j - break; - } //if - } // for:i - return result; - } //calculateR - - private String finalAdjust(String word) { - - String result = Character.toString(word.charAt(0)); - - for (int i = 1; i < word.length(); i++) { - - switch (word.charAt(i)) { - case 'U' : - { - result = result + "u"; - break; - } //case - case 'Y' : - { - result = result + "y"; - break; - } //case - case 'ä' : - { - result = result + "a"; - break; - } //case - case 'ö' : - { - result = result + "o"; - break; - } //case - case 'ü' : - { - result = result + "u"; - break; - } //case - - default : - { - result = result + word.charAt(i); - break; - } //default - } //switch - - } //for - //result = result.substring(0,word.length()); - return result; - } //finalAdjust - - public static void main(String args[]) { - Stemmer stemmer = new Stemmer(); - System.out.println("RESULT: " + stemmer.findStem(args[0])); - } //main - -} //Stemmer +/** + * Copyright 2000-2008 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * This file is part of MARY TTS. + * + * MARY TTS is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + */ +package marytts.language.de.infostruct; + +import marytts.util.MaryUtils; + +import org.apache.log4j.Logger; + + +public class Stemmer { + + private String vowels = "aeiouyäöü"; + private String sEndings = "bdfghklmnrt"; + private String stEndings = "bdfghklmnt"; + private Logger logger; + + public Stemmer() { + logger = MaryUtils.getLogger("Stemmer"); + } + + //extract the stemm of the original word + public String findStem(String word) { + word = prepareWord(word); + // print prepared word + + int posR1 = calculateR(word, 0, 2); //(word, 0); + int posR2 = calculateR(word, posR1, 0); + // R1 is the region after the first non-vowel following a vowel + String r1 = word.substring(posR1, word.length()); + // R2 is the region after the first non-vowel following a vowel in R1 + String r2 = word.substring(posR2, word.length()); + // print R1 and R2 + + String wordWithoutR1 = word.substring(0, posR1); + // print beginn of word + + //begin the cut-endings operation + String r1AfterStep1 = step1(word, r1); + // print cutted word after step1 + + String r1AfterStep2 = step2(wordWithoutR1 + r1AfterStep1, r1AfterStep1); + // print cutted word after step2 + + //result = step3(wordWithoutR1+result,result,r2); + //System.out.println("step3: " +wordWithoutR1 + result); + // return wordWithoutR1+result; + String result = finalAdjust(wordWithoutR1 + r1AfterStep2); + logger.debug(word + " => " + wordWithoutR1 + "|" + r1 + + " => (step1)" + wordWithoutR1 + r1AfterStep1 + + " => (step2)" + wordWithoutR1 + r1AfterStep2 + + " => " + result); + return result; + } //findStemm + + private String prepareWord(String word) { + String result = Character.toString(word.charAt(0)); + for (int i = 1; i < word.length(); i++) { + switch (word.charAt(i)) { + case 'u' : + { + if ((i != (word.length() - 1)) + && (vowels.indexOf(word.charAt(i - 1)) > -1) + && (vowels.indexOf(word.charAt(i + 1)) > -1)) { + result = result + "U"; + } //if + else + result = result + word.charAt(i); + break; + } //case + case 'y' : + { + if ((i != (word.length() - 1)) + && (vowels.indexOf(word.charAt(i - 1)) > -1) + && (vowels.indexOf(word.charAt(i + 1)) > -1)) { + result = result + "Y"; + } //if + else + result = result + word.charAt(i); + break; + } //case + case 'ß' : + { + result = result + "ss"; + break; + } //case + default : + { + result = result + word.charAt(i); + break; + } //default + } //switch + } //for + return result; + } //prepareWord + + /** + * Try to cut off a simple suffix, corresponding to adjective/noun inflection endings. + * @param word + * @param r1 + * @return + */ + private String step1(String word, String r1) { + String result = r1; + if (r1.endsWith("ern")) { + result = result.substring(0, result.length() - 3); + } else if (r1.endsWith("er") || r1.endsWith("es") || r1.endsWith("em") || r1.endsWith("en")) { + result = result.substring(0, result.length() - 2); + } else if (r1.endsWith("e") || (r1.endsWith("s") && (sEndings.indexOf(word.charAt(word.length() - 2)) > -1))) { + result = result.substring(0, result.length() - 1); + } //else if + return result; + } //step1 + + /** + * Try to cut off a longer suffix, taking into account Komparativ and Superlativ forms + * of adjectives. + * @param word + * @param r + * @return + */ + private String step2(String word, String r) { + String result = r; + if (r.endsWith("est")) { + result = result.substring(0, result.length() - 3); + } else if ( + r.endsWith("er") + || r.endsWith("en") + || (r.endsWith("st") + && (stEndings.indexOf(word.charAt(word.length() - 3)) > -1) + && (word.length() - 3 > 2))) { + result = result.substring(0, result.length() - 2); + } //else if + return result; + } //step2 + + /** + * Try to cut of adjective- and noun-building suffixes. + * @param word + * @param r1 + * @param r2 + * @return + */ + private String step3(String word, String r1, String r2) { + String result = r1; + if (r2.endsWith("isch") && (r1.charAt(r1.length() - 4) != 'e')) { + result = result.substring(0, result.length() - 4); + } else if (r2.endsWith("lich") || r2.endsWith("heit")) { + if (r1.endsWith("erlich") || r1.endsWith("enlich")) { + result = result.substring(0, result.length() - 6); + } else + result = result.substring(0, result.length() - 4); + } else if (r2.endsWith("keit")) { + if (r2.endsWith("lichkeit")) { + result = result.substring(0, result.length() - 8); + } else if (r2.endsWith("igkeit")) { + result = result.substring(0, result.length() - 6); + } else + result = result.substring(0, result.length() - 4); + } else if (r2.endsWith("end") || r2.endsWith("ung")) { + if ((r2.substring(0, r2.length() - 3)).endsWith("ig") && (r1.charAt(r1.length() - 5) != 'e')) { + result = result.substring(0, result.length() - 5); + } else + result = result.substring(0, result.length() - 3); + } else if ((r2.endsWith("ig") || r2.endsWith("ik")) && r1.charAt(r1.length() - 3) != 'e') { + result = result.substring(0, result.length() - 2); + } //else if + return result; + } // step3 + + /** + * In the given word, and starting from a given index, find the first consonant + * after a vowel sequence and return the position after that consonant, or + * constraint, whichever is larger. + * @param word the string in which to search for a V-C pattern + * @param index index in word from which to start + * @param constraint minimum return value + * @return the position after the consonant, or constraint, whichever is larger. + */ + private int calculateR(String word, int index, int constraint) { + int result = index; + for (int i = index; i < word.length(); i++) { + if (vowels.indexOf(word.charAt(i)) > -1) { + for (int j = i; j < word.length(); j++) { + if ((vowels.indexOf(word.charAt(j)) == -1) && j > 1) { + result = ++j; + if (result > constraint) { + break; + } else + result = constraint; + } //if + } // for:j + break; + } //if + } // for:i + return result; + } //calculateR + + private String finalAdjust(String word) { + + String result = Character.toString(word.charAt(0)); + + for (int i = 1; i < word.length(); i++) { + + switch (word.charAt(i)) { + case 'U' : + { + result = result + "u"; + break; + } //case + case 'Y' : + { + result = result + "y"; + break; + } //case + case 'ä' : + { + result = result + "a"; + break; + } //case + case 'ö' : + { + result = result + "o"; + break; + } //case + case 'ü' : + { + result = result + "u"; + break; + } //case + + default : + { + result = result + word.charAt(i); + break; + } //default + } //switch + + } //for + //result = result.substring(0,word.length()); + return result; + } //finalAdjust + + public static void main(String args[]) { + Stemmer stemmer = new Stemmer(); + System.out.println("RESULT: " + stemmer.findStem(args[0])); + } //main + +} //Stemmer diff --git a/marytts-jungle/src/main/java/marytts/language/tib/ContourGenerator.java b/marytts-jungle/src/main/java/marytts/language/tib/ContourGenerator.java index 844e1717..ee08c17f 100644 --- a/marytts-jungle/src/main/java/marytts/language/tib/ContourGenerator.java +++ b/marytts-jungle/src/main/java/marytts/language/tib/ContourGenerator.java @@ -1,6 +1,6 @@ -/** - * Copyright 2000-2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. +/** + * Copyright 2000-2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * @@ -17,1898 +17,1898 @@ * along with this program. If not, see . * */ -package marytts.language.tib; - -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; -import java.util.Locale; -import java.util.Map; -import java.util.Stack; -import java.util.WeakHashMap; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.FactoryConfigurationError; -import javax.xml.parsers.ParserConfigurationException; - -import marytts.datatypes.MaryData; -import marytts.datatypes.MaryDataType; -import marytts.datatypes.MaryXML; -import marytts.exceptions.NoSuchPropertyException; -import marytts.modules.InternalModule; -import marytts.modules.MaryModule; -import marytts.modules.ModuleRegistry; -import marytts.modules.phonemiser.Allophone; -import marytts.modules.phonemiser.AllophoneSet; -import marytts.modules.synthesis.MbrolaVoice; -import marytts.modules.synthesis.Voice; -import marytts.server.MaryProperties; -import marytts.util.MaryUtils; -import marytts.util.dom.MaryDomUtils; -import marytts.util.dom.NameNodeFilter; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.w3c.dom.traversal.DocumentTraversal; -import org.w3c.dom.traversal.NodeFilter; -import org.w3c.dom.traversal.NodeIterator; -import org.w3c.dom.traversal.TreeWalker; - - -/** - * The Tibetan contour generator module. - * - * todo: Code säubern, kommentieren - * @author Lars Jungjohann - */ - -public class ContourGenerator extends InternalModule { - - /** This map contains the topline-baseline frequency configurations for the - * currently used phrase and sub-phrase prosody elements. As this is a - * WeakHashMap, entries will automatically be deleted when not in regular - * use anymore. */ - private WeakHashMap topBaseConfMap; - /** This map contains the prosodic settings, as ProsodicSettings objects, - * for the currently used prosody elements. As this is a WeakHashMap, - * entries will automatically be deleted when not in regular use - * anymore. */ - private WeakHashMap prosodyMap; - /** This map contains the default voice element for a given document. - * As this is a WeakHashMap, entries will automatically be deleted when not in - * regular use anymore. */ - private WeakHashMap defaultVoiceMap; - /** The allophoneSet used for this language */ - private AllophoneSet allophoneSet; - /** The tone realisation rules for this language */ - private String tonerulefilePropertyName = "tibetan.cap.tonerulefile"; - private Map toneMap; - - - public ContourGenerator() { - super("TibetanContourGenerator", - MaryDataType.DURATIONS, - MaryDataType.ACOUSTPARAMS, - new Locale("tib")); - } - - public void startup() throws Exception { - super.startup(); - // We depend on the Synthesis module: - MaryModule synthesis = ModuleRegistry.getModule(marytts.modules.Synthesis.class); - assert synthesis != null; - if (synthesis.getState() == MaryModule.MODULE_OFFLINE) - synthesis.startup(); - // load phone list - allophoneSet = AllophoneSet.getAllophoneSet(MaryProperties.needFilename("tibetan.cap.phonelistfile")); - // load tone rules - toneMap = new HashMap(); - loadToneRules(); - // instantiate the Map in which settings are associated with elements: - // (when the objects serving as keys are not in ordinary use any more, - // the key-value pairs are deleted from the WeakHashMap earlier or - // later; that means we do not need to keep track of the hashmaps per - // thread) - topBaseConfMap = new WeakHashMap(); - prosodyMap = new WeakHashMap(); - defaultVoiceMap = new WeakHashMap(); - } - - private synchronized void loadToneRules() - throws FactoryConfigurationError, ParserConfigurationException, org.xml.sax.SAXException, IOException, - NoSuchPropertyException { - DocumentBuilderFactory f = DocumentBuilderFactory.newInstance(); - f.setValidating(false); - DocumentBuilder b = f.newDocumentBuilder(); - - // load tone rules - Document toneRules = b.parse(new FileInputStream(MaryProperties.needFilename(tonerulefilePropertyName))); - // Now fill the map of tobi symbols: - Element root = toneRules.getDocumentElement(); - for (Element e = MaryDomUtils.getFirstChildElement(root); - e != null; - e = MaryDomUtils.getNextSiblingElement(e)) { - if (e.getTagName().equals("tone") || e.getTagName().equals(MaryXML.BOUNDARY)) { - String name = e.getAttribute("name"); - // We want to be able to find tone labels both in - // uppercase and lowercase form: - toneMap.put(name.toUpperCase(), e); - } - } - } // loadToneRules - - - public MaryData process(MaryData d) throws Exception - { - Document doc = d.getDocument(); - defaultVoiceMap.put(doc, d.getDefaultVoice()); - determineProsodicSettings(doc); - addOrDeleteBoundaries(doc); - - NodeList sentences = doc.getElementsByTagName(MaryXML.SENTENCE); - for (int i=0; i < sentences.getLength(); i++) { - processSentence((Element)sentences.item(i)); - } - MaryData result = new MaryData(outputType(), d.getLocale()); - result.setDocument(doc); - return result; - } - - /** - * For all (possibly nested) prosody elements in the document, - * calculate their (possibly cumulated) prosodic settings - * and save them in a map. - */ - private void determineProsodicSettings(Document doc) { - // Determine the prosodic setting for each prosody element - // Note: It is important that ancestor nodes are processed before - // descendant nodes, because the descendants will inherit the - // ancestors' settings! - NodeList prosodies = doc.getElementsByTagName(MaryXML.PROSODY); - for (int i = 0; i < prosodies.getLength(); i++) { - Element prosody = (Element) prosodies.item(i); - determineProsodicSettings(prosody); - } - } - - /** - * For one given prosody element, determine the prosodic settings, - * taking into account its closest prosody ancestor's settings. - * This method needs to be called once when starting to work with a document - * (from determineProsodicSettings(Document) and when a new prosody element - * is created (e.g. for upstep/downstep). - * @param prosody the prosody element for which to save the prosodic settings in - * the map. - */ - private void determineProsodicSettings(Element prosody) { - ProsodicSettings settings = new ProsodicSettings(); - // Neutral default settings: - ProsodicSettings parentSettings = new ProsodicSettings(); - // Obtain parent settings, if any: - Element ancestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); - if (ancestor != null) { - ProsodicSettings testSettings = (ProsodicSettings) prosodyMap.get(ancestor); - if (testSettings != null) { - parentSettings = testSettings; - } - } - // Only accept relative changes, i.e. percentage delta: - settings.setRate(parentSettings.rate() + MaryUtils.getPercentageDelta(prosody.getAttribute("rate"))); - settings.setAccentProminence( - parentSettings.accentProminence() + MaryUtils.getPercentageDelta(prosody.getAttribute("accent-prominence"))); - settings.setAccentSlope( - parentSettings.accentSlope() + MaryUtils.getPercentageDelta(prosody.getAttribute("accent-slope"))); - settings.setNumberOfPauses( - parentSettings.numberOfPauses() + MaryUtils.getPercentageDelta(prosody.getAttribute("number-of-pauses"))); - settings.setPauseDuration( - parentSettings.pauseDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("pause-duration"))); - settings.setVowelDuration( - parentSettings.vowelDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("vowel-duration"))); - settings.setPlosiveDuration( - parentSettings.plosiveDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("plosive-duration"))); - settings.setFricativeDuration( - parentSettings.fricativeDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("fricative-duration"))); - settings.setNasalDuration( - parentSettings.nasalDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("nasal-duration"))); - settings.setLiquidDuration( - parentSettings.liquidDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("liquid-duration"))); - settings.setGlideDuration( - parentSettings.glideDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("glide-duration"))); - - String sVolume = prosody.getAttribute("volume"); - if (sVolume.equals("")) { - settings.setVolume(parentSettings.volume()); - } else if (MaryUtils.isPercentageDelta(sVolume)) { - int newVolume = parentSettings.volume() + MaryUtils.getPercentageDelta(sVolume); - if (newVolume < 0) - newVolume = 0; - else if (newVolume > 100) - newVolume = 100; - settings.setVolume(newVolume); - } else if (MaryUtils.isUnsignedNumber(sVolume)) { - settings.setVolume(MaryUtils.getUnsignedNumber(sVolume)); - } else if (sVolume.equals("silent")) { - settings.setVolume(0); - } else if (sVolume.equals("soft")) { - settings.setVolume(25); - } else if (sVolume.equals("medium")) { - settings.setVolume(50); - } else if (sVolume.equals("loud")) { - settings.setVolume(75); - } - prosodyMap.put(prosody, settings); - } - - /** - * Adjust the number of boundaries according to rate and the - * "number-of-pauses" attribute. - */ - private void addOrDeleteBoundaries(Document doc) { - // TODO: Check if this is needed; for German, this is already done in KlattDurationModeller! - // Go through boundaries. A boundary is deleted if the determined - // minimum breakindex size is larger than this boundary's breakindex. - NodeIterator it = - ((DocumentTraversal) doc).createNodeIterator( - doc, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(MaryXML.BOUNDARY), - false); - Element boundary = null; - List bi1prosodyElements = null; - while ((boundary = (Element) it.nextNode()) != null) { - int minBI = 3; - Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - assert settings != null; - int rate = settings.rate(); - int numberOfPauses = settings.numberOfPauses(); - if (numberOfPauses <= 50) - minBI = 5; - else if (numberOfPauses <= 75) - minBI = 4; - else if (numberOfPauses > 150) - minBI = 1; - else if (numberOfPauses > 125) - minBI = 2; - // Rate can only shift the number of pauses by one breakindex - if (rate < 90 && minBI > 1) - minBI--; - if (minBI == 1) { - // Remember that the current prosody element wants bi 1 boundaries: - if (bi1prosodyElements == null) - bi1prosodyElements = new ArrayList(); - bi1prosodyElements.add(prosody); - } - } - // This boundary's bi: - int bi = 3; - try { - bi = Integer.parseInt(boundary.getAttribute("breakindex")); - } catch (NumberFormatException e) { - logger.info( - "Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "', assuming " + bi); - } - if (bi < minBI) { - if (!boundary.hasAttribute("duration")) - boundary.getParentNode().removeChild(boundary); - else - boundary.removeAttribute("bi"); // but keep duration - } - } - // Do we need to add any boundaries? - if (bi1prosodyElements != null) { - Iterator elIt = bi1prosodyElements.iterator(); - while (elIt.hasNext()) { - Element prosody = (Element) elIt.next(); - NodeIterator nodeIt = - ((DocumentTraversal) doc).createNodeIterator( - prosody, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), - false); - Element el = null; - Element prevEl = null; - while ((el = (Element) nodeIt.nextNode()) != null) { - if (el.getTagName().equals(MaryXML.TOKEN) && prevEl != null && prevEl.getTagName().equals(MaryXML.TOKEN)) { - // Need to insert a boundary before el: - Element newBoundary = MaryXML.createElement(doc, MaryXML.BOUNDARY); - newBoundary.setAttribute("breakindex", "1"); - el.getParentNode().insertBefore(newBoundary, el); - } - prevEl = el; - } - } - } - } - - private void processSentence(Element sentence) { - NodeList syllables = sentence.getElementsByTagName(MaryXML.SYLLABLE); - if (syllables.getLength() < 1) { - return; // no syllables -- what can we do? - } - - NodeList phrases = sentence.getElementsByTagName(MaryXML.PHRASE); - for (int i = 0; i < phrases.getLength(); i++) { - Element phrase = (Element) phrases.item(i); - // calculate the F0 targets - calculateF0Targets(phrase); - // anchor the F0 targets at individual segments - // calculate frequency values. - } - } - - - ////////////////////////////////////////////////////////////////////// - ///////////////////////////// Tone Rules ///////////////////////////// - ////////////////////////////////////////////////////////////////////// - - /** - * Determine the topline and baseline start and end frequencies for a - * phrase. Create an appropriate TopBaseConfiguration object - * and save it in a hash, as a value to which the phrase element is the - * key. - * @see #getToplineFrequency(Element,int) - * @see #getBaselineFrequency(Element,int) - */ - private void determinePhraseTopBaseConf(Element phrase) { - Voice voice = null; - // Determine the settings for the phrase element: - Element voiceElement = (Element) MaryDomUtils.getAncestor(phrase, MaryXML.VOICE); - if (voiceElement != null) - voice = Voice.getVoice(voiceElement); - if (voice == null) - voice = (Voice) defaultVoiceMap.get(phrase.getOwnerDocument()); - // In any case, if we do not have a voice now, - // use the global default voice: - if (voice == null) { - voice = Voice.getDefaultVoice(getLocale()); - } - if (!(voice instanceof MbrolaVoice)) { - throw new IllegalStateException("This contour generator can only be used with an MBROLA voice, but "+voice.getName()+" is a "+voice.getClass()); - } - MbrolaVoice mVoice = (MbrolaVoice) voice; - int topStart = mVoice.topStart(); - int topEnd = mVoice.topEnd(); - int baseStart = mVoice.baseStart(); - int baseEnd = mVoice.baseEnd(); - TopBaseConfiguration tbConf = new TopBaseConfiguration(topStart, topEnd, baseStart, baseEnd); - - // Now see if there are any global modifiers ( elements - // ancestors to this phrase element, but inside the voiceElement if - // there is one; start with the outermost element and - // superpose them one after the other): - Element current = phrase; - Stack prosodyElements = new Stack(); - while (MaryDomUtils.hasAncestor(current, MaryXML.PROSODY)) { - current = (Element) MaryDomUtils.getAncestor(current, MaryXML.PROSODY); - prosodyElements.push(current); - // Ignore prosody elements that are outside the closest voice element: - if (voiceElement != null && !MaryDomUtils.isAncestor(voiceElement, current)) { - // We have gone upwards past the voiceElement, so stop. - break; - } - } - while (!prosodyElements.empty()) { - Element prosody = (Element) prosodyElements.pop(); - tbConf = calculateTopBase(prosody, tbConf); - } - - // OK, now tbConf is the best we can do for the prosodic settings of - // this phrase. - // Add timing information: (start is 0 for a phrase, end is the end of - // the last segment in the phrase) - Element lastSegment = MaryDomUtils.getLastElementByTagName(phrase, MaryXML.PHONE); - if (lastSegment != null) { - // There ARE segments in this phrase - int endTime = 0; - try { - endTime = Integer.parseInt(lastSegment.getAttribute("end")); - } catch (NumberFormatException e) { - logger.warn("Unexpected end time `" + lastSegment.getAttribute("end") + "'"); - } - tbConf.setTimes(0, endTime); - } - // Save the TopBaseConfiguration object in a hash, with the phrase - // element as a key: - topBaseConfMap.put(phrase, tbConf); - - //System.err.println("For phrase ranging from " + tbConf.startTime() + " to " + tbConf.endTime() + ", determined topStart " + tbConf.topStart() + ", topEnd " + tbConf.topEnd() + ", baseStart " + tbConf.baseStart() + ", baseEnd " + tbConf.baseEnd()); - } - - /** - * Determine the topline and baseline start and end frequencies for a - * prosody element within a phrase. Create an - * appropriate TopBaseConfiguration object and save it in a hash, as a - * value to which the prosody element is the key. - * @see getToplineFrequency(Element,int) - * @see getBaselineFrequency(Element,int) - */ - private void determineProsodyTopBaseConf(Element prosody) { - if (prosody == null) - throw new NullPointerException("Received null argument"); - if (!prosody.getTagName().equals(MaryXML.PROSODY)) - throw new IllegalArgumentException("Expected argument, got <" + prosody.getTagName() + ">"); - // Find closest ancestor phrase or prosody element: - Element phrase = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PHRASE); - if (phrase == null) { - logger.warn("Trying to determine prosody top base conf for element without a ancestor. Ignoring."); - return; - } - Element confReferenceKey = phrase; - // Now see if there is a prosody element which is our ancestor and which is - // inside the phrase -- then that one is our configuration reference: - Element prosodyAncestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); - if (prosodyAncestor != null && MaryDomUtils.isAncestor(phrase, prosodyAncestor)) { - confReferenceKey = prosodyAncestor; - } - TopBaseConfiguration confReference = (TopBaseConfiguration) topBaseConfMap.get(confReferenceKey); - assert confReference != null; - // Now calculate start and end times for this element: - Element firstSegment = MaryDomUtils.getFirstElementByTagName(prosody, MaryXML.PHONE); - int startTime = 0; - try { - startTime = - Integer.parseInt(firstSegment.getAttribute("end")) - Integer.parseInt(firstSegment.getAttribute("d")); - } catch (NumberFormatException e) { - logger.warn( - "Unexpected start time `" - + firstSegment.getAttribute("end") - + "' - `" - + firstSegment.getAttribute("d") - + "'"); - } - - Element lastSegment = MaryDomUtils.getLastElementByTagName(prosody, MaryXML.PHONE); - int endTime = 0; - try { - endTime = Integer.parseInt(lastSegment.getAttribute("end")); - } catch (NumberFormatException e) { - logger.warn("Unexpected end time `" + lastSegment.getAttribute("end") + "'"); - } - // Create a new TopBaseConfiguration element reflecting the - // settings in the confReference: - TopBaseConfiguration tbConf = - new TopBaseConfiguration( - confReference.toplineFrequency(startTime), - confReference.toplineFrequency(endTime), - confReference.baselineFrequency(startTime), - confReference.baselineFrequency(endTime), - startTime, - endTime); - // Modify this reference according to this prosody element: - tbConf = calculateTopBase(prosody, tbConf); - // Save the TopBaseConfiguration object in a hash, with the prosody - // element as a key: - topBaseConfMap.put(prosody, tbConf); - - //System.err.println("For prosody ranging from " + tbConf.startTime() + " to " + tbConf.endTime() + ", determined topStart " + tbConf.topStart() + ", topEnd " + tbConf.topEnd() + ", baseStart " + tbConf.baseStart() + ", baseEnd " + tbConf.baseEnd()); - - } - - /** - * Starting from a baseline prosodic configuration and the settings - * requested in the prosody element, a new prosodic - * configuration (topline and baseline start and end frequencies) is - * calculated. - */ - private TopBaseConfiguration calculateTopBase(Element prosody, TopBaseConfiguration origConf) { - int topStart = origConf.topStart(); - int topEnd = origConf.topEnd(); - int baseStart = origConf.baseStart(); - int baseEnd = origConf.baseEnd(); - String pitch = prosody.getAttribute("pitch"); - if (!pitch.equals("")) { - if (MaryUtils.isPercentageDelta(pitch)) { - //System.err.println("Percentage delta: `" + pitch + "'"); - int percentage = MaryUtils.getPercentageDelta(pitch); - baseStart = (baseStart * (100 + percentage)) / 100; - baseEnd = (baseEnd * (100 + percentage)) / 100; - // For the topline we have two possibilities: - // i) we shift by the same number of Hz as the baseline, - // i.e. keep the range constant in the frequency domain; - // ii) we multiply with the same factor, - // i.e. keep the range constant in the log frequency domain - // (constant number of semitones range) - // The latter seems more appropriate given the fact that the - // human ear hears frequencies logarithmically. - topStart = (topStart * (100 + percentage)) / 100; - topEnd = (topEnd * (100 + percentage)) / 100; - } else if (MaryUtils.isSemitonesDelta(pitch)) { - //System.err.println("Semitones delta: `" + pitch + "'"); - double semitones = MaryUtils.getSemitonesDelta(pitch); - // Adding one semitone to any frequency corresponds to a - // multiplication with 2^(1/12) = 1.0595. - // Subtracting one semitone corresponds to a division by 1.0595. - // In general: Changing the frequency by x semitones corresponds - // to a multiplication with 1.0595^x. - double factor = Math.pow(1.0595, semitones); - baseStart = (int) (baseStart * factor); - baseEnd = (int) (baseEnd * factor); - topStart = (int) (topStart * factor); - topEnd = (int) (topEnd * factor); - } else if (MaryUtils.isNumberDelta(pitch)) { // +5, -10.2 - //System.err.println("Number delta: `" + pitch + "'"); - int delta = MaryUtils.getNumberDelta(pitch); - baseStart += delta; - baseEnd += delta; - topStart += delta; - topEnd += delta; - } else if (MaryUtils.isUnsignedNumber(pitch)) { // 180, 212.75 - //System.err.println("Unsigned number: `" + pitch + "'"); - // In order to keep the range constant in log frequency domain, - // calculate the ratio of current topMean and baseMean. Notice - // that the spreads are not calculated in the frequency domain, - // i.e. the slope of topline and baseline in the frequency - // domain change during the shift. It is unclear whether this - // is very relevant. - int baseMean = (baseStart + baseEnd) / 2; - int topMean = (topStart + topEnd) / 2; - double topBaseRatio = ((double) topMean) / baseMean; - int topSpread = (topEnd - topStart) / 2; - int newBaseMean = MaryUtils.getUnsignedNumber(pitch); - int baseSpread = (baseEnd - baseStart) / 2; - baseStart = newBaseMean - baseSpread; - baseEnd = newBaseMean + baseSpread; - topStart = (int) (newBaseMean * topBaseRatio - topSpread); - topEnd = (int) (newBaseMean * topBaseRatio + topSpread); - } - } - String range = prosody.getAttribute("range"); - if (!range.equals("")) { - // Range leaves the baseline untouched, and moves the topline. - // All relative changes stretch the distance top-base. - if (MaryUtils.isPercentageDelta(range)) { // +25%, -17.2% - //System.err.println("Percentage delta: `" + range + "'"); - int percentage = MaryUtils.getPercentageDelta(range); - topStart = baseStart + ((topStart - baseStart) * (100 + percentage)) / 100; - topEnd = baseEnd + ((topEnd - baseEnd) * (100 + percentage)) / 100; - } else if (MaryUtils.isSemitonesDelta(range)) { // +5.2st, -0.7st - //System.err.println("Semitones delta: `" + range + "'"); - // Change the current range by x semitones - double semitones = MaryUtils.getSemitonesDelta(range); - // for explanations, see pitch section above. - double factor = Math.pow(1.0595, semitones); - int deltaStart = (int) ((topStart - baseStart) * factor); - int deltaEnd = (int) ((topEnd - baseEnd) * factor); - topStart = baseStart + deltaStart; - topEnd = baseEnd + deltaEnd; - } else if (MaryUtils.isNumberDelta(range)) { // +15, -27.3 - //System.err.println("Number delta: `" + range + "'"); - int delta = MaryUtils.getNumberDelta(range); - topStart += delta; - topEnd += delta; - } else if (MaryUtils.isUnsignedSemitones(range)) { // 12st, 5.32st - //System.err.println("Unsigned semitones: `" + range + "'"); - // Set the new range to x semitones, discarding the previous - // range - double semitones = MaryUtils.getUnsignedSemitones(range); - // for explanations, see pitch section above. - double factor = Math.pow(1.0595, semitones); - topStart = (int) (baseStart * factor); - topEnd = (int) (baseEnd * factor); - } else if (MaryUtils.isUnsignedNumber(range)) { // 60, 50.4 - //System.err.println("Unsigned number: `" + range + "'"); - // Notice that the spread is not calculated in the frequency - // domain, i.e. the slope of topline and baseline in the - // frequency domain change during the shift. It is unclear - // whether this is very relevant. - int baseMean = (baseStart + baseEnd) / 2; - int topSpread = (topEnd - topStart) / 2; - int newRange = MaryUtils.getUnsignedNumber(range); - topStart = baseMean + newRange - topSpread; - topEnd = baseMean + newRange + topSpread; - } - } - String pitchDynamics = prosody.getAttribute("pitch-dynamics"); - if (!pitchDynamics.equals("")) { - if (MaryUtils.isPercentageDelta(pitchDynamics)) { // +25%, -17.2% - //System.err.println("Percentage delta: `" + pitchDynamics + "'"); - int percentage = MaryUtils.getPercentageDelta(pitchDynamics); - int baseMean = (baseStart + baseEnd) / 2; - // Motivation: m = (a+z)/2, and - // z = (1+p)*a (that is the idea in "pitch-dynamics"!) - // => m = (1 + p/2) * a => a = m / (1 + p/2) - baseStart = (200 * baseMean) / (200 + percentage); - baseEnd = (baseStart * (100 + percentage)) / 100; - } else if (MaryUtils.isNumberDelta(pitchDynamics)) { // +15, -27.3 - //System.err.println("Number delta: `" + pitchDynamics + "'"); - int delta = MaryUtils.getNumberDelta(pitchDynamics); - int baseMean = (baseStart + baseEnd) / 2; - baseStart = baseMean + delta / 2; - baseEnd = baseMean - delta / 2; - } else if (MaryUtils.isSemitonesDelta(pitchDynamics)) { // +5.2st, -0.7st - //System.err.println("Semitones delta: `" + pitchDynamics + "'"); - double semitones = MaryUtils.getSemitonesDelta(pitchDynamics); - // for explanations, see pitch section above. - double factor = Math.pow(1.0595, semitones); - int baseMean = (baseStart + baseEnd) / 2; - // Motivation: as for percentage delta above, replacing - // (1+p) with factor: - // m = (a+z)/2 - // z = factor * a (that is the idea in "pitch-dynamics"!) - // => m = (1+factor)*a/2 => a = 2m / (1+factor) - baseStart = (int) ((2 * baseMean) / (1 + factor)); - baseEnd = (int) (factor * baseStart); - } // non-delta values don't make sense for X-dynamics. - } - String rangeDynamics = prosody.getAttribute("range-dynamics"); - if (!rangeDynamics.equals("")) { - if (MaryUtils.isPercentageDelta(rangeDynamics)) { // +25%, -17.2% - //System.err.println("Percentage delta: `" + rangeDynamics + "'"); - int percentage = MaryUtils.getPercentageDelta(rangeDynamics); - int baseMean = (baseStart + baseEnd) / 2; - int topMean = (topStart + topEnd) / 2; - int rangeMean = topMean - baseMean; - // Motivation: see "pitch-dynamics" above - int rangeStart = (200 * rangeMean) / (200 + percentage); - int rangeEnd = (rangeStart * (100 + percentage)) / 100; - topStart = baseStart + rangeStart; - topEnd = baseEnd + rangeEnd; - } else if (MaryUtils.isNumberDelta(rangeDynamics)) { // +15, -27.3 - //System.err.println("Number delta: `" + rangeDynamics + "'"); - int delta = MaryUtils.getNumberDelta(rangeDynamics); - int baseMean = (baseStart + baseEnd) / 2; - int topMean = (topStart + topEnd) / 2; - int rangeMean = topMean - baseMean; - int rangeStart = rangeMean + delta / 2; - int rangeEnd = rangeMean - delta / 2; - topStart = baseStart + rangeStart; - topEnd = baseEnd + rangeEnd; - } else if (MaryUtils.isSemitonesDelta(rangeDynamics)) { // +5.2st, -0.7st - //System.err.println("Semitones delta: `" + rangeDynamics + "'"); - double semitones = MaryUtils.getSemitonesDelta(rangeDynamics); - // for explanations, see pitch section above. - double factor = Math.pow(1.0595, semitones); - int baseMean = (baseStart + baseEnd) / 2; - int topMean = (topStart + topEnd) / 2; - int rangeMean = topMean - baseMean; - // Motivation: see pitch-dynamics section above - int rangeStart = (int) ((2 * rangeMean) / (1 + factor)); - int rangeEnd = (int) (factor * rangeStart); - topStart = baseStart + rangeStart; - topEnd = baseEnd + rangeEnd; - } // non-delta values don't make sense for X-dynamics. - } - - // Refuse to put topline below baseline: - if (topStart < baseStart) - topStart = baseStart; - if (topEnd < baseEnd) - topEnd = baseEnd; - return new TopBaseConfiguration(topStart, topEnd, baseStart, baseEnd, origConf.startTime(), origConf.endTime()); - } - - - /** - * For a given phrase, calculate the target positions and frequencies - * for each ToBI accent and boundary tone in the phrase. - */ - private void calculateF0Targets(Element phrase) { - // Determine top- / baseline start and end values for each phrase - determinePhraseTopBaseConf(phrase); - // and for all the elements within the phrase. - NodeList prosodies = phrase.getElementsByTagName(MaryXML.PROSODY); - for (int j = 0; j < prosodies.getLength(); j++) { - Element prosody = (Element) prosodies.item(j); - determineProsodyTopBaseConf(prosody); - } - // Some useful memories for assigning the targets: - boolean isFirstInPhrase = true; - Element prevToneSyllable = null; - char prevTone = 0; // valid values: 'H' and 'L' - int lastHFreq = 0; // in Hertz - List allTargetList = new ArrayList(); - // Go through all tokens and boundaries in the phrase, from left to - // right: - TreeWalker tw = - ((DocumentTraversal) phrase.getOwnerDocument()).createTreeWalker( - phrase, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.SYLLABLE }), - false); - Element e = null; - while ((e = (Element) tw.nextNode()) != null) { - Element referenceSyllable = null; - Element rule = null; - if (e.getTagName().equals(MaryXML.SYLLABLE)) { // a syllable - // tone: - if (e.hasAttribute("tone")) { - String tone = e.getAttribute("tone").toUpperCase(); - rule = (Element) toneMap.get(tone); - if (rule != null) { - // Determine the stressed syllable in the token: - referenceSyllable = e; - } - } - } else { - // Boundary: - if (e.hasAttribute("tone")) { - String tone = e.getAttribute("tone").toUpperCase(); - rule = (Element) toneMap.get(tone); - if (rule != null) { - // The reference syllable is the one preceding the - // boundary: - TreeWalker stw = - ((DocumentTraversal) e.getOwnerDocument()).createTreeWalker( - phrase, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(MaryXML.SYLLABLE), - false); - stw.setCurrentNode(e); - referenceSyllable = (Element) stw.previousNode(); - } - } - } - if (referenceSyllable != null && rule != null) { - logger.debug( - "Now assigning targets for tone `" - + rule.getAttribute("name") - + "' on syllable [" - + referenceSyllable.getAttribute("ph") - + "]"); - // We have some targets to assign - // For each target in the rule, first determine its location: - List targetList = new ArrayList(); - Target starTarget = null; - TreeWalker rtw = - ((DocumentTraversal) rule.getOwnerDocument()).createTreeWalker( - rule, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { "target"}), - false); - Element rulePart = null; - while ((rulePart = (Element) rtw.nextNode()) != null) { - if (rulePart.getTagName().equals("target")) { - Target target = - determineInitialTargetLocation( - rulePart, - referenceSyllable, - isFirstInPhrase, - prevTone, - prevToneSyllable); - if (target != null) { - targetList.add(target); - allTargetList.add(target); - logger.debug( - " " - + target.type() - + " target on [" - + target.segment().getAttribute("p") - + "] at " - + target.getTargetTime() - + " ms"); - if (target.type().equals("star")) { - if (starTarget != null) { - logger.info( - "Found more than one star target for tone rule `" - + rule.getAttribute("name") - + "'"); - } - starTarget = target; - } - } - } else { // "prosody": downstep or upstep - // First, identify the syllable which is to be the - // first to be downstepped or upstepped. - String tCode = rulePart.getAttribute("t_code"); - Element prosSyllable = null; - if (tCode.equals("21")) { // this syllable - prosSyllable = referenceSyllable; - } else if (tCode.equals("11")) { // previous syllable - prosSyllable = MaryDomUtils.getPreviousOfItsKindIn(referenceSyllable, phrase); - } else if (tCode.equals("31")) { // next syllable - prosSyllable = MaryDomUtils.getNextOfItsKindIn(referenceSyllable, phrase); - } else if (tCode.equals("99")) { - // syllable after last tone - prosSyllable = MaryDomUtils.getNextOfItsKindIn(prevToneSyllable, phrase); - } - if (prosSyllable == null) { - // Unknown tCode setting or no previous or next - // syllable -- Well, then we start with this - // syllable - prosSyllable = referenceSyllable; - } - logger.debug( - " upstep/downstep starting with syllable [" + prosSyllable.getAttribute("ph") + "]"); - // Insert a prosody element into the syllable such - // that it encloses this syllable and the last - // syllable in the syllable. - // adaptProsody(rulePart, prosSyllable); - } - } - // Adjust location of "plus" type targets if necessary: - Iterator it = targetList.iterator(); - while (it.hasNext()) { - Target target = (Target) it.next(); - target.setMyStar(starTarget); - if (target.type().equals("plus")) { - //adjustTargetLocation(target, starTarget); - } - } - // Calculate target frequencies, and write the targets into the - // XML structure: - it = targetList.iterator(); - while (it.hasNext()) { - Target target = (Target) it.next(); - lastHFreq = calculateTargetFrequency(target, lastHFreq); - } - - // Now some useful memories for future rules inside this phrase: - // We have already assigned at least one target: - isFirstInPhrase = false; - prevToneSyllable = referenceSyllable; - String label = rule.getAttribute("name"); - if (label.lastIndexOf('H') > label.lastIndexOf('L')) { - // Remember previous tone was an H tone - prevTone = 'H'; - } else { - // Remember previous tone was an L tone - prevTone = 'L'; - } - } - } - // Now verify that targets don't overlap, and that no target is closer - // to another tone's target than to its own "star". - ListIterator it = allTargetList.listIterator(); - Target prev = null; - Target current = null; - Target next = null; - while (it.hasNext()) { - next = (Target) it.next(); - if (current != null) { - // Verify that next comes later than current: - int currentTargetTime = current.getTargetTime(); - int nextTargetTime = next.getTargetTime(); - if (currentTargetTime > nextTargetTime) { - // If one is a star, move the other one: - if (current.type().equals("star") && !next.type().equals("star")) { - Element oldSegment = next.segment(); - // Move next to the segment following current: - Element newSegment = getNextSegment(current.segment()); - int newTiming = 10; // at 10% of following segment - if (newSegment == null) { // no such segment - newSegment = current.segment(); - newTiming = 100; - } - next.setSegment(newSegment); - next.setTiming(newTiming); - // And recalculate the target frequency (trust that - // lastHFreq is not needed for this target) - calculateTargetFrequency(next, 0); - logger.debug( - "Found overlapping targets. Moved " - + "\"plus\" target from " - + nextTargetTime - + "ms [" - + oldSegment.getAttribute("p") - + "] to " - + next.getTargetTime() - + "ms [" - + next.segment().getAttribute("p") - + "]."); - } else if (next.type().equals("star") && !current.type().equals("star")) { - Element oldSegment = current.segment(); - // Move current to the segment preceding next: - Element newSegment = getPreviousSegment(next.segment()); - int newTiming = 90; // at 90% of following segment - if (newSegment == null) { // no such segment - newSegment = next.segment(); - newTiming = 0; - } - current.setSegment(newSegment); - current.setTiming(newTiming); - // And recalculate the target frequency (trust that - // lastHFreq is not needed for this target) - calculateTargetFrequency(current, 0); - logger.debug( - "Found overlapping targets. Moved " - + "\"plus\" target from " - + currentTargetTime - + "ms [" - + oldSegment.getAttribute("p") - + "] to " - + current.getTargetTime() - + "ms [" - + current.segment().getAttribute("p") - + "]."); - } else { // none is a star - // If none is a star, calculate the meeting point - // of their respective interpolation lines, and - // replace them with a single target at this point. - int tn = next.getTargetTime(); - int fn = next.f0(); - int tc = current.getTargetTime(); - int fc = current.f0(); - int t1; // new target time - int f1; // new f0 - // Two methods for calculating the new target: - if (next.myStar() != null - && next.myStar() != next - && current.myStar() != null - && current.myStar() != current) { - // The maths: - // next = (tn, fn); next.myStar() = (tns, fns) - // current = (tc, fc); current.myStar() = (tcs, fcs) - // We search for point (t1, f1) where lines meet. - // slope_n = (fns - fn) / (tns - tn) - // slope_c = (fc - fcs) / (tc - tcs) - // f1 = fn + slope_n * (t1 - tn) - // and - // f1 = fc - slope_c * (tc - t1) - // out of which we can conlcude - // t1 = ((fn - slope_n tn) - (fc - slope_c tc)) / - // (slope_c - slope_n) - int tns = next.myStar().getTargetTime(); - int fns = next.myStar().f0(); - int tcs = current.myStar().getTargetTime(); - int fcs = current.myStar().f0(); - double slope_n = ((double) fns - fn) / (tns - tn); - double slope_c = ((double) fc - fcs) / (tc - tcs); - if (slope_n < 0 && slope_c >= 0 || slope_c < 0 && slope_n >= 0) { - t1 = (int) (((fn - slope_n * tn) - (fc - slope_c * tc)) / (slope_c - slope_n)); - f1 = (int) (fn + slope_n * (t1 - tn)); - } else { - t1 = (tn + tc) / 2; - f1 = (fn + fc) / 2; - } - } else { - // One of them has no star (or both are stars - // themselves, which should not happen) - // Calculate a simple time and frequency mean. - t1 = (tn + tc) / 2; - f1 = (fn + fc) / 2; - } - // Set the new values for current: - current.setTargetTime(t1); - current.setF0(f1); - - logger.debug( - "Found two overlapping targets, at " - + tc - + "ms, " - + fc - + "Hz and " - + tn - + "ms, " - + fn - + "Hz. Replaced them with a target at " - + t1 - + "ms, " - + f1 - + "Hz."); - // Delete next: - it.remove(); // removes next; - continue; // and re-get a next - } - } - } - // Once we know no targets are inversed, we can check that no - // target is closer to another tone's target than to its own star. - if (prev != null) { - int prevTime = prev.getTargetTime(); - int currentTime = current.getTargetTime(); - int nextTime = next.getTargetTime(); - if (current.myStar() == next - && nextTime - currentTime > currentTime - prevTime - || current.myStar() == prev - && currentTime - prevTime > nextTime - currentTime) { - int newTime = (prevTime + nextTime) / 2; - logger.debug( - "Target at " - + currentTime - + "ms is further from its star than from" - + " a different target -- moving to " - + newTime - + "ms."); - current.setTargetTime(newTime); - // And recalculate the target frequency (trust that - // lastHFreq is not needed for this target) - calculateTargetFrequency(current, 0); - - } - } - prev = current; - current = next; - } - // Finally, insert the targets into MaryXML: - it = allTargetList.listIterator(); - while (it.hasNext()) { - insertTargetIntoMaryXML((Target) it.next()); - } - } - - - - /** - * Determine the initial location of a target point on the time axis, based - * on the rule in Element target relative to the given - * syllable Element. For accents, this is the stressed - * syllable of the accented word; for boundary tones, it is the last - * syllable before the boundary. - * @return a newly created Target object, or null if the conditions for - * applying this rule part were not fulfilled or no suitable target - * location could be found. - */ - private Target determineInitialTargetLocation( - Element rulePart, - Element syllable, - boolean isFirstInPhrase, - char prevTone, - Element prevToneSyllable) { - // Essential sanity check: - if (rulePart == null - || !rulePart.getTagName().equals("target") - || syllable == null - || !syllable.getTagName().equals(MaryXML.SYLLABLE)) { - return null; - } - // Verify if rulePart condition applies: - if (rulePart.hasAttribute("condition")) { - String condition = rulePart.getAttribute("condition"); - if (condition.equals("first_in_IP") && !isFirstInPhrase) { - return null; - } else if (condition.equals("prevtone_H") && prevTone != 'H') { - return null; - } else if (condition.equals("prevtone_L") && prevTone != 'L') { - return null; - } - } - // OK, no condition violated. - // Locate target position: - Element segment = null; - // Target timing is in percent of the segment duration, relative to the - // segment start: - int timing = -1; - String tCode = rulePart.getAttribute("t_code"); - if (tCode.equals("0")) { // start of this declination phrase - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - segment = MaryDomUtils.getFirstElementByTagName(phrase, MaryXML.PHONE); - timing = 0; - } else if (tCode.equals("12")) { - // start of the nucleus of the preceding syllable - Element prevSyl = getPreviousSyllable(syllable); - if (prevSyl == null) { - // No such syllable -- just ignore this target - } else { - Element nucleus = getNucleus(prevSyl); - if (nucleus == null) // No nucleus -- take first segment then: - segment = MaryDomUtils.getFirstElementByTagName(prevSyl, MaryXML.PHONE); - else - segment = nucleus; - timing = 0; - } - } else if (tCode.equals("21")) { - // start of this syllable - segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); - timing = 0; - } else if (tCode.equals("22")) { - // start of the nucleus of this syllable - Element nucleus = getNucleus(syllable); - if (nucleus == null) // No nucleus -- take first segment then: - segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); - else - segment = nucleus; - timing = 0; - } else if (tCode.equals("23")) { - // middle of the nucleus of this syllable - // (simplifying assumption: the nucleus contains only one segment) - Element nucleus = getNucleus(syllable); - if (nucleus == null) // No nucleus -- take first segment then: - segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); - else - segment = nucleus; - timing = 50; - } else if (tCode.equals("24")) { - // end of the nucleus of this syllable - // (simplifying assumption: the nucleus contains only one segment) - Element nucleus = getNucleus(syllable); - if (nucleus == null) // No nucleus -- take last segment then: - segment = MaryDomUtils.getLastElementByTagName(syllable, MaryXML.PHONE); - else - segment = nucleus; - timing = 100; - } else if (tCode.equals("25")) { - // end of this syllable - segment = MaryDomUtils.getLastElementByTagName(syllable, MaryXML.PHONE); - timing = 100; - } else if (tCode.equals("34")) { - // end of the nucleus of the following syllable - // (simplifying assumption: the nucleus contains only one segment) - Element nextSyl = getNextSyllable(syllable); - if (nextSyl == null) { - // No such syllable -- just ignore this target - } else { - Element nucleus = getNucleus(nextSyl); - if (nucleus == null) // No nucleus -- take last segment then: - segment = MaryDomUtils.getLastElementByTagName(nextSyl, MaryXML.PHONE); - else - segment = nucleus; - timing = 100; - } - } else if (tCode.equals("99")) { - // middle of the nucleus of the syllable after the syllable - // associated with the previous tone - Element syl = getNextSyllable(prevToneSyllable); - if (syl == null) { - // No such syllable -- just ignore this target - } else { - Element nucleus = getNucleus(syl); - if (nucleus == null) // No nucleus -- take first segment then: - segment = MaryDomUtils.getFirstElementByTagName(syl, MaryXML.PHONE); - else - segment = nucleus; - timing = 50; - } - } else if (tCode.equals("98")) { - // middle of the nucleus of the first 1ary or 2ary stressed - // syllable after the syllable after the syllable associated with - // the previous tone - Element syl = getNextSyllable(prevToneSyllable); - if (syl == null) { - // No such syllable -- just ignore this target - } else { - // OK, skip that first syllable: - syl = getNextSyllable(syl); - if (syl == null) { - // No such syllable -- just ignore this target - } else { - // Now take the first one we get which has 1ary or 2ary stress: - Element fallback = syl; - while (syl != null - && !(syl.getAttribute("stress").equals("1") || syl.getAttribute("stress").equals("2"))) { - syl = getNextSyllable(syl); - } - if (syl == null) - syl = fallback; - // OK, now we have a syllable to work with. - Element nucleus = getNucleus(syl); - if (nucleus == null) // No nucleus -- take first segment then: - segment = MaryDomUtils.getFirstElementByTagName(syl, MaryXML.PHONE); - else - segment = nucleus; - timing = 50; - } - } - } - if (segment == null || timing == -1) { - logger.debug(" Target (" + rulePart.getAttribute("f0") + ") could not be attached. skipping."); - return null; - } - return new Target(rulePart, segment, timing, 0); - } - - - /** - * For the given target, calculate the appropriate F0 by taking into - * account the local topline and baseline frequency. - * @return The last target frequency on the topline, either as passed here - * through parameter lastHFreq or as realised by this target. - */ - private int calculateTargetFrequency(Target target, int lastHFreq) { - // sanity check: - if (target == null || target.targetRule() == null || target.segment() == null) - throw new NullPointerException("Null target specification -- cannot calculate Frequency"); - // Calculate target frequency: - String f0descr = target.targetRule().getAttribute("f0"); - int f0 = 0; - TopBaseConfiguration tbConf = null; - // Approximation if we need lastHFreq and don't have it: - if (f0descr.equals("last_H_freq") && lastHFreq == 0) { - f0descr = "1100"; // 10% above top line - } - if (MaryUtils.isNumber(f0descr)) { - int f0promille = MaryUtils.getNumber(f0descr); - Element phrase = (Element) MaryDomUtils.getAncestor(target.segment(), MaryXML.PHRASE); - Element prosody = (Element) MaryDomUtils.getAncestor(target.segment(), MaryXML.PROSODY); - Element topBaseRef = phrase; - if (prosody != null && MaryDomUtils.isAncestor(phrase, prosody)) { - // A local prosody tag -- this is our reference - topBaseRef = prosody; - } - // For accents, realise target overshoot or undershoot - // as a function of the "accent-prominence" attribute: - if (prosody != null - && // inside or outside phrase - target.targetRule().getParentNode().getNodeName().equals("tone") - && target.type().equals("star")) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - if (settings != null) { - int accentProminence = settings.accentProminence(); - // Stretch the distance of f0promille from 500 - int dist = f0promille - 500; - int newDist = (dist * accentProminence) / 100; - f0promille = 500 + newDist; - } - } - tbConf = (TopBaseConfiguration) topBaseConfMap.get(topBaseRef); - int d = 0; - try { - d = Integer.parseInt(target.segment().getAttribute("d")); - } catch (NumberFormatException e) { - logger.warn("Unexpected duration value `" + target.segment().getAttribute("d") + "'"); - } - int end = 0; - try { - end = Integer.parseInt(target.segment().getAttribute("end")); - } catch (NumberFormatException e) { - logger.warn("Unexpected duration value `" + target.segment().getAttribute("end") + "'"); - } - // Remember that timing is expressed as a percentage of d: - int timeMillis = (end - d) + (d * target.timing()) / 100; - if (f0promille == 1000) { // on topline - f0 = tbConf.toplineFrequency(timeMillis); - lastHFreq = f0; - } else if (f0promille == 0) { // on baseline - f0 = tbConf.baselineFrequency(timeMillis); - } else { // somewhere in between or above or below - int base = tbConf.baselineFrequency(timeMillis); - int top = tbConf.toplineFrequency(timeMillis); - int range = top - base; - f0 = base + (f0promille * range) / 1000; - } - } else if (f0descr.equals("last_H_freq")) { - f0 = lastHFreq; - } else { - logger.warn("Unknown f0 specification `" + f0descr + "' in file " + MaryProperties.getFilename(tonerulefilePropertyName)); - } - if (f0 != 0) { - // OK, valid - target.setF0(f0); - logger.debug( - "Target on segment [" - + target.segment().getAttribute("p") - + "] at " - + target.getTargetTime() - + " ms, " - + target.f0() - + " Hz (" - + f0descr - + ")"); - } - return lastHFreq; - } - - private void insertTargetIntoMaryXML(Target target) { - if (target != null && target.segment() != null && target.timing() != -1 && target.f0() != 0) { - String newF0 = "(" + target.timing() + "," + target.f0() + ")"; - if (target.segment().hasAttribute("f0")) { - String oldF0 = target.segment().getAttribute("f0"); - target.segment().setAttribute("f0", oldF0 + " " + newF0); - } else { - target.segment().setAttribute("f0", newF0); - } - } - } - - /** - * Add a prosody element in the current phrase, realising the - * upstep or downstep formulated in prosodyRule, and enclosing - * the given syllable and all other syllables until the end of - * the phrase. - */ - private void adaptProsody(Element prosodyRule, Element syllable) { - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - Element first = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.TOKEN); - if (MaryDomUtils.hasAncestor(first, MaryXML.MTU)) { - first = (Element) MaryDomUtils.getHighestLevelAncestor(first, MaryXML.MTU); - } - Element lastSyl = MaryDomUtils.getLastElementByTagName(phrase, MaryXML.SYLLABLE); - Element last = (Element) MaryDomUtils.getAncestor(lastSyl, MaryXML.TOKEN); - if (MaryDomUtils.hasAncestor(last, MaryXML.MTU)) { - last = (Element) MaryDomUtils.getHighestLevelAncestor(last, MaryXML.MTU); - } - Element newProsody = MaryDomUtils.encloseNodesWithNewElement(first, last, MaryXML.PROSODY); - newProsody.setAttribute("range", prosodyRule.getAttribute("range")); - determineProsodicSettings(newProsody); - determineProsodyTopBaseConf(newProsody); - // And now, if there are any prosody tags enclosed by the new - // prosody tag, their topbaseconf needs to be calculated again - TreeWalker tw = - ((DocumentTraversal) newProsody.getOwnerDocument()).createTreeWalker( - newProsody, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(MaryXML.PROSODY), - false); - Element p = null; - while ((p = (Element) tw.nextNode()) != null) { - determineProsodyTopBaseConf(p); - } - } - - - - ////////////////////////////////////////////////////////////////////// - ////////////////////////////// Helpers /////////////////////////////// - ////////////////////////////////////////////////////////////////////// - - private Element getToken(Element segmentOrSyllable) { - return (Element) MaryDomUtils.getAncestor(segmentOrSyllable, MaryXML.TOKEN); - } - - private Element getSyllable(Element segment) { - return (Element) MaryDomUtils.getAncestor(segment, MaryXML.SYLLABLE); - } - - /** - * Find the segment preceding this segment within the same - * phrase. - * @return that segment, or null if there is no such segment. - */ - private static Element getPreviousSegment(Element segment) { - Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); - return MaryDomUtils.getPreviousOfItsKindIn(segment, phrase); - } - - /** - * Find the segment following this segment within the same - * phrase. - * @return that segment, or null if there is no such segment. - */ - private static Element getNextSegment(Element segment) { - Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); - return MaryDomUtils.getNextOfItsKindIn(segment, phrase); - } - - /** - * Find the syllable preceding this syllable within the same - * phrase. - * @return that syllable, or null if there is no such - * syllable. - */ - private static Element getPreviousSyllable(Element syllable) { - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - return MaryDomUtils.getPreviousOfItsKindIn(syllable, phrase); - } - - /** - * Find the syllable following this syllable within the same - * phrase. - * @return that syllable, or null if there is no such - * syllable. - */ - private static Element getNextSyllable(Element syllable) { - if (syllable == null) - return null; - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - return MaryDomUtils.getNextOfItsKindIn(syllable, phrase); - } - - private boolean hasTone(Element token) { - String tone = token.getAttribute("tone").toUpperCase(); - // Is it a known / valid accent: - return toneMap.containsKey(tone); - } - - /** - * Search for boundary and syllable elements following the given syllable. - * If the next matching element found is a boundary with breakindex - * minBreakindex or larger, return true; otherwise, - * return false. - * If there is no next node, return true. - */ - private boolean isLastBeforeBoundary(Element syllable, int minBreakindex) { - Document doc = syllable.getOwnerDocument(); - Element sentence = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.SENTENCE); - TreeWalker tw = - ((DocumentTraversal) doc).createTreeWalker( - sentence, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.SYLLABLE, MaryXML.BOUNDARY }), - false); - tw.setCurrentNode(syllable); - Element next = (Element) tw.nextNode(); - if (next == null) { - // no matching node after syllable -- - // we must be in a final position. - return true; - } - if (next.getNodeName().equals(MaryXML.BOUNDARY)) { - if (getBreakindex(next) >= minBreakindex) - return true; - } - // This syllable is either followed by another syllable or - // by a boundary with breakindex < minBreakindex - return false; - } - - private boolean isMajIPFinal(Element syllable) { - // If this syllable is followed by a boundary with breakindex - // 4 or above, return true. - return isLastBeforeBoundary(syllable, 4); - } - - private boolean isMinipFinal(Element syllable) { - // If this syllable is followed by a boundary with breakindex - // 3 or above, return true. - return isLastBeforeBoundary(syllable, 3); - } - - private boolean isWordFinal(Element syllable) { - Element e = syllable; - while (e != null) { - e = MaryDomUtils.getNextSiblingElement(e); - if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) - return false; - } - return true; - } - - private boolean isWordMedial(Element syllable) { - return !(isWordFinal(syllable) || isWordInitial(syllable)); - - } - - private boolean isWordInitial(Element syllable) { - Element e = syllable; - while (e != null) { - e = MaryDomUtils.getPreviousSiblingElement(e); - if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) - return false; - } - return true; - } - - private boolean isInOnset(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return false; - } - // OK, segment is not syllabic. See if it is followed by a syllabic - // segment: - for (Element e = MaryDomUtils.getNextSiblingElement(segment); - e != null; - e = MaryDomUtils.getNextSiblingElement(e)) { - ph = allophoneSet.getAllophone(e.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return true; - } - } - return false; - } - - private boolean isInNucleus(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isSyllabic(); - } - - private boolean isInCoda(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return false; - } - // OK, segment is not syllabic. See if it is preceded by a syllabic - // segment: - for (Element e = MaryDomUtils.getPreviousSiblingElement(segment); - e != null; - e = MaryDomUtils.getPreviousSiblingElement(e)) { - ph = allophoneSet.getAllophone(e.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return true; - } - } - return false; - } - - - private int getBreakindex(Element boundary) { - int breakindex = 0; - try { - breakindex = Integer.parseInt(boundary.getAttribute("breakindex")); - } catch (NumberFormatException e) { - logger.warn("Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "'"); - } - return breakindex; - } - - - /** - * For a syllable, return the first child segment which is a nucleus - * segment. Return null if there is no such segment. - */ - private Element getNucleus(Element syllable) { - if (syllable == null || !syllable.getTagName().equals(MaryXML.SYLLABLE)) - return null; - Element seg = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); - while (seg != null && !isInNucleus(seg)) { - seg = MaryDomUtils.getNextSiblingElementByTagName(seg, MaryXML.PHONE); - } - return seg; - } - - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - /////////////////////////// Helper Classes /////////////////////////// - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - - static class ProsodicSettings { - // Relative settings: 100 = 100% = no change - int rate; - int accentProminence; - int accentSlope; - int numberOfPauses; - int pauseDuration; - int vowelDuration; - int plosiveDuration; - int fricativeDuration; - int nasalDuration; - int liquidDuration; - int glideDuration; - int volume; - - ProsodicSettings() { - this.rate = 100; - this.accentProminence = 100; - this.accentSlope = 100; - this.numberOfPauses = 100; - this.pauseDuration = 100; - this.vowelDuration = 100; - this.plosiveDuration = 100; - this.fricativeDuration = 100; - this.nasalDuration = 100; - this.liquidDuration = 100; - this.glideDuration = 100; - this.volume = 50; - } - - ProsodicSettings( - int rate, - int accentProminence, - int accentSlope, - int numberOfPauses, - int pauseDuration, - int vowelDuration, - int plosiveDuration, - int fricativeDuration, - int nasalDuration, - int liquidDuration, - int glideDuration, - int volume) { - this.rate = rate; - this.accentProminence = accentProminence; - this.accentSlope = accentSlope; - this.numberOfPauses = numberOfPauses; - this.pauseDuration = pauseDuration; - this.vowelDuration = vowelDuration; - this.plosiveDuration = plosiveDuration; - this.fricativeDuration = fricativeDuration; - this.nasalDuration = nasalDuration; - this.liquidDuration = liquidDuration; - this.glideDuration = glideDuration; - this.volume = volume; - } - - int rate() { - return rate; - } - int accentProminence() { - return accentProminence; - } - int accentSlope() { - return accentSlope; - } - int numberOfPauses() { - return numberOfPauses; - } - int pauseDuration() { - return pauseDuration; - } - int vowelDuration() { - return vowelDuration; - } - int plosiveDuration() { - return plosiveDuration; - } - int fricativeDuration() { - return fricativeDuration; - } - int nasalDuration() { - return nasalDuration; - } - int liquidDuration() { - return liquidDuration; - } - int glideDuration() { - return glideDuration; - } - int volume() { - return volume; - } - - void setRate(int value) { - rate = value; - } - void setAccentProminence(int value) { - accentProminence = value; - } - void setAccentSlope(int value) { - accentSlope = value; - } - void setNumberOfPauses(int value) { - numberOfPauses = value; - } - void setPauseDuration(int value) { - pauseDuration = value; - } - void setVowelDuration(int value) { - vowelDuration = value; - } - void setPlosiveDuration(int value) { - plosiveDuration = value; - } - void setFricativeDuration(int value) { - fricativeDuration = value; - } - void setNasalDuration(int value) { - nasalDuration = value; - } - void setLiquidDuration(int value) { - liquidDuration = value; - } - void setGlideDuration(int value) { - glideDuration = value; - } - void setVolume(int value) { - volume = value; - } - - } - - static class TopBaseConfiguration { - int topStart; - int topEnd; - int baseStart; - int baseEnd; - int startTime; - int endTime; - double topSlope; - double baseSlope; - TopBaseConfiguration(int topStart, int topEnd, int baseStart, int baseEnd) { - this(topStart, topEnd, baseStart, baseEnd, 0, 0); - } - - TopBaseConfiguration(int topStart, int topEnd, int baseStart, int baseEnd, int startTime, int endTime) { - this.topStart = topStart; - this.topEnd = topEnd; - this.baseStart = baseStart; - this.baseEnd = baseEnd; - this.startTime = startTime; - this.endTime = endTime; - if (startTime != endTime) { // can calculate slope - topSlope = ((double) topEnd - topStart) / (endTime - startTime); - baseSlope = ((double) baseEnd - baseStart) / (endTime - startTime); - } else { - topSlope = 0; - baseSlope = 0; - } - } - - int topStart() { - return topStart; - } - int topEnd() { - return topEnd; - } - int baseStart() { - return baseStart; - } - int baseEnd() { - return baseEnd; - } - int startTime() { - return startTime; - } - int endTime() { - return endTime; - } - - void setTimes(int startTime, int endTime) { - this.startTime = startTime; - this.endTime = endTime; - if (startTime != endTime) { // can calculate slope - topSlope = ((double) topEnd - topStart) / (endTime - startTime); - baseSlope = ((double) baseEnd - baseStart) / (endTime - startTime); - } else { - topSlope = 0; - baseSlope = 0; - } - } - - /** - * Calculate the frequency of the topline at time time. - * This is calculated as a linear function of topStart, topEnd and - * time. - */ - int toplineFrequency(int time) { - if (time < startTime || time > endTime) { - throw new RuntimeException( - "Invalid time " + time + " (startTime " + startTime + ", endTime " + endTime + ")"); - } - return topStart + (int) (topSlope * (time - startTime)); - } - - /** - * Calculate the frequency of the baseline at time time. - * This is calculated as a linear function of baseStart, baseEnd and - * time. - */ - int baselineFrequency(int time) { - if (time < startTime || time > endTime) { - throw new RuntimeException( - "Invalid time " + time + "(startTime " + startTime + ", endTime " + endTime + ")"); - } - return baseStart + (int) (baseSlope * (time - startTime)); - } - } - - /** A class representing an F0-time target. */ - static class Target { - Element targetRule; - Element segment; - int timing; - int f0; - Target myStar; - - Target() { - targetRule = null; - segment = null; - timing = -1; - f0 = 0; - myStar = null; - } - - Target(Element targetRule, Element segment, int timing, int f0) { - this.targetRule = targetRule; - this.segment = segment; - this.timing = timing; - this.f0 = f0; - myStar = null; - } - Element targetRule() { - return targetRule; - } - Element segment() { - return segment; - } - int timing() { - return timing; - } - int f0() { - return f0; - } - Target myStar() { - return myStar; - } - - void setTargetRule(Element targetRule) { - this.targetRule = targetRule; - } - void setSegment(Element segment) { - this.segment = segment; - } - void setTiming(int timing) { - this.timing = timing; - } - void setF0(int f0) { - this.f0 = f0; - } - void setMyStar(Target star) { - this.myStar = star; - } - - String type() { - if (targetRule != null) - return targetRule.getAttribute("type"); - else - return ""; - } - - /** - * Get the target time relative to the beginning of the phrase, on the - * same scale as that used by the segment "end" attributes. - * @return the target time, or -1 if the time cannot be determined. - */ - int getTargetTime() { - if (segment == null || timing == -1) - return -1; - int end = -1; - try { - end = Integer.parseInt(segment.getAttribute("end")); - } catch (NumberFormatException e) { - return -1; - } - int d = -1; - try { - d = Integer.parseInt(segment.getAttribute("d")); - } catch (NumberFormatException e) { - return -1; - } - // The target time is: - // t = end - d + (timing/100 * d) = end - (1 - timing/100) * d - return end - (100 - timing) * d / 100; - } - - /** - * Set the target time relative to the beginning of the phrase, on the - * same scale as that used by the segment "end" attributes. Adjust this - * target's segment and timing accordingly. This is done as possible -- - * in particular in the presence of pauses, the target is only shifted towards - * the border of the pause but not into or beyond it. - * @return true on success, false on failure. - */ - boolean setTargetTime(int targetTime) { - if (targetTime < 0) - return false; - int currentTargetTime = getTargetTime(); - Element seg = segment; - try { - if (targetTime < currentTargetTime) { - while (seg != null - && Integer.parseInt(seg.getAttribute("end")) - Integer.parseInt(seg.getAttribute("d")) - > targetTime) { - Element s = getPreviousSegment(seg); - // Check for "holes": If last start time (end-d) is too large but - // next end time is too small, there is a small pause between the - // two segments - // => stay at the side of the pause closer to the original - if (s != null && Integer.parseInt(s.getAttribute("end")) < targetTime) { - targetTime = - Integer.parseInt(seg.getAttribute("end")) - Integer.parseInt(seg.getAttribute("d")); - break; // keep seg, forget about s - } else { - seg = s; - } - } - } else { // targetTime > currentTargetTime - while (seg != null && Integer.parseInt(seg.getAttribute("end")) < targetTime) { - Element s = getNextSegment(seg); - // Check for "holes": If last end time is too small but - // next start time (end-d) is too large, there is a small pause - // between the two segments - // => stay at the side of the pause closer to the original - if (s != null - && Integer.parseInt(s.getAttribute("end")) - Integer.parseInt(s.getAttribute("d")) - > targetTime) { - targetTime = Integer.parseInt(seg.getAttribute("end")); - break; // keep seg, forget about s - } else { - seg = s; - } - } - } - if (seg != null) { - // newTiming = (1 - (end - targetTime) / d) * 100 - int newTiming = - 100 - - (100 * (Integer.parseInt(seg.getAttribute("end")) - targetTime)) - / Integer.parseInt(seg.getAttribute("d")); - segment = seg; - timing = newTiming; - assert timing >= 0 && timing <= 100; - } - } catch (NumberFormatException e) { - return false; - } - return true; - } - - } - - - - -} - +package marytts.language.tib; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Locale; +import java.util.Map; +import java.util.Stack; +import java.util.WeakHashMap; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.FactoryConfigurationError; +import javax.xml.parsers.ParserConfigurationException; + +import marytts.datatypes.MaryData; +import marytts.datatypes.MaryDataType; +import marytts.datatypes.MaryXML; +import marytts.exceptions.NoSuchPropertyException; +import marytts.modules.InternalModule; +import marytts.modules.MaryModule; +import marytts.modules.ModuleRegistry; +import marytts.modules.phonemiser.Allophone; +import marytts.modules.phonemiser.AllophoneSet; +import marytts.modules.synthesis.MbrolaVoice; +import marytts.modules.synthesis.Voice; +import marytts.server.MaryProperties; +import marytts.util.MaryUtils; +import marytts.util.dom.MaryDomUtils; +import marytts.util.dom.NameNodeFilter; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.w3c.dom.traversal.DocumentTraversal; +import org.w3c.dom.traversal.NodeFilter; +import org.w3c.dom.traversal.NodeIterator; +import org.w3c.dom.traversal.TreeWalker; + + +/** + * The Tibetan contour generator module. + * + * todo: Code säubern, kommentieren + * @author Lars Jungjohann + */ + +public class ContourGenerator extends InternalModule { + + /** This map contains the topline-baseline frequency configurations for the + * currently used phrase and sub-phrase prosody elements. As this is a + * WeakHashMap, entries will automatically be deleted when not in regular + * use anymore. */ + private WeakHashMap topBaseConfMap; + /** This map contains the prosodic settings, as ProsodicSettings objects, + * for the currently used prosody elements. As this is a WeakHashMap, + * entries will automatically be deleted when not in regular use + * anymore. */ + private WeakHashMap prosodyMap; + /** This map contains the default voice element for a given document. + * As this is a WeakHashMap, entries will automatically be deleted when not in + * regular use anymore. */ + private WeakHashMap defaultVoiceMap; + /** The allophoneSet used for this language */ + private AllophoneSet allophoneSet; + /** The tone realisation rules for this language */ + private String tonerulefilePropertyName = "tibetan.cap.tonerulefile"; + private Map toneMap; + + + public ContourGenerator() { + super("TibetanContourGenerator", + MaryDataType.DURATIONS, + MaryDataType.ACOUSTPARAMS, + new Locale("tib")); + } + + public void startup() throws Exception { + super.startup(); + // We depend on the Synthesis module: + MaryModule synthesis = ModuleRegistry.getModule(marytts.modules.Synthesis.class); + assert synthesis != null; + if (synthesis.getState() == MaryModule.MODULE_OFFLINE) + synthesis.startup(); + // load phone list + allophoneSet = AllophoneSet.getAllophoneSet(MaryProperties.needFilename("tibetan.cap.phonelistfile")); + // load tone rules + toneMap = new HashMap(); + loadToneRules(); + // instantiate the Map in which settings are associated with elements: + // (when the objects serving as keys are not in ordinary use any more, + // the key-value pairs are deleted from the WeakHashMap earlier or + // later; that means we do not need to keep track of the hashmaps per + // thread) + topBaseConfMap = new WeakHashMap(); + prosodyMap = new WeakHashMap(); + defaultVoiceMap = new WeakHashMap(); + } + + private synchronized void loadToneRules() + throws FactoryConfigurationError, ParserConfigurationException, org.xml.sax.SAXException, IOException, + NoSuchPropertyException { + DocumentBuilderFactory f = DocumentBuilderFactory.newInstance(); + f.setValidating(false); + DocumentBuilder b = f.newDocumentBuilder(); + + // load tone rules + Document toneRules = b.parse(new FileInputStream(MaryProperties.needFilename(tonerulefilePropertyName))); + // Now fill the map of tobi symbols: + Element root = toneRules.getDocumentElement(); + for (Element e = MaryDomUtils.getFirstChildElement(root); + e != null; + e = MaryDomUtils.getNextSiblingElement(e)) { + if (e.getTagName().equals("tone") || e.getTagName().equals(MaryXML.BOUNDARY)) { + String name = e.getAttribute("name"); + // We want to be able to find tone labels both in + // uppercase and lowercase form: + toneMap.put(name.toUpperCase(), e); + } + } + } // loadToneRules + + + public MaryData process(MaryData d) throws Exception + { + Document doc = d.getDocument(); + defaultVoiceMap.put(doc, d.getDefaultVoice()); + determineProsodicSettings(doc); + addOrDeleteBoundaries(doc); + + NodeList sentences = doc.getElementsByTagName(MaryXML.SENTENCE); + for (int i=0; i < sentences.getLength(); i++) { + processSentence((Element)sentences.item(i)); + } + MaryData result = new MaryData(outputType(), d.getLocale()); + result.setDocument(doc); + return result; + } + + /** + * For all (possibly nested) prosody elements in the document, + * calculate their (possibly cumulated) prosodic settings + * and save them in a map. + */ + private void determineProsodicSettings(Document doc) { + // Determine the prosodic setting for each prosody element + // Note: It is important that ancestor nodes are processed before + // descendant nodes, because the descendants will inherit the + // ancestors' settings! + NodeList prosodies = doc.getElementsByTagName(MaryXML.PROSODY); + for (int i = 0; i < prosodies.getLength(); i++) { + Element prosody = (Element) prosodies.item(i); + determineProsodicSettings(prosody); + } + } + + /** + * For one given prosody element, determine the prosodic settings, + * taking into account its closest prosody ancestor's settings. + * This method needs to be called once when starting to work with a document + * (from determineProsodicSettings(Document) and when a new prosody element + * is created (e.g. for upstep/downstep). + * @param prosody the prosody element for which to save the prosodic settings in + * the map. + */ + private void determineProsodicSettings(Element prosody) { + ProsodicSettings settings = new ProsodicSettings(); + // Neutral default settings: + ProsodicSettings parentSettings = new ProsodicSettings(); + // Obtain parent settings, if any: + Element ancestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); + if (ancestor != null) { + ProsodicSettings testSettings = (ProsodicSettings) prosodyMap.get(ancestor); + if (testSettings != null) { + parentSettings = testSettings; + } + } + // Only accept relative changes, i.e. percentage delta: + settings.setRate(parentSettings.rate() + MaryUtils.getPercentageDelta(prosody.getAttribute("rate"))); + settings.setAccentProminence( + parentSettings.accentProminence() + MaryUtils.getPercentageDelta(prosody.getAttribute("accent-prominence"))); + settings.setAccentSlope( + parentSettings.accentSlope() + MaryUtils.getPercentageDelta(prosody.getAttribute("accent-slope"))); + settings.setNumberOfPauses( + parentSettings.numberOfPauses() + MaryUtils.getPercentageDelta(prosody.getAttribute("number-of-pauses"))); + settings.setPauseDuration( + parentSettings.pauseDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("pause-duration"))); + settings.setVowelDuration( + parentSettings.vowelDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("vowel-duration"))); + settings.setPlosiveDuration( + parentSettings.plosiveDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("plosive-duration"))); + settings.setFricativeDuration( + parentSettings.fricativeDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("fricative-duration"))); + settings.setNasalDuration( + parentSettings.nasalDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("nasal-duration"))); + settings.setLiquidDuration( + parentSettings.liquidDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("liquid-duration"))); + settings.setGlideDuration( + parentSettings.glideDuration() + MaryUtils.getPercentageDelta(prosody.getAttribute("glide-duration"))); + + String sVolume = prosody.getAttribute("volume"); + if (sVolume.equals("")) { + settings.setVolume(parentSettings.volume()); + } else if (MaryUtils.isPercentageDelta(sVolume)) { + int newVolume = parentSettings.volume() + MaryUtils.getPercentageDelta(sVolume); + if (newVolume < 0) + newVolume = 0; + else if (newVolume > 100) + newVolume = 100; + settings.setVolume(newVolume); + } else if (MaryUtils.isUnsignedNumber(sVolume)) { + settings.setVolume(MaryUtils.getUnsignedNumber(sVolume)); + } else if (sVolume.equals("silent")) { + settings.setVolume(0); + } else if (sVolume.equals("soft")) { + settings.setVolume(25); + } else if (sVolume.equals("medium")) { + settings.setVolume(50); + } else if (sVolume.equals("loud")) { + settings.setVolume(75); + } + prosodyMap.put(prosody, settings); + } + + /** + * Adjust the number of boundaries according to rate and the + * "number-of-pauses" attribute. + */ + private void addOrDeleteBoundaries(Document doc) { + // TODO: Check if this is needed; for German, this is already done in KlattDurationModeller! + // Go through boundaries. A boundary is deleted if the determined + // minimum breakindex size is larger than this boundary's breakindex. + NodeIterator it = + ((DocumentTraversal) doc).createNodeIterator( + doc, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(MaryXML.BOUNDARY), + false); + Element boundary = null; + List bi1prosodyElements = null; + while ((boundary = (Element) it.nextNode()) != null) { + int minBI = 3; + Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + assert settings != null; + int rate = settings.rate(); + int numberOfPauses = settings.numberOfPauses(); + if (numberOfPauses <= 50) + minBI = 5; + else if (numberOfPauses <= 75) + minBI = 4; + else if (numberOfPauses > 150) + minBI = 1; + else if (numberOfPauses > 125) + minBI = 2; + // Rate can only shift the number of pauses by one breakindex + if (rate < 90 && minBI > 1) + minBI--; + if (minBI == 1) { + // Remember that the current prosody element wants bi 1 boundaries: + if (bi1prosodyElements == null) + bi1prosodyElements = new ArrayList(); + bi1prosodyElements.add(prosody); + } + } + // This boundary's bi: + int bi = 3; + try { + bi = Integer.parseInt(boundary.getAttribute("breakindex")); + } catch (NumberFormatException e) { + logger.info( + "Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "', assuming " + bi); + } + if (bi < minBI) { + if (!boundary.hasAttribute("duration")) + boundary.getParentNode().removeChild(boundary); + else + boundary.removeAttribute("bi"); // but keep duration + } + } + // Do we need to add any boundaries? + if (bi1prosodyElements != null) { + Iterator elIt = bi1prosodyElements.iterator(); + while (elIt.hasNext()) { + Element prosody = (Element) elIt.next(); + NodeIterator nodeIt = + ((DocumentTraversal) doc).createNodeIterator( + prosody, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), + false); + Element el = null; + Element prevEl = null; + while ((el = (Element) nodeIt.nextNode()) != null) { + if (el.getTagName().equals(MaryXML.TOKEN) && prevEl != null && prevEl.getTagName().equals(MaryXML.TOKEN)) { + // Need to insert a boundary before el: + Element newBoundary = MaryXML.createElement(doc, MaryXML.BOUNDARY); + newBoundary.setAttribute("breakindex", "1"); + el.getParentNode().insertBefore(newBoundary, el); + } + prevEl = el; + } + } + } + } + + private void processSentence(Element sentence) { + NodeList syllables = sentence.getElementsByTagName(MaryXML.SYLLABLE); + if (syllables.getLength() < 1) { + return; // no syllables -- what can we do? + } + + NodeList phrases = sentence.getElementsByTagName(MaryXML.PHRASE); + for (int i = 0; i < phrases.getLength(); i++) { + Element phrase = (Element) phrases.item(i); + // calculate the F0 targets + calculateF0Targets(phrase); + // anchor the F0 targets at individual segments + // calculate frequency values. + } + } + + + ////////////////////////////////////////////////////////////////////// + ///////////////////////////// Tone Rules ///////////////////////////// + ////////////////////////////////////////////////////////////////////// + + /** + * Determine the topline and baseline start and end frequencies for a + * phrase. Create an appropriate TopBaseConfiguration object + * and save it in a hash, as a value to which the phrase element is the + * key. + * @see #getToplineFrequency(Element,int) + * @see #getBaselineFrequency(Element,int) + */ + private void determinePhraseTopBaseConf(Element phrase) { + Voice voice = null; + // Determine the settings for the phrase element: + Element voiceElement = (Element) MaryDomUtils.getAncestor(phrase, MaryXML.VOICE); + if (voiceElement != null) + voice = Voice.getVoice(voiceElement); + if (voice == null) + voice = (Voice) defaultVoiceMap.get(phrase.getOwnerDocument()); + // In any case, if we do not have a voice now, + // use the global default voice: + if (voice == null) { + voice = Voice.getDefaultVoice(getLocale()); + } + if (!(voice instanceof MbrolaVoice)) { + throw new IllegalStateException("This contour generator can only be used with an MBROLA voice, but "+voice.getName()+" is a "+voice.getClass()); + } + MbrolaVoice mVoice = (MbrolaVoice) voice; + int topStart = mVoice.topStart(); + int topEnd = mVoice.topEnd(); + int baseStart = mVoice.baseStart(); + int baseEnd = mVoice.baseEnd(); + TopBaseConfiguration tbConf = new TopBaseConfiguration(topStart, topEnd, baseStart, baseEnd); + + // Now see if there are any global modifiers ( elements + // ancestors to this phrase element, but inside the voiceElement if + // there is one; start with the outermost element and + // superpose them one after the other): + Element current = phrase; + Stack prosodyElements = new Stack(); + while (MaryDomUtils.hasAncestor(current, MaryXML.PROSODY)) { + current = (Element) MaryDomUtils.getAncestor(current, MaryXML.PROSODY); + prosodyElements.push(current); + // Ignore prosody elements that are outside the closest voice element: + if (voiceElement != null && !MaryDomUtils.isAncestor(voiceElement, current)) { + // We have gone upwards past the voiceElement, so stop. + break; + } + } + while (!prosodyElements.empty()) { + Element prosody = (Element) prosodyElements.pop(); + tbConf = calculateTopBase(prosody, tbConf); + } + + // OK, now tbConf is the best we can do for the prosodic settings of + // this phrase. + // Add timing information: (start is 0 for a phrase, end is the end of + // the last segment in the phrase) + Element lastSegment = MaryDomUtils.getLastElementByTagName(phrase, MaryXML.PHONE); + if (lastSegment != null) { + // There ARE segments in this phrase + int endTime = 0; + try { + endTime = Integer.parseInt(lastSegment.getAttribute("end")); + } catch (NumberFormatException e) { + logger.warn("Unexpected end time `" + lastSegment.getAttribute("end") + "'"); + } + tbConf.setTimes(0, endTime); + } + // Save the TopBaseConfiguration object in a hash, with the phrase + // element as a key: + topBaseConfMap.put(phrase, tbConf); + + //System.err.println("For phrase ranging from " + tbConf.startTime() + " to " + tbConf.endTime() + ", determined topStart " + tbConf.topStart() + ", topEnd " + tbConf.topEnd() + ", baseStart " + tbConf.baseStart() + ", baseEnd " + tbConf.baseEnd()); + } + + /** + * Determine the topline and baseline start and end frequencies for a + * prosody element within a phrase. Create an + * appropriate TopBaseConfiguration object and save it in a hash, as a + * value to which the prosody element is the key. + * @see getToplineFrequency(Element,int) + * @see getBaselineFrequency(Element,int) + */ + private void determineProsodyTopBaseConf(Element prosody) { + if (prosody == null) + throw new NullPointerException("Received null argument"); + if (!prosody.getTagName().equals(MaryXML.PROSODY)) + throw new IllegalArgumentException("Expected argument, got <" + prosody.getTagName() + ">"); + // Find closest ancestor phrase or prosody element: + Element phrase = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PHRASE); + if (phrase == null) { + logger.warn("Trying to determine prosody top base conf for element without a ancestor. Ignoring."); + return; + } + Element confReferenceKey = phrase; + // Now see if there is a prosody element which is our ancestor and which is + // inside the phrase -- then that one is our configuration reference: + Element prosodyAncestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); + if (prosodyAncestor != null && MaryDomUtils.isAncestor(phrase, prosodyAncestor)) { + confReferenceKey = prosodyAncestor; + } + TopBaseConfiguration confReference = (TopBaseConfiguration) topBaseConfMap.get(confReferenceKey); + assert confReference != null; + // Now calculate start and end times for this element: + Element firstSegment = MaryDomUtils.getFirstElementByTagName(prosody, MaryXML.PHONE); + int startTime = 0; + try { + startTime = + Integer.parseInt(firstSegment.getAttribute("end")) - Integer.parseInt(firstSegment.getAttribute("d")); + } catch (NumberFormatException e) { + logger.warn( + "Unexpected start time `" + + firstSegment.getAttribute("end") + + "' - `" + + firstSegment.getAttribute("d") + + "'"); + } + + Element lastSegment = MaryDomUtils.getLastElementByTagName(prosody, MaryXML.PHONE); + int endTime = 0; + try { + endTime = Integer.parseInt(lastSegment.getAttribute("end")); + } catch (NumberFormatException e) { + logger.warn("Unexpected end time `" + lastSegment.getAttribute("end") + "'"); + } + // Create a new TopBaseConfiguration element reflecting the + // settings in the confReference: + TopBaseConfiguration tbConf = + new TopBaseConfiguration( + confReference.toplineFrequency(startTime), + confReference.toplineFrequency(endTime), + confReference.baselineFrequency(startTime), + confReference.baselineFrequency(endTime), + startTime, + endTime); + // Modify this reference according to this prosody element: + tbConf = calculateTopBase(prosody, tbConf); + // Save the TopBaseConfiguration object in a hash, with the prosody + // element as a key: + topBaseConfMap.put(prosody, tbConf); + + //System.err.println("For prosody ranging from " + tbConf.startTime() + " to " + tbConf.endTime() + ", determined topStart " + tbConf.topStart() + ", topEnd " + tbConf.topEnd() + ", baseStart " + tbConf.baseStart() + ", baseEnd " + tbConf.baseEnd()); + + } + + /** + * Starting from a baseline prosodic configuration and the settings + * requested in the prosody element, a new prosodic + * configuration (topline and baseline start and end frequencies) is + * calculated. + */ + private TopBaseConfiguration calculateTopBase(Element prosody, TopBaseConfiguration origConf) { + int topStart = origConf.topStart(); + int topEnd = origConf.topEnd(); + int baseStart = origConf.baseStart(); + int baseEnd = origConf.baseEnd(); + String pitch = prosody.getAttribute("pitch"); + if (!pitch.equals("")) { + if (MaryUtils.isPercentageDelta(pitch)) { + //System.err.println("Percentage delta: `" + pitch + "'"); + int percentage = MaryUtils.getPercentageDelta(pitch); + baseStart = (baseStart * (100 + percentage)) / 100; + baseEnd = (baseEnd * (100 + percentage)) / 100; + // For the topline we have two possibilities: + // i) we shift by the same number of Hz as the baseline, + // i.e. keep the range constant in the frequency domain; + // ii) we multiply with the same factor, + // i.e. keep the range constant in the log frequency domain + // (constant number of semitones range) + // The latter seems more appropriate given the fact that the + // human ear hears frequencies logarithmically. + topStart = (topStart * (100 + percentage)) / 100; + topEnd = (topEnd * (100 + percentage)) / 100; + } else if (MaryUtils.isSemitonesDelta(pitch)) { + //System.err.println("Semitones delta: `" + pitch + "'"); + double semitones = MaryUtils.getSemitonesDelta(pitch); + // Adding one semitone to any frequency corresponds to a + // multiplication with 2^(1/12) = 1.0595. + // Subtracting one semitone corresponds to a division by 1.0595. + // In general: Changing the frequency by x semitones corresponds + // to a multiplication with 1.0595^x. + double factor = Math.pow(1.0595, semitones); + baseStart = (int) (baseStart * factor); + baseEnd = (int) (baseEnd * factor); + topStart = (int) (topStart * factor); + topEnd = (int) (topEnd * factor); + } else if (MaryUtils.isNumberDelta(pitch)) { // +5, -10.2 + //System.err.println("Number delta: `" + pitch + "'"); + int delta = MaryUtils.getNumberDelta(pitch); + baseStart += delta; + baseEnd += delta; + topStart += delta; + topEnd += delta; + } else if (MaryUtils.isUnsignedNumber(pitch)) { // 180, 212.75 + //System.err.println("Unsigned number: `" + pitch + "'"); + // In order to keep the range constant in log frequency domain, + // calculate the ratio of current topMean and baseMean. Notice + // that the spreads are not calculated in the frequency domain, + // i.e. the slope of topline and baseline in the frequency + // domain change during the shift. It is unclear whether this + // is very relevant. + int baseMean = (baseStart + baseEnd) / 2; + int topMean = (topStart + topEnd) / 2; + double topBaseRatio = ((double) topMean) / baseMean; + int topSpread = (topEnd - topStart) / 2; + int newBaseMean = MaryUtils.getUnsignedNumber(pitch); + int baseSpread = (baseEnd - baseStart) / 2; + baseStart = newBaseMean - baseSpread; + baseEnd = newBaseMean + baseSpread; + topStart = (int) (newBaseMean * topBaseRatio - topSpread); + topEnd = (int) (newBaseMean * topBaseRatio + topSpread); + } + } + String range = prosody.getAttribute("range"); + if (!range.equals("")) { + // Range leaves the baseline untouched, and moves the topline. + // All relative changes stretch the distance top-base. + if (MaryUtils.isPercentageDelta(range)) { // +25%, -17.2% + //System.err.println("Percentage delta: `" + range + "'"); + int percentage = MaryUtils.getPercentageDelta(range); + topStart = baseStart + ((topStart - baseStart) * (100 + percentage)) / 100; + topEnd = baseEnd + ((topEnd - baseEnd) * (100 + percentage)) / 100; + } else if (MaryUtils.isSemitonesDelta(range)) { // +5.2st, -0.7st + //System.err.println("Semitones delta: `" + range + "'"); + // Change the current range by x semitones + double semitones = MaryUtils.getSemitonesDelta(range); + // for explanations, see pitch section above. + double factor = Math.pow(1.0595, semitones); + int deltaStart = (int) ((topStart - baseStart) * factor); + int deltaEnd = (int) ((topEnd - baseEnd) * factor); + topStart = baseStart + deltaStart; + topEnd = baseEnd + deltaEnd; + } else if (MaryUtils.isNumberDelta(range)) { // +15, -27.3 + //System.err.println("Number delta: `" + range + "'"); + int delta = MaryUtils.getNumberDelta(range); + topStart += delta; + topEnd += delta; + } else if (MaryUtils.isUnsignedSemitones(range)) { // 12st, 5.32st + //System.err.println("Unsigned semitones: `" + range + "'"); + // Set the new range to x semitones, discarding the previous + // range + double semitones = MaryUtils.getUnsignedSemitones(range); + // for explanations, see pitch section above. + double factor = Math.pow(1.0595, semitones); + topStart = (int) (baseStart * factor); + topEnd = (int) (baseEnd * factor); + } else if (MaryUtils.isUnsignedNumber(range)) { // 60, 50.4 + //System.err.println("Unsigned number: `" + range + "'"); + // Notice that the spread is not calculated in the frequency + // domain, i.e. the slope of topline and baseline in the + // frequency domain change during the shift. It is unclear + // whether this is very relevant. + int baseMean = (baseStart + baseEnd) / 2; + int topSpread = (topEnd - topStart) / 2; + int newRange = MaryUtils.getUnsignedNumber(range); + topStart = baseMean + newRange - topSpread; + topEnd = baseMean + newRange + topSpread; + } + } + String pitchDynamics = prosody.getAttribute("pitch-dynamics"); + if (!pitchDynamics.equals("")) { + if (MaryUtils.isPercentageDelta(pitchDynamics)) { // +25%, -17.2% + //System.err.println("Percentage delta: `" + pitchDynamics + "'"); + int percentage = MaryUtils.getPercentageDelta(pitchDynamics); + int baseMean = (baseStart + baseEnd) / 2; + // Motivation: m = (a+z)/2, and + // z = (1+p)*a (that is the idea in "pitch-dynamics"!) + // => m = (1 + p/2) * a => a = m / (1 + p/2) + baseStart = (200 * baseMean) / (200 + percentage); + baseEnd = (baseStart * (100 + percentage)) / 100; + } else if (MaryUtils.isNumberDelta(pitchDynamics)) { // +15, -27.3 + //System.err.println("Number delta: `" + pitchDynamics + "'"); + int delta = MaryUtils.getNumberDelta(pitchDynamics); + int baseMean = (baseStart + baseEnd) / 2; + baseStart = baseMean + delta / 2; + baseEnd = baseMean - delta / 2; + } else if (MaryUtils.isSemitonesDelta(pitchDynamics)) { // +5.2st, -0.7st + //System.err.println("Semitones delta: `" + pitchDynamics + "'"); + double semitones = MaryUtils.getSemitonesDelta(pitchDynamics); + // for explanations, see pitch section above. + double factor = Math.pow(1.0595, semitones); + int baseMean = (baseStart + baseEnd) / 2; + // Motivation: as for percentage delta above, replacing + // (1+p) with factor: + // m = (a+z)/2 + // z = factor * a (that is the idea in "pitch-dynamics"!) + // => m = (1+factor)*a/2 => a = 2m / (1+factor) + baseStart = (int) ((2 * baseMean) / (1 + factor)); + baseEnd = (int) (factor * baseStart); + } // non-delta values don't make sense for X-dynamics. + } + String rangeDynamics = prosody.getAttribute("range-dynamics"); + if (!rangeDynamics.equals("")) { + if (MaryUtils.isPercentageDelta(rangeDynamics)) { // +25%, -17.2% + //System.err.println("Percentage delta: `" + rangeDynamics + "'"); + int percentage = MaryUtils.getPercentageDelta(rangeDynamics); + int baseMean = (baseStart + baseEnd) / 2; + int topMean = (topStart + topEnd) / 2; + int rangeMean = topMean - baseMean; + // Motivation: see "pitch-dynamics" above + int rangeStart = (200 * rangeMean) / (200 + percentage); + int rangeEnd = (rangeStart * (100 + percentage)) / 100; + topStart = baseStart + rangeStart; + topEnd = baseEnd + rangeEnd; + } else if (MaryUtils.isNumberDelta(rangeDynamics)) { // +15, -27.3 + //System.err.println("Number delta: `" + rangeDynamics + "'"); + int delta = MaryUtils.getNumberDelta(rangeDynamics); + int baseMean = (baseStart + baseEnd) / 2; + int topMean = (topStart + topEnd) / 2; + int rangeMean = topMean - baseMean; + int rangeStart = rangeMean + delta / 2; + int rangeEnd = rangeMean - delta / 2; + topStart = baseStart + rangeStart; + topEnd = baseEnd + rangeEnd; + } else if (MaryUtils.isSemitonesDelta(rangeDynamics)) { // +5.2st, -0.7st + //System.err.println("Semitones delta: `" + rangeDynamics + "'"); + double semitones = MaryUtils.getSemitonesDelta(rangeDynamics); + // for explanations, see pitch section above. + double factor = Math.pow(1.0595, semitones); + int baseMean = (baseStart + baseEnd) / 2; + int topMean = (topStart + topEnd) / 2; + int rangeMean = topMean - baseMean; + // Motivation: see pitch-dynamics section above + int rangeStart = (int) ((2 * rangeMean) / (1 + factor)); + int rangeEnd = (int) (factor * rangeStart); + topStart = baseStart + rangeStart; + topEnd = baseEnd + rangeEnd; + } // non-delta values don't make sense for X-dynamics. + } + + // Refuse to put topline below baseline: + if (topStart < baseStart) + topStart = baseStart; + if (topEnd < baseEnd) + topEnd = baseEnd; + return new TopBaseConfiguration(topStart, topEnd, baseStart, baseEnd, origConf.startTime(), origConf.endTime()); + } + + + /** + * For a given phrase, calculate the target positions and frequencies + * for each ToBI accent and boundary tone in the phrase. + */ + private void calculateF0Targets(Element phrase) { + // Determine top- / baseline start and end values for each phrase + determinePhraseTopBaseConf(phrase); + // and for all the elements within the phrase. + NodeList prosodies = phrase.getElementsByTagName(MaryXML.PROSODY); + for (int j = 0; j < prosodies.getLength(); j++) { + Element prosody = (Element) prosodies.item(j); + determineProsodyTopBaseConf(prosody); + } + // Some useful memories for assigning the targets: + boolean isFirstInPhrase = true; + Element prevToneSyllable = null; + char prevTone = 0; // valid values: 'H' and 'L' + int lastHFreq = 0; // in Hertz + List allTargetList = new ArrayList(); + // Go through all tokens and boundaries in the phrase, from left to + // right: + TreeWalker tw = + ((DocumentTraversal) phrase.getOwnerDocument()).createTreeWalker( + phrase, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.SYLLABLE }), + false); + Element e = null; + while ((e = (Element) tw.nextNode()) != null) { + Element referenceSyllable = null; + Element rule = null; + if (e.getTagName().equals(MaryXML.SYLLABLE)) { // a syllable + // tone: + if (e.hasAttribute("tone")) { + String tone = e.getAttribute("tone").toUpperCase(); + rule = (Element) toneMap.get(tone); + if (rule != null) { + // Determine the stressed syllable in the token: + referenceSyllable = e; + } + } + } else { + // Boundary: + if (e.hasAttribute("tone")) { + String tone = e.getAttribute("tone").toUpperCase(); + rule = (Element) toneMap.get(tone); + if (rule != null) { + // The reference syllable is the one preceding the + // boundary: + TreeWalker stw = + ((DocumentTraversal) e.getOwnerDocument()).createTreeWalker( + phrase, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(MaryXML.SYLLABLE), + false); + stw.setCurrentNode(e); + referenceSyllable = (Element) stw.previousNode(); + } + } + } + if (referenceSyllable != null && rule != null) { + logger.debug( + "Now assigning targets for tone `" + + rule.getAttribute("name") + + "' on syllable [" + + referenceSyllable.getAttribute("ph") + + "]"); + // We have some targets to assign + // For each target in the rule, first determine its location: + List targetList = new ArrayList(); + Target starTarget = null; + TreeWalker rtw = + ((DocumentTraversal) rule.getOwnerDocument()).createTreeWalker( + rule, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { "target"}), + false); + Element rulePart = null; + while ((rulePart = (Element) rtw.nextNode()) != null) { + if (rulePart.getTagName().equals("target")) { + Target target = + determineInitialTargetLocation( + rulePart, + referenceSyllable, + isFirstInPhrase, + prevTone, + prevToneSyllable); + if (target != null) { + targetList.add(target); + allTargetList.add(target); + logger.debug( + " " + + target.type() + + " target on [" + + target.segment().getAttribute("p") + + "] at " + + target.getTargetTime() + + " ms"); + if (target.type().equals("star")) { + if (starTarget != null) { + logger.info( + "Found more than one star target for tone rule `" + + rule.getAttribute("name") + + "'"); + } + starTarget = target; + } + } + } else { // "prosody": downstep or upstep + // First, identify the syllable which is to be the + // first to be downstepped or upstepped. + String tCode = rulePart.getAttribute("t_code"); + Element prosSyllable = null; + if (tCode.equals("21")) { // this syllable + prosSyllable = referenceSyllable; + } else if (tCode.equals("11")) { // previous syllable + prosSyllable = MaryDomUtils.getPreviousOfItsKindIn(referenceSyllable, phrase); + } else if (tCode.equals("31")) { // next syllable + prosSyllable = MaryDomUtils.getNextOfItsKindIn(referenceSyllable, phrase); + } else if (tCode.equals("99")) { + // syllable after last tone + prosSyllable = MaryDomUtils.getNextOfItsKindIn(prevToneSyllable, phrase); + } + if (prosSyllable == null) { + // Unknown tCode setting or no previous or next + // syllable -- Well, then we start with this + // syllable + prosSyllable = referenceSyllable; + } + logger.debug( + " upstep/downstep starting with syllable [" + prosSyllable.getAttribute("ph") + "]"); + // Insert a prosody element into the syllable such + // that it encloses this syllable and the last + // syllable in the syllable. + // adaptProsody(rulePart, prosSyllable); + } + } + // Adjust location of "plus" type targets if necessary: + Iterator it = targetList.iterator(); + while (it.hasNext()) { + Target target = (Target) it.next(); + target.setMyStar(starTarget); + if (target.type().equals("plus")) { + //adjustTargetLocation(target, starTarget); + } + } + // Calculate target frequencies, and write the targets into the + // XML structure: + it = targetList.iterator(); + while (it.hasNext()) { + Target target = (Target) it.next(); + lastHFreq = calculateTargetFrequency(target, lastHFreq); + } + + // Now some useful memories for future rules inside this phrase: + // We have already assigned at least one target: + isFirstInPhrase = false; + prevToneSyllable = referenceSyllable; + String label = rule.getAttribute("name"); + if (label.lastIndexOf('H') > label.lastIndexOf('L')) { + // Remember previous tone was an H tone + prevTone = 'H'; + } else { + // Remember previous tone was an L tone + prevTone = 'L'; + } + } + } + // Now verify that targets don't overlap, and that no target is closer + // to another tone's target than to its own "star". + ListIterator it = allTargetList.listIterator(); + Target prev = null; + Target current = null; + Target next = null; + while (it.hasNext()) { + next = (Target) it.next(); + if (current != null) { + // Verify that next comes later than current: + int currentTargetTime = current.getTargetTime(); + int nextTargetTime = next.getTargetTime(); + if (currentTargetTime > nextTargetTime) { + // If one is a star, move the other one: + if (current.type().equals("star") && !next.type().equals("star")) { + Element oldSegment = next.segment(); + // Move next to the segment following current: + Element newSegment = getNextSegment(current.segment()); + int newTiming = 10; // at 10% of following segment + if (newSegment == null) { // no such segment + newSegment = current.segment(); + newTiming = 100; + } + next.setSegment(newSegment); + next.setTiming(newTiming); + // And recalculate the target frequency (trust that + // lastHFreq is not needed for this target) + calculateTargetFrequency(next, 0); + logger.debug( + "Found overlapping targets. Moved " + + "\"plus\" target from " + + nextTargetTime + + "ms [" + + oldSegment.getAttribute("p") + + "] to " + + next.getTargetTime() + + "ms [" + + next.segment().getAttribute("p") + + "]."); + } else if (next.type().equals("star") && !current.type().equals("star")) { + Element oldSegment = current.segment(); + // Move current to the segment preceding next: + Element newSegment = getPreviousSegment(next.segment()); + int newTiming = 90; // at 90% of following segment + if (newSegment == null) { // no such segment + newSegment = next.segment(); + newTiming = 0; + } + current.setSegment(newSegment); + current.setTiming(newTiming); + // And recalculate the target frequency (trust that + // lastHFreq is not needed for this target) + calculateTargetFrequency(current, 0); + logger.debug( + "Found overlapping targets. Moved " + + "\"plus\" target from " + + currentTargetTime + + "ms [" + + oldSegment.getAttribute("p") + + "] to " + + current.getTargetTime() + + "ms [" + + current.segment().getAttribute("p") + + "]."); + } else { // none is a star + // If none is a star, calculate the meeting point + // of their respective interpolation lines, and + // replace them with a single target at this point. + int tn = next.getTargetTime(); + int fn = next.f0(); + int tc = current.getTargetTime(); + int fc = current.f0(); + int t1; // new target time + int f1; // new f0 + // Two methods for calculating the new target: + if (next.myStar() != null + && next.myStar() != next + && current.myStar() != null + && current.myStar() != current) { + // The maths: + // next = (tn, fn); next.myStar() = (tns, fns) + // current = (tc, fc); current.myStar() = (tcs, fcs) + // We search for point (t1, f1) where lines meet. + // slope_n = (fns - fn) / (tns - tn) + // slope_c = (fc - fcs) / (tc - tcs) + // f1 = fn + slope_n * (t1 - tn) + // and + // f1 = fc - slope_c * (tc - t1) + // out of which we can conlcude + // t1 = ((fn - slope_n tn) - (fc - slope_c tc)) / + // (slope_c - slope_n) + int tns = next.myStar().getTargetTime(); + int fns = next.myStar().f0(); + int tcs = current.myStar().getTargetTime(); + int fcs = current.myStar().f0(); + double slope_n = ((double) fns - fn) / (tns - tn); + double slope_c = ((double) fc - fcs) / (tc - tcs); + if (slope_n < 0 && slope_c >= 0 || slope_c < 0 && slope_n >= 0) { + t1 = (int) (((fn - slope_n * tn) - (fc - slope_c * tc)) / (slope_c - slope_n)); + f1 = (int) (fn + slope_n * (t1 - tn)); + } else { + t1 = (tn + tc) / 2; + f1 = (fn + fc) / 2; + } + } else { + // One of them has no star (or both are stars + // themselves, which should not happen) + // Calculate a simple time and frequency mean. + t1 = (tn + tc) / 2; + f1 = (fn + fc) / 2; + } + // Set the new values for current: + current.setTargetTime(t1); + current.setF0(f1); + + logger.debug( + "Found two overlapping targets, at " + + tc + + "ms, " + + fc + + "Hz and " + + tn + + "ms, " + + fn + + "Hz. Replaced them with a target at " + + t1 + + "ms, " + + f1 + + "Hz."); + // Delete next: + it.remove(); // removes next; + continue; // and re-get a next + } + } + } + // Once we know no targets are inversed, we can check that no + // target is closer to another tone's target than to its own star. + if (prev != null) { + int prevTime = prev.getTargetTime(); + int currentTime = current.getTargetTime(); + int nextTime = next.getTargetTime(); + if (current.myStar() == next + && nextTime - currentTime > currentTime - prevTime + || current.myStar() == prev + && currentTime - prevTime > nextTime - currentTime) { + int newTime = (prevTime + nextTime) / 2; + logger.debug( + "Target at " + + currentTime + + "ms is further from its star than from" + + " a different target -- moving to " + + newTime + + "ms."); + current.setTargetTime(newTime); + // And recalculate the target frequency (trust that + // lastHFreq is not needed for this target) + calculateTargetFrequency(current, 0); + + } + } + prev = current; + current = next; + } + // Finally, insert the targets into MaryXML: + it = allTargetList.listIterator(); + while (it.hasNext()) { + insertTargetIntoMaryXML((Target) it.next()); + } + } + + + + /** + * Determine the initial location of a target point on the time axis, based + * on the rule in Element target relative to the given + * syllable Element. For accents, this is the stressed + * syllable of the accented word; for boundary tones, it is the last + * syllable before the boundary. + * @return a newly created Target object, or null if the conditions for + * applying this rule part were not fulfilled or no suitable target + * location could be found. + */ + private Target determineInitialTargetLocation( + Element rulePart, + Element syllable, + boolean isFirstInPhrase, + char prevTone, + Element prevToneSyllable) { + // Essential sanity check: + if (rulePart == null + || !rulePart.getTagName().equals("target") + || syllable == null + || !syllable.getTagName().equals(MaryXML.SYLLABLE)) { + return null; + } + // Verify if rulePart condition applies: + if (rulePart.hasAttribute("condition")) { + String condition = rulePart.getAttribute("condition"); + if (condition.equals("first_in_IP") && !isFirstInPhrase) { + return null; + } else if (condition.equals("prevtone_H") && prevTone != 'H') { + return null; + } else if (condition.equals("prevtone_L") && prevTone != 'L') { + return null; + } + } + // OK, no condition violated. + // Locate target position: + Element segment = null; + // Target timing is in percent of the segment duration, relative to the + // segment start: + int timing = -1; + String tCode = rulePart.getAttribute("t_code"); + if (tCode.equals("0")) { // start of this declination phrase + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + segment = MaryDomUtils.getFirstElementByTagName(phrase, MaryXML.PHONE); + timing = 0; + } else if (tCode.equals("12")) { + // start of the nucleus of the preceding syllable + Element prevSyl = getPreviousSyllable(syllable); + if (prevSyl == null) { + // No such syllable -- just ignore this target + } else { + Element nucleus = getNucleus(prevSyl); + if (nucleus == null) // No nucleus -- take first segment then: + segment = MaryDomUtils.getFirstElementByTagName(prevSyl, MaryXML.PHONE); + else + segment = nucleus; + timing = 0; + } + } else if (tCode.equals("21")) { + // start of this syllable + segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); + timing = 0; + } else if (tCode.equals("22")) { + // start of the nucleus of this syllable + Element nucleus = getNucleus(syllable); + if (nucleus == null) // No nucleus -- take first segment then: + segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); + else + segment = nucleus; + timing = 0; + } else if (tCode.equals("23")) { + // middle of the nucleus of this syllable + // (simplifying assumption: the nucleus contains only one segment) + Element nucleus = getNucleus(syllable); + if (nucleus == null) // No nucleus -- take first segment then: + segment = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); + else + segment = nucleus; + timing = 50; + } else if (tCode.equals("24")) { + // end of the nucleus of this syllable + // (simplifying assumption: the nucleus contains only one segment) + Element nucleus = getNucleus(syllable); + if (nucleus == null) // No nucleus -- take last segment then: + segment = MaryDomUtils.getLastElementByTagName(syllable, MaryXML.PHONE); + else + segment = nucleus; + timing = 100; + } else if (tCode.equals("25")) { + // end of this syllable + segment = MaryDomUtils.getLastElementByTagName(syllable, MaryXML.PHONE); + timing = 100; + } else if (tCode.equals("34")) { + // end of the nucleus of the following syllable + // (simplifying assumption: the nucleus contains only one segment) + Element nextSyl = getNextSyllable(syllable); + if (nextSyl == null) { + // No such syllable -- just ignore this target + } else { + Element nucleus = getNucleus(nextSyl); + if (nucleus == null) // No nucleus -- take last segment then: + segment = MaryDomUtils.getLastElementByTagName(nextSyl, MaryXML.PHONE); + else + segment = nucleus; + timing = 100; + } + } else if (tCode.equals("99")) { + // middle of the nucleus of the syllable after the syllable + // associated with the previous tone + Element syl = getNextSyllable(prevToneSyllable); + if (syl == null) { + // No such syllable -- just ignore this target + } else { + Element nucleus = getNucleus(syl); + if (nucleus == null) // No nucleus -- take first segment then: + segment = MaryDomUtils.getFirstElementByTagName(syl, MaryXML.PHONE); + else + segment = nucleus; + timing = 50; + } + } else if (tCode.equals("98")) { + // middle of the nucleus of the first 1ary or 2ary stressed + // syllable after the syllable after the syllable associated with + // the previous tone + Element syl = getNextSyllable(prevToneSyllable); + if (syl == null) { + // No such syllable -- just ignore this target + } else { + // OK, skip that first syllable: + syl = getNextSyllable(syl); + if (syl == null) { + // No such syllable -- just ignore this target + } else { + // Now take the first one we get which has 1ary or 2ary stress: + Element fallback = syl; + while (syl != null + && !(syl.getAttribute("stress").equals("1") || syl.getAttribute("stress").equals("2"))) { + syl = getNextSyllable(syl); + } + if (syl == null) + syl = fallback; + // OK, now we have a syllable to work with. + Element nucleus = getNucleus(syl); + if (nucleus == null) // No nucleus -- take first segment then: + segment = MaryDomUtils.getFirstElementByTagName(syl, MaryXML.PHONE); + else + segment = nucleus; + timing = 50; + } + } + } + if (segment == null || timing == -1) { + logger.debug(" Target (" + rulePart.getAttribute("f0") + ") could not be attached. skipping."); + return null; + } + return new Target(rulePart, segment, timing, 0); + } + + + /** + * For the given target, calculate the appropriate F0 by taking into + * account the local topline and baseline frequency. + * @return The last target frequency on the topline, either as passed here + * through parameter lastHFreq or as realised by this target. + */ + private int calculateTargetFrequency(Target target, int lastHFreq) { + // sanity check: + if (target == null || target.targetRule() == null || target.segment() == null) + throw new NullPointerException("Null target specification -- cannot calculate Frequency"); + // Calculate target frequency: + String f0descr = target.targetRule().getAttribute("f0"); + int f0 = 0; + TopBaseConfiguration tbConf = null; + // Approximation if we need lastHFreq and don't have it: + if (f0descr.equals("last_H_freq") && lastHFreq == 0) { + f0descr = "1100"; // 10% above top line + } + if (MaryUtils.isNumber(f0descr)) { + int f0promille = MaryUtils.getNumber(f0descr); + Element phrase = (Element) MaryDomUtils.getAncestor(target.segment(), MaryXML.PHRASE); + Element prosody = (Element) MaryDomUtils.getAncestor(target.segment(), MaryXML.PROSODY); + Element topBaseRef = phrase; + if (prosody != null && MaryDomUtils.isAncestor(phrase, prosody)) { + // A local prosody tag -- this is our reference + topBaseRef = prosody; + } + // For accents, realise target overshoot or undershoot + // as a function of the "accent-prominence" attribute: + if (prosody != null + && // inside or outside phrase + target.targetRule().getParentNode().getNodeName().equals("tone") + && target.type().equals("star")) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + if (settings != null) { + int accentProminence = settings.accentProminence(); + // Stretch the distance of f0promille from 500 + int dist = f0promille - 500; + int newDist = (dist * accentProminence) / 100; + f0promille = 500 + newDist; + } + } + tbConf = (TopBaseConfiguration) topBaseConfMap.get(topBaseRef); + int d = 0; + try { + d = Integer.parseInt(target.segment().getAttribute("d")); + } catch (NumberFormatException e) { + logger.warn("Unexpected duration value `" + target.segment().getAttribute("d") + "'"); + } + int end = 0; + try { + end = Integer.parseInt(target.segment().getAttribute("end")); + } catch (NumberFormatException e) { + logger.warn("Unexpected duration value `" + target.segment().getAttribute("end") + "'"); + } + // Remember that timing is expressed as a percentage of d: + int timeMillis = (end - d) + (d * target.timing()) / 100; + if (f0promille == 1000) { // on topline + f0 = tbConf.toplineFrequency(timeMillis); + lastHFreq = f0; + } else if (f0promille == 0) { // on baseline + f0 = tbConf.baselineFrequency(timeMillis); + } else { // somewhere in between or above or below + int base = tbConf.baselineFrequency(timeMillis); + int top = tbConf.toplineFrequency(timeMillis); + int range = top - base; + f0 = base + (f0promille * range) / 1000; + } + } else if (f0descr.equals("last_H_freq")) { + f0 = lastHFreq; + } else { + logger.warn("Unknown f0 specification `" + f0descr + "' in file " + MaryProperties.getFilename(tonerulefilePropertyName)); + } + if (f0 != 0) { + // OK, valid + target.setF0(f0); + logger.debug( + "Target on segment [" + + target.segment().getAttribute("p") + + "] at " + + target.getTargetTime() + + " ms, " + + target.f0() + + " Hz (" + + f0descr + + ")"); + } + return lastHFreq; + } + + private void insertTargetIntoMaryXML(Target target) { + if (target != null && target.segment() != null && target.timing() != -1 && target.f0() != 0) { + String newF0 = "(" + target.timing() + "," + target.f0() + ")"; + if (target.segment().hasAttribute("f0")) { + String oldF0 = target.segment().getAttribute("f0"); + target.segment().setAttribute("f0", oldF0 + " " + newF0); + } else { + target.segment().setAttribute("f0", newF0); + } + } + } + + /** + * Add a prosody element in the current phrase, realising the + * upstep or downstep formulated in prosodyRule, and enclosing + * the given syllable and all other syllables until the end of + * the phrase. + */ + private void adaptProsody(Element prosodyRule, Element syllable) { + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + Element first = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.TOKEN); + if (MaryDomUtils.hasAncestor(first, MaryXML.MTU)) { + first = (Element) MaryDomUtils.getHighestLevelAncestor(first, MaryXML.MTU); + } + Element lastSyl = MaryDomUtils.getLastElementByTagName(phrase, MaryXML.SYLLABLE); + Element last = (Element) MaryDomUtils.getAncestor(lastSyl, MaryXML.TOKEN); + if (MaryDomUtils.hasAncestor(last, MaryXML.MTU)) { + last = (Element) MaryDomUtils.getHighestLevelAncestor(last, MaryXML.MTU); + } + Element newProsody = MaryDomUtils.encloseNodesWithNewElement(first, last, MaryXML.PROSODY); + newProsody.setAttribute("range", prosodyRule.getAttribute("range")); + determineProsodicSettings(newProsody); + determineProsodyTopBaseConf(newProsody); + // And now, if there are any prosody tags enclosed by the new + // prosody tag, their topbaseconf needs to be calculated again + TreeWalker tw = + ((DocumentTraversal) newProsody.getOwnerDocument()).createTreeWalker( + newProsody, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(MaryXML.PROSODY), + false); + Element p = null; + while ((p = (Element) tw.nextNode()) != null) { + determineProsodyTopBaseConf(p); + } + } + + + + ////////////////////////////////////////////////////////////////////// + ////////////////////////////// Helpers /////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + private Element getToken(Element segmentOrSyllable) { + return (Element) MaryDomUtils.getAncestor(segmentOrSyllable, MaryXML.TOKEN); + } + + private Element getSyllable(Element segment) { + return (Element) MaryDomUtils.getAncestor(segment, MaryXML.SYLLABLE); + } + + /** + * Find the segment preceding this segment within the same + * phrase. + * @return that segment, or null if there is no such segment. + */ + private static Element getPreviousSegment(Element segment) { + Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); + return MaryDomUtils.getPreviousOfItsKindIn(segment, phrase); + } + + /** + * Find the segment following this segment within the same + * phrase. + * @return that segment, or null if there is no such segment. + */ + private static Element getNextSegment(Element segment) { + Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); + return MaryDomUtils.getNextOfItsKindIn(segment, phrase); + } + + /** + * Find the syllable preceding this syllable within the same + * phrase. + * @return that syllable, or null if there is no such + * syllable. + */ + private static Element getPreviousSyllable(Element syllable) { + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + return MaryDomUtils.getPreviousOfItsKindIn(syllable, phrase); + } + + /** + * Find the syllable following this syllable within the same + * phrase. + * @return that syllable, or null if there is no such + * syllable. + */ + private static Element getNextSyllable(Element syllable) { + if (syllable == null) + return null; + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + return MaryDomUtils.getNextOfItsKindIn(syllable, phrase); + } + + private boolean hasTone(Element token) { + String tone = token.getAttribute("tone").toUpperCase(); + // Is it a known / valid accent: + return toneMap.containsKey(tone); + } + + /** + * Search for boundary and syllable elements following the given syllable. + * If the next matching element found is a boundary with breakindex + * minBreakindex or larger, return true; otherwise, + * return false. + * If there is no next node, return true. + */ + private boolean isLastBeforeBoundary(Element syllable, int minBreakindex) { + Document doc = syllable.getOwnerDocument(); + Element sentence = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.SENTENCE); + TreeWalker tw = + ((DocumentTraversal) doc).createTreeWalker( + sentence, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.SYLLABLE, MaryXML.BOUNDARY }), + false); + tw.setCurrentNode(syllable); + Element next = (Element) tw.nextNode(); + if (next == null) { + // no matching node after syllable -- + // we must be in a final position. + return true; + } + if (next.getNodeName().equals(MaryXML.BOUNDARY)) { + if (getBreakindex(next) >= minBreakindex) + return true; + } + // This syllable is either followed by another syllable or + // by a boundary with breakindex < minBreakindex + return false; + } + + private boolean isMajIPFinal(Element syllable) { + // If this syllable is followed by a boundary with breakindex + // 4 or above, return true. + return isLastBeforeBoundary(syllable, 4); + } + + private boolean isMinipFinal(Element syllable) { + // If this syllable is followed by a boundary with breakindex + // 3 or above, return true. + return isLastBeforeBoundary(syllable, 3); + } + + private boolean isWordFinal(Element syllable) { + Element e = syllable; + while (e != null) { + e = MaryDomUtils.getNextSiblingElement(e); + if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) + return false; + } + return true; + } + + private boolean isWordMedial(Element syllable) { + return !(isWordFinal(syllable) || isWordInitial(syllable)); + + } + + private boolean isWordInitial(Element syllable) { + Element e = syllable; + while (e != null) { + e = MaryDomUtils.getPreviousSiblingElement(e); + if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) + return false; + } + return true; + } + + private boolean isInOnset(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return false; + } + // OK, segment is not syllabic. See if it is followed by a syllabic + // segment: + for (Element e = MaryDomUtils.getNextSiblingElement(segment); + e != null; + e = MaryDomUtils.getNextSiblingElement(e)) { + ph = allophoneSet.getAllophone(e.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return true; + } + } + return false; + } + + private boolean isInNucleus(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isSyllabic(); + } + + private boolean isInCoda(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return false; + } + // OK, segment is not syllabic. See if it is preceded by a syllabic + // segment: + for (Element e = MaryDomUtils.getPreviousSiblingElement(segment); + e != null; + e = MaryDomUtils.getPreviousSiblingElement(e)) { + ph = allophoneSet.getAllophone(e.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return true; + } + } + return false; + } + + + private int getBreakindex(Element boundary) { + int breakindex = 0; + try { + breakindex = Integer.parseInt(boundary.getAttribute("breakindex")); + } catch (NumberFormatException e) { + logger.warn("Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "'"); + } + return breakindex; + } + + + /** + * For a syllable, return the first child segment which is a nucleus + * segment. Return null if there is no such segment. + */ + private Element getNucleus(Element syllable) { + if (syllable == null || !syllable.getTagName().equals(MaryXML.SYLLABLE)) + return null; + Element seg = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); + while (seg != null && !isInNucleus(seg)) { + seg = MaryDomUtils.getNextSiblingElementByTagName(seg, MaryXML.PHONE); + } + return seg; + } + + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + /////////////////////////// Helper Classes /////////////////////////// + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + static class ProsodicSettings { + // Relative settings: 100 = 100% = no change + int rate; + int accentProminence; + int accentSlope; + int numberOfPauses; + int pauseDuration; + int vowelDuration; + int plosiveDuration; + int fricativeDuration; + int nasalDuration; + int liquidDuration; + int glideDuration; + int volume; + + ProsodicSettings() { + this.rate = 100; + this.accentProminence = 100; + this.accentSlope = 100; + this.numberOfPauses = 100; + this.pauseDuration = 100; + this.vowelDuration = 100; + this.plosiveDuration = 100; + this.fricativeDuration = 100; + this.nasalDuration = 100; + this.liquidDuration = 100; + this.glideDuration = 100; + this.volume = 50; + } + + ProsodicSettings( + int rate, + int accentProminence, + int accentSlope, + int numberOfPauses, + int pauseDuration, + int vowelDuration, + int plosiveDuration, + int fricativeDuration, + int nasalDuration, + int liquidDuration, + int glideDuration, + int volume) { + this.rate = rate; + this.accentProminence = accentProminence; + this.accentSlope = accentSlope; + this.numberOfPauses = numberOfPauses; + this.pauseDuration = pauseDuration; + this.vowelDuration = vowelDuration; + this.plosiveDuration = plosiveDuration; + this.fricativeDuration = fricativeDuration; + this.nasalDuration = nasalDuration; + this.liquidDuration = liquidDuration; + this.glideDuration = glideDuration; + this.volume = volume; + } + + int rate() { + return rate; + } + int accentProminence() { + return accentProminence; + } + int accentSlope() { + return accentSlope; + } + int numberOfPauses() { + return numberOfPauses; + } + int pauseDuration() { + return pauseDuration; + } + int vowelDuration() { + return vowelDuration; + } + int plosiveDuration() { + return plosiveDuration; + } + int fricativeDuration() { + return fricativeDuration; + } + int nasalDuration() { + return nasalDuration; + } + int liquidDuration() { + return liquidDuration; + } + int glideDuration() { + return glideDuration; + } + int volume() { + return volume; + } + + void setRate(int value) { + rate = value; + } + void setAccentProminence(int value) { + accentProminence = value; + } + void setAccentSlope(int value) { + accentSlope = value; + } + void setNumberOfPauses(int value) { + numberOfPauses = value; + } + void setPauseDuration(int value) { + pauseDuration = value; + } + void setVowelDuration(int value) { + vowelDuration = value; + } + void setPlosiveDuration(int value) { + plosiveDuration = value; + } + void setFricativeDuration(int value) { + fricativeDuration = value; + } + void setNasalDuration(int value) { + nasalDuration = value; + } + void setLiquidDuration(int value) { + liquidDuration = value; + } + void setGlideDuration(int value) { + glideDuration = value; + } + void setVolume(int value) { + volume = value; + } + + } + + static class TopBaseConfiguration { + int topStart; + int topEnd; + int baseStart; + int baseEnd; + int startTime; + int endTime; + double topSlope; + double baseSlope; + TopBaseConfiguration(int topStart, int topEnd, int baseStart, int baseEnd) { + this(topStart, topEnd, baseStart, baseEnd, 0, 0); + } + + TopBaseConfiguration(int topStart, int topEnd, int baseStart, int baseEnd, int startTime, int endTime) { + this.topStart = topStart; + this.topEnd = topEnd; + this.baseStart = baseStart; + this.baseEnd = baseEnd; + this.startTime = startTime; + this.endTime = endTime; + if (startTime != endTime) { // can calculate slope + topSlope = ((double) topEnd - topStart) / (endTime - startTime); + baseSlope = ((double) baseEnd - baseStart) / (endTime - startTime); + } else { + topSlope = 0; + baseSlope = 0; + } + } + + int topStart() { + return topStart; + } + int topEnd() { + return topEnd; + } + int baseStart() { + return baseStart; + } + int baseEnd() { + return baseEnd; + } + int startTime() { + return startTime; + } + int endTime() { + return endTime; + } + + void setTimes(int startTime, int endTime) { + this.startTime = startTime; + this.endTime = endTime; + if (startTime != endTime) { // can calculate slope + topSlope = ((double) topEnd - topStart) / (endTime - startTime); + baseSlope = ((double) baseEnd - baseStart) / (endTime - startTime); + } else { + topSlope = 0; + baseSlope = 0; + } + } + + /** + * Calculate the frequency of the topline at time time. + * This is calculated as a linear function of topStart, topEnd and + * time. + */ + int toplineFrequency(int time) { + if (time < startTime || time > endTime) { + throw new RuntimeException( + "Invalid time " + time + " (startTime " + startTime + ", endTime " + endTime + ")"); + } + return topStart + (int) (topSlope * (time - startTime)); + } + + /** + * Calculate the frequency of the baseline at time time. + * This is calculated as a linear function of baseStart, baseEnd and + * time. + */ + int baselineFrequency(int time) { + if (time < startTime || time > endTime) { + throw new RuntimeException( + "Invalid time " + time + "(startTime " + startTime + ", endTime " + endTime + ")"); + } + return baseStart + (int) (baseSlope * (time - startTime)); + } + } + + /** A class representing an F0-time target. */ + static class Target { + Element targetRule; + Element segment; + int timing; + int f0; + Target myStar; + + Target() { + targetRule = null; + segment = null; + timing = -1; + f0 = 0; + myStar = null; + } + + Target(Element targetRule, Element segment, int timing, int f0) { + this.targetRule = targetRule; + this.segment = segment; + this.timing = timing; + this.f0 = f0; + myStar = null; + } + Element targetRule() { + return targetRule; + } + Element segment() { + return segment; + } + int timing() { + return timing; + } + int f0() { + return f0; + } + Target myStar() { + return myStar; + } + + void setTargetRule(Element targetRule) { + this.targetRule = targetRule; + } + void setSegment(Element segment) { + this.segment = segment; + } + void setTiming(int timing) { + this.timing = timing; + } + void setF0(int f0) { + this.f0 = f0; + } + void setMyStar(Target star) { + this.myStar = star; + } + + String type() { + if (targetRule != null) + return targetRule.getAttribute("type"); + else + return ""; + } + + /** + * Get the target time relative to the beginning of the phrase, on the + * same scale as that used by the segment "end" attributes. + * @return the target time, or -1 if the time cannot be determined. + */ + int getTargetTime() { + if (segment == null || timing == -1) + return -1; + int end = -1; + try { + end = Integer.parseInt(segment.getAttribute("end")); + } catch (NumberFormatException e) { + return -1; + } + int d = -1; + try { + d = Integer.parseInt(segment.getAttribute("d")); + } catch (NumberFormatException e) { + return -1; + } + // The target time is: + // t = end - d + (timing/100 * d) = end - (1 - timing/100) * d + return end - (100 - timing) * d / 100; + } + + /** + * Set the target time relative to the beginning of the phrase, on the + * same scale as that used by the segment "end" attributes. Adjust this + * target's segment and timing accordingly. This is done as possible -- + * in particular in the presence of pauses, the target is only shifted towards + * the border of the pause but not into or beyond it. + * @return true on success, false on failure. + */ + boolean setTargetTime(int targetTime) { + if (targetTime < 0) + return false; + int currentTargetTime = getTargetTime(); + Element seg = segment; + try { + if (targetTime < currentTargetTime) { + while (seg != null + && Integer.parseInt(seg.getAttribute("end")) - Integer.parseInt(seg.getAttribute("d")) + > targetTime) { + Element s = getPreviousSegment(seg); + // Check for "holes": If last start time (end-d) is too large but + // next end time is too small, there is a small pause between the + // two segments + // => stay at the side of the pause closer to the original + if (s != null && Integer.parseInt(s.getAttribute("end")) < targetTime) { + targetTime = + Integer.parseInt(seg.getAttribute("end")) - Integer.parseInt(seg.getAttribute("d")); + break; // keep seg, forget about s + } else { + seg = s; + } + } + } else { // targetTime > currentTargetTime + while (seg != null && Integer.parseInt(seg.getAttribute("end")) < targetTime) { + Element s = getNextSegment(seg); + // Check for "holes": If last end time is too small but + // next start time (end-d) is too large, there is a small pause + // between the two segments + // => stay at the side of the pause closer to the original + if (s != null + && Integer.parseInt(s.getAttribute("end")) - Integer.parseInt(s.getAttribute("d")) + > targetTime) { + targetTime = Integer.parseInt(seg.getAttribute("end")); + break; // keep seg, forget about s + } else { + seg = s; + } + } + } + if (seg != null) { + // newTiming = (1 - (end - targetTime) / d) * 100 + int newTiming = + 100 + - (100 * (Integer.parseInt(seg.getAttribute("end")) - targetTime)) + / Integer.parseInt(seg.getAttribute("d")); + segment = seg; + timing = newTiming; + assert timing >= 0 && timing <= 100; + } + } catch (NumberFormatException e) { + return false; + } + return true; + } + + } + + + + +} + diff --git a/marytts-jungle/src/main/java/marytts/language/tib/KlattDurationModeller.java b/marytts-jungle/src/main/java/marytts/language/tib/KlattDurationModeller.java index 815b16c7..1ad3a198 100644 --- a/marytts-jungle/src/main/java/marytts/language/tib/KlattDurationModeller.java +++ b/marytts-jungle/src/main/java/marytts/language/tib/KlattDurationModeller.java @@ -1,1564 +1,1564 @@ -/** - * Copyright 2000-2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * This file is part of MARY TTS. - * - * MARY TTS is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - * - */ -package marytts.language.tib; - -import java.io.FileInputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.Properties; -import java.util.StringTokenizer; -import java.util.WeakHashMap; - -import marytts.datatypes.MaryData; -import marytts.datatypes.MaryDataType; -import marytts.datatypes.MaryXML; -import marytts.language.tib.datatypes.TibetanDataTypes; -import marytts.modules.InternalModule; -import marytts.modules.MaryModule; -import marytts.modules.ModuleRegistry; -import marytts.modules.KlattDurationModeller.KlattDurationParams; -import marytts.modules.phonemiser.Allophone; -import marytts.modules.phonemiser.AllophoneSet; -import marytts.server.MaryProperties; -import marytts.util.dom.MaryDomUtils; -import marytts.util.dom.NameNodeFilter; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.w3c.dom.traversal.DocumentTraversal; -import org.w3c.dom.traversal.NodeFilter; -import org.w3c.dom.traversal.NodeIterator; -import org.w3c.dom.traversal.TreeWalker; - - -/** - * The calculation of acoustic parameters module. - * - * @author Marc Schröder - */ - -public class KlattDurationModeller extends InternalModule { - private AllophoneSet allophoneSet; - private KlattDurationParams klattDurationParams; - private Properties klattRuleParams; - /** This map contains the topline-baseline frequency configurations for the - * currently used phrase and sub-phrase prosody elements. As this is a - * WeakHashMap, entries will automatically be deleted when not in regular - * use anymore. */ - private WeakHashMap topBaseConfMap; - /** This map contains the prosodic settings, as ProsodicSettings objects, - * for the currently used prosody elements. As this is a WeakHashMap, - * entries will automatically be deleted when not in regular use - * anymore. */ - private WeakHashMap prosodyMap; - - public KlattDurationModeller() - { - super("KlattDurationModeller", - TibetanDataTypes.PHRASES_TIB, - MaryDataType.DURATIONS, - new Locale("tib")); - } - - public void startup() throws Exception { - super.startup(); - // We depend on the Synthesis module: - MaryModule synthesis = ModuleRegistry.getModule(marytts.modules.Synthesis.class); - assert synthesis != null; - if (synthesis.getState() == MaryModule.MODULE_OFFLINE) - synthesis.startup(); - // load klatt rules - klattRuleParams = new Properties(); - klattRuleParams.load(new FileInputStream(MaryProperties.needFilename("tibetan.cap.klattrulefile"))); // load phone list - allophoneSet = AllophoneSet.getAllophoneSet(MaryProperties.needFilename("tibetan.allophoneset")); - klattDurationParams = new KlattDurationParams(MaryProperties.needFilename("tibetan.cap.klattdurfile")); - // instantiate the Map in which settings are associated with elements: - // (when the objects serving as keys are not in ordinary use any more, - // the key-value pairs are deleted from the WeakHashMap earlier or - // later; that means we do not need to keep track of the hashmaps per - // thread) - prosodyMap = new WeakHashMap(); - } - - public MaryData process(MaryData d) throws Exception { - Document doc = d.getDocument(); - determineProsodicSettings(doc); - addOrDeleteBoundaries(doc); - - NodeList sentences = doc.getElementsByTagName(MaryXML.SENTENCE); - for (int i = 0; i < sentences.getLength(); i++) { - Element sentence = (Element) sentences.item(i); - processSentence(sentence); - } - MaryData result = new MaryData(outputType(), d.getLocale()); - result.setDocument(doc); - return result; - } - - /** - * For all (possibly nested) prosody elements in the document, - * calculate their (possibly cumulated) prosodic settings - * and save them in a map. - */ - private void determineProsodicSettings(Document doc) { - // Determine the prosodic setting for each prosody element - NodeList prosodies = doc.getElementsByTagName(MaryXML.PROSODY); - for (int i = 0; i < prosodies.getLength(); i++) { - Element prosody = (Element) prosodies.item(i); - ProsodicSettings settings = new ProsodicSettings(); - // Neutral default settings: - ProsodicSettings parentSettings = new ProsodicSettings(); - // Obtain parent settings, if any: - Element ancestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); - if (ancestor != null) { - ProsodicSettings testSettings = (ProsodicSettings) prosodyMap.get(ancestor); - if (testSettings != null) { - parentSettings = testSettings; - } - } - // Only accept relative changes, i.e. percentage delta: - settings.setRate(parentSettings.rate() + getPercentageDelta(prosody.getAttribute("rate"))); - settings.setAccentProminence( - parentSettings.accentProminence() + getPercentageDelta(prosody.getAttribute("accent-prominence"))); - settings.setAccentSlope( - parentSettings.accentSlope() + getPercentageDelta(prosody.getAttribute("accent-slope"))); - settings.setNumberOfPauses( - parentSettings.numberOfPauses() + getPercentageDelta(prosody.getAttribute("number-of-pauses"))); - settings.setPauseDuration( - parentSettings.pauseDuration() + getPercentageDelta(prosody.getAttribute("pause-duration"))); - settings.setVowelDuration( - parentSettings.vowelDuration() + getPercentageDelta(prosody.getAttribute("vowel-duration"))); - settings.setPlosiveDuration( - parentSettings.plosiveDuration() + getPercentageDelta(prosody.getAttribute("plosive-duration"))); - settings.setFricativeDuration( - parentSettings.fricativeDuration() + getPercentageDelta(prosody.getAttribute("fricative-duration"))); - settings.setNasalDuration( - parentSettings.nasalDuration() + getPercentageDelta(prosody.getAttribute("nasal-duration"))); - settings.setLiquidDuration( - parentSettings.liquidDuration() + getPercentageDelta(prosody.getAttribute("liquid-duration"))); - settings.setGlideDuration( - parentSettings.glideDuration() + getPercentageDelta(prosody.getAttribute("glide-duration"))); - - String sVolume = prosody.getAttribute("volume"); - if (sVolume.equals("")) { - settings.setVolume(parentSettings.volume()); - } else if (isPercentageDelta(sVolume)) { - int newVolume = parentSettings.volume() + getPercentageDelta(sVolume); - if (newVolume < 0) - newVolume = 0; - else if (newVolume > 100) - newVolume = 100; - settings.setVolume(newVolume); - } else if (isUnsignedNumber(sVolume)) { - settings.setVolume(getUnsignedNumber(sVolume)); - } else if (sVolume.equals("silent")) { - settings.setVolume(0); - } else if (sVolume.equals("soft")) { - settings.setVolume(25); - } else if (sVolume.equals("medium")) { - settings.setVolume(50); - } else if (sVolume.equals("loud")) { - settings.setVolume(75); - } - prosodyMap.put(prosody, settings); - } - } - - /** - * Adjust the number of boundaries according to rate and the - * "number-of-pauses" attribute. - */ - private void addOrDeleteBoundaries(Document doc) { - // Go through boundaries. A boundary is deleted if the determined - // minimum breakindex size is larger than this boundary's breakindex. - NodeIterator it = - ((DocumentTraversal) doc).createNodeIterator( - doc, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(MaryXML.BOUNDARY), - false); - Element boundary = null; - List bi1prosodyElements = null; - while ((boundary = (Element) it.nextNode()) != null) { - int minBI = 3; - Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - assert settings != null; - int rate = settings.rate(); - int numberOfPauses = settings.numberOfPauses(); - if (numberOfPauses <= 50) - minBI = 5; - else if (numberOfPauses <= 75) - minBI = 4; - else if (numberOfPauses > 150) - minBI = 1; - else if (numberOfPauses > 125) - minBI = 2; - // Rate can only shift the number of pauses by one breakindex - if (rate < 90 && minBI > 1) - minBI--; - if (minBI == 1) { - // Remember that the current prosody element wants bi 1 boundaries: - if (bi1prosodyElements == null) - bi1prosodyElements = new ArrayList(); - bi1prosodyElements.add(prosody); - } - } - // This boundary's bi: - int bi = 3; - try { - bi = Integer.parseInt(boundary.getAttribute("breakindex")); - } catch (NumberFormatException e) { - logger.info( - "Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "', assuming " + bi); - } - if (bi < minBI) { - if (!boundary.hasAttribute("duration")) - boundary.getParentNode().removeChild(boundary); - else - boundary.removeAttribute("bi"); // but keep duration - } - } - // Do we need to add any boundaries? - if (bi1prosodyElements != null) { - Iterator elIt = bi1prosodyElements.iterator(); - while (elIt.hasNext()) { - Element prosody = (Element) elIt.next(); - NodeIterator nodeIt = - ((DocumentTraversal) doc).createNodeIterator( - prosody, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), - false); - Element el = null; - Element prevEl = null; - while ((el = (Element) nodeIt.nextNode()) != null) { - if (el.getTagName().equals(MaryXML.TOKEN) && prevEl != null && prevEl.getTagName().equals(MaryXML.TOKEN)) { - // Need to insert a boundary before el: - Element newBoundary = - MaryXML.createElement(doc, MaryXML.BOUNDARY); - newBoundary.setAttribute("breakindex", "1"); - el.getParentNode().insertBefore(newBoundary, el); - } - prevEl = el; - } - } - } - } - - private void processSentence(Element sentence) { - NodeList tokens = sentence.getElementsByTagName(MaryXML.TOKEN); - if (tokens.getLength() < 1) { - return; // no tokens -- what can we do? - } - - // Create the substructure of each token - for (int i = 0; i < tokens.getLength(); i++) { - Element token = (Element) tokens.item(i); - createSubStructure(token); - } - - // apply Klatt rules to each segment - NodeList segments = sentence.getElementsByTagName(MaryXML.PHONE); - for (int i = 0; i < segments.getLength(); i++) { - Element segment = (Element) segments.item(i); - int factor = 100; - int klatt0 = klattRule0(segment); - int klatt2 = klattRule2(segment); - int klatt2a = klattRule2a(segment); - int klatt3 = klattRule3(segment); - int klatt4 = klattRule4(segment); - int klatt5 = klattRule5(segment); - int klatt6 = klattRule6(segment); - int klatt7 = klattRule7(segment); - int klatt8 = klattRule8(segment); - int klatt10 = klattRule10(segment); - int accentProminence = accentProminenceRule(segment); - factor = (factor * klatt0) / 100; - factor = (factor * klatt2) / 100; - factor = (factor * klatt2a) / 100; - factor = (factor * klatt3) / 100; - factor = (factor * klatt4) / 100; - factor = (factor * klatt5) / 100; - factor = (factor * klatt6) / 100; - factor = (factor * klatt7) / 100; - factor = (factor * klatt8) / 100; - factor = (factor * klatt10) / 100; - factor = (factor * accentProminence) / 100; - - // and determine the actual length: - int inhDuration = getInhDuration(segment); - int minDuration = getMinDuration(segment); - int normalDuration = minDuration + ((inhDuration - minDuration) * factor) / 100; - - // Tempo operates on the entire duration, not just on - // the stretchable part: - int tempo = tempoRule(segment); - int duration = (normalDuration * tempo) / 100; - - segment.setAttribute("d", String.valueOf(duration)); - logger.debug( - segment.getAttribute("p") - + " " - + duration - + "ms (tempoFactor " - + tempo - + "%, normal " - + normalDuration - + ", min " - + minDuration - + ", inh " - + inhDuration - + ") " - + factor - + "% (" - + klatt0 - + "*" - + klatt2 - + "*" - + klatt2a - + "*" - + klatt3 - + "*" - + klatt4 - + "*" - + klatt5 - + "*" - + klatt6 - + "*" - + klatt7 - + "*" - + klatt8 - + "*" - + klatt10 - + ")"); - } - - // apply Klatt rule 1 to boundaries: - NodeList boundaries = sentence.getElementsByTagName(MaryXML.BOUNDARY); - for (int i = 0; i < boundaries.getLength(); i++) { - Element boundary = (Element) boundaries.item(i); - if (!boundary.hasAttribute("duration")) { - int duration = klattRule1(boundary); - boundary.setAttribute("duration", String.valueOf(duration)); - } - } - - NodeList phrases = sentence.getElementsByTagName(MaryXML.PHRASE); - for (int i = 0; i < phrases.getLength(); i++) { - Element phrase = (Element) phrases.item(i); - // Now save the accumulated duration in every segment - // of all segments and boundaries in the phrase. - calculateAccumulatedDurations(phrase); - } - } - - private void createSubStructure(Element token) { - Document document = token.getOwnerDocument(); - Element prosody = (Element) MaryDomUtils.getAncestor(token, MaryXML.PROSODY); - String vq = null; // voice quality - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - int volume = settings.volume(); - if (volume >= 60) { - vq = "loud"; - } else if (volume <= 40) { - vq = "soft"; - } else { - vq = null; - } - } - - // Create syllables within this token only if it does not have any yet: - NodeList syls = token.getElementsByTagName(MaryXML.SYLLABLE); - if (syls.getLength() > 0) { - for (int i=0; i= 2, - * if it is part of an accented word, gets additional lengthening. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule2a(Element segment) { - Element syllable = getSyllable(segment); - Element token = getToken(syllable); - if (isLastBeforeBoundary(syllable, 2) && hasAccent(token)) { - if (isInNucleus(segment)) { - return getPropertyAsInteger("rule2a.nucleus"); - } else if (isInCoda(segment) && isNasal(segment)) { - return getPropertyAsInteger("rule2a.coda"); - } - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 3: Non-phrase-final shortening. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule3(Element segment) { - Element syllable = getSyllable(segment); - if (!isMajIPFinal(syllable)) { - if (isInNucleus(segment)) { - return getPropertyAsInteger("rule3.nucleus"); - } - } else if (isInCoda(segment) && (isLiquid(segment) || isNasal(segment))) { - return getPropertyAsInteger("rule3.coda"); - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 4: Non-word-final shortening. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule4(Element segment) { - Element syllable = getSyllable(segment); - if (!isWordFinal(syllable)) { - if (isInNucleus(segment)) { - return getPropertyAsInteger("rule4.nucleus"); - } - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 5: Polysyllabic shortening. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule5(Element segment) { - Element token = getToken(segment); - if (isPolysyllabic(token)) { - if (isInNucleus(segment)) { - return getPropertyAsInteger("rule5.nucleus"); - } - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 6: Non-initial consonant shortening. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule6(Element segment) { - Element syllable = getSyllable(segment); - if (isInOnset(segment) && !isWordInitial(syllable)) { - return getPropertyAsInteger("rule6.onset"); - } else if (isInCoda(segment)) { - return getPropertyAsInteger("rule6.coda"); - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 7: Unstressed shortening - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule7(Element segment) { - // The stress reduction formulated by Klatt as part of rule 7 - // is relocated to getStress(syllable). - // The min. duration reduction is relocated to getMinDuration(segment). - - Element token = getToken(segment); - Element syllable = getSyllable(segment); - int stress = getStress(syllable); - - if (stress == 2 || stress == 0) { - if (isInOnset(segment)) { - if (isLiquid(segment) || isGlide(segment)) { - return (getPropertyAsInteger("rule7.onset.liquids")); - } else { - return (getPropertyAsInteger("rule7.others")); - } - } else if (isInNucleus(segment)) { - if (isWordMedial(syllable)) { - return (getPropertyAsInteger("rule7.nucleus.medial")); - } else { - return (getPropertyAsInteger("rule7.nucleus.others")); - } - } else { // segment is in coda - return (getPropertyAsInteger("rule7.others")); - } - } - // default: Rule not applicable - return 100; - } - - /** - * Klatt Rule 8: Lengthening for emphasis - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule8(Element segment) { - Element syllable = getSyllable(segment); - if (hasAccent(syllable)) { - if (isInNucleus(segment)) { - return getPropertyAsInteger("rule8.accent"); - } - } - // default: Rule not applicable - return 100; - } - - // Klatt Rule 9 (postvocalic context of vowels) - // is not needed for German. - - /** - * Klatt Rule 10: Shortening in consonant clusters - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - */ - private int klattRule10(Element segment) { - boolean hasPrecedingConsonant = false; - boolean hasFollowingConsonant = false; - if (isConsonant(segment)) { - Element preceding = getPreviousSegment(segment); - if (preceding != null && isConsonant(preceding)) { - hasPrecedingConsonant = true; - } - Element following = getNextSegment(segment); - if (following != null && isConsonant(following)) { - hasFollowingConsonant = true; - } - if (hasPrecedingConsonant && hasFollowingConsonant) { - return getPropertyAsInteger("rule10.surrounded"); - } else if (hasPrecedingConsonant) { - return getPropertyAsInteger("rule10.preceded"); - } else if (hasFollowingConsonant) { - return getPropertyAsInteger("rule10.followed"); - } - } - // default: Rule not applicable - return 100; - } - - // Klatt Rule 11 (lengthening due to plosive aspiration) - // is not needed for German. - - /** - * Klatt Rule 1: Pause duration. The pause duration depends on the break - * index, on the speech rate, and on the "pause-duration" attribute. This - * rule assumes that every boundary it gets as input is to be realised, - * i.e. not-to-be-realised boundaries are already deleted at this stage. - * @return A pause duration, in milliseconds. - */ - private int klattRule1(Element boundary) { - int breakindex = getBreakindex(boundary); - if (breakindex >= 1 && breakindex <= 6) { - int durationMeasure = 100; - Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - assert settings != null; - // Calculate duration measure as a sum of rate and pauseDur. - int deltaRate = settings.rate() - 100; - int deltaPauseDur = settings.pauseDuration() - 100; - durationMeasure = 100 - deltaRate + deltaPauseDur; - } - // Now factor is a measure of how long the pauses are to be: - // 100 medium, 120 long, 140 very long - // 80 short, 60 very short - // Intermediate values are interpolated. - if (durationMeasure == 100) { // probably the most common - return getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); - } else { - // We could treat 120, 140, 80, and 60 as special cases, - // but they are probably so rare that it doesn't harm - // getting them with the interpolation code below. - int longer; - int shorter; - int dist; - // dist is distance from shorter; our duration value is - // shorter + dist/20 * (longer - shorter) - if (durationMeasure > 100) { - if (durationMeasure > 120) { - // 120 < durationMeasure -- need 120 (long) and 140 (verylong) - longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".verylong"); - shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".long"); - dist = durationMeasure - 120; - } else { - // 100 < durationMeasure <= 120 -- need 100 (medium) and 120 - // (long) - longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".long"); - shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); - dist = durationMeasure - 100; - } - } else { - if (durationMeasure < 80) { - // durationMeasure < 80 -- need 80 (short) and 60 (veryshort) - longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".short"); - shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".veryshort"); - dist = durationMeasure - 60; - } else { - // 80 <= durationMeasure < 100 -- need 80 (short) and 100 (medium) - longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); - shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".short"); - dist = durationMeasure - 80; - } - } - int result = shorter + (dist * (longer - shorter)) / 20; - if (result < 10) - result = 10; - return result; - } - } - // Not a valid break index: - return 0; - } - - /** - * Tempo rule: Take into account the prosody settings - * for modifying the segment durations, realising speech tempo. - */ - private int tempoRule(Element segment) { - Element prosody = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PROSODY); - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - assert settings != null; - int rate = settings.rate(); - // Duration is the inverse of rate: - int durFactor = 10000 / rate; - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - if (ph != null) { - if (ph.isVowel()) - durFactor = (durFactor * settings.vowelDuration()) / 100; - else if (ph.isPlosive()) - durFactor = (durFactor * settings.plosiveDuration()) / 100; - else if (ph.isFricative()) - durFactor = (durFactor * settings.fricativeDuration()) / 100; - else if (ph.isNasal()) - durFactor = (durFactor * settings.nasalDuration()) / 100; - else if (ph.isLiquid()) - durFactor = (durFactor * settings.liquidDuration()) / 100; - else if (ph.isGlide()) - durFactor = (durFactor * settings.glideDuration()) / 100; - } - return durFactor; - } - // default: Rule not applicable - return 100; - } - - /** - * Accent prominence rule: The "accent-prominence" attribute influences - * nucleus duration for accented syllables (in addition to Klatt rule 8), - * and affects voice quality for accented syllables. In - * addition, but not here, the "accent-prominence" attribute causes a - * topline/baseline overshoot / undershoot. - * @return A percentage value as a factor for duration - * (100 corresponds to no change). - * @see #calculateTargetFrequency() - */ - private int accentProminenceRule(Element segment) { - // In addition to Klatt rule 8, take into account the - // "accent-prominence" attribute: - int returnValue = 100; // default value - Element syllable = getSyllable(segment); - if (hasAccent(syllable)) { - Element prosody = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PROSODY); - if (prosody != null) { - ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); - if (settings != null) { - int accentProminence = settings.accentProminence(); - if (accentProminence != 100) { - if (isInNucleus(segment)) { - returnValue = accentProminence; - } - // And affect voice quality: - String vq = segment.getAttribute("vq"); - if (accentProminence >= 150) { - if (vq.equals("soft") || vq.equals("modal") || vq.equals("")) - vq = "loud"; - } else if (accentProminence >= 125) { - if (vq.equals("soft")) { - vq = "modal"; - } else if (vq.equals("modal") || vq.equals("")) { - vq = "loud"; - } - } - if (!vq.equals(segment.getAttribute("vq"))) { - segment.setAttribute("vq", vq); - } - } - } - } - } - return returnValue; - - } - - /** - * For each segment in the given phrase, calculate the accumulated duration - * since the beginning of the phrase, including this segment's duration, - * and save it in the segment's end attribute. (This value is - * then comparable to the end feature in FreeTTS, but we use - * milliseconds, they use seconds.) - */ - private void calculateAccumulatedDurations(Element phrase) { - TreeWalker tw = - ((DocumentTraversal) phrase.getOwnerDocument()).createTreeWalker( - phrase, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.PHONE, MaryXML.BOUNDARY }), - false); - int totalDuration = 0; - Element element; - while ((element = (Element) tw.nextNode()) != null) { - if (element.getTagName().equals(MaryXML.PHONE)) { - // A segment - int d = 0; - try { - d = Integer.parseInt(element.getAttribute("d")); - } catch (NumberFormatException e) { - logger.warn("Unexpected duration value `" + element.getAttribute("d") + "'"); - } - totalDuration += d; - element.setAttribute("end", String.valueOf(totalDuration)); - } else { - // A boundary - int d = 0; - try { - d = Integer.parseInt(element.getAttribute("duration")); - } catch (NumberFormatException e) { - logger.warn("Unexpected duration value `" + element.getAttribute("duration") + "'"); - } - totalDuration += d; - } - } - } - - - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - ////////////////////////////// Helpers /////////////////////////////// - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - - private int getPropertyAsInteger(String prop) { - int value = 100; - try { - value = Integer.parseInt(klattRuleParams.getProperty(prop)); - } catch (NumberFormatException e) { - logger.warn("Cannot read property " + prop + " in klattrule parameter file. Using default."); - } - return value; - } - - private Element getToken(Element segmentOrSyllable) { - return (Element) MaryDomUtils.getAncestor(segmentOrSyllable, MaryXML.TOKEN); - } - - private Element getSyllable(Element segment) { - return (Element) MaryDomUtils.getAncestor(segment, MaryXML.SYLLABLE); - } - - private int getStress(Element syllable) { - // Klatt's usage of 1ary and 2ary stress (Klatt, 1979): - // primary lexical stress is reserved for vowels in open-class content - // words, only one 1ary stress per word; - // 2ary lexical stress is used in some content words, in compounds, - // in the strongest syllable of polysyllabic function words, and for - // pronouns (excluding personal pronouns). - // Approximately adapt our input to Klatt's input: - // * accented prosodic words (have a tobi accent) can stay as they are - // * for each unaccented prosodic word (no tobi accent) - // - if it is monosyllabic and not a pronoun, remove any stress sign - // - if it is polysyllabic, remove 2ary stress, - // and reduce 1ary to 2ary. - - int stress = 0; - - if (syllable.hasAttribute("stress")) { - String helper = syllable.getAttribute("stress"); - if (helper.equals("1")) - stress = 1; - else if (helper.equals("2")) - stress = 2; - } - - if (stress != 0) { - // it is worth thinking about stress reduction - Element token = getToken(syllable); - // stress reduction: - if (!hasAccent(token)) { - // unaccented word - if (isPolysyllabic(token)) { - // polysyllabic: - // reduce 1ary to 2ary, 2ary to no stress: - if (stress == 1) - stress = 2; - else if (stress == 2) - stress = 0; - } else { - // monosyllabic: - if (!isPronoun(token)) { - // not a pronoun - // remove any stress: - stress = 0; - } - } - } - } - - return stress; - } - - /** - * Find the segment preceding this segment within the same - * phrase. - * @return that segment, or null if there is no such segment. - */ - private static Element getPreviousSegment(Element segment) { - Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); - return MaryDomUtils.getPreviousOfItsKindIn(segment, phrase); - } - - /** - * Find the segment following this segment within the same - * phrase. - * @return that segment, or null if there is no such segment. - */ - private static Element getNextSegment(Element segment) { - Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); - return MaryDomUtils.getNextOfItsKindIn(segment, phrase); - } - - /** - * Find the syllable preceding this syllable within the same - * phrase. - * @return that syllable, or null if there is no such - * syllable. - */ - private static Element getPreviousSyllable(Element syllable) { - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - return MaryDomUtils.getPreviousOfItsKindIn(syllable, phrase); - } - - /** - * Find the syllable following this syllable within the same - * phrase. - * @return that syllable, or null if there is no such - * syllable. - */ - private static Element getNextSyllable(Element syllable) { - if (syllable == null) - return null; - Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); - return MaryDomUtils.getNextOfItsKindIn(syllable, phrase); - } - - private int getMinDuration(Element segment) { - int minDuration = klattDurationParams.getMinDuration(segment.getAttribute("p")); - - // additional reduction for unstressed segments: - // (this comes from klatt's original rule no. 7) - if (getStress(getSyllable(segment)) == 0) { - // For unstressed segments, - // increase stretchability by reducing minimum duration: - return (minDuration * getPropertyAsInteger("rule7.mindur")) / 100; - } else { // default - return minDuration; - } - } - - private int getInhDuration(Element segment) { - return klattDurationParams.getInhDuration(segment.getAttribute("p")); - } - - private boolean isPronoun(Element token) { - String pos = token.getAttribute("pos"); - return pos.equals("PDS") - || pos.equals("PDAT") - || pos.equals("PIS") - || pos.equals("PIAT") - || pos.equals("PIDAT") - || pos.equals("PPER") - || pos.equals("PPOSS") - || pos.equals("PPOSAT") - || pos.equals("PRELS") - || pos.equals("PRELAT") - || pos.equals("PRF") - || pos.equals("PWS") - || pos.equals("PWAT") - || pos.equals("PWAV"); - } - - private boolean isPolysyllabic(Element token) { - return token.getElementsByTagName(MaryXML.SYLLABLE).getLength() > 1; - } - - private boolean hasAccent(Element token) { - String accent = token.getAttribute("accent"); - return !accent.equals(""); - } - - /** - * Search for boundary and syllable elements following the given syllable. - * If the next matching element found is a boundary with breakindex - * minBreakindex or larger, return true; otherwise, - * return false. - * If there is no next node, return true. - */ - private boolean isLastBeforeBoundary(Element syllable, int minBreakindex) { - Document doc = syllable.getOwnerDocument(); - Element sentence = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.SENTENCE); - TreeWalker tw = - ((DocumentTraversal) doc).createTreeWalker( - sentence, - NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { MaryXML.SYLLABLE, MaryXML.BOUNDARY }), - false); - tw.setCurrentNode(syllable); - Element next = (Element) tw.nextNode(); - if (next == null) { - // no matching node after syllable -- - // we must be in a final position. - return true; - } - if (next.getNodeName().equals(MaryXML.BOUNDARY)) { - if (getBreakindex(next) >= minBreakindex) - return true; - } - // This syllable is either followed by another syllable or - // by a boundary with breakindex < minBreakindex - return false; - } - - private boolean isMajIPFinal(Element syllable) { - // If this syllable is followed by a boundary with breakindex - // 4 or above, return true. - return isLastBeforeBoundary(syllable, 4); - } - - private boolean isMinipFinal(Element syllable) { - // If this syllable is followed by a boundary with breakindex - // 3 or above, return true. - return isLastBeforeBoundary(syllable, 3); - } - - private boolean isWordFinal(Element syllable) { - Element e = syllable; - while (e != null) { - e = MaryDomUtils.getNextSiblingElement(e); - if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) - return false; - } - return true; - } - - private boolean isWordMedial(Element syllable) { - return !(isWordFinal(syllable) || isWordInitial(syllable)); - - } - - private boolean isWordInitial(Element syllable) { - Element e = syllable; - while (e != null) { - e = MaryDomUtils.getPreviousSiblingElement(e); - if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) - return false; - } - return true; - } - - private boolean isInOnset(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return false; - } - // OK, segment is not syllabic. See if it is followed by a syllabic - // segment: - for (Element e = MaryDomUtils.getNextSiblingElement(segment); - e != null; - e = MaryDomUtils.getNextSiblingElement(e)) { - ph = allophoneSet.getAllophone(e.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return true; - } - } - return false; - } - - private boolean isInNucleus(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isSyllabic(); - } - - private boolean isInCoda(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return false; - } - // OK, segment is not syllabic. See if it is preceded by a syllabic - // segment: - for (Element e = MaryDomUtils.getPreviousSiblingElement(segment); - e != null; - e = MaryDomUtils.getPreviousSiblingElement(e)) { - ph = allophoneSet.getAllophone(e.getAttribute("p")); - assert ph != null; - if (ph.isSyllabic()) { - return true; - } - } - return false; - } - - private boolean isConsonant(Element segment) { - return !isVowel(segment); - } - - private boolean isVowel(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isVowel(); - } - - private boolean isLiquid(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isLiquid(); - } - - private boolean isGlide(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isGlide(); - } - - private boolean isNasal(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isNasal(); - } - - private boolean isFricative(Element segment) { - Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); - assert ph != null; - return ph.isFricative(); - } - - private int getBreakindex(Element boundary) { - int breakindex = 0; - try { - breakindex = Integer.parseInt(boundary.getAttribute("breakindex")); - } catch (NumberFormatException e) { - logger.warn("Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "'"); - } - return breakindex; - } - - /** - * Tell whether the string contains a positive or negative percentage - * delta, i.e., a percentage number with an obligatory + or - sign. - */ - private boolean isPercentageDelta(String string) { - String s = string.trim(); - if (s.length() < 3) - return false; - return s.substring(s.length() - 1).equals("%") && isNumberDelta(s.substring(0, s.length() - 1)); - } - - /** - * For a string containing a percentage delta as judged by - * isPercentageDelta(), return the numerical value, rounded to - * an integer. - * @return the numeric part of the percentage, rounded to an integer, or 0 - * if the string is not a valid percentage delta. - */ - private int getPercentageDelta(String string) { - String s = string.trim(); - if (!isPercentageDelta(s)) - return 0; - return getNumberDelta(s.substring(0, s.length() - 1)); - } - - /** - * Tell whether the string contains a positive or negative semitones delta, - * i.e., a semitones number with an obligatory + or - sign, such as - * "+3.2st" or "-13.2st". - */ - private boolean isSemitonesDelta(String string) { - String s = string.trim(); - if (s.length() < 4) - return false; - return s.substring(s.length() - 2).equals("st") && isNumberDelta(s.substring(0, s.length() - 2)); - } - - /** - * For a string containing a semitones delta as judged by - * isSemitonesDelta(), return the numerical value, as a - * double. - * @return the numeric part of the semitones delta, or 0 - * if the string is not a valid semitones delta. - */ - private double getSemitonesDelta(String string) { - String s = string.trim(); - if (!isSemitonesDelta(s)) - return 0; - String num = s.substring(0, s.length() - 2); - double value = 0; - try { - value = Double.parseDouble(num); - } catch (NumberFormatException e) { - logger.warn("Unexpected number value `" + num + "'"); - } - return value; - } - - /** - * Tell whether the string contains a positive or negative number - * delta, i.e., a number with an obligatory + or - sign. - */ - private boolean isNumberDelta(String string) { - String s = string.trim(); - if (s.length() < 2) - return false; - return (s.charAt(0) == '+' || s.charAt(0) == '-') && isUnsignedNumber(s.substring(1)); - } - - /** - * For a string containing a number delta as judged by - * isNumberDelta(), return the numerical value, rounded to - * an integer. - * @return the numeric value, rounded to an integer, or 0 - * if the string is not a valid number delta. - */ - private int getNumberDelta(String string) { - String s = string.trim(); - if (!isNumberDelta(s)) - return 0; - double value = 0; - try { - value = Double.parseDouble(s); - } catch (NumberFormatException e) { - logger.warn("Unexpected number value `" + s + "'"); - } - return (int) Math.round(value); - - } - - /** - * Tell whether the string contains an unsigned semitones expression, such - * as "12st" or "5.4st". - */ - private boolean isUnsignedSemitones(String string) { - String s = string.trim(); - if (s.length() < 3) - return false; - return s.substring(s.length() - 2).equals("st") && isUnsignedNumber(s.substring(0, s.length() - 2)); - } - - /** - * For a string containing an unsigned semitones expression as judged by - * isUnsignedSemitones(), return the numerical value as a - * double. - * @return the numeric part of the semitones expression, or 0 if the string - * is not a valid unsigned semitones expression. - */ - private double getUnsignedSemitones(String string) { - String s = string.trim(); - if (!isUnsignedSemitones(s)) - return 0; - String num = s.substring(0, s.length() - 2); - double value = 0; - try { - value = Double.parseDouble(num); - } catch (NumberFormatException e) { - logger.warn("Unexpected number value `" + num + "'"); - } - return value; - } - - /** - * Tell whether the string contains an unsigned number. - */ - private boolean isUnsignedNumber(String string) { - String s = string.trim(); - if (s.length() < 1) - return false; - if (s.charAt(0) != '+' && s.charAt(0) != '-') { - double value = 0; - try { - value = Double.parseDouble(s); - } catch (NumberFormatException e) { - return false; - } - return true; - } - return false; - } - - /** - * For a string containing an unsigned number as judged by - * isUnsignedNumber(), return the numerical value, rounded to - * an integer. - * @return the numeric value, rounded to an integer, or 0 - * if the string is not a valid unsigned number. - */ - private int getUnsignedNumber(String string) { - String s = string.trim(); - if (!isUnsignedNumber(s)) - return 0; - double value = 0; - try { - value = Double.parseDouble(s); - } catch (NumberFormatException e) { - logger.warn("Unexpected number value `" + s + "'"); - } - return (int) Math.round(value); - } - - /** - * Tell whether the string contains a number. - */ - private boolean isNumber(String string) { - String s = string.trim(); - if (s.length() < 1) - return false; - double value = 0; - try { - value = Double.parseDouble(s); - } catch (NumberFormatException e) { - return false; - } - return true; - } - - /** - * For a string containing a number as judged by - * isNumber(), return the numerical value, rounded to - * an integer. - * @return the numeric value, rounded to an integer, or 0 - * if the string is not a valid number. - */ - private int getNumber(String string) { - String s = string.trim(); - if (!isNumber(s)) - return 0; - double value = 0; - try { - value = Double.parseDouble(s); - } catch (NumberFormatException e) { - logger.warn("Unexpected number value `" + s + "'"); - } - return (int) Math.round(value); - } - - /** - * For a given token, find the stressed syllable. If no syllable has - * primary stress, return the first syllable with secondary stress. If none - * has secondary stress, return the first syllable in the token. If there - * is no syllable in the token or the element given in the argument is not - * a token element, return null. - */ - private Element getStressedSyllable(Element token) { - if (token == null || !token.getTagName().equals(MaryXML.TOKEN)) - return null; - Element syl = MaryDomUtils.getFirstElementByTagName(token, MaryXML.SYLLABLE); - while (syl != null && !syl.getAttribute("stress").equals("1")) { - syl = MaryDomUtils.getNextSiblingElementByTagName(syl, MaryXML.SYLLABLE); - } - if (syl != null) { - return syl; - } - // If we get here, there is no stressed syllable. As a fallback, use - // the first syllable with secondary stress, or the first syllable if - // none has secondary stress. - Element first = MaryDomUtils.getFirstElementByTagName(token, MaryXML.SYLLABLE); - Element secondary = first; - while (secondary != null && !secondary.getAttribute("stress").equals("2")) { - secondary = MaryDomUtils.getNextSiblingElementByTagName(secondary, MaryXML.SYLLABLE); - } - if (secondary != null) { - return secondary; - } - return first; - } - - /** - * For a syllable, return the first child segment which is a nucleus - * segment. Return null if there is no such segment. - */ - private Element getNucleus(Element syllable) { - if (syllable == null || !syllable.getTagName().equals(MaryXML.SYLLABLE)) - return null; - Element seg = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); - while (seg != null && !isInNucleus(seg)) { - seg = MaryDomUtils.getNextSiblingElementByTagName(seg, MaryXML.PHONE); - } - return seg; - } - - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - /////////////////////////// Helper Classes /////////////////////////// - ////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////// - - static class ProsodicSettings { - // Relative settings: 100 = 100% = no change - int rate; - int accentProminence; - int accentSlope; - int numberOfPauses; - int pauseDuration; - int vowelDuration; - int plosiveDuration; - int fricativeDuration; - int nasalDuration; - int liquidDuration; - int glideDuration; - int volume; - - ProsodicSettings() { - this.rate = 100; - this.accentProminence = 100; - this.accentSlope = 100; - this.numberOfPauses = 100; - this.pauseDuration = 100; - this.vowelDuration = 100; - this.plosiveDuration = 100; - this.fricativeDuration = 100; - this.nasalDuration = 100; - this.liquidDuration = 100; - this.glideDuration = 100; - this.volume = 50; - } - - ProsodicSettings( - int rate, - int accentProminence, - int accentSlope, - int numberOfPauses, - int pauseDuration, - int vowelDuration, - int plosiveDuration, - int fricativeDuration, - int nasalDuration, - int liquidDuration, - int glideDuration, - int volume) { - this.rate = rate; - this.accentProminence = accentProminence; - this.accentSlope = accentSlope; - this.numberOfPauses = numberOfPauses; - this.pauseDuration = pauseDuration; - this.vowelDuration = vowelDuration; - this.plosiveDuration = plosiveDuration; - this.fricativeDuration = fricativeDuration; - this.nasalDuration = nasalDuration; - this.liquidDuration = liquidDuration; - this.glideDuration = glideDuration; - this.volume = volume; - } - - int rate() { - return rate; - } - int accentProminence() { - return accentProminence; - } - int accentSlope() { - return accentSlope; - } - int numberOfPauses() { - return numberOfPauses; - } - int pauseDuration() { - return pauseDuration; - } - int vowelDuration() { - return vowelDuration; - } - int plosiveDuration() { - return plosiveDuration; - } - int fricativeDuration() { - return fricativeDuration; - } - int nasalDuration() { - return nasalDuration; - } - int liquidDuration() { - return liquidDuration; - } - int glideDuration() { - return glideDuration; - } - int volume() { - return volume; - } - - void setRate(int value) { - rate = value; - } - void setAccentProminence(int value) { - accentProminence = value; - } - void setAccentSlope(int value) { - accentSlope = value; - } - void setNumberOfPauses(int value) { - numberOfPauses = value; - } - void setPauseDuration(int value) { - pauseDuration = value; - } - void setVowelDuration(int value) { - vowelDuration = value; - } - void setPlosiveDuration(int value) { - plosiveDuration = value; - } - void setFricativeDuration(int value) { - fricativeDuration = value; - } - void setNasalDuration(int value) { - nasalDuration = value; - } - void setLiquidDuration(int value) { - liquidDuration = value; - } - void setGlideDuration(int value) { - glideDuration = value; - } - void setVolume(int value) { - volume = value; - } - - } -} - +/** + * Copyright 2000-2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * This file is part of MARY TTS. + * + * MARY TTS is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + */ +package marytts.language.tib; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Properties; +import java.util.StringTokenizer; +import java.util.WeakHashMap; + +import marytts.datatypes.MaryData; +import marytts.datatypes.MaryDataType; +import marytts.datatypes.MaryXML; +import marytts.language.tib.datatypes.TibetanDataTypes; +import marytts.modules.InternalModule; +import marytts.modules.MaryModule; +import marytts.modules.ModuleRegistry; +import marytts.modules.KlattDurationModeller.KlattDurationParams; +import marytts.modules.phonemiser.Allophone; +import marytts.modules.phonemiser.AllophoneSet; +import marytts.server.MaryProperties; +import marytts.util.dom.MaryDomUtils; +import marytts.util.dom.NameNodeFilter; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.w3c.dom.traversal.DocumentTraversal; +import org.w3c.dom.traversal.NodeFilter; +import org.w3c.dom.traversal.NodeIterator; +import org.w3c.dom.traversal.TreeWalker; + + +/** + * The calculation of acoustic parameters module. + * + * @author Marc Schröder + */ + +public class KlattDurationModeller extends InternalModule { + private AllophoneSet allophoneSet; + private KlattDurationParams klattDurationParams; + private Properties klattRuleParams; + /** This map contains the topline-baseline frequency configurations for the + * currently used phrase and sub-phrase prosody elements. As this is a + * WeakHashMap, entries will automatically be deleted when not in regular + * use anymore. */ + private WeakHashMap topBaseConfMap; + /** This map contains the prosodic settings, as ProsodicSettings objects, + * for the currently used prosody elements. As this is a WeakHashMap, + * entries will automatically be deleted when not in regular use + * anymore. */ + private WeakHashMap prosodyMap; + + public KlattDurationModeller() + { + super("KlattDurationModeller", + TibetanDataTypes.PHRASES_TIB, + MaryDataType.DURATIONS, + new Locale("tib")); + } + + public void startup() throws Exception { + super.startup(); + // We depend on the Synthesis module: + MaryModule synthesis = ModuleRegistry.getModule(marytts.modules.Synthesis.class); + assert synthesis != null; + if (synthesis.getState() == MaryModule.MODULE_OFFLINE) + synthesis.startup(); + // load klatt rules + klattRuleParams = new Properties(); + klattRuleParams.load(new FileInputStream(MaryProperties.needFilename("tibetan.cap.klattrulefile"))); // load phone list + allophoneSet = AllophoneSet.getAllophoneSet(MaryProperties.needFilename("tibetan.allophoneset")); + klattDurationParams = new KlattDurationParams(MaryProperties.needFilename("tibetan.cap.klattdurfile")); + // instantiate the Map in which settings are associated with elements: + // (when the objects serving as keys are not in ordinary use any more, + // the key-value pairs are deleted from the WeakHashMap earlier or + // later; that means we do not need to keep track of the hashmaps per + // thread) + prosodyMap = new WeakHashMap(); + } + + public MaryData process(MaryData d) throws Exception { + Document doc = d.getDocument(); + determineProsodicSettings(doc); + addOrDeleteBoundaries(doc); + + NodeList sentences = doc.getElementsByTagName(MaryXML.SENTENCE); + for (int i = 0; i < sentences.getLength(); i++) { + Element sentence = (Element) sentences.item(i); + processSentence(sentence); + } + MaryData result = new MaryData(outputType(), d.getLocale()); + result.setDocument(doc); + return result; + } + + /** + * For all (possibly nested) prosody elements in the document, + * calculate their (possibly cumulated) prosodic settings + * and save them in a map. + */ + private void determineProsodicSettings(Document doc) { + // Determine the prosodic setting for each prosody element + NodeList prosodies = doc.getElementsByTagName(MaryXML.PROSODY); + for (int i = 0; i < prosodies.getLength(); i++) { + Element prosody = (Element) prosodies.item(i); + ProsodicSettings settings = new ProsodicSettings(); + // Neutral default settings: + ProsodicSettings parentSettings = new ProsodicSettings(); + // Obtain parent settings, if any: + Element ancestor = (Element) MaryDomUtils.getAncestor(prosody, MaryXML.PROSODY); + if (ancestor != null) { + ProsodicSettings testSettings = (ProsodicSettings) prosodyMap.get(ancestor); + if (testSettings != null) { + parentSettings = testSettings; + } + } + // Only accept relative changes, i.e. percentage delta: + settings.setRate(parentSettings.rate() + getPercentageDelta(prosody.getAttribute("rate"))); + settings.setAccentProminence( + parentSettings.accentProminence() + getPercentageDelta(prosody.getAttribute("accent-prominence"))); + settings.setAccentSlope( + parentSettings.accentSlope() + getPercentageDelta(prosody.getAttribute("accent-slope"))); + settings.setNumberOfPauses( + parentSettings.numberOfPauses() + getPercentageDelta(prosody.getAttribute("number-of-pauses"))); + settings.setPauseDuration( + parentSettings.pauseDuration() + getPercentageDelta(prosody.getAttribute("pause-duration"))); + settings.setVowelDuration( + parentSettings.vowelDuration() + getPercentageDelta(prosody.getAttribute("vowel-duration"))); + settings.setPlosiveDuration( + parentSettings.plosiveDuration() + getPercentageDelta(prosody.getAttribute("plosive-duration"))); + settings.setFricativeDuration( + parentSettings.fricativeDuration() + getPercentageDelta(prosody.getAttribute("fricative-duration"))); + settings.setNasalDuration( + parentSettings.nasalDuration() + getPercentageDelta(prosody.getAttribute("nasal-duration"))); + settings.setLiquidDuration( + parentSettings.liquidDuration() + getPercentageDelta(prosody.getAttribute("liquid-duration"))); + settings.setGlideDuration( + parentSettings.glideDuration() + getPercentageDelta(prosody.getAttribute("glide-duration"))); + + String sVolume = prosody.getAttribute("volume"); + if (sVolume.equals("")) { + settings.setVolume(parentSettings.volume()); + } else if (isPercentageDelta(sVolume)) { + int newVolume = parentSettings.volume() + getPercentageDelta(sVolume); + if (newVolume < 0) + newVolume = 0; + else if (newVolume > 100) + newVolume = 100; + settings.setVolume(newVolume); + } else if (isUnsignedNumber(sVolume)) { + settings.setVolume(getUnsignedNumber(sVolume)); + } else if (sVolume.equals("silent")) { + settings.setVolume(0); + } else if (sVolume.equals("soft")) { + settings.setVolume(25); + } else if (sVolume.equals("medium")) { + settings.setVolume(50); + } else if (sVolume.equals("loud")) { + settings.setVolume(75); + } + prosodyMap.put(prosody, settings); + } + } + + /** + * Adjust the number of boundaries according to rate and the + * "number-of-pauses" attribute. + */ + private void addOrDeleteBoundaries(Document doc) { + // Go through boundaries. A boundary is deleted if the determined + // minimum breakindex size is larger than this boundary's breakindex. + NodeIterator it = + ((DocumentTraversal) doc).createNodeIterator( + doc, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(MaryXML.BOUNDARY), + false); + Element boundary = null; + List bi1prosodyElements = null; + while ((boundary = (Element) it.nextNode()) != null) { + int minBI = 3; + Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + assert settings != null; + int rate = settings.rate(); + int numberOfPauses = settings.numberOfPauses(); + if (numberOfPauses <= 50) + minBI = 5; + else if (numberOfPauses <= 75) + minBI = 4; + else if (numberOfPauses > 150) + minBI = 1; + else if (numberOfPauses > 125) + minBI = 2; + // Rate can only shift the number of pauses by one breakindex + if (rate < 90 && minBI > 1) + minBI--; + if (minBI == 1) { + // Remember that the current prosody element wants bi 1 boundaries: + if (bi1prosodyElements == null) + bi1prosodyElements = new ArrayList(); + bi1prosodyElements.add(prosody); + } + } + // This boundary's bi: + int bi = 3; + try { + bi = Integer.parseInt(boundary.getAttribute("breakindex")); + } catch (NumberFormatException e) { + logger.info( + "Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "', assuming " + bi); + } + if (bi < minBI) { + if (!boundary.hasAttribute("duration")) + boundary.getParentNode().removeChild(boundary); + else + boundary.removeAttribute("bi"); // but keep duration + } + } + // Do we need to add any boundaries? + if (bi1prosodyElements != null) { + Iterator elIt = bi1prosodyElements.iterator(); + while (elIt.hasNext()) { + Element prosody = (Element) elIt.next(); + NodeIterator nodeIt = + ((DocumentTraversal) doc).createNodeIterator( + prosody, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), + false); + Element el = null; + Element prevEl = null; + while ((el = (Element) nodeIt.nextNode()) != null) { + if (el.getTagName().equals(MaryXML.TOKEN) && prevEl != null && prevEl.getTagName().equals(MaryXML.TOKEN)) { + // Need to insert a boundary before el: + Element newBoundary = + MaryXML.createElement(doc, MaryXML.BOUNDARY); + newBoundary.setAttribute("breakindex", "1"); + el.getParentNode().insertBefore(newBoundary, el); + } + prevEl = el; + } + } + } + } + + private void processSentence(Element sentence) { + NodeList tokens = sentence.getElementsByTagName(MaryXML.TOKEN); + if (tokens.getLength() < 1) { + return; // no tokens -- what can we do? + } + + // Create the substructure of each token + for (int i = 0; i < tokens.getLength(); i++) { + Element token = (Element) tokens.item(i); + createSubStructure(token); + } + + // apply Klatt rules to each segment + NodeList segments = sentence.getElementsByTagName(MaryXML.PHONE); + for (int i = 0; i < segments.getLength(); i++) { + Element segment = (Element) segments.item(i); + int factor = 100; + int klatt0 = klattRule0(segment); + int klatt2 = klattRule2(segment); + int klatt2a = klattRule2a(segment); + int klatt3 = klattRule3(segment); + int klatt4 = klattRule4(segment); + int klatt5 = klattRule5(segment); + int klatt6 = klattRule6(segment); + int klatt7 = klattRule7(segment); + int klatt8 = klattRule8(segment); + int klatt10 = klattRule10(segment); + int accentProminence = accentProminenceRule(segment); + factor = (factor * klatt0) / 100; + factor = (factor * klatt2) / 100; + factor = (factor * klatt2a) / 100; + factor = (factor * klatt3) / 100; + factor = (factor * klatt4) / 100; + factor = (factor * klatt5) / 100; + factor = (factor * klatt6) / 100; + factor = (factor * klatt7) / 100; + factor = (factor * klatt8) / 100; + factor = (factor * klatt10) / 100; + factor = (factor * accentProminence) / 100; + + // and determine the actual length: + int inhDuration = getInhDuration(segment); + int minDuration = getMinDuration(segment); + int normalDuration = minDuration + ((inhDuration - minDuration) * factor) / 100; + + // Tempo operates on the entire duration, not just on + // the stretchable part: + int tempo = tempoRule(segment); + int duration = (normalDuration * tempo) / 100; + + segment.setAttribute("d", String.valueOf(duration)); + logger.debug( + segment.getAttribute("p") + + " " + + duration + + "ms (tempoFactor " + + tempo + + "%, normal " + + normalDuration + + ", min " + + minDuration + + ", inh " + + inhDuration + + ") " + + factor + + "% (" + + klatt0 + + "*" + + klatt2 + + "*" + + klatt2a + + "*" + + klatt3 + + "*" + + klatt4 + + "*" + + klatt5 + + "*" + + klatt6 + + "*" + + klatt7 + + "*" + + klatt8 + + "*" + + klatt10 + + ")"); + } + + // apply Klatt rule 1 to boundaries: + NodeList boundaries = sentence.getElementsByTagName(MaryXML.BOUNDARY); + for (int i = 0; i < boundaries.getLength(); i++) { + Element boundary = (Element) boundaries.item(i); + if (!boundary.hasAttribute("duration")) { + int duration = klattRule1(boundary); + boundary.setAttribute("duration", String.valueOf(duration)); + } + } + + NodeList phrases = sentence.getElementsByTagName(MaryXML.PHRASE); + for (int i = 0; i < phrases.getLength(); i++) { + Element phrase = (Element) phrases.item(i); + // Now save the accumulated duration in every segment + // of all segments and boundaries in the phrase. + calculateAccumulatedDurations(phrase); + } + } + + private void createSubStructure(Element token) { + Document document = token.getOwnerDocument(); + Element prosody = (Element) MaryDomUtils.getAncestor(token, MaryXML.PROSODY); + String vq = null; // voice quality + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + int volume = settings.volume(); + if (volume >= 60) { + vq = "loud"; + } else if (volume <= 40) { + vq = "soft"; + } else { + vq = null; + } + } + + // Create syllables within this token only if it does not have any yet: + NodeList syls = token.getElementsByTagName(MaryXML.SYLLABLE); + if (syls.getLength() > 0) { + for (int i=0; i= 2, + * if it is part of an accented word, gets additional lengthening. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule2a(Element segment) { + Element syllable = getSyllable(segment); + Element token = getToken(syllable); + if (isLastBeforeBoundary(syllable, 2) && hasAccent(token)) { + if (isInNucleus(segment)) { + return getPropertyAsInteger("rule2a.nucleus"); + } else if (isInCoda(segment) && isNasal(segment)) { + return getPropertyAsInteger("rule2a.coda"); + } + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 3: Non-phrase-final shortening. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule3(Element segment) { + Element syllable = getSyllable(segment); + if (!isMajIPFinal(syllable)) { + if (isInNucleus(segment)) { + return getPropertyAsInteger("rule3.nucleus"); + } + } else if (isInCoda(segment) && (isLiquid(segment) || isNasal(segment))) { + return getPropertyAsInteger("rule3.coda"); + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 4: Non-word-final shortening. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule4(Element segment) { + Element syllable = getSyllable(segment); + if (!isWordFinal(syllable)) { + if (isInNucleus(segment)) { + return getPropertyAsInteger("rule4.nucleus"); + } + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 5: Polysyllabic shortening. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule5(Element segment) { + Element token = getToken(segment); + if (isPolysyllabic(token)) { + if (isInNucleus(segment)) { + return getPropertyAsInteger("rule5.nucleus"); + } + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 6: Non-initial consonant shortening. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule6(Element segment) { + Element syllable = getSyllable(segment); + if (isInOnset(segment) && !isWordInitial(syllable)) { + return getPropertyAsInteger("rule6.onset"); + } else if (isInCoda(segment)) { + return getPropertyAsInteger("rule6.coda"); + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 7: Unstressed shortening + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule7(Element segment) { + // The stress reduction formulated by Klatt as part of rule 7 + // is relocated to getStress(syllable). + // The min. duration reduction is relocated to getMinDuration(segment). + + Element token = getToken(segment); + Element syllable = getSyllable(segment); + int stress = getStress(syllable); + + if (stress == 2 || stress == 0) { + if (isInOnset(segment)) { + if (isLiquid(segment) || isGlide(segment)) { + return (getPropertyAsInteger("rule7.onset.liquids")); + } else { + return (getPropertyAsInteger("rule7.others")); + } + } else if (isInNucleus(segment)) { + if (isWordMedial(syllable)) { + return (getPropertyAsInteger("rule7.nucleus.medial")); + } else { + return (getPropertyAsInteger("rule7.nucleus.others")); + } + } else { // segment is in coda + return (getPropertyAsInteger("rule7.others")); + } + } + // default: Rule not applicable + return 100; + } + + /** + * Klatt Rule 8: Lengthening for emphasis + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule8(Element segment) { + Element syllable = getSyllable(segment); + if (hasAccent(syllable)) { + if (isInNucleus(segment)) { + return getPropertyAsInteger("rule8.accent"); + } + } + // default: Rule not applicable + return 100; + } + + // Klatt Rule 9 (postvocalic context of vowels) + // is not needed for German. + + /** + * Klatt Rule 10: Shortening in consonant clusters + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + */ + private int klattRule10(Element segment) { + boolean hasPrecedingConsonant = false; + boolean hasFollowingConsonant = false; + if (isConsonant(segment)) { + Element preceding = getPreviousSegment(segment); + if (preceding != null && isConsonant(preceding)) { + hasPrecedingConsonant = true; + } + Element following = getNextSegment(segment); + if (following != null && isConsonant(following)) { + hasFollowingConsonant = true; + } + if (hasPrecedingConsonant && hasFollowingConsonant) { + return getPropertyAsInteger("rule10.surrounded"); + } else if (hasPrecedingConsonant) { + return getPropertyAsInteger("rule10.preceded"); + } else if (hasFollowingConsonant) { + return getPropertyAsInteger("rule10.followed"); + } + } + // default: Rule not applicable + return 100; + } + + // Klatt Rule 11 (lengthening due to plosive aspiration) + // is not needed for German. + + /** + * Klatt Rule 1: Pause duration. The pause duration depends on the break + * index, on the speech rate, and on the "pause-duration" attribute. This + * rule assumes that every boundary it gets as input is to be realised, + * i.e. not-to-be-realised boundaries are already deleted at this stage. + * @return A pause duration, in milliseconds. + */ + private int klattRule1(Element boundary) { + int breakindex = getBreakindex(boundary); + if (breakindex >= 1 && breakindex <= 6) { + int durationMeasure = 100; + Element prosody = (Element) MaryDomUtils.getAncestor(boundary, MaryXML.PROSODY); + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + assert settings != null; + // Calculate duration measure as a sum of rate and pauseDur. + int deltaRate = settings.rate() - 100; + int deltaPauseDur = settings.pauseDuration() - 100; + durationMeasure = 100 - deltaRate + deltaPauseDur; + } + // Now factor is a measure of how long the pauses are to be: + // 100 medium, 120 long, 140 very long + // 80 short, 60 very short + // Intermediate values are interpolated. + if (durationMeasure == 100) { // probably the most common + return getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); + } else { + // We could treat 120, 140, 80, and 60 as special cases, + // but they are probably so rare that it doesn't harm + // getting them with the interpolation code below. + int longer; + int shorter; + int dist; + // dist is distance from shorter; our duration value is + // shorter + dist/20 * (longer - shorter) + if (durationMeasure > 100) { + if (durationMeasure > 120) { + // 120 < durationMeasure -- need 120 (long) and 140 (verylong) + longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".verylong"); + shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".long"); + dist = durationMeasure - 120; + } else { + // 100 < durationMeasure <= 120 -- need 100 (medium) and 120 + // (long) + longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".long"); + shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); + dist = durationMeasure - 100; + } + } else { + if (durationMeasure < 80) { + // durationMeasure < 80 -- need 80 (short) and 60 (veryshort) + longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".short"); + shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".veryshort"); + dist = durationMeasure - 60; + } else { + // 80 <= durationMeasure < 100 -- need 80 (short) and 100 (medium) + longer = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".medium"); + shorter = getPropertyAsInteger("rule1.bi" + String.valueOf(breakindex) + ".short"); + dist = durationMeasure - 80; + } + } + int result = shorter + (dist * (longer - shorter)) / 20; + if (result < 10) + result = 10; + return result; + } + } + // Not a valid break index: + return 0; + } + + /** + * Tempo rule: Take into account the prosody settings + * for modifying the segment durations, realising speech tempo. + */ + private int tempoRule(Element segment) { + Element prosody = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PROSODY); + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + assert settings != null; + int rate = settings.rate(); + // Duration is the inverse of rate: + int durFactor = 10000 / rate; + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + if (ph != null) { + if (ph.isVowel()) + durFactor = (durFactor * settings.vowelDuration()) / 100; + else if (ph.isPlosive()) + durFactor = (durFactor * settings.plosiveDuration()) / 100; + else if (ph.isFricative()) + durFactor = (durFactor * settings.fricativeDuration()) / 100; + else if (ph.isNasal()) + durFactor = (durFactor * settings.nasalDuration()) / 100; + else if (ph.isLiquid()) + durFactor = (durFactor * settings.liquidDuration()) / 100; + else if (ph.isGlide()) + durFactor = (durFactor * settings.glideDuration()) / 100; + } + return durFactor; + } + // default: Rule not applicable + return 100; + } + + /** + * Accent prominence rule: The "accent-prominence" attribute influences + * nucleus duration for accented syllables (in addition to Klatt rule 8), + * and affects voice quality for accented syllables. In + * addition, but not here, the "accent-prominence" attribute causes a + * topline/baseline overshoot / undershoot. + * @return A percentage value as a factor for duration + * (100 corresponds to no change). + * @see #calculateTargetFrequency() + */ + private int accentProminenceRule(Element segment) { + // In addition to Klatt rule 8, take into account the + // "accent-prominence" attribute: + int returnValue = 100; // default value + Element syllable = getSyllable(segment); + if (hasAccent(syllable)) { + Element prosody = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PROSODY); + if (prosody != null) { + ProsodicSettings settings = (ProsodicSettings) prosodyMap.get(prosody); + if (settings != null) { + int accentProminence = settings.accentProminence(); + if (accentProminence != 100) { + if (isInNucleus(segment)) { + returnValue = accentProminence; + } + // And affect voice quality: + String vq = segment.getAttribute("vq"); + if (accentProminence >= 150) { + if (vq.equals("soft") || vq.equals("modal") || vq.equals("")) + vq = "loud"; + } else if (accentProminence >= 125) { + if (vq.equals("soft")) { + vq = "modal"; + } else if (vq.equals("modal") || vq.equals("")) { + vq = "loud"; + } + } + if (!vq.equals(segment.getAttribute("vq"))) { + segment.setAttribute("vq", vq); + } + } + } + } + } + return returnValue; + + } + + /** + * For each segment in the given phrase, calculate the accumulated duration + * since the beginning of the phrase, including this segment's duration, + * and save it in the segment's end attribute. (This value is + * then comparable to the end feature in FreeTTS, but we use + * milliseconds, they use seconds.) + */ + private void calculateAccumulatedDurations(Element phrase) { + TreeWalker tw = + ((DocumentTraversal) phrase.getOwnerDocument()).createTreeWalker( + phrase, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.PHONE, MaryXML.BOUNDARY }), + false); + int totalDuration = 0; + Element element; + while ((element = (Element) tw.nextNode()) != null) { + if (element.getTagName().equals(MaryXML.PHONE)) { + // A segment + int d = 0; + try { + d = Integer.parseInt(element.getAttribute("d")); + } catch (NumberFormatException e) { + logger.warn("Unexpected duration value `" + element.getAttribute("d") + "'"); + } + totalDuration += d; + element.setAttribute("end", String.valueOf(totalDuration)); + } else { + // A boundary + int d = 0; + try { + d = Integer.parseInt(element.getAttribute("duration")); + } catch (NumberFormatException e) { + logger.warn("Unexpected duration value `" + element.getAttribute("duration") + "'"); + } + totalDuration += d; + } + } + } + + + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + ////////////////////////////// Helpers /////////////////////////////// + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + private int getPropertyAsInteger(String prop) { + int value = 100; + try { + value = Integer.parseInt(klattRuleParams.getProperty(prop)); + } catch (NumberFormatException e) { + logger.warn("Cannot read property " + prop + " in klattrule parameter file. Using default."); + } + return value; + } + + private Element getToken(Element segmentOrSyllable) { + return (Element) MaryDomUtils.getAncestor(segmentOrSyllable, MaryXML.TOKEN); + } + + private Element getSyllable(Element segment) { + return (Element) MaryDomUtils.getAncestor(segment, MaryXML.SYLLABLE); + } + + private int getStress(Element syllable) { + // Klatt's usage of 1ary and 2ary stress (Klatt, 1979): + // primary lexical stress is reserved for vowels in open-class content + // words, only one 1ary stress per word; + // 2ary lexical stress is used in some content words, in compounds, + // in the strongest syllable of polysyllabic function words, and for + // pronouns (excluding personal pronouns). + // Approximately adapt our input to Klatt's input: + // * accented prosodic words (have a tobi accent) can stay as they are + // * for each unaccented prosodic word (no tobi accent) + // - if it is monosyllabic and not a pronoun, remove any stress sign + // - if it is polysyllabic, remove 2ary stress, + // and reduce 1ary to 2ary. + + int stress = 0; + + if (syllable.hasAttribute("stress")) { + String helper = syllable.getAttribute("stress"); + if (helper.equals("1")) + stress = 1; + else if (helper.equals("2")) + stress = 2; + } + + if (stress != 0) { + // it is worth thinking about stress reduction + Element token = getToken(syllable); + // stress reduction: + if (!hasAccent(token)) { + // unaccented word + if (isPolysyllabic(token)) { + // polysyllabic: + // reduce 1ary to 2ary, 2ary to no stress: + if (stress == 1) + stress = 2; + else if (stress == 2) + stress = 0; + } else { + // monosyllabic: + if (!isPronoun(token)) { + // not a pronoun + // remove any stress: + stress = 0; + } + } + } + } + + return stress; + } + + /** + * Find the segment preceding this segment within the same + * phrase. + * @return that segment, or null if there is no such segment. + */ + private static Element getPreviousSegment(Element segment) { + Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); + return MaryDomUtils.getPreviousOfItsKindIn(segment, phrase); + } + + /** + * Find the segment following this segment within the same + * phrase. + * @return that segment, or null if there is no such segment. + */ + private static Element getNextSegment(Element segment) { + Element phrase = (Element) MaryDomUtils.getAncestor(segment, MaryXML.PHRASE); + return MaryDomUtils.getNextOfItsKindIn(segment, phrase); + } + + /** + * Find the syllable preceding this syllable within the same + * phrase. + * @return that syllable, or null if there is no such + * syllable. + */ + private static Element getPreviousSyllable(Element syllable) { + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + return MaryDomUtils.getPreviousOfItsKindIn(syllable, phrase); + } + + /** + * Find the syllable following this syllable within the same + * phrase. + * @return that syllable, or null if there is no such + * syllable. + */ + private static Element getNextSyllable(Element syllable) { + if (syllable == null) + return null; + Element phrase = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.PHRASE); + return MaryDomUtils.getNextOfItsKindIn(syllable, phrase); + } + + private int getMinDuration(Element segment) { + int minDuration = klattDurationParams.getMinDuration(segment.getAttribute("p")); + + // additional reduction for unstressed segments: + // (this comes from klatt's original rule no. 7) + if (getStress(getSyllable(segment)) == 0) { + // For unstressed segments, + // increase stretchability by reducing minimum duration: + return (minDuration * getPropertyAsInteger("rule7.mindur")) / 100; + } else { // default + return minDuration; + } + } + + private int getInhDuration(Element segment) { + return klattDurationParams.getInhDuration(segment.getAttribute("p")); + } + + private boolean isPronoun(Element token) { + String pos = token.getAttribute("pos"); + return pos.equals("PDS") + || pos.equals("PDAT") + || pos.equals("PIS") + || pos.equals("PIAT") + || pos.equals("PIDAT") + || pos.equals("PPER") + || pos.equals("PPOSS") + || pos.equals("PPOSAT") + || pos.equals("PRELS") + || pos.equals("PRELAT") + || pos.equals("PRF") + || pos.equals("PWS") + || pos.equals("PWAT") + || pos.equals("PWAV"); + } + + private boolean isPolysyllabic(Element token) { + return token.getElementsByTagName(MaryXML.SYLLABLE).getLength() > 1; + } + + private boolean hasAccent(Element token) { + String accent = token.getAttribute("accent"); + return !accent.equals(""); + } + + /** + * Search for boundary and syllable elements following the given syllable. + * If the next matching element found is a boundary with breakindex + * minBreakindex or larger, return true; otherwise, + * return false. + * If there is no next node, return true. + */ + private boolean isLastBeforeBoundary(Element syllable, int minBreakindex) { + Document doc = syllable.getOwnerDocument(); + Element sentence = (Element) MaryDomUtils.getAncestor(syllable, MaryXML.SENTENCE); + TreeWalker tw = + ((DocumentTraversal) doc).createTreeWalker( + sentence, + NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { MaryXML.SYLLABLE, MaryXML.BOUNDARY }), + false); + tw.setCurrentNode(syllable); + Element next = (Element) tw.nextNode(); + if (next == null) { + // no matching node after syllable -- + // we must be in a final position. + return true; + } + if (next.getNodeName().equals(MaryXML.BOUNDARY)) { + if (getBreakindex(next) >= minBreakindex) + return true; + } + // This syllable is either followed by another syllable or + // by a boundary with breakindex < minBreakindex + return false; + } + + private boolean isMajIPFinal(Element syllable) { + // If this syllable is followed by a boundary with breakindex + // 4 or above, return true. + return isLastBeforeBoundary(syllable, 4); + } + + private boolean isMinipFinal(Element syllable) { + // If this syllable is followed by a boundary with breakindex + // 3 or above, return true. + return isLastBeforeBoundary(syllable, 3); + } + + private boolean isWordFinal(Element syllable) { + Element e = syllable; + while (e != null) { + e = MaryDomUtils.getNextSiblingElement(e); + if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) + return false; + } + return true; + } + + private boolean isWordMedial(Element syllable) { + return !(isWordFinal(syllable) || isWordInitial(syllable)); + + } + + private boolean isWordInitial(Element syllable) { + Element e = syllable; + while (e != null) { + e = MaryDomUtils.getPreviousSiblingElement(e); + if (e != null && e.getNodeName().equals(MaryXML.SYLLABLE)) + return false; + } + return true; + } + + private boolean isInOnset(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return false; + } + // OK, segment is not syllabic. See if it is followed by a syllabic + // segment: + for (Element e = MaryDomUtils.getNextSiblingElement(segment); + e != null; + e = MaryDomUtils.getNextSiblingElement(e)) { + ph = allophoneSet.getAllophone(e.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return true; + } + } + return false; + } + + private boolean isInNucleus(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isSyllabic(); + } + + private boolean isInCoda(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return false; + } + // OK, segment is not syllabic. See if it is preceded by a syllabic + // segment: + for (Element e = MaryDomUtils.getPreviousSiblingElement(segment); + e != null; + e = MaryDomUtils.getPreviousSiblingElement(e)) { + ph = allophoneSet.getAllophone(e.getAttribute("p")); + assert ph != null; + if (ph.isSyllabic()) { + return true; + } + } + return false; + } + + private boolean isConsonant(Element segment) { + return !isVowel(segment); + } + + private boolean isVowel(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isVowel(); + } + + private boolean isLiquid(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isLiquid(); + } + + private boolean isGlide(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isGlide(); + } + + private boolean isNasal(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isNasal(); + } + + private boolean isFricative(Element segment) { + Allophone ph = allophoneSet.getAllophone(segment.getAttribute("p")); + assert ph != null; + return ph.isFricative(); + } + + private int getBreakindex(Element boundary) { + int breakindex = 0; + try { + breakindex = Integer.parseInt(boundary.getAttribute("breakindex")); + } catch (NumberFormatException e) { + logger.warn("Unexpected breakindex value `" + boundary.getAttribute("breakindex") + "'"); + } + return breakindex; + } + + /** + * Tell whether the string contains a positive or negative percentage + * delta, i.e., a percentage number with an obligatory + or - sign. + */ + private boolean isPercentageDelta(String string) { + String s = string.trim(); + if (s.length() < 3) + return false; + return s.substring(s.length() - 1).equals("%") && isNumberDelta(s.substring(0, s.length() - 1)); + } + + /** + * For a string containing a percentage delta as judged by + * isPercentageDelta(), return the numerical value, rounded to + * an integer. + * @return the numeric part of the percentage, rounded to an integer, or 0 + * if the string is not a valid percentage delta. + */ + private int getPercentageDelta(String string) { + String s = string.trim(); + if (!isPercentageDelta(s)) + return 0; + return getNumberDelta(s.substring(0, s.length() - 1)); + } + + /** + * Tell whether the string contains a positive or negative semitones delta, + * i.e., a semitones number with an obligatory + or - sign, such as + * "+3.2st" or "-13.2st". + */ + private boolean isSemitonesDelta(String string) { + String s = string.trim(); + if (s.length() < 4) + return false; + return s.substring(s.length() - 2).equals("st") && isNumberDelta(s.substring(0, s.length() - 2)); + } + + /** + * For a string containing a semitones delta as judged by + * isSemitonesDelta(), return the numerical value, as a + * double. + * @return the numeric part of the semitones delta, or 0 + * if the string is not a valid semitones delta. + */ + private double getSemitonesDelta(String string) { + String s = string.trim(); + if (!isSemitonesDelta(s)) + return 0; + String num = s.substring(0, s.length() - 2); + double value = 0; + try { + value = Double.parseDouble(num); + } catch (NumberFormatException e) { + logger.warn("Unexpected number value `" + num + "'"); + } + return value; + } + + /** + * Tell whether the string contains a positive or negative number + * delta, i.e., a number with an obligatory + or - sign. + */ + private boolean isNumberDelta(String string) { + String s = string.trim(); + if (s.length() < 2) + return false; + return (s.charAt(0) == '+' || s.charAt(0) == '-') && isUnsignedNumber(s.substring(1)); + } + + /** + * For a string containing a number delta as judged by + * isNumberDelta(), return the numerical value, rounded to + * an integer. + * @return the numeric value, rounded to an integer, or 0 + * if the string is not a valid number delta. + */ + private int getNumberDelta(String string) { + String s = string.trim(); + if (!isNumberDelta(s)) + return 0; + double value = 0; + try { + value = Double.parseDouble(s); + } catch (NumberFormatException e) { + logger.warn("Unexpected number value `" + s + "'"); + } + return (int) Math.round(value); + + } + + /** + * Tell whether the string contains an unsigned semitones expression, such + * as "12st" or "5.4st". + */ + private boolean isUnsignedSemitones(String string) { + String s = string.trim(); + if (s.length() < 3) + return false; + return s.substring(s.length() - 2).equals("st") && isUnsignedNumber(s.substring(0, s.length() - 2)); + } + + /** + * For a string containing an unsigned semitones expression as judged by + * isUnsignedSemitones(), return the numerical value as a + * double. + * @return the numeric part of the semitones expression, or 0 if the string + * is not a valid unsigned semitones expression. + */ + private double getUnsignedSemitones(String string) { + String s = string.trim(); + if (!isUnsignedSemitones(s)) + return 0; + String num = s.substring(0, s.length() - 2); + double value = 0; + try { + value = Double.parseDouble(num); + } catch (NumberFormatException e) { + logger.warn("Unexpected number value `" + num + "'"); + } + return value; + } + + /** + * Tell whether the string contains an unsigned number. + */ + private boolean isUnsignedNumber(String string) { + String s = string.trim(); + if (s.length() < 1) + return false; + if (s.charAt(0) != '+' && s.charAt(0) != '-') { + double value = 0; + try { + value = Double.parseDouble(s); + } catch (NumberFormatException e) { + return false; + } + return true; + } + return false; + } + + /** + * For a string containing an unsigned number as judged by + * isUnsignedNumber(), return the numerical value, rounded to + * an integer. + * @return the numeric value, rounded to an integer, or 0 + * if the string is not a valid unsigned number. + */ + private int getUnsignedNumber(String string) { + String s = string.trim(); + if (!isUnsignedNumber(s)) + return 0; + double value = 0; + try { + value = Double.parseDouble(s); + } catch (NumberFormatException e) { + logger.warn("Unexpected number value `" + s + "'"); + } + return (int) Math.round(value); + } + + /** + * Tell whether the string contains a number. + */ + private boolean isNumber(String string) { + String s = string.trim(); + if (s.length() < 1) + return false; + double value = 0; + try { + value = Double.parseDouble(s); + } catch (NumberFormatException e) { + return false; + } + return true; + } + + /** + * For a string containing a number as judged by + * isNumber(), return the numerical value, rounded to + * an integer. + * @return the numeric value, rounded to an integer, or 0 + * if the string is not a valid number. + */ + private int getNumber(String string) { + String s = string.trim(); + if (!isNumber(s)) + return 0; + double value = 0; + try { + value = Double.parseDouble(s); + } catch (NumberFormatException e) { + logger.warn("Unexpected number value `" + s + "'"); + } + return (int) Math.round(value); + } + + /** + * For a given token, find the stressed syllable. If no syllable has + * primary stress, return the first syllable with secondary stress. If none + * has secondary stress, return the first syllable in the token. If there + * is no syllable in the token or the element given in the argument is not + * a token element, return null. + */ + private Element getStressedSyllable(Element token) { + if (token == null || !token.getTagName().equals(MaryXML.TOKEN)) + return null; + Element syl = MaryDomUtils.getFirstElementByTagName(token, MaryXML.SYLLABLE); + while (syl != null && !syl.getAttribute("stress").equals("1")) { + syl = MaryDomUtils.getNextSiblingElementByTagName(syl, MaryXML.SYLLABLE); + } + if (syl != null) { + return syl; + } + // If we get here, there is no stressed syllable. As a fallback, use + // the first syllable with secondary stress, or the first syllable if + // none has secondary stress. + Element first = MaryDomUtils.getFirstElementByTagName(token, MaryXML.SYLLABLE); + Element secondary = first; + while (secondary != null && !secondary.getAttribute("stress").equals("2")) { + secondary = MaryDomUtils.getNextSiblingElementByTagName(secondary, MaryXML.SYLLABLE); + } + if (secondary != null) { + return secondary; + } + return first; + } + + /** + * For a syllable, return the first child segment which is a nucleus + * segment. Return null if there is no such segment. + */ + private Element getNucleus(Element syllable) { + if (syllable == null || !syllable.getTagName().equals(MaryXML.SYLLABLE)) + return null; + Element seg = MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE); + while (seg != null && !isInNucleus(seg)) { + seg = MaryDomUtils.getNextSiblingElementByTagName(seg, MaryXML.PHONE); + } + return seg; + } + + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + /////////////////////////// Helper Classes /////////////////////////// + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + static class ProsodicSettings { + // Relative settings: 100 = 100% = no change + int rate; + int accentProminence; + int accentSlope; + int numberOfPauses; + int pauseDuration; + int vowelDuration; + int plosiveDuration; + int fricativeDuration; + int nasalDuration; + int liquidDuration; + int glideDuration; + int volume; + + ProsodicSettings() { + this.rate = 100; + this.accentProminence = 100; + this.accentSlope = 100; + this.numberOfPauses = 100; + this.pauseDuration = 100; + this.vowelDuration = 100; + this.plosiveDuration = 100; + this.fricativeDuration = 100; + this.nasalDuration = 100; + this.liquidDuration = 100; + this.glideDuration = 100; + this.volume = 50; + } + + ProsodicSettings( + int rate, + int accentProminence, + int accentSlope, + int numberOfPauses, + int pauseDuration, + int vowelDuration, + int plosiveDuration, + int fricativeDuration, + int nasalDuration, + int liquidDuration, + int glideDuration, + int volume) { + this.rate = rate; + this.accentProminence = accentProminence; + this.accentSlope = accentSlope; + this.numberOfPauses = numberOfPauses; + this.pauseDuration = pauseDuration; + this.vowelDuration = vowelDuration; + this.plosiveDuration = plosiveDuration; + this.fricativeDuration = fricativeDuration; + this.nasalDuration = nasalDuration; + this.liquidDuration = liquidDuration; + this.glideDuration = glideDuration; + this.volume = volume; + } + + int rate() { + return rate; + } + int accentProminence() { + return accentProminence; + } + int accentSlope() { + return accentSlope; + } + int numberOfPauses() { + return numberOfPauses; + } + int pauseDuration() { + return pauseDuration; + } + int vowelDuration() { + return vowelDuration; + } + int plosiveDuration() { + return plosiveDuration; + } + int fricativeDuration() { + return fricativeDuration; + } + int nasalDuration() { + return nasalDuration; + } + int liquidDuration() { + return liquidDuration; + } + int glideDuration() { + return glideDuration; + } + int volume() { + return volume; + } + + void setRate(int value) { + rate = value; + } + void setAccentProminence(int value) { + accentProminence = value; + } + void setAccentSlope(int value) { + accentSlope = value; + } + void setNumberOfPauses(int value) { + numberOfPauses = value; + } + void setPauseDuration(int value) { + pauseDuration = value; + } + void setVowelDuration(int value) { + vowelDuration = value; + } + void setPlosiveDuration(int value) { + plosiveDuration = value; + } + void setFricativeDuration(int value) { + fricativeDuration = value; + } + void setNasalDuration(int value) { + nasalDuration = value; + } + void setLiquidDuration(int value) { + liquidDuration = value; + } + void setGlideDuration(int value) { + glideDuration = value; + } + void setVolume(int value) { + volume = value; + } + + } +} + diff --git a/marytts-jungle/src/main/java/marytts/language/tib/Prosody.java b/marytts-jungle/src/main/java/marytts/language/tib/Prosody.java index 5d452b66..08af2137 100644 --- a/marytts-jungle/src/main/java/marytts/language/tib/Prosody.java +++ b/marytts-jungle/src/main/java/marytts/language/tib/Prosody.java @@ -1,135 +1,135 @@ -/** - * Copyright 2000-2006 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. - * - * This file is part of MARY TTS. - * - * MARY TTS is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - * - */ -package marytts.language.tib; - -import java.util.Locale; - -import marytts.language.tib.datatypes.TibetanDataTypes; -import marytts.modules.ProsodyGeneric; -import marytts.util.dom.DomUtils; -import marytts.util.dom.MaryDomUtils; -import marytts.util.dom.NameNodeFilter; - -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.w3c.dom.traversal.DocumentTraversal; -import org.w3c.dom.traversal.NodeFilter; -import org.w3c.dom.traversal.TreeWalker; - - -public class Prosody extends ProsodyGeneric -{ - public Prosody() - { - super(TibetanDataTypes.TONES_TIB, - TibetanDataTypes.PHRASES_TIB, - new Locale("tib"), - "tibetan.prosody.tobipredparams", null, null, "tibetan.prosody.paragraphdeclination"); - } - - /** - * Currently, no accents assigned for Tibetan. - * @param token a token element - * @param accent the accent string to assign. - */ - protected void setAccent(Element token, String accent) - { - } - - /** Checks if a boundary is to be inserted after the current token - * Override default implementation in order not to assign boundary - * tones, but only break indices (no tones in Tibetan yet). - * @param token (current token) - * @param tokens (list of tokens in sentence) - * @param position (position in token list) - * @param sentenceType (declarative, exclamative or interrogative) - * @param specialPositionType (endofvorfeld if sentence has vorfeld and the next token is a finite verb or end of paragraph) - * @param invalidXML (true if xml structure allows boundary insertion) - * @param firstTokenInPhrase (begin of intonation phrase) - * @return firstTokenInPhrase (if a boundary was inserted, firstTokenInPhrase gets null) - */ - - protected Element getBoundary(Element token, NodeList tokens, int position, - String sentenceType, String specialPositionType, - boolean invalidXML, Element firstTokenInPhrase) - { - - String tokenText = MaryDomUtils.tokenText(token); // text of current token - - Element ruleList = null; - // only the "boundaries" rules are relevant - ruleList = (Element) tobiPredMap.get("boundaries"); - // search for concrete rules (search for tag "rule") - TreeWalker tw = ((DocumentTraversal) ruleList.getOwnerDocument()) - .createTreeWalker(ruleList, NodeFilter.SHOW_ELEMENT, - new NameNodeFilter(new String[] { "rule" }), false); - - boolean rule_fired = false; - Element rule = null; - - // search for appropriate rules; the top rule has highest prority - // if a rule fires (that is: all the conditions are fulfilled), the boundary is inserted and the loop stops - while (!rule_fired && (rule = (Element) tw.nextNode()) != null) { - // rule = the whole rule - // currentRulePart = part of the rule (condition or action) - Element currentRulePart = DomUtils.getFirstChildElement(rule); - - while (!rule_fired && currentRulePart != null) { - boolean conditionSatisfied = false; - - // if rule part with tag "action": boundary insertion - if (currentRulePart.getTagName().equals("action")) { - int bi = Integer.parseInt(currentRulePart - .getAttribute("bi")); - if (bi == 0) { - // no boundary insertion - } else if ((bi >= 4) && (bi <= 6)) { - if (!invalidXML) { - insertMajorBoundary(tokens, position, - firstTokenInPhrase, null, bi); - firstTokenInPhrase = null; - } else - insertBoundary(token, null, bi); - } - - else - insertBoundary(token, null, bi); - rule_fired = true; - break; - } - - // check if the condition is satisfied - conditionSatisfied = checkRulePart(currentRulePart, token, - tokens, position, sentenceType, specialPositionType, - tokenText); - if (!conditionSatisfied) - break; // condition violated, try next rule - - // the previous conditions are satisfied --> check the next rule part - currentRulePart = DomUtils - .getNextSiblingElement(currentRulePart); - }//while loop that checks the rule parts - } // while loop that checks the whole rule - return firstTokenInPhrase; - } - -} - - +/** + * Copyright 2000-2006 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. + * + * This file is part of MARY TTS. + * + * MARY TTS is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + */ +package marytts.language.tib; + +import java.util.Locale; + +import marytts.language.tib.datatypes.TibetanDataTypes; +import marytts.modules.ProsodyGeneric; +import marytts.util.dom.DomUtils; +import marytts.util.dom.MaryDomUtils; +import marytts.util.dom.NameNodeFilter; + +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.w3c.dom.traversal.DocumentTraversal; +import org.w3c.dom.traversal.NodeFilter; +import org.w3c.dom.traversal.TreeWalker; + + +public class Prosody extends ProsodyGeneric +{ + public Prosody() + { + super(TibetanDataTypes.TONES_TIB, + TibetanDataTypes.PHRASES_TIB, + new Locale("tib"), + "tibetan.prosody.tobipredparams", null, null, "tibetan.prosody.paragraphdeclination"); + } + + /** + * Currently, no accents assigned for Tibetan. + * @param token a token element + * @param accent the accent string to assign. + */ + protected void setAccent(Element token, String accent) + { + } + + /** Checks if a boundary is to be inserted after the current token + * Override default implementation in order not to assign boundary + * tones, but only break indices (no tones in Tibetan yet). + * @param token (current token) + * @param tokens (list of tokens in sentence) + * @param position (position in token list) + * @param sentenceType (declarative, exclamative or interrogative) + * @param specialPositionType (endofvorfeld if sentence has vorfeld and the next token is a finite verb or end of paragraph) + * @param invalidXML (true if xml structure allows boundary insertion) + * @param firstTokenInPhrase (begin of intonation phrase) + * @return firstTokenInPhrase (if a boundary was inserted, firstTokenInPhrase gets null) + */ + + protected Element getBoundary(Element token, NodeList tokens, int position, + String sentenceType, String specialPositionType, + boolean invalidXML, Element firstTokenInPhrase) + { + + String tokenText = MaryDomUtils.tokenText(token); // text of current token + + Element ruleList = null; + // only the "boundaries" rules are relevant + ruleList = (Element) tobiPredMap.get("boundaries"); + // search for concrete rules (search for tag "rule") + TreeWalker tw = ((DocumentTraversal) ruleList.getOwnerDocument()) + .createTreeWalker(ruleList, NodeFilter.SHOW_ELEMENT, + new NameNodeFilter(new String[] { "rule" }), false); + + boolean rule_fired = false; + Element rule = null; + + // search for appropriate rules; the top rule has highest prority + // if a rule fires (that is: all the conditions are fulfilled), the boundary is inserted and the loop stops + while (!rule_fired && (rule = (Element) tw.nextNode()) != null) { + // rule = the whole rule + // currentRulePart = part of the rule (condition or action) + Element currentRulePart = DomUtils.getFirstChildElement(rule); + + while (!rule_fired && currentRulePart != null) { + boolean conditionSatisfied = false; + + // if rule part with tag "action": boundary insertion + if (currentRulePart.getTagName().equals("action")) { + int bi = Integer.parseInt(currentRulePart + .getAttribute("bi")); + if (bi == 0) { + // no boundary insertion + } else if ((bi >= 4) && (bi <= 6)) { + if (!invalidXML) { + insertMajorBoundary(tokens, position, + firstTokenInPhrase, null, bi); + firstTokenInPhrase = null; + } else + insertBoundary(token, null, bi); + } + + else + insertBoundary(token, null, bi); + rule_fired = true; + break; + } + + // check if the condition is satisfied + conditionSatisfied = checkRulePart(currentRulePart, token, + tokens, position, sentenceType, specialPositionType, + tokenText); + if (!conditionSatisfied) + break; // condition violated, try next rule + + // the previous conditions are satisfied --> check the next rule part + currentRulePart = DomUtils + .getNextSiblingElement(currentRulePart); + }//while loop that checks the rule parts + } // while loop that checks the whole rule + return firstTokenInPhrase; + } + +} + + diff --git a/marytts-jungle/src/main/java/marytts/signalproc/demo/ChangeMyVoiceUI.java b/marytts-jungle/src/main/java/marytts/signalproc/demo/ChangeMyVoiceUI.java index 9715fd7e..ea2324ed 100644 --- a/marytts-jungle/src/main/java/marytts/signalproc/demo/ChangeMyVoiceUI.java +++ b/marytts-jungle/src/main/java/marytts/signalproc/demo/ChangeMyVoiceUI.java @@ -1,6 +1,6 @@ -/** - * Copyright 2007 DFKI GmbH. - * All Rights Reserved. Use is subject to license terms. +/** + * Copyright 2007 DFKI GmbH. + * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * @@ -17,1216 +17,1216 @@ * along with this program. If not, see . * */ -package marytts.signalproc.demo; - -import java.awt.Dimension; -import java.awt.Point; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.Vector; - -import javax.sound.sampled.AudioFileFormat; -import javax.sound.sampled.AudioFormat; -import javax.sound.sampled.AudioInputStream; -import javax.sound.sampled.AudioSystem; -import javax.sound.sampled.Clip; -import javax.sound.sampled.DataLine; -import javax.sound.sampled.Line; -import javax.sound.sampled.LineEvent; -import javax.sound.sampled.LineListener; -import javax.sound.sampled.LineUnavailableException; -import javax.sound.sampled.Mixer; -import javax.sound.sampled.SourceDataLine; -import javax.sound.sampled.TargetDataLine; -import javax.sound.sampled.UnsupportedAudioFileException; -import javax.swing.JFileChooser; -import javax.swing.filechooser.FileFilter; - -import marytts.signalproc.filter.BandPassFilter; -import marytts.signalproc.filter.LowPassFilter; -import marytts.signalproc.process.AudioMixer; -import marytts.signalproc.process.Chorus; -import marytts.signalproc.process.InlineDataProcessor; -import marytts.signalproc.process.LPCWhisperiser; -import marytts.signalproc.process.Robotiser; -import marytts.signalproc.process.VocalTractScalingProcessor; -import marytts.signalproc.process.VocalTractScalingSimpleProcessor; -import marytts.signalproc.process.VoiceModificationParameters; -import marytts.util.data.audio.AudioRecorder.BufferingRecorder; -import marytts.util.io.SimpleFileFilter; -import marytts.util.math.MathUtils; -import marytts.util.signal.SignalProcUtils; - - -/** - * - * @author oytun.turk - * - * A demonstration GUI for real-time voice modification using various audio effects, - * signal processing, and modification - * - */ - -public class ChangeMyVoiceUI extends javax.swing.JFrame { - File outputFile; - private double amount; - private int targetIndex; - private int inputIndex; - private int recordIndex; - private boolean bStarted; - private boolean bRecording; - private boolean bPlaying; - OnlineAudioEffects online; - TargetDataLine microphone; - SourceDataLine loudspeakers; - AudioInputStream inputStream; - InputStream resStream; - BufferingRecorder recorder; - Clip m_clip; - InputStream playFile; - InputStream mixFile; - - private Vector listItems; //Just the names we see on the list - private File lastDirectory; - private InputStream inputFile; - private String[] inputFileNameList; //Actual full paths to files - private Vector builtInFileNameList; - private String classPath; //Class run-time path - private String strRecordPath; - - VoiceModificationParameters modParams; - String [] targetNames = { "Robot", - "Whisper", - "Dwarf1", - "Dwarf2", - "Ogre1", - "Ogre2", - "Giant1", - "Giant2", - //"Echo", - "Stadium", - "Telephone", - //"Old Radio", - "Jet Pilot", - //"Helicopter Pilot", - "Bird", - "Cat", - "Dog", - "Horse", - //"Jungle", - "Monster1", - "Monster2", - "Ghost", - //"Alien", - "Flute", - "Violin", - //"Earthquake", - //"Fire", - //"Ocean", - //"Thunder", - //"Waterfall" - }; - -private String[] mixFiles = {"helicopter_mix.wav", - "jungle_mix.wav", - "monster1_mix.wav", - "alien_mix.wav" - }; - -private String[] lpCrossSynthFiles = {"bird.wav", - "cat.wav", - "dog.wav", - "horse.wav", - "ghost.wav", - "monster2.wav", - "flute.wav", - "violin.wav", - "earthquake.wav", - "fire.wav", - "ocean.wav", - "thunder.wav", - "waterfall.wav" - }; - - /** Creates new form ChangeMyVoiceUI */ - public ChangeMyVoiceUI() { - playFile = null; - mixFile = null; - recorder = null; - outputFile = null; - microphone = null; - loudspeakers = null; - inputStream = null; - targetIndex = -1; - inputIndex = -1; - inputFile = null; - resStream = null; - bRecording = false; - bPlaying = false; - lastDirectory = null; - inputFileNameList = null; - listItems = new Vector(); - recordIndex = 0; - - classPath = new File(".").getAbsolutePath(); - - listItems.addElement("Streaming Audio"); - - builtInFileNameList = new Vector(); - listItems.addElement("Unit selection TTS male (wohin-bits3.wav)"); - builtInFileNameList.add("wohin-bits3.wav"); - - listItems.addElement("Unit selection TTS male (ausprobieren-bits3.wav)"); - builtInFileNameList.add("ausprobieren-bits3.wav"); - - listItems.addElement("HMM-based TTS male (ausprobieren-hmm3.wav)"); - builtInFileNameList.add("ausprobieren-hmm3.wav"); - - listItems.addElement("HMM-based TTS male (wohin-hmm3.wav)"); - builtInFileNameList.add("wohin-hmm3.wav"); - - listItems.addElement("Unit selection TTS female (gewinnen-bits4.wav)"); - builtInFileNameList.add("gewinnen-bits4.wav"); - - listItems.addElement("Unit selection TTS female (so-nicht-bits4.wav)"); - builtInFileNameList.add("so-nicht-bits4.wav"); - - listItems.addElement("HMM-based TTS female (gewinnen-hmm4.wav)"); - builtInFileNameList.add("gewinnen-hmm4.wav"); - - listItems.addElement("HMM-based TTS female (so-nicht-hmm4.wav)"); - builtInFileNameList.add("so-nicht-hmm4.wav"); - - listItems.addElement("Limited domain TTS neutral (herta-neutral.wav)"); - builtInFileNameList.add("herta-neutral.wav"); - - listItems.addElement("Limited domain TTS excited (herta-excited.wav)"); - builtInFileNameList.add("herta-excited.wav"); - - initComponents(); - modParams = new VoiceModificationParameters(); - } - - /** This method is called from within the constructor to - * initialize the form. - * WARNING: Do NOT modify this code. The content of this method is - * always regenerated by the Form Editor. - */ - // //GEN-BEGIN:initComponents - private void initComponents() { - java.awt.GridBagConstraints gridBagConstraints; - - jComboBoxTargetVoice = new javax.swing.JComboBox(); - jButtonExit = new javax.swing.JButton(); - jLabelTargetVoice = new javax.swing.JLabel(); - jButtonAdd = new javax.swing.JButton(); - jButtonStart = new javax.swing.JButton(); - jButtonDel = new javax.swing.JButton(); - jButtonPlay = new javax.swing.JButton(); - jLabelLow = new javax.swing.JLabel(); - jScrollList = new javax.swing.JScrollPane(); - jListInput = new javax.swing.JList(); - jLabelChangeAmount = new javax.swing.JLabel(); - jLabelHigh = new javax.swing.JLabel(); - jSliderChangeAmount = new javax.swing.JSlider(); - jLabelInput = new javax.swing.JLabel(); - jButtonRec = new javax.swing.JButton(); - jLabelMedium = new javax.swing.JLabel(); - jLabel1 = new javax.swing.JLabel(); - jLabel2 = new javax.swing.JLabel(); - - getContentPane().setLayout(new java.awt.GridBagLayout()); - - setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); - setTitle("Change My Voice"); - setResizable(false); - addMouseListener(new java.awt.event.MouseAdapter() { - public void mouseClicked(java.awt.event.MouseEvent evt) { - formMouseClicked(evt); - } - }); - addWindowListener(new java.awt.event.WindowAdapter() { - public void windowOpened(java.awt.event.WindowEvent evt) { - formWindowOpened(evt); - } - }); - - jComboBoxTargetVoice.setMaximumRowCount(20); - jComboBoxTargetVoice.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jComboBoxTargetVoiceActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 0; - gridBagConstraints.gridwidth = 3; - gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; - gridBagConstraints.insets = new java.awt.Insets(0, 0, 20, 10); - getContentPane().add(jComboBoxTargetVoice, gridBagConstraints); - - jButtonExit.setText("Exit"); - jButtonExit.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonExitActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 8; - gridBagConstraints.gridwidth = 2; - getContentPane().add(jButtonExit, gridBagConstraints); - - jLabelTargetVoice.setText("Target Voice"); - jLabelTargetVoice.setName(""); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 0; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0); - getContentPane().add(jLabelTargetVoice, gridBagConstraints); - - jButtonAdd.setText("Add"); - jButtonAdd.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonAddActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 1; - gridBagConstraints.gridy = 5; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHEAST; - getContentPane().add(jButtonAdd, gridBagConstraints); - - jButtonStart.setText("Start"); - jButtonStart.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonStartActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 6; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.gridheight = 2; - gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; - gridBagConstraints.ipady = 10; - gridBagConstraints.insets = new java.awt.Insets(20, 0, 20, 0); - getContentPane().add(jButtonStart, gridBagConstraints); - - jButtonDel.setText("Del"); - jButtonDel.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonDelActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 4; - gridBagConstraints.gridy = 5; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - getContentPane().add(jButtonDel, gridBagConstraints); - - jButtonPlay.setText("Play"); - jButtonPlay.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonPlayActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 3; - gridBagConstraints.gridy = 5; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTH; - getContentPane().add(jButtonPlay, gridBagConstraints); - - jLabelLow.setText("Low"); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - getContentPane().add(jLabelLow, gridBagConstraints); - - jListInput.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); - jListInput.setPreferredSize(new java.awt.Dimension(0, 100)); - jListInput.addListSelectionListener(new javax.swing.event.ListSelectionListener() { - public void valueChanged(javax.swing.event.ListSelectionEvent evt) { - jListInputValueChanged(evt); - } - }); - jListInput.addMouseListener(new java.awt.event.MouseAdapter() { - public void mouseClicked(java.awt.event.MouseEvent evt) { - jListInputMouseClicked(evt); - } - }); - - jScrollList.setViewportView(jListInput); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 4; - gridBagConstraints.gridwidth = 6; - gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; - gridBagConstraints.ipady = 200; - gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 10); - getContentPane().add(jScrollList, gridBagConstraints); - - jLabelChangeAmount.setText("Change Amount"); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 1; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; - gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0); - getContentPane().add(jLabelChangeAmount, gridBagConstraints); - - jLabelHigh.setText("High"); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 4; - gridBagConstraints.gridy = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHEAST; - gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 10); - getContentPane().add(jLabelHigh, gridBagConstraints); - - jSliderChangeAmount.setMajorTickSpacing(50); - jSliderChangeAmount.setMinorTickSpacing(5); - jSliderChangeAmount.setPaintTicks(true); - jSliderChangeAmount.addChangeListener(new javax.swing.event.ChangeListener() { - public void stateChanged(javax.swing.event.ChangeEvent evt) { - jSliderChangeAmountStateChanged(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 1; - gridBagConstraints.gridwidth = 3; - gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; - gridBagConstraints.ipadx = 154; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 10); - getContentPane().add(jSliderChangeAmount, gridBagConstraints); - - jLabelInput.setText("Input"); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 3; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(20, 10, 0, 0); - getContentPane().add(jLabelInput, gridBagConstraints); - - jButtonRec.setText("Rec"); - jButtonRec.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - jButtonRecActionPerformed(evt); - } - }); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 2; - gridBagConstraints.gridy = 5; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTH; - getContentPane().add(jButtonRec, gridBagConstraints); - - jLabelMedium.setText("Medium"); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 3; - gridBagConstraints.gridy = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - getContentPane().add(jLabelMedium, gridBagConstraints); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 5; - gridBagConstraints.ipadx = 30; - getContentPane().add(jLabel1, gridBagConstraints); - - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 5; - gridBagConstraints.gridy = 5; - gridBagConstraints.ipadx = 30; - getContentPane().add(jLabel2, gridBagConstraints); - - java.awt.Dimension screenSize = java.awt.Toolkit.getDefaultToolkit().getScreenSize(); - setBounds((screenSize.width-382)/2, (screenSize.height-560)/2, 382, 560); - }// //GEN-END:initComponents - - private void jListInputMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jListInputMouseClicked - int numClicks = evt.getClickCount(); - if (numClicks==2) - { - getInputIndex(); - - if (inputIndex>0) - { - if (!bPlaying && !bRecording) - jButtonPlay.doClick(); - } - else if (inputIndex==0 && !bStarted) - jButtonStart.doClick(); - } - }//GEN-LAST:event_jListInputMouseClicked - - private Clip playClip = null; - private void jButtonPlayActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonPlayActionPerformed - if (!bRecording) - { - if (!bPlaying) - { - bPlaying = true; - try - { - if (inputIndex <= 0) - playFile = null; - else if (inputIndex>builtInFileNameList.size()) - { - try - { - playFile = new BufferedInputStream(new FileInputStream((String)listItems.get(inputIndex))); - } catch (FileNotFoundException fnf) - { - fnf.printStackTrace(); - } - } else - { - playFile = ChangeMyVoiceUI.class.getResourceAsStream("wav/samples"+((String) builtInFileNameList.get(inputIndex-1))); - } - AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(playFile); - AudioFormat format = audioInputStream.getFormat(); - DataLine.Info lineInfo = new DataLine.Info(Clip.class, format); - playClip = (Clip) AudioSystem.getLine(lineInfo); - playClip.addLineListener(new LineListener() { - public void update(LineEvent le) { - if (le.getType().equals(LineEvent.Type.STOP)) { - bPlaying = false; - playClip.close(); - playClip = null; - try { - playFile.close(); - } catch (IOException e) { - e.printStackTrace(); - } - playFile = null; - updateGUIPlaying(); - } - } - }); - playClip.open(audioInputStream); - playClip.start(); - } catch (Exception e) { - e.printStackTrace(); - } - } else { - bPlaying = false; - if (playClip != null) { - playClip.stop(); - playClip = null; - } - if (playFile != null) { - try { - playFile.close(); - } catch (IOException e) { - e.printStackTrace(); - } - playFile = null; - } - } - updateGUIPlaying(); - } - }//GEN-LAST:event_jButtonPlayActionPerformed - - private void updateGUIPlaying() - { - if (bPlaying) { - jButtonPlay.setText("Stop"); - } else { - jButtonPlay.setText("Play"); - } - jButtonRec.setEnabled(!bPlaying); - jButtonAdd.setEnabled(!bPlaying); - jButtonDel.setEnabled(!bPlaying); - jListInput.setEnabled(!bPlaying); - jButtonStart.setEnabled(!bPlaying); - - } - - - private void jButtonDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonDelActionPerformed - if (inputIndex>=builtInFileNameList.size()+1) - { - listItems.remove(inputIndex); - inputIndex--; - UpdateInputList(); - } - }//GEN-LAST:event_jButtonDelActionPerformed - - private void jListInputValueChanged(javax.swing.event.ListSelectionEvent evt) {//GEN-FIRST:event_jListInputValueChanged - - getInputIndex(); - - if (inputIndex==0) - jButtonPlay.setEnabled(false); - else - jButtonPlay.setEnabled(true); - - if (inputIndex0.001) //If currently processing and changed modification amount - { - jButtonStart.doClick(); //Stop - jButtonStart.doClick(); //and restart to adapt to new target voice - } - }//GEN-LAST:event_jSliderChangeAmountStateChanged - - private void jButtonExitActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonExitActionPerformed - System.exit(0); - }//GEN-LAST:event_jButtonExitActionPerformed - - private void jComboBoxTargetVoiceActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jComboBoxTargetVoiceActionPerformed - int prevTargetIndex = targetIndex; - - getTargetIndex(); - - if (bStarted && prevTargetIndex != targetIndex) //If currently processing and changed target voice type - { - jButtonStart.doClick(); //Stop - jButtonStart.doClick(); //and restart to adapt to new target voice - } - }//GEN-LAST:event_jComboBoxTargetVoiceActionPerformed - - private void formMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_formMouseClicked - - }//GEN-LAST:event_formMouseClicked - - public void getTargetIndex() - { - targetIndex = jComboBoxTargetVoice.getSelectedIndex(); - if (targetNames[targetIndex]=="Telephone") - modParams.fs = 8000; - else - modParams.fs = 16000; - - boolean bChangeEnabled = true; - if (targetNames[targetIndex]=="Jet Pilot" || - targetNames[targetIndex]=="Old Radio" || - targetNames[targetIndex]=="Telephone") - { - bChangeEnabled = false; - } - - jLabelChangeAmount.setEnabled(bChangeEnabled); - jLabelLow.setEnabled(bChangeEnabled); - jLabelMedium.setEnabled(bChangeEnabled); - jLabelHigh.setEnabled(bChangeEnabled); - jSliderChangeAmount.setEnabled(bChangeEnabled); - - if (targetNames[targetIndex]=="Robot") - jLabelChangeAmount.setText("Pitch"); - else - jLabelChangeAmount.setText("Change Amount"); - } - - private void jButtonStartActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonStartActionPerformed - if (!bStarted) - { - bStarted = true; - updateGUIStart(); - getParameters(); - changeVoice(); - } else { - bStarted = false; - updateGUIStart(); - online.requestStop(); - - //Close the source and the target datalines to be able to use them repeatedly - if (microphone!=null) - { - microphone.close(); - microphone = null; - } - - if (loudspeakers != null) - { - loudspeakers.close(); - loudspeakers = null; - } - - if (inputStream != null) - { - try { - inputStream.close(); - } catch (IOException e) { - e.printStackTrace(); - } - inputStream = null; - } - - if (inputFile != null) { - try { - inputFile.close(); - } catch (IOException e) { - e.printStackTrace(); - } - inputFile = null; - - } - // - - jButtonStart.setText("Start"); - jButtonRec.setEnabled(true); - jButtonPlay.setEnabled(true); - jButtonAdd.setEnabled(true); - if (inputIndex>builtInFileNameList.size()) - jButtonDel.setEnabled(true); - jListInput.setEnabled(true); - - } - }//GEN-LAST:event_jButtonStartActionPerformed - - private void updateGUIStart() - { - if (bStarted) - { - jButtonStart.setText("Stop"); - } else { - jButtonStart.setText("Start"); - } - jButtonRec.setEnabled(!bStarted); - jButtonPlay.setEnabled(!bStarted); - jButtonAdd.setEnabled(!bStarted); - jButtonDel.setEnabled(!bStarted && inputIndex>builtInFileNameList.size()); - jListInput.setEnabled(!bStarted); - } - - /* This function gets the modification parameters from the GUI - * and fills in the modParams object - */ - private void getParameters() { - getInputIndex(); - getTargetIndex(); - getAmount(); - } - - /*This function opens source and target datalines and starts real-time voice modification - * using the parameters in the modParams object - */ - private void changeVoice() { - int channels = 1; - - AudioFormat audioFormat = null; - - if (inputIndex == 0) //Online processing using microphone - { - audioFormat = new AudioFormat( - AudioFormat.Encoding.PCM_SIGNED, modParams.fs, 16, channels, 2*channels, modParams.fs, - false); - - if (microphone != null) - microphone.close(); - - microphone = getMicrophone(audioFormat); - - if (microphone != null) - { - audioFormat = microphone.getFormat(); - modParams.fs = (int)audioFormat.getSampleRate(); - } - } - else //Online processing using pre-recorded wav file - { - if (inputIndex>0) - { - if (inputIndex>builtInFileNameList.size()) - { - String inputFileNameFull = (String)listItems.get(inputIndex); - try { - inputFile = new BufferedInputStream(new FileInputStream(inputFileNameFull)); - } catch (FileNotFoundException fnf) { - fnf.printStackTrace(); - } - } - else - inputFile = ChangeMyVoiceUI.class.getResourceAsStream("wav/samples/"+((String)builtInFileNameList.get(inputIndex-1))); - } - else - inputFile = null; - - if (inputFile != null) - { - try { - inputStream = AudioSystem.getAudioInputStream(inputFile); - } catch (UnsupportedAudioFileException e) { - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - if (inputStream != null) - { - audioFormat = inputStream.getFormat(); - modParams.fs = (int)audioFormat.getSampleRate(); - } - } - - if (loudspeakers != null) - loudspeakers.close(); - - try { - DataLine.Info info = new DataLine.Info(SourceDataLine.class, - audioFormat); - loudspeakers = (SourceDataLine) AudioSystem.getLine(info); - loudspeakers.open(audioFormat); - System.out.println("Loudspeaker format: " + loudspeakers.getFormat()); - } catch (LineUnavailableException e) { - e.printStackTrace(); - } - - // Choose an audio effect - InlineDataProcessor effect = null; - int bufferSize = SignalProcUtils.getDFTSize(modParams.fs); - - if (targetNames[targetIndex]=="Robot") - { - double targetHz = 200+(amount-0.5)*200; - bufferSize = (int) (modParams.fs / targetHz * 4 /*-fold overlap in ola*/ ); - - effect = new Robotiser.PhaseRemover(MathUtils.closestPowerOfTwoAbove(bufferSize), 1.0); - } - else if (targetNames[targetIndex]=="Whisper") - { - effect = new LPCWhisperiser(SignalProcUtils.getLPOrder(modParams.fs), 0.4+0.6*amount); - } - else if (targetNames[targetIndex]=="Dwarf1") //Using freq. domain LP spectrum modification - { - double [] vscales = {1.3+0.5*amount}; - int p = SignalProcUtils.getLPOrder(modParams.fs); - if (bufferSize<1024) - bufferSize=1024; - effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Dwarf2") //Using freq. domain DFT magnitude spectrum modification - { - double [] vscales = {1.3+0.5*amount}; - effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Ogre1") //Using freq. domain LP spectrum modification - { - double [] vscales = {0.90-0.1*amount}; - int p = SignalProcUtils.getLPOrder(modParams.fs); - if (bufferSize<1024) - bufferSize=1024; - effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Ogre2") //Using freq. domain DFT magnitude spectrum modification - { - double [] vscales = {0.90-0.1*amount}; - effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Giant1") //Using freq. domain LP spectrum modification - { - double [] vscales = {0.75-0.1*amount}; - int p = SignalProcUtils.getLPOrder(modParams.fs); - if (bufferSize<1024) - bufferSize=1024; - effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Giant2") //Using freq. domain DFT magnitude spectrum modification - { - double [] vscales = {0.75-0.1*amount}; - effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); - } - else if (targetNames[targetIndex]=="Echo") - { - int [] delaysInMiliseconds = {100+(int)(20*amount), 200+(int)(50*amount), 300+(int)(100*amount)}; - double [] amps = {0.8, -0.7, 0.9}; - - int maxDelayInMiliseconds = MathUtils.getMax(delaysInMiliseconds); - int maxDelayInSamples = (int)(maxDelayInMiliseconds/1000.0*modParams.fs); - - if (bufferSize= 0; i--) { - if (formats[i].getChannels() == 1 - && formats[i].getFrameSize() == 2) { - lineFormat = formats[i]; - break; - } - } - System.err.println("Using instead: "+lineFormat); - } - if (lineFormat == null) { - throw new LineUnavailableException("Cannot get any mono line with 16 bit"); - } - line.open(lineFormat, 4096); - - } catch (LineUnavailableException e) { - e.printStackTrace(); - System.exit(1); - } - return line; - } - - public InlineDataProcessor getLPCrossSynthEffect(int lpCrossSynthFileInd, int bufferSize) - { - InlineDataProcessor effect = null; - - effect = new LPCCrossSynthesisOnline(SignalProcUtils.getLPOrder(modParams.fs), bufferSize, "wav/lp_cross_synth/"+lpCrossSynthFiles[lpCrossSynthFileInd], modParams.fs); - - return effect; - } - - /** - * @param args the command line arguments - */ - public static void main(String args[]) { - java.awt.EventQueue.invokeLater(new Runnable() { - public void run() { - new ChangeMyVoiceUI().setVisible(true); - } - }); - } - - // Variables declaration - do not modify//GEN-BEGIN:variables - private javax.swing.JButton jButtonAdd; - private javax.swing.JButton jButtonDel; - private javax.swing.JButton jButtonExit; - private javax.swing.JButton jButtonPlay; - private javax.swing.JButton jButtonRec; - private javax.swing.JButton jButtonStart; - private javax.swing.JComboBox jComboBoxTargetVoice; - private javax.swing.JLabel jLabel1; - private javax.swing.JLabel jLabel2; - private javax.swing.JLabel jLabelChangeAmount; - private javax.swing.JLabel jLabelHigh; - private javax.swing.JLabel jLabelInput; - private javax.swing.JLabel jLabelLow; - private javax.swing.JLabel jLabelMedium; - private javax.swing.JLabel jLabelTargetVoice; - private javax.swing.JList jListInput; - private javax.swing.JScrollPane jScrollList; - private javax.swing.JSlider jSliderChangeAmount; - // End of variables declaration//GEN-END:variables -} +package marytts.signalproc.demo; + +import java.awt.Dimension; +import java.awt.Point; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.Vector; + +import javax.sound.sampled.AudioFileFormat; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.Clip; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.Line; +import javax.sound.sampled.LineEvent; +import javax.sound.sampled.LineListener; +import javax.sound.sampled.LineUnavailableException; +import javax.sound.sampled.Mixer; +import javax.sound.sampled.SourceDataLine; +import javax.sound.sampled.TargetDataLine; +import javax.sound.sampled.UnsupportedAudioFileException; +import javax.swing.JFileChooser; +import javax.swing.filechooser.FileFilter; + +import marytts.signalproc.filter.BandPassFilter; +import marytts.signalproc.filter.LowPassFilter; +import marytts.signalproc.process.AudioMixer; +import marytts.signalproc.process.Chorus; +import marytts.signalproc.process.InlineDataProcessor; +import marytts.signalproc.process.LPCWhisperiser; +import marytts.signalproc.process.Robotiser; +import marytts.signalproc.process.VocalTractScalingProcessor; +import marytts.signalproc.process.VocalTractScalingSimpleProcessor; +import marytts.signalproc.process.VoiceModificationParameters; +import marytts.util.data.audio.AudioRecorder.BufferingRecorder; +import marytts.util.io.SimpleFileFilter; +import marytts.util.math.MathUtils; +import marytts.util.signal.SignalProcUtils; + + +/** + * + * @author oytun.turk + * + * A demonstration GUI for real-time voice modification using various audio effects, + * signal processing, and modification + * + */ + +public class ChangeMyVoiceUI extends javax.swing.JFrame { + File outputFile; + private double amount; + private int targetIndex; + private int inputIndex; + private int recordIndex; + private boolean bStarted; + private boolean bRecording; + private boolean bPlaying; + OnlineAudioEffects online; + TargetDataLine microphone; + SourceDataLine loudspeakers; + AudioInputStream inputStream; + InputStream resStream; + BufferingRecorder recorder; + Clip m_clip; + InputStream playFile; + InputStream mixFile; + + private Vector listItems; //Just the names we see on the list + private File lastDirectory; + private InputStream inputFile; + private String[] inputFileNameList; //Actual full paths to files + private Vector builtInFileNameList; + private String classPath; //Class run-time path + private String strRecordPath; + + VoiceModificationParameters modParams; + String [] targetNames = { "Robot", + "Whisper", + "Dwarf1", + "Dwarf2", + "Ogre1", + "Ogre2", + "Giant1", + "Giant2", + //"Echo", + "Stadium", + "Telephone", + //"Old Radio", + "Jet Pilot", + //"Helicopter Pilot", + "Bird", + "Cat", + "Dog", + "Horse", + //"Jungle", + "Monster1", + "Monster2", + "Ghost", + //"Alien", + "Flute", + "Violin", + //"Earthquake", + //"Fire", + //"Ocean", + //"Thunder", + //"Waterfall" + }; + +private String[] mixFiles = {"helicopter_mix.wav", + "jungle_mix.wav", + "monster1_mix.wav", + "alien_mix.wav" + }; + +private String[] lpCrossSynthFiles = {"bird.wav", + "cat.wav", + "dog.wav", + "horse.wav", + "ghost.wav", + "monster2.wav", + "flute.wav", + "violin.wav", + "earthquake.wav", + "fire.wav", + "ocean.wav", + "thunder.wav", + "waterfall.wav" + }; + + /** Creates new form ChangeMyVoiceUI */ + public ChangeMyVoiceUI() { + playFile = null; + mixFile = null; + recorder = null; + outputFile = null; + microphone = null; + loudspeakers = null; + inputStream = null; + targetIndex = -1; + inputIndex = -1; + inputFile = null; + resStream = null; + bRecording = false; + bPlaying = false; + lastDirectory = null; + inputFileNameList = null; + listItems = new Vector(); + recordIndex = 0; + + classPath = new File(".").getAbsolutePath(); + + listItems.addElement("Streaming Audio"); + + builtInFileNameList = new Vector(); + listItems.addElement("Unit selection TTS male (wohin-bits3.wav)"); + builtInFileNameList.add("wohin-bits3.wav"); + + listItems.addElement("Unit selection TTS male (ausprobieren-bits3.wav)"); + builtInFileNameList.add("ausprobieren-bits3.wav"); + + listItems.addElement("HMM-based TTS male (ausprobieren-hmm3.wav)"); + builtInFileNameList.add("ausprobieren-hmm3.wav"); + + listItems.addElement("HMM-based TTS male (wohin-hmm3.wav)"); + builtInFileNameList.add("wohin-hmm3.wav"); + + listItems.addElement("Unit selection TTS female (gewinnen-bits4.wav)"); + builtInFileNameList.add("gewinnen-bits4.wav"); + + listItems.addElement("Unit selection TTS female (so-nicht-bits4.wav)"); + builtInFileNameList.add("so-nicht-bits4.wav"); + + listItems.addElement("HMM-based TTS female (gewinnen-hmm4.wav)"); + builtInFileNameList.add("gewinnen-hmm4.wav"); + + listItems.addElement("HMM-based TTS female (so-nicht-hmm4.wav)"); + builtInFileNameList.add("so-nicht-hmm4.wav"); + + listItems.addElement("Limited domain TTS neutral (herta-neutral.wav)"); + builtInFileNameList.add("herta-neutral.wav"); + + listItems.addElement("Limited domain TTS excited (herta-excited.wav)"); + builtInFileNameList.add("herta-excited.wav"); + + initComponents(); + modParams = new VoiceModificationParameters(); + } + + /** This method is called from within the constructor to + * initialize the form. + * WARNING: Do NOT modify this code. The content of this method is + * always regenerated by the Form Editor. + */ + // //GEN-BEGIN:initComponents + private void initComponents() { + java.awt.GridBagConstraints gridBagConstraints; + + jComboBoxTargetVoice = new javax.swing.JComboBox(); + jButtonExit = new javax.swing.JButton(); + jLabelTargetVoice = new javax.swing.JLabel(); + jButtonAdd = new javax.swing.JButton(); + jButtonStart = new javax.swing.JButton(); + jButtonDel = new javax.swing.JButton(); + jButtonPlay = new javax.swing.JButton(); + jLabelLow = new javax.swing.JLabel(); + jScrollList = new javax.swing.JScrollPane(); + jListInput = new javax.swing.JList(); + jLabelChangeAmount = new javax.swing.JLabel(); + jLabelHigh = new javax.swing.JLabel(); + jSliderChangeAmount = new javax.swing.JSlider(); + jLabelInput = new javax.swing.JLabel(); + jButtonRec = new javax.swing.JButton(); + jLabelMedium = new javax.swing.JLabel(); + jLabel1 = new javax.swing.JLabel(); + jLabel2 = new javax.swing.JLabel(); + + getContentPane().setLayout(new java.awt.GridBagLayout()); + + setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); + setTitle("Change My Voice"); + setResizable(false); + addMouseListener(new java.awt.event.MouseAdapter() { + public void mouseClicked(java.awt.event.MouseEvent evt) { + formMouseClicked(evt); + } + }); + addWindowListener(new java.awt.event.WindowAdapter() { + public void windowOpened(java.awt.event.WindowEvent evt) { + formWindowOpened(evt); + } + }); + + jComboBoxTargetVoice.setMaximumRowCount(20); + jComboBoxTargetVoice.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jComboBoxTargetVoiceActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 0; + gridBagConstraints.gridwidth = 3; + gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; + gridBagConstraints.insets = new java.awt.Insets(0, 0, 20, 10); + getContentPane().add(jComboBoxTargetVoice, gridBagConstraints); + + jButtonExit.setText("Exit"); + jButtonExit.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonExitActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 8; + gridBagConstraints.gridwidth = 2; + getContentPane().add(jButtonExit, gridBagConstraints); + + jLabelTargetVoice.setText("Target Voice"); + jLabelTargetVoice.setName(""); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 0; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0); + getContentPane().add(jLabelTargetVoice, gridBagConstraints); + + jButtonAdd.setText("Add"); + jButtonAdd.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonAddActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 1; + gridBagConstraints.gridy = 5; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHEAST; + getContentPane().add(jButtonAdd, gridBagConstraints); + + jButtonStart.setText("Start"); + jButtonStart.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonStartActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 6; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.gridheight = 2; + gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; + gridBagConstraints.ipady = 10; + gridBagConstraints.insets = new java.awt.Insets(20, 0, 20, 0); + getContentPane().add(jButtonStart, gridBagConstraints); + + jButtonDel.setText("Del"); + jButtonDel.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonDelActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 4; + gridBagConstraints.gridy = 5; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + getContentPane().add(jButtonDel, gridBagConstraints); + + jButtonPlay.setText("Play"); + jButtonPlay.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonPlayActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 3; + gridBagConstraints.gridy = 5; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTH; + getContentPane().add(jButtonPlay, gridBagConstraints); + + jLabelLow.setText("Low"); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + getContentPane().add(jLabelLow, gridBagConstraints); + + jListInput.setSelectionMode(javax.swing.ListSelectionModel.SINGLE_SELECTION); + jListInput.setPreferredSize(new java.awt.Dimension(0, 100)); + jListInput.addListSelectionListener(new javax.swing.event.ListSelectionListener() { + public void valueChanged(javax.swing.event.ListSelectionEvent evt) { + jListInputValueChanged(evt); + } + }); + jListInput.addMouseListener(new java.awt.event.MouseAdapter() { + public void mouseClicked(java.awt.event.MouseEvent evt) { + jListInputMouseClicked(evt); + } + }); + + jScrollList.setViewportView(jListInput); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 4; + gridBagConstraints.gridwidth = 6; + gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; + gridBagConstraints.ipady = 200; + gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 10); + getContentPane().add(jScrollList, gridBagConstraints); + + jLabelChangeAmount.setText("Change Amount"); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 1; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; + gridBagConstraints.insets = new java.awt.Insets(0, 10, 0, 0); + getContentPane().add(jLabelChangeAmount, gridBagConstraints); + + jLabelHigh.setText("High"); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 4; + gridBagConstraints.gridy = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHEAST; + gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 10); + getContentPane().add(jLabelHigh, gridBagConstraints); + + jSliderChangeAmount.setMajorTickSpacing(50); + jSliderChangeAmount.setMinorTickSpacing(5); + jSliderChangeAmount.setPaintTicks(true); + jSliderChangeAmount.addChangeListener(new javax.swing.event.ChangeListener() { + public void stateChanged(javax.swing.event.ChangeEvent evt) { + jSliderChangeAmountStateChanged(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 1; + gridBagConstraints.gridwidth = 3; + gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; + gridBagConstraints.ipadx = 154; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(0, 0, 0, 10); + getContentPane().add(jSliderChangeAmount, gridBagConstraints); + + jLabelInput.setText("Input"); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 3; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(20, 10, 0, 0); + getContentPane().add(jLabelInput, gridBagConstraints); + + jButtonRec.setText("Rec"); + jButtonRec.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + jButtonRecActionPerformed(evt); + } + }); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 2; + gridBagConstraints.gridy = 5; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTH; + getContentPane().add(jButtonRec, gridBagConstraints); + + jLabelMedium.setText("Medium"); + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 3; + gridBagConstraints.gridy = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + getContentPane().add(jLabelMedium, gridBagConstraints); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 5; + gridBagConstraints.ipadx = 30; + getContentPane().add(jLabel1, gridBagConstraints); + + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 5; + gridBagConstraints.gridy = 5; + gridBagConstraints.ipadx = 30; + getContentPane().add(jLabel2, gridBagConstraints); + + java.awt.Dimension screenSize = java.awt.Toolkit.getDefaultToolkit().getScreenSize(); + setBounds((screenSize.width-382)/2, (screenSize.height-560)/2, 382, 560); + }// //GEN-END:initComponents + + private void jListInputMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jListInputMouseClicked + int numClicks = evt.getClickCount(); + if (numClicks==2) + { + getInputIndex(); + + if (inputIndex>0) + { + if (!bPlaying && !bRecording) + jButtonPlay.doClick(); + } + else if (inputIndex==0 && !bStarted) + jButtonStart.doClick(); + } + }//GEN-LAST:event_jListInputMouseClicked + + private Clip playClip = null; + private void jButtonPlayActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonPlayActionPerformed + if (!bRecording) + { + if (!bPlaying) + { + bPlaying = true; + try + { + if (inputIndex <= 0) + playFile = null; + else if (inputIndex>builtInFileNameList.size()) + { + try + { + playFile = new BufferedInputStream(new FileInputStream((String)listItems.get(inputIndex))); + } catch (FileNotFoundException fnf) + { + fnf.printStackTrace(); + } + } else + { + playFile = ChangeMyVoiceUI.class.getResourceAsStream("wav/samples"+((String) builtInFileNameList.get(inputIndex-1))); + } + AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(playFile); + AudioFormat format = audioInputStream.getFormat(); + DataLine.Info lineInfo = new DataLine.Info(Clip.class, format); + playClip = (Clip) AudioSystem.getLine(lineInfo); + playClip.addLineListener(new LineListener() { + public void update(LineEvent le) { + if (le.getType().equals(LineEvent.Type.STOP)) { + bPlaying = false; + playClip.close(); + playClip = null; + try { + playFile.close(); + } catch (IOException e) { + e.printStackTrace(); + } + playFile = null; + updateGUIPlaying(); + } + } + }); + playClip.open(audioInputStream); + playClip.start(); + } catch (Exception e) { + e.printStackTrace(); + } + } else { + bPlaying = false; + if (playClip != null) { + playClip.stop(); + playClip = null; + } + if (playFile != null) { + try { + playFile.close(); + } catch (IOException e) { + e.printStackTrace(); + } + playFile = null; + } + } + updateGUIPlaying(); + } + }//GEN-LAST:event_jButtonPlayActionPerformed + + private void updateGUIPlaying() + { + if (bPlaying) { + jButtonPlay.setText("Stop"); + } else { + jButtonPlay.setText("Play"); + } + jButtonRec.setEnabled(!bPlaying); + jButtonAdd.setEnabled(!bPlaying); + jButtonDel.setEnabled(!bPlaying); + jListInput.setEnabled(!bPlaying); + jButtonStart.setEnabled(!bPlaying); + + } + + + private void jButtonDelActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonDelActionPerformed + if (inputIndex>=builtInFileNameList.size()+1) + { + listItems.remove(inputIndex); + inputIndex--; + UpdateInputList(); + } + }//GEN-LAST:event_jButtonDelActionPerformed + + private void jListInputValueChanged(javax.swing.event.ListSelectionEvent evt) {//GEN-FIRST:event_jListInputValueChanged + + getInputIndex(); + + if (inputIndex==0) + jButtonPlay.setEnabled(false); + else + jButtonPlay.setEnabled(true); + + if (inputIndex0.001) //If currently processing and changed modification amount + { + jButtonStart.doClick(); //Stop + jButtonStart.doClick(); //and restart to adapt to new target voice + } + }//GEN-LAST:event_jSliderChangeAmountStateChanged + + private void jButtonExitActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonExitActionPerformed + System.exit(0); + }//GEN-LAST:event_jButtonExitActionPerformed + + private void jComboBoxTargetVoiceActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jComboBoxTargetVoiceActionPerformed + int prevTargetIndex = targetIndex; + + getTargetIndex(); + + if (bStarted && prevTargetIndex != targetIndex) //If currently processing and changed target voice type + { + jButtonStart.doClick(); //Stop + jButtonStart.doClick(); //and restart to adapt to new target voice + } + }//GEN-LAST:event_jComboBoxTargetVoiceActionPerformed + + private void formMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_formMouseClicked + + }//GEN-LAST:event_formMouseClicked + + public void getTargetIndex() + { + targetIndex = jComboBoxTargetVoice.getSelectedIndex(); + if (targetNames[targetIndex]=="Telephone") + modParams.fs = 8000; + else + modParams.fs = 16000; + + boolean bChangeEnabled = true; + if (targetNames[targetIndex]=="Jet Pilot" || + targetNames[targetIndex]=="Old Radio" || + targetNames[targetIndex]=="Telephone") + { + bChangeEnabled = false; + } + + jLabelChangeAmount.setEnabled(bChangeEnabled); + jLabelLow.setEnabled(bChangeEnabled); + jLabelMedium.setEnabled(bChangeEnabled); + jLabelHigh.setEnabled(bChangeEnabled); + jSliderChangeAmount.setEnabled(bChangeEnabled); + + if (targetNames[targetIndex]=="Robot") + jLabelChangeAmount.setText("Pitch"); + else + jLabelChangeAmount.setText("Change Amount"); + } + + private void jButtonStartActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonStartActionPerformed + if (!bStarted) + { + bStarted = true; + updateGUIStart(); + getParameters(); + changeVoice(); + } else { + bStarted = false; + updateGUIStart(); + online.requestStop(); + + //Close the source and the target datalines to be able to use them repeatedly + if (microphone!=null) + { + microphone.close(); + microphone = null; + } + + if (loudspeakers != null) + { + loudspeakers.close(); + loudspeakers = null; + } + + if (inputStream != null) + { + try { + inputStream.close(); + } catch (IOException e) { + e.printStackTrace(); + } + inputStream = null; + } + + if (inputFile != null) { + try { + inputFile.close(); + } catch (IOException e) { + e.printStackTrace(); + } + inputFile = null; + + } + // + + jButtonStart.setText("Start"); + jButtonRec.setEnabled(true); + jButtonPlay.setEnabled(true); + jButtonAdd.setEnabled(true); + if (inputIndex>builtInFileNameList.size()) + jButtonDel.setEnabled(true); + jListInput.setEnabled(true); + + } + }//GEN-LAST:event_jButtonStartActionPerformed + + private void updateGUIStart() + { + if (bStarted) + { + jButtonStart.setText("Stop"); + } else { + jButtonStart.setText("Start"); + } + jButtonRec.setEnabled(!bStarted); + jButtonPlay.setEnabled(!bStarted); + jButtonAdd.setEnabled(!bStarted); + jButtonDel.setEnabled(!bStarted && inputIndex>builtInFileNameList.size()); + jListInput.setEnabled(!bStarted); + } + + /* This function gets the modification parameters from the GUI + * and fills in the modParams object + */ + private void getParameters() { + getInputIndex(); + getTargetIndex(); + getAmount(); + } + + /*This function opens source and target datalines and starts real-time voice modification + * using the parameters in the modParams object + */ + private void changeVoice() { + int channels = 1; + + AudioFormat audioFormat = null; + + if (inputIndex == 0) //Online processing using microphone + { + audioFormat = new AudioFormat( + AudioFormat.Encoding.PCM_SIGNED, modParams.fs, 16, channels, 2*channels, modParams.fs, + false); + + if (microphone != null) + microphone.close(); + + microphone = getMicrophone(audioFormat); + + if (microphone != null) + { + audioFormat = microphone.getFormat(); + modParams.fs = (int)audioFormat.getSampleRate(); + } + } + else //Online processing using pre-recorded wav file + { + if (inputIndex>0) + { + if (inputIndex>builtInFileNameList.size()) + { + String inputFileNameFull = (String)listItems.get(inputIndex); + try { + inputFile = new BufferedInputStream(new FileInputStream(inputFileNameFull)); + } catch (FileNotFoundException fnf) { + fnf.printStackTrace(); + } + } + else + inputFile = ChangeMyVoiceUI.class.getResourceAsStream("wav/samples/"+((String)builtInFileNameList.get(inputIndex-1))); + } + else + inputFile = null; + + if (inputFile != null) + { + try { + inputStream = AudioSystem.getAudioInputStream(inputFile); + } catch (UnsupportedAudioFileException e) { + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if (inputStream != null) + { + audioFormat = inputStream.getFormat(); + modParams.fs = (int)audioFormat.getSampleRate(); + } + } + + if (loudspeakers != null) + loudspeakers.close(); + + try { + DataLine.Info info = new DataLine.Info(SourceDataLine.class, + audioFormat); + loudspeakers = (SourceDataLine) AudioSystem.getLine(info); + loudspeakers.open(audioFormat); + System.out.println("Loudspeaker format: " + loudspeakers.getFormat()); + } catch (LineUnavailableException e) { + e.printStackTrace(); + } + + // Choose an audio effect + InlineDataProcessor effect = null; + int bufferSize = SignalProcUtils.getDFTSize(modParams.fs); + + if (targetNames[targetIndex]=="Robot") + { + double targetHz = 200+(amount-0.5)*200; + bufferSize = (int) (modParams.fs / targetHz * 4 /*-fold overlap in ola*/ ); + + effect = new Robotiser.PhaseRemover(MathUtils.closestPowerOfTwoAbove(bufferSize), 1.0); + } + else if (targetNames[targetIndex]=="Whisper") + { + effect = new LPCWhisperiser(SignalProcUtils.getLPOrder(modParams.fs), 0.4+0.6*amount); + } + else if (targetNames[targetIndex]=="Dwarf1") //Using freq. domain LP spectrum modification + { + double [] vscales = {1.3+0.5*amount}; + int p = SignalProcUtils.getLPOrder(modParams.fs); + if (bufferSize<1024) + bufferSize=1024; + effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Dwarf2") //Using freq. domain DFT magnitude spectrum modification + { + double [] vscales = {1.3+0.5*amount}; + effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Ogre1") //Using freq. domain LP spectrum modification + { + double [] vscales = {0.90-0.1*amount}; + int p = SignalProcUtils.getLPOrder(modParams.fs); + if (bufferSize<1024) + bufferSize=1024; + effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Ogre2") //Using freq. domain DFT magnitude spectrum modification + { + double [] vscales = {0.90-0.1*amount}; + effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Giant1") //Using freq. domain LP spectrum modification + { + double [] vscales = {0.75-0.1*amount}; + int p = SignalProcUtils.getLPOrder(modParams.fs); + if (bufferSize<1024) + bufferSize=1024; + effect = new VocalTractScalingProcessor(p, modParams.fs, bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Giant2") //Using freq. domain DFT magnitude spectrum modification + { + double [] vscales = {0.75-0.1*amount}; + effect = new VocalTractScalingSimpleProcessor(bufferSize, vscales); + } + else if (targetNames[targetIndex]=="Echo") + { + int [] delaysInMiliseconds = {100+(int)(20*amount), 200+(int)(50*amount), 300+(int)(100*amount)}; + double [] amps = {0.8, -0.7, 0.9}; + + int maxDelayInMiliseconds = MathUtils.getMax(delaysInMiliseconds); + int maxDelayInSamples = (int)(maxDelayInMiliseconds/1000.0*modParams.fs); + + if (bufferSize= 0; i--) { + if (formats[i].getChannels() == 1 + && formats[i].getFrameSize() == 2) { + lineFormat = formats[i]; + break; + } + } + System.err.println("Using instead: "+lineFormat); + } + if (lineFormat == null) { + throw new LineUnavailableException("Cannot get any mono line with 16 bit"); + } + line.open(lineFormat, 4096); + + } catch (LineUnavailableException e) { + e.printStackTrace(); + System.exit(1); + } + return line; + } + + public InlineDataProcessor getLPCrossSynthEffect(int lpCrossSynthFileInd, int bufferSize) + { + InlineDataProcessor effect = null; + + effect = new LPCCrossSynthesisOnline(SignalProcUtils.getLPOrder(modParams.fs), bufferSize, "wav/lp_cross_synth/"+lpCrossSynthFiles[lpCrossSynthFileInd], modParams.fs); + + return effect; + } + + /** + * @param args the command line arguments + */ + public static void main(String args[]) { + java.awt.EventQueue.invokeLater(new Runnable() { + public void run() { + new ChangeMyVoiceUI().setVisible(true); + } + }); + } + + // Variables declaration - do not modify//GEN-BEGIN:variables + private javax.swing.JButton jButtonAdd; + private javax.swing.JButton jButtonDel; + private javax.swing.JButton jButtonExit; + private javax.swing.JButton jButtonPlay; + private javax.swing.JButton jButtonRec; + private javax.swing.JButton jButtonStart; + private javax.swing.JComboBox jComboBoxTargetVoice; + private javax.swing.JLabel jLabel1; + private javax.swing.JLabel jLabel2; + private javax.swing.JLabel jLabelChangeAmount; + private javax.swing.JLabel jLabelHigh; + private javax.swing.JLabel jLabelInput; + private javax.swing.JLabel jLabelLow; + private javax.swing.JLabel jLabelMedium; + private javax.swing.JLabel jLabelTargetVoice; + private javax.swing.JList jListInput; + private javax.swing.JScrollPane jScrollList; + private javax.swing.JSlider jSliderChangeAmount; + // End of variables declaration//GEN-END:variables +} diff --git a/marytts-jungle/src/main/java/marytts/signalproc/demo/LPCCrossSynthesisOnline.java b/marytts-jungle/src/main/java/marytts/signalproc/demo/LPCCrossSynthesisOnline.java index b4ab57d1..3b3f0a67 100644 --- a/marytts-jungle/src/main/java/marytts/signalproc/demo/LPCCrossSynthesisOnline.java +++ b/marytts-jungle/src/main/java/marytts/signalproc/demo/LPCCrossSynthesisOnline.java @@ -17,8 +17,8 @@ * along with this program. If not, see . * */ -package marytts.signalproc.demo; - +package marytts.signalproc.demo; + import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -36,81 +36,81 @@ import marytts.util.data.BufferedDoubleDataSource; import marytts.util.data.DoubleDataSource; import marytts.util.data.SequenceDoubleDataSource; import marytts.util.data.audio.AudioDoubleDataSource; - - -public class LPCCrossSynthesisOnline extends LPCAnalysisResynthesis { - protected int frameLength; - protected AudioInputStream residualStream; - protected DoubleDataSource newResidual; - protected DoubleDataSource padding1; - protected DoubleDataSource paddedExcitation; - protected FrameProvider newResidualAudioFrames; - protected int samplingRate; - protected InputStream resStream; - protected String resFile; - - public LPCCrossSynthesisOnline(int p, int frmLen, String inResFile, int fs) - { - super(p); - - this.resFile = inResFile; - this.frameLength = frmLen; - this.samplingRate = fs; - - this.resStream = null; - this.residualStream = null; - this.newResidual = null; - this.padding1 = null; - this.paddedExcitation = null; - this.newResidualAudioFrames = null; - } - - /** - * Replace residual with new residual from audio signal, - * adapting the gain in order to maintain overall volume. - */ - protected void processLPC(LpCoeffs coeffs, double[] residual) - { - if (newResidualAudioFrames==null || !newResidualAudioFrames.hasMoreData()) - { - resStream = ChangeMyVoiceUI.class.getResourceAsStream(resFile); - - try { - residualStream = AudioSystem.getAudioInputStream(resStream); - } catch (UnsupportedAudioFileException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - newResidual = new AudioDoubleDataSource(residualStream); - padding1 = new BufferedDoubleDataSource(new double[3*frameLength/4]); - paddedExcitation = new SequenceDoubleDataSource(new DoubleDataSource[]{padding1, newResidual}); - newResidualAudioFrames = new FrameProvider(paddedExcitation, Window.get(Window.HANNING, frameLength, 0.5), frameLength, frameLength/4, samplingRate, false); - } - - double gain = coeffs.getGain(); - double[] frame = newResidualAudioFrames.getNextFrame(); - - assert frame.length == residual.length; - - int excP = 3; - LpCoeffs newCoeffs = LpcAnalyser.calcLPC(frame, excP); - double newResidualGain = newCoeffs.getGain(); - //double[] newResidual = ArrayUtils.subarray(new FIRFilter(oneMinusA).apply(frame),0,frame.length); - //System.arraycopy(newResidual, 0, residual, 0, residual.length); - double gainFactor = gain/newResidualGain; - Arrays.fill(residual, 0); - for (int n=0; n