Sphinx4 OutOfMemoryError с пользовательской конфигурацией

Я хочу создать программу, которая распознает речь из файла .wav. Я попробовал код ниже, но он выдает исключение

Исключение в потоке «основной» java.lang.OutOfMemoryError: превышен лимит накладных расходов GC

Даже если мой "eclipse.ini" имеет это свойство:

--launcher.XXMaxPermSize 2048M 
--launcher.XXMaxPermSize 2048m 
-Xms2048m 
-Xmx2048m

Как я могу исправить это исключение?

Java-код:

import java.net.MalformedURLException;
import java.net.URL;

import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;

public class TestRecognizer {

    public static void main(String[] args) {
        ConfigurationManager cm;

        if (args.length > 0) {
            cm = new ConfigurationManager(args[0]);
        } else {
            cm = new ConfigurationManager("english_use_LexTreeLinguist.xml");
        }

        URL audioURL = null;
        try {
            audioURL = new URL("file:./10001-90210-01803.wav");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        if(audioURL == null)
            throw new IllegalArgumentException("Given audio file doesn't exist.");

        // allocate the recognizer
        System.out.println("Loading recognizer");
        Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
        recognizer.allocate();
        System.out.println("Loading audio");
        AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");
        dataSource.setAudioFile(audioURL, null);

        // loop the recognition until the programm exits.
        Result result;
        System.out.println("recognizing");`enter code here`
        while ((result = recognizer.recognize())!= null) {
            String resultText = result.getBestResultNoFiller();
            System.out.println(resultText);
        }
    }

}

Конфигурация XML-файла:

<config>
    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** -->

    <property name="absoluteBeamWidth"           value="-1"/>
    <property name="relativeBeamWidth"           value="1E-80"/>
    <property name="wordInsertionProbability"    value=".1"/>
    <property name="languageWeight"              value="8"/>
    <property name="silenceInsertionProbability" value="1"/>
    <property name="fillerInsertionProbability" value="1E-10"/>
    <property name="logLevel"                    value="WARNING"/>
    <property name="recognizer" value="recognizer"/>
    <property name="linguist"   value="lexTreeLinguist"/>
    <property name="frontend"   value="mfcFrontEnd"/>

    <!-- ******************************************************** -->
    <!-- The Recognizer configuration               -->
    <!-- ******************************************************** -->

    <component name="recognizer" 
               type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder configuration                              -->
    <!-- ******************************************************** -->

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>
    </component>

    <component name="searchManager" 
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="${linguist}"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>

    <component name="activeList" 
             type="edu.cmu.sphinx.decoder.search.SortingActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="trivialPruner" 
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <component name="threadedScorer" 
                type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
        <property name="isCpuRelative" value="true"/>
        <property name="numThreads" value="0"/>
        <property name="minScoreablesPerThread" value="10"/>
        <property name="scoreablesKeepFeature" value="true"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->

    <component name="lexTreeLinguist" 
                type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="trigramModel"/>
        <property name="dictionary" value="englishDict"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="${fillerInsertionProbability}"/>
        <property name="generateUnitStates" value="false"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" 
                value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" 
                value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Language Model configuration                         -->
    <!-- ******************************************************** -->
    <component name="trigramModel" 
            type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="unigramWeight" value=".5"/>
        <property name="maxDepth" value="3"/>
        <property name="logMath" value="logMath"/>
        <property name="dictionary" value="englishDict"/>
        <property name="location"
            value="resource:/edu/cmu/sphinx/models/language/en-us.lm.dmp"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->
    <component name="englishDict" 
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath"
                  value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/cmudict.0.6d"/>
        <property name="fillerPath" 
              value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/noisedict"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->

    <component name="wsj"
               type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager" 
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->

    <component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <!--item>streamDataSource </item-->
            <item>audioFileDataSource </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>batchCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <component name="streamDataSource" 
                type="edu.cmu.sphinx.frontend.util.StreamDataSource">
        <property name="sampleRate" value="16000"/>
    <property name="bitsPerSample" value="16"/>
    <property name="bigEndianData" value="false"/>
    <property name="signedData" value="true"/>
    </component>

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>

    <component name="preemphasizer"
        type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>

    <component name="fft" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>

    <component name="melFilterBank" 
          type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>

    <component name="dct" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="batchCMN" 
               type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>

    <component name="featureExtraction"
        type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>

</config>

person Dorin    schedule 03.10.2014    source источник


Ответы (1)


Ваша конфигурация совершенно неверна, лучи слишком широкие, интерфейс настроен неправильно.

Вам нужно использовать конфигурацию по умолчанию default.config.xml, если вы хотите внести изменения или просто использовать высокоуровневый API без файлов XML. Для наилучшей точности декодирования вам необходимо использовать акустическую модель en-us-8khz, доступную в загрузках.

Если вы хотите транскрибировать звук 8 кГц, вам также необходимо вызвать распознаватель.setSampleRate(8000);

person Nikolay Shmyrev    schedule 03.10.2014