package de.tudarmstadt.ukp.dkpro.core.tokit;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.SegmenterBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.text.BreakIterator;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;

@TypeCapability(outputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter", description = "BreakIterator segmenter.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/BreakIteratorSegmenter.class */
public class BreakIteratorSegmenter extends SegmenterBase {
    public static final String PARAM_SPLIT_AT_APOSTROPHE = "splitAtApostrophe";

    @ConfigurationParameter(name = PARAM_SPLIT_AT_APOSTROPHE, mandatory = true, defaultValue = {"false"}, description = "Per default the Java BreakIterator does not split off contractions like\nJohn's into two tokens. When this parameter is enabled, a non-default token split is\ngenerated when an apostrophe (') is encountered.")
    private boolean splitAtApostrophe;

    @Override // de.tudarmstadt.ukp.dkpro.core.api.segmentation.SegmenterBase
    protected void process(JCas jCas, String str, int i) throws AnalysisEngineProcessException {
        BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(getLocale(jCas));
        sentenceInstance.setText(str);
        int first = sentenceInstance.first() + i;
        int next = sentenceInstance.next();
        while (true) {
            int i2 = next;
            if (i2 == -1) {
                return;
            }
            int i3 = i2 + i;
            if (isWriteSentence()) {
                Sentence createSentence = createSentence(jCas, first, i3);
                if (createSentence != null) {
                    processSentence(jCas, createSentence.getCoveredText(), createSentence.getBegin());
                }
            } else {
                int[] iArr = {first, i3};
                trim(jCas.getDocumentText(), iArr);
                processSentence(jCas, jCas.getDocumentText().substring(iArr[0], iArr[1]), iArr[0]);
            }
            first = i3;
            next = sentenceInstance.next();
        }
    }

    private void processSentence(JCas jCas, String str, int i) {
        int indexOf;
        BreakIterator wordInstance = BreakIterator.getWordInstance(getLocale(jCas));
        wordInstance.setText(str);
        int first = wordInstance.first() + i;
        int next = wordInstance.next();
        while (true) {
            int i2 = next;
            if (i2 == -1) {
                return;
            }
            int i3 = i2 + i;
            Token createToken = createToken(jCas, first, i3);
            if (createToken != null && this.splitAtApostrophe && (indexOf = createToken.getCoveredText().indexOf("'")) > 0) {
                int begin = indexOf + createToken.getBegin();
                createToken(jCas, begin, createToken.getEnd());
                createToken.setEnd(begin);
            }
            first = i3;
            next = wordInstance.next();
        }
    }
}
