package de.tudarmstadt.ukp.dkpro.core.tokit;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;

@TypeCapability(outputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.tokit.ParagraphSplitter", description = "This class creates paragraph annotations for the given input document. It searches for the\noccurrence of two or more line-breaks (Unix and Windows) and regards this as the boundary between\nparagraphs.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/ParagraphSplitter.class */
public class ParagraphSplitter extends JCasAnnotator_ImplBase {
    public static final String SINGLE_LINE_BREAKS_PATTERN = "((\n\r\n)+(\r\n)*)|((\n)+(\n)*)";
    public static final String DOUBLE_LINE_BREAKS_PATTERN = "((\r\n\r\n)+(\r\n)*)|((\n\n)+(\n)*)";
    public static final String PARAM_SPLIT_PATTERN = "splitPattern";

    @ConfigurationParameter(name = PARAM_SPLIT_PATTERN, defaultValue = {DOUBLE_LINE_BREAKS_PATTERN}, description = "A regular expression used to detect paragraph splits.\n\nDefault: #DOUBLE_LINE_BREAKS_PATTERN (split on two consecutive line breaks)")
    private Pattern splitPattern;

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        String documentText = jCas.getDocumentText();
        if (documentText.length() < 1) {
            throw new AnalysisEngineProcessException(new Throwable("Document text is empty."));
        }
        Matcher matcher = this.splitPattern.matcher(documentText);
        int i = 0;
        int i2 = 0;
        while (matcher.find(i)) {
            new Paragraph(jCas, i2, matcher.start()).addToIndexes();
            i2 = matcher.end();
            i = matcher.end();
        }
        if (i < documentText.length()) {
            new Paragraph(jCas, i2, documentText.length()).addToIndexes();
        }
    }
}
