package de.tudarmstadt.ukp.dkpro.core.tokit;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.tokit.PatternBasedTokenSegmenter", description = "Split up existing tokens again at particular split-chars.\nThe prefix states whether the split chars should be added as separate Token Tokens.\nIf the #INCLUDE_PREFIX precedes the split pattern, the pattern is included.\nConsequently, patterns following the #EXCLUDE_PREFIX, will not be added as a Token.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/PatternBasedTokenSegmenter.class */
public class PatternBasedTokenSegmenter extends JCasAnnotator_ImplBase {
    public static final String INCLUDE_PREFIX = "+|";
    public static final String EXCLUDE_PREFIX = "-|";
    public static final String PARAM_DELETE_COVER = "deleteCover";

    @ConfigurationParameter(name = "deleteCover", mandatory = true, defaultValue = {"true"}, description = "Wether to remove the original token.\n\nDefault: true")
    private boolean deleteCover;
    public static final String PARAM_PATTERNS = "patterns";

    @ConfigurationParameter(name = "patterns", mandatory = true, description = "A list of regular expressions, prefixed with #INCLUDE_PREFIX or\n#EXCLUDE_PREFIX. If neither of the prefixes is used, #EXCLUDE_PREFIX is\nassumed.")
    private String[] rawPatterns;
    private StringBuilder buf;
    private SplitPattern[] patterns;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/PatternBasedTokenSegmenter$SplitPattern.class */
    public static class SplitPattern {
        final boolean includeInOutput;
        final Matcher matchter;

        public SplitPattern(String str, boolean z) {
            this.includeInOutput = z;
            this.matchter = Pattern.compile(str).matcher("");
        }
    }

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.patterns = new SplitPattern[this.rawPatterns.length];
        for (int i = 0; i < this.rawPatterns.length; i++) {
            if (this.rawPatterns[i].startsWith("+|")) {
                this.patterns[i] = new SplitPattern(this.rawPatterns[i].substring("+|".length()), true);
            } else if (this.rawPatterns[i].startsWith("-|")) {
                this.patterns[i] = new SplitPattern(this.rawPatterns[i].substring("-|".length()), false);
            } else {
                this.patterns[i] = new SplitPattern(this.rawPatterns[i], false);
            }
        }
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        this.buf = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (Token token : JCasUtil.select(jCas, Token.class)) {
            String coveredText = token.getCoveredText();
            int begin = token.getBegin();
            int i = 0;
            SplitPattern pattern = getPattern(coveredText.charAt(0), null);
            Token token2 = null;
            for (int i2 = 1; i2 < coveredText.length(); i2++) {
                SplitPattern pattern2 = getPattern(coveredText.charAt(i2), pattern);
                if (pattern2 != pattern) {
                    if (pattern == null || pattern.includeInOutput) {
                        token2 = token2 == null ? addToken(jCas, begin, coveredText, i, i2, arrayList) : token2;
                    }
                    i = i2;
                }
                pattern = pattern2;
            }
            if (i != 0) {
                if (this.deleteCover) {
                    arrayList2.add(token);
                }
                if (pattern == null || pattern.includeInOutput) {
                    addToken(jCas, begin, coveredText, i, coveredText.length(), arrayList);
                }
            } else if (pattern != null && !pattern.includeInOutput) {
                arrayList2.add(token);
            }
        }
        Iterator<Token> it = arrayList.iterator();
        while (it.hasNext()) {
            it.next().addToIndexes();
        }
        Iterator it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            ((Token) it2.next()).removeFromIndexes();
        }
    }

    private Token addToken(JCas jCas, int i, String str, int i2, int i3, List<Token> list) {
        if (i3 == i2) {
            return null;
        }
        Token token = new Token(jCas, i + i2, i + i3);
        list.add(token);
        return token;
    }

    SplitPattern getPattern(char c, SplitPattern splitPattern) {
        this.buf.append(c);
        for (SplitPattern splitPattern2 : this.patterns) {
            splitPattern2.matchter.reset(this.buf);
            if (splitPattern2.matchter.matches()) {
                if (splitPattern2 != splitPattern) {
                    this.buf.setLength(0);
                }
                return splitPattern2;
            }
        }
        this.buf.setLength(0);
        return null;
    }
}
