package de.tudarmstadt.ukp.dkpro.core.tokit;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.tokit.TokenMerger", description = "Merges any Tokens that are covered by a given annotation type. E.g. this component can be used\nto create a single tokens from all tokens that constitute a multi-token named entity.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/TokenMerger.class */
public class TokenMerger extends JCasAnnotator_ImplBase {
    public static final String PARAM_ANNOTATION_TYPE = "annotationType";

    @ConfigurationParameter(name = PARAM_ANNOTATION_TYPE, mandatory = true, description = "Annotation type for which tokens should be merged.")
    private String annotationType;
    public static final String PARAM_CONSTRAINT = "constraint";

    @ConfigurationParameter(name = PARAM_CONSTRAINT, mandatory = false, description = "A constraint on the annotations that should be considered in form of a JXPath statement.\nExample: set #PARAM_ANNOTATION_TYPE to a NamedEntity type and set the\n#PARAM_CONSTRAINT to \".[value = 'LOCATION']\" to merge only tokens that are\npart of a location named entity.")
    private String constraint;
    public static final String PARAM_LEMMA_MODE = "lemmaMode";

    @ConfigurationParameter(name = PARAM_LEMMA_MODE, mandatory = true, defaultValue = {"JOIN"}, description = "Configure what should happen to the lemma of the merged tokens. It is possible to JOIN\nthe lemmata to a single lemma (space separated), to REMOVE the lemma or LEAVE the lemma\nof the first token as-is.")
    private LemmaMode lemmaMode;
    public static final String PARAM_POS_VALUE = "posValue";

    @ConfigurationParameter(name = PARAM_POS_VALUE, mandatory = false, description = "Set a new POS value for the new merged token. This is the actual tag set value and is subject\nto tagset mapping. For example when merging tokens for named entities, the new POS value\nmay be set to \"NNP\" (English/Penn Treebank Tagset).")
    private String posValue;
    public static final String PARAM_POS_TYPE = "posType";

    @ConfigurationParameter(name = PARAM_POS_TYPE, mandatory = false, description = "Set a new POS tag for the new merged token. This is the mapped type. If this is specified,\ntag set mapping will not be performed. This parameter has no effect unless PARAM_POS_VALUE\nis also set.")
    private String posType;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model and tag set mapping.")
    protected String language;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Override the tagset mapping.")
    protected String posMappingLocation;
    private MappingProvider mappingProvider;

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/tokit/TokenMerger$LemmaMode.class */
    public enum LemmaMode {
        JOIN,
        REMOVE,
        LEAVE
    }

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.mappingProvider = new MappingProvider();
        this.mappingProvider.setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/api/lexmorph/tagset/${language}-${pos.tagset}-pos.map");
        this.mappingProvider.setDefault(MappingProvider.BASE_TYPE, POS.class.getName());
        this.mappingProvider.setDefault(ResourceMetadata.META_POS_TAGSET, "default");
        this.mappingProvider.setOverride(ResourceObjectProviderBase.LOCATION, this.posMappingLocation);
        this.mappingProvider.setOverride("language", this.language);
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Lemma lemma;
        CAS cas = jCas.getCas();
        if (this.posValue != null) {
            this.mappingProvider.configure(cas);
        }
        ArrayList<AnnotationFS> arrayList = new ArrayList(CasUtil.select(cas, CasUtil.getAnnotationType(cas, this.annotationType)));
        Collection<Annotation> arrayList2 = new ArrayList<>();
        for (AnnotationFS annotationFS : arrayList) {
            List selectCovered = JCasUtil.selectCovered(Token.class, annotationFS);
            if (selectCovered.size() >= 2 && (this.constraint == null || JXPathContext.newContext(annotationFS).iterate(this.constraint).hasNext())) {
                Iterator it = selectCovered.iterator();
                Token token = (Token) it.next();
                token.removeFromIndexes();
                token.setEnd(((Token) selectCovered.get(selectCovered.size() - 1)).getEnd());
                token.addToIndexes();
                if (this.posValue != null) {
                    updatePos(token, arrayList2);
                }
                ArrayList arrayList3 = new ArrayList();
                if (token.getLemma() != null) {
                    arrayList3.add(token.getLemma().getValue());
                }
                while (it.hasNext()) {
                    Token token2 = (Token) it.next();
                    Lemma lemma2 = token2.getLemma();
                    if (lemma2 != null) {
                        arrayList3.add(lemma2.getValue());
                        arrayList2.add(lemma2);
                    }
                    Annotation pos = token2.getPos();
                    if (pos != null) {
                        arrayList2.add(pos);
                    }
                    arrayList2.add(token2);
                }
                if (this.lemmaMode == LemmaMode.JOIN) {
                    Lemma lemma3 = token.getLemma();
                    if (!arrayList3.isEmpty()) {
                        if (lemma3 == null) {
                            lemma3 = new Lemma(jCas);
                        }
                        lemma3.setValue(StringUtils.join(arrayList3, " "));
                    } else if (lemma3 != null) {
                        token.setLemma(null);
                        arrayList2.add(lemma3);
                    }
                } else if (this.lemmaMode == LemmaMode.REMOVE && (lemma = token.getLemma()) != null) {
                    token.setLemma(null);
                    arrayList2.add(lemma);
                }
                if (token.getLemma() != null) {
                    Lemma lemma4 = token.getLemma();
                    lemma4.removeFromIndexes();
                    lemma4.setBegin(token.getBegin());
                    lemma4.setEnd(token.getEnd());
                    lemma4.addToIndexes();
                }
            }
        }
        Iterator<Annotation> it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            it2.next().removeFromIndexes();
        }
    }

    private void updatePos(Token token, Collection<Annotation> collection) {
        Type type = this.posType != null ? CasUtil.getType(token.getCAS(), this.posType) : this.mappingProvider.getTagType(this.posValue);
        POS pos = token.getPos();
        if (pos != null && !pos.getType().equals(type)) {
            collection.add(pos);
            pos = null;
        }
        if (pos == null) {
            pos = (POS) token.getCAS().createAnnotation(type, token.getBegin(), token.getEnd());
            pos.addToIndexes();
        } else {
            pos.setBegin(token.getBegin());
            pos.setEnd(token.getEnd());
        }
        pos.setPosValue(this.posValue);
        token.setPos(pos);
    }
}
