package de.tudarmstadt.ukp.dkpro.core.stanfordnlp;

import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.PTBEscapingProcessor;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer", description = "Stanford Named Entity Recognizer component.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordNamedEntityRecognizer.class */
public class StanfordNamedEntityRecognizer extends JCasAnnotator_ImplBase {
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Log the tag set(s) when a model is loaded.")
    protected boolean printTagSet;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model.")
    protected String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Variant of a model the model. Used to address a specific model if here are multiple models\nfor one language.")
    protected String variant;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Location from which the model is read.")
    protected String modelLocation;
    public static final String PARAM_NAMED_ENTITY_MAPPING_LOCATION = "NamedEntityMappingLocation";

    @ConfigurationParameter(name = "NamedEntityMappingLocation", mandatory = false, description = "Location of the mapping file for named entity tags to UIMA types.")
    protected String mappingLocation;
    public static final String PARAM_PTB3_ESCAPING = "ptb3Escaping";

    @ConfigurationParameter(name = "ptb3Escaping", mandatory = true, defaultValue = {"true"}, description = "Enable all traditional PTB3 token transforms (like -LRB-, -RRB-).")
    private boolean ptb3Escaping;
    public static final String PARAM_QUOTE_BEGIN = "quoteBegin";

    @ConfigurationParameter(name = "quoteBegin", mandatory = false, description = "List of extra token texts (usually single character strings) that should be treated like\nopening quotes and escaped accordingly before being sent to the parser.")
    private List<String> quoteBegin;
    public static final String PARAM_QUOTE_END = "quoteEnd";

    @ConfigurationParameter(name = "quoteEnd", mandatory = false, description = "List of extra token texts (usually single character strings) that should be treated like\nclosing quotes and escaped accordingly before being sent to the parser.")
    private List<String> quoteEnd;
    private CasConfigurableProviderBase<AbstractSequenceClassifier<CoreMap>> modelProvider;
    private MappingProvider mappingProvider;
    private final PTBEscapingProcessor<HasWord, String, Word> escaper = new PTBEscapingProcessor<>();

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.modelProvider = new CasConfigurableProviderBase<AbstractSequenceClassifier<CoreMap>>() { // from class: de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer.1
            {
                setContextObject(StanfordNamedEntityRecognizer.this);
                setDefault(ResourceObjectProviderBase.GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
                setDefault(ResourceObjectProviderBase.ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-ner-${language}-${variant}");
                setDefaultVariantsLocation("de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/ner-default-variants.map");
                setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/ner-${language}-${variant}.properties");
                setOverride(ResourceObjectProviderBase.LOCATION, StanfordNamedEntityRecognizer.this.modelLocation);
                setOverride("language", StanfordNamedEntityRecognizer.this.language);
                setOverride("variant", StanfordNamedEntityRecognizer.this.variant);
            }

            /* JADX INFO: Access modifiers changed from: protected */
            @Override // de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase
            public AbstractSequenceClassifier<CoreMap> produceResource(URL url) throws IOException {
                InputStream inputStream = null;
                try {
                    try {
                        inputStream = url.openStream();
                        if (url.toString().endsWith(".gz")) {
                            inputStream = new GZIPInputStream(inputStream);
                        }
                        CRFClassifier classifier = CRFClassifier.getClassifier(inputStream);
                        if (StanfordNamedEntityRecognizer.this.printTagSet) {
                            StringBuilder sb = new StringBuilder();
                            sb.append("Model contains [").append(classifier.classIndex.size()).append("] tags: ");
                            ArrayList arrayList = new ArrayList();
                            Iterator<String> it = classifier.classIndex.iterator();
                            while (it.hasNext()) {
                                arrayList.add(it.next());
                            }
                            Collections.sort(arrayList);
                            sb.append(StringUtils.join(arrayList, " "));
                            StanfordNamedEntityRecognizer.this.getContext().getLogger().log(Level.INFO, sb.toString());
                        }
                        return classifier;
                    } catch (ClassNotFoundException e) {
                        throw new IOException(e);
                    }
                } finally {
                    IOUtils.closeQuietly(inputStream);
                }
            }
        };
        this.mappingProvider = new MappingProvider();
        this.mappingProvider.setDefaultVariantsLocation("de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/ner-default-variants.map");
        this.mappingProvider.setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/stanfordnlp/lib/ner-${language}-${variant}.map");
        this.mappingProvider.setDefault(MappingProvider.BASE_TYPE, NamedEntity.class.getName());
        this.mappingProvider.setOverride(ResourceObjectProviderBase.LOCATION, this.mappingLocation);
        this.mappingProvider.setOverride("language", this.language);
        this.mappingProvider.setOverride("variant", this.variant);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v68, types: [java.util.List] */
    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        CAS cas = jCas.getCas();
        this.modelProvider.configure(cas);
        this.mappingProvider.configure(cas);
        Iterator it = JCasUtil.select(jCas, Sentence.class).iterator();
        while (it.hasNext()) {
            List selectCovered = JCasUtil.selectCovered(jCas, Token.class, (Sentence) it.next());
            ArrayList arrayList = new ArrayList(selectCovered.size());
            Iterator it2 = selectCovered.iterator();
            while (it2.hasNext()) {
                arrayList.add(CoreNlpUtils.tokenToWord((Token) it2.next()));
            }
            if (this.ptb3Escaping) {
                arrayList = CoreNlpUtils.applyPtbEscaping(arrayList, this.quoteBegin, this.quoteEnd);
            }
            int i = -1;
            int i2 = -1;
            String str = null;
            for (CoreMap coreMap : this.modelProvider.getResource().classifySentence(arrayList)) {
                String str2 = (String) coreMap.get(CoreAnnotations.AnswerAnnotation.class);
                if (("O".equals(str2) || !str2.equals(str)) && str != null) {
                    NamedEntity namedEntity = (NamedEntity) cas.createAnnotation(this.mappingProvider.getTagType(str), i, i2);
                    namedEntity.setValue(str);
                    namedEntity.addToIndexes();
                    str = null;
                }
                if (!"O".equals(str2)) {
                    if (str == null) {
                        str = str2;
                        i = ((Integer) coreMap.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue();
                    }
                    i2 = ((Integer) coreMap.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue();
                }
            }
        }
    }
}
