package de.tudarmstadt.ukp.dkpro.core.treetagger;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.treetagger.internal.DKProExecutableResolver;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import org.annolab.tt4j.TokenAdapter;
import org.annolab.tt4j.TokenHandler;
import org.annolab.tt4j.TreeTaggerException;
import org.annolab.tt4j.TreeTaggerModelUtil;
import org.annolab.tt4j.TreeTaggerWrapper;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerPosTagger", description = "Part-of-Speech and lemmatizer annotator using TreeTagger.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/treetagger/TreeTaggerPosTagger.class */
public class TreeTaggerPosTagger extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model.")
    protected String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Override the default variant used to locate the model.")
    protected String variant;
    public static final String PARAM_EXECUTABLE_PATH = "executablePath";

    @ConfigurationParameter(name = "executablePath", mandatory = false, description = "Use this TreeTagger executable instead of trying to locate the executable automatically.")
    private File executablePath;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Load the model from this location instead of locating the model automatically.")
    protected String modelLocation;
    public static final String PARAM_MODEL_ENCODING = "modelEncoding";

    @ConfigurationParameter(name = "modelEncoding", mandatory = false, description = "The character encoding used by the model.")
    protected String modelEncoding;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Load the part-of-speech tag to UIMA type mapping from this location instead of locating\nthe mapping automatically.")
    protected String posMappingLocation;
    public static final String PARAM_INTERN_TAGS = "internTags";

    @ConfigurationParameter(name = "internTags", mandatory = false, defaultValue = {"true"}, description = "Use the String#intern() method on tags. This is usually a good idea to avoid\nspaming the heap with thousands of strings representing only a few different tags.\n\nDefault: true")
    private boolean internTags;
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Log the tag set(s) when a model is loaded.\n\nDefault: false")
    protected boolean printTagSet;
    public static final String PARAM_PERFORMANCE_MODE = "performanceMode";

    @ConfigurationParameter(name = "performanceMode", mandatory = true, defaultValue = {"false"}, description = "TT4J setting: Disable some sanity checks, e.g. whether tokens contain line breaks (which is\nnot allowed). Turning this on will increase your performance, but the wrapper may throw\nexceptions if illegal data is provided.")
    private boolean performanceMode;
    public static final String PARAM_WRITE_POS = "writePOS";

    @ConfigurationParameter(name = "writePOS", mandatory = true, defaultValue = {"true"}, description = "Write part-of-speech information.\n\nDefault: true")
    private boolean writePos;
    public static final String PARAM_WRITE_LEMMA = "writeLemma";

    @ConfigurationParameter(name = "writeLemma", mandatory = true, defaultValue = {"true"}, description = "Write lemma information.\n\nDefault: true")
    private boolean writeLemma;
    private CasConfigurableProviderBase<TreeTaggerWrapper<Token>> modelProvider;
    private MappingProvider posMappingProvider;

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.modelProvider = new ModelProviderBase<TreeTaggerWrapper<Token>>() { // from class: de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerPosTagger.1
            private TreeTaggerWrapper<Token> treetagger;

            {
                setContextObject(TreeTaggerPosTagger.this);
                setDefault(ResourceObjectProviderBase.ARTIFACT_ID, "${groupId}.treetagger-model-tagger-${language}-${variant}");
                setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/${package}/lib/tagger-${language}-${variant}.properties");
                setDefault("variant", "le");
                setOverride(ResourceObjectProviderBase.LOCATION, TreeTaggerPosTagger.this.modelLocation);
                setOverride("language", TreeTaggerPosTagger.this.language);
                setOverride("variant", TreeTaggerPosTagger.this.variant);
                this.treetagger = new TreeTaggerWrapper<>();
                this.treetagger.setPerformanceMode(TreeTaggerPosTagger.this.performanceMode);
                DKProExecutableResolver dKProExecutableResolver = new DKProExecutableResolver(this.treetagger);
                dKProExecutableResolver.setExecutablePath(TreeTaggerPosTagger.this.executablePath);
                this.treetagger.setExecutableProvider(dKProExecutableResolver);
                this.treetagger.setAdapter(new TokenAdapter<Token>() { // from class: de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerPosTagger.1.1
                    @Override // org.annolab.tt4j.TokenAdapter
                    public String getText(Token token) {
                        String coveredText;
                        synchronized (token.getCAS()) {
                            coveredText = token.getCoveredText();
                        }
                        return coveredText;
                    }
                });
            }

            /* JADX INFO: Access modifiers changed from: protected */
            @Override // de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableStreamProviderBase, de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase
            public TreeTaggerWrapper<Token> produceResource(URL url) throws IOException {
                Properties resourceMetaData = getResourceMetaData();
                String property = TreeTaggerPosTagger.this.modelEncoding != null ? TreeTaggerPosTagger.this.modelEncoding : resourceMetaData.getProperty("encoding");
                String property2 = resourceMetaData.getProperty(ResourceMetadata.META_POS_TAGSET);
                File urlAsFile = ResourceUtils.getUrlAsFile(url, true);
                this.treetagger.setModel(urlAsFile.getPath() + ":" + property);
                List<String> tagset = TreeTaggerModelUtil.getTagset(urlAsFile, property);
                SingletonTagset singletonTagset = new SingletonTagset(POS.class, property2);
                singletonTagset.addAll(tagset);
                addTagset(singletonTagset);
                if (TreeTaggerPosTagger.this.printTagSet) {
                    TreeTaggerPosTagger.this.getContext().getLogger().log(Level.INFO, getTagset().toString());
                }
                return this.treetagger;
            }
        };
        this.posMappingProvider = MappingProviderFactory.createPosMappingProvider(this.posMappingLocation, this.language, this.modelProvider);
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(final JCas jCas) throws AnalysisEngineProcessException {
        final CAS cas = jCas.getCas();
        this.modelProvider.configure(cas);
        this.posMappingProvider.configure(cas);
        TreeTaggerWrapper resource = this.modelProvider.getResource();
        try {
            ArrayList arrayList = new ArrayList(JCasUtil.select(jCas, Token.class));
            final POS[] posArr = new POS[arrayList.size()];
            final Lemma[] lemmaArr = new Lemma[arrayList.size()];
            final AtomicInteger atomicInteger = new AtomicInteger(0);
            resource.setHandler(new TokenHandler<Token>() { // from class: de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerPosTagger.2
                @Override // org.annolab.tt4j.TokenHandler
                public void token(Token token, String str, String str2) {
                    synchronized (cas) {
                        if (TreeTaggerPosTagger.this.writePos && str != null) {
                            POS pos = (POS) cas.createAnnotation(TreeTaggerPosTagger.this.posMappingProvider.getTagType(str), token.getBegin(), token.getEnd());
                            pos.setPosValue(TreeTaggerPosTagger.this.internTags ? str.intern() : str);
                            token.setPos(pos);
                            posArr[atomicInteger.get()] = pos;
                        }
                        if (TreeTaggerPosTagger.this.writeLemma && str2 != null) {
                            Lemma lemma = new Lemma(jCas, token.getBegin(), token.getEnd());
                            lemma.setValue(TreeTaggerPosTagger.this.internTags ? str2.intern() : str2);
                            token.setLemma(lemma);
                            lemmaArr[atomicInteger.get()] = lemma;
                        }
                        atomicInteger.getAndIncrement();
                    }
                }
            });
            resource.process(arrayList);
            for (int i = 0; i < atomicInteger.get(); i++) {
                if (posArr[i] != null) {
                    posArr[i].addToIndexes();
                }
                if (lemmaArr[i] != null) {
                    lemmaArr[i].addToIndexes();
                }
            }
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        } catch (TreeTaggerException e2) {
            throw new AnalysisEngineProcessException(e2);
        }
    }
}
