package de.tudarmstadt.ukp.dkpro.core.treetagger;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk;
import de.tudarmstadt.ukp.dkpro.core.treetagger.internal.DKProExecutableResolver;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.annolab.tt4j.DefaultModel;
import org.annolab.tt4j.TokenAdapter;
import org.annolab.tt4j.TokenHandler;
import org.annolab.tt4j.TreeTaggerException;
import org.annolab.tt4j.TreeTaggerModelUtil;
import org.annolab.tt4j.TreeTaggerWrapper;
import org.apache.ivy.ant.IvyCleanCache;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerChunker", description = "Chunk annotator using TreeTagger.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/treetagger/TreeTaggerChunker.class */
public class TreeTaggerChunker extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model.")
    protected String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Override the default variant used to locate the model.")
    protected String variant;
    public static final String PARAM_EXECUTABLE_PATH = "executablePath";

    @ConfigurationParameter(name = "executablePath", mandatory = false, description = "Use this TreeTagger executable instead of trying to locate the executable automatically.")
    private File executablePath;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Load the model from this location instead of locating the model automatically.")
    protected String modelLocation;
    public static final String PARAM_CHUNK_MAPPING_LOCATION = "ChunkMappingLocation";

    @ConfigurationParameter(name = "ChunkMappingLocation", mandatory = false, description = "Location of the mapping file for chunk tags to UIMA types.")
    protected String chunkMappingLocation;
    public static final String PARAM_INTERN_TAGS = "internTags";

    @ConfigurationParameter(name = "internTags", mandatory = false, defaultValue = {"true"}, description = "Use the String#intern() method on tags. This is usually a good idea to avoid\nspaming the heap with thousands of strings representing only a few different tags.\n\nDefault: true")
    private boolean internTags;
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Log the tag set(s) when a model is loaded.\n\nDefault: false")
    protected boolean printTagSet;
    public static final String PARAM_PERFORMANCE_MODE = "performanceMode";

    @ConfigurationParameter(name = "performanceMode", mandatory = true, defaultValue = {"false"}, description = "TT4J setting: Disable some sanity checks, e.g. whether tokens contain line breaks (which is\nnot allowed). Turning this on will increase your performance, but the wrapper may throw\nexceptions if illegal data is provided.")
    private boolean performanceMode;
    public static final String PARAM_FLUSH_SEQUENCE = "flushSequence";

    @ConfigurationParameter(name = PARAM_FLUSH_SEQUENCE, mandatory = false, description = "A sequence to flush the internal TreeTagger buffer and to force it to output the rest of the\ncompleted analysis. This is typically just a sequence of like 5-10 full stops (\".\") separated\nby new line characters. However, some models may require a different flush sequence, e.g. a\nshort sentence in the respective language. For chunker models, mind that the sentence must\nalso be POS tagged, e.g. Nous-PRO:PER\\n....")
    private String flushSequence;
    private CasConfigurableProviderBase<TreeTaggerWrapper<POS>> modelProvider;
    private MappingProvider mappingProvider;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/treetagger/TreeTaggerChunker$MappingTokenAdapter.class */
    public static class MappingTokenAdapter implements TokenAdapter<POS> {
        private Map<String, String> mapping = new HashMap();

        public MappingTokenAdapter(Properties properties) {
            for (Map.Entry entry : properties.entrySet()) {
                String valueOf = String.valueOf(entry.getKey());
                if (valueOf.startsWith(ResourceMetadata.META_POS_TAG_MAP_PREFIX)) {
                    this.mapping.put(valueOf.substring(ResourceMetadata.META_POS_TAG_MAP_PREFIX.length()), String.valueOf(entry.getValue()));
                }
            }
        }

        @Override // org.annolab.tt4j.TokenAdapter
        public String getText(POS pos) {
            String str;
            synchronized (pos.getCAS()) {
                String str2 = this.mapping.get(pos.getPosValue());
                if (str2 == null) {
                    str2 = pos.getPosValue();
                }
                str = pos.getCoveredText() + "-" + str2;
            }
            return str;
        }
    }

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.modelProvider = new ModelProviderBase<TreeTaggerWrapper<POS>>() { // from class: de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerChunker.1
            private TreeTaggerWrapper<POS> treetagger;

            {
                setContextObject(TreeTaggerChunker.this);
                setDefault(ResourceObjectProviderBase.ARTIFACT_ID, "${groupId}.treetagger-model-chunker-${language}-${variant}");
                setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/${package}/lib/chunker-${language}-${variant}.properties");
                setDefault("variant", "le");
                setOverride(ResourceObjectProviderBase.LOCATION, TreeTaggerChunker.this.modelLocation);
                setOverride("language", TreeTaggerChunker.this.language);
                setOverride("variant", TreeTaggerChunker.this.variant);
                this.treetagger = new TreeTaggerWrapper<>();
                this.treetagger.setPerformanceMode(TreeTaggerChunker.this.performanceMode);
                this.treetagger.setEpsilon(Double.valueOf(1.0E-8d));
                this.treetagger.setHyphenHeuristics(true);
                DKProExecutableResolver dKProExecutableResolver = new DKProExecutableResolver(this.treetagger);
                dKProExecutableResolver.setExecutablePath(TreeTaggerChunker.this.executablePath);
                this.treetagger.setExecutableProvider(dKProExecutableResolver);
            }

            /* JADX INFO: Access modifiers changed from: protected */
            @Override // de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableStreamProviderBase, de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase
            public TreeTaggerWrapper<POS> produceResource(URL url) throws IOException {
                Properties resourceMetaData = getResourceMetaData();
                String property = resourceMetaData.getProperty("encoding");
                String property2 = resourceMetaData.getProperty(ResourceMetadata.META_CHUNK_TAGSET);
                String property3 = resourceMetaData.getProperty(TreeTaggerChunker.PARAM_FLUSH_SEQUENCE, DefaultModel.DEFAULT_FLUSH_SEQUENCE);
                if (TreeTaggerChunker.this.flushSequence != null) {
                    property3 = TreeTaggerChunker.this.flushSequence;
                }
                File urlAsFile = ResourceUtils.getUrlAsFile(url, true);
                this.treetagger.setModel(new DefaultModel(urlAsFile.getPath() + ":" + property, urlAsFile, property, property3));
                this.treetagger.setAdapter(new MappingTokenAdapter(resourceMetaData));
                List<String> tagset = TreeTaggerModelUtil.getTagset(urlAsFile, property);
                SingletonTagset singletonTagset = new SingletonTagset(Chunk.class, property2);
                Iterator<String> it = tagset.iterator();
                while (it.hasNext()) {
                    String[] split = it.next().split("/")[1].split("-");
                    singletonTagset.add(split.length == 2 ? split[1] : split[0]);
                }
                addTagset(singletonTagset);
                if (TreeTaggerChunker.this.printTagSet) {
                    TreeTaggerChunker.this.getContext().getLogger().log(Level.INFO, getTagset().toString());
                }
                return this.treetagger;
            }
        };
        this.mappingProvider = MappingProviderFactory.createChunkMappingProvider(this.chunkMappingLocation, this.language, this.modelProvider);
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        final CAS cas = jCas.getCas();
        this.modelProvider.configure(cas);
        this.mappingProvider.configure(cas);
        TokenHandler<POS> tokenHandler = new TokenHandler<POS>() { // from class: de.tudarmstadt.ukp.dkpro.core.treetagger.TreeTaggerChunker.2
            private String openChunk;
            private int start;
            private int end;

            @Override // org.annolab.tt4j.TokenHandler
            public void token(POS pos, String str, String str2) {
                synchronized (cas) {
                    if (str == null) {
                        chunkComplete();
                        return;
                    }
                    String[] split = str.split("/")[1].split("-");
                    String str3 = split.length == 2 ? split[0] : IvyCleanCache.NONE;
                    String str4 = split.length == 2 ? split[1] : split[0];
                    if (!str4.equals(this.openChunk) || "B".equals(str3)) {
                        if (this.openChunk != null) {
                            chunkComplete();
                        }
                        this.openChunk = str4;
                        this.start = pos.getBegin();
                    }
                    this.end = pos.getEnd();
                }
            }

            private void chunkComplete() {
                if (this.openChunk != null) {
                    Chunk chunk = (Chunk) cas.createAnnotation(TreeTaggerChunker.this.mappingProvider.getTagType(this.openChunk), this.start, this.end);
                    chunk.setChunkValue(TreeTaggerChunker.this.internTags ? this.openChunk.intern() : this.openChunk);
                    cas.addFsToIndexes(chunk);
                    this.openChunk = null;
                }
            }
        };
        try {
            TreeTaggerWrapper<POS> resource = this.modelProvider.getResource();
            resource.setHandler(tokenHandler);
            Iterator it = JCasUtil.select(jCas, Sentence.class).iterator();
            while (it.hasNext()) {
                resource.process(new ArrayList(JCasUtil.selectCovered(POS.class, (Sentence) it.next())));
                tokenHandler.token(null, null, null);
            }
        } catch (IOException e) {
            throw new AnalysisEngineProcessException(e);
        } catch (TreeTaggerException e2) {
            throw new AnalysisEngineProcessException(e2);
        }
    }
}
