package de.tudarmstadt.ukp.dkpro.core.berkeleyparser;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent;
import edu.berkeley.nlp.PCFGLA.CoarseToFineMaxRuleParser;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.Lexicon;
import edu.berkeley.nlp.PCFGLA.ParserData;
import edu.berkeley.nlp.PCFGLA.TreeAnnotations;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.Numberer;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.OperationalProperties;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.maltparser.core.syntaxgraph.RootLabels;

@OperationalProperties(multipleDeploymentAllowed = false)
@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent", "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.berkeleyparser.BerkeleyParser", description = "Berkeley Parser annotator . Requires Sentences to be annotated before.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/berkeleyparser/BerkeleyParser.class */
public class BerkeleyParser extends JCasAnnotator_ImplBase {
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the language set in the CAS to locate the model.")
    protected String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Override the default variant used to locate the model.")
    protected String variant;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Load the model from this location instead of locating the model automatically.")
    protected String modelLocation;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Location of the mapping file for part-of-speech tags to UIMA types.")
    protected String posMappingLocation;
    public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = "ConstituentMappingLocation";

    @ConfigurationParameter(name = "ConstituentMappingLocation", mandatory = false, description = "Location of the mapping file for constituent tags to UIMA types.")
    protected String constituentMappingLocation;
    public static final String PARAM_INTERN_TAGS = "internTags";

    @ConfigurationParameter(name = "internTags", mandatory = false, defaultValue = {"true"}, description = "Use the String#intern() method on tags. This is usually a good idea to avoid spaming\nthe heap with thousands of strings representing only a few different tags.\n\nDefault: true")
    private boolean internTags;
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Log the tag set(s) when a model is loaded.\n\nDefault: false")
    protected boolean printTagSet;
    public static final String PARAM_READ_POS = "readPOS";

    @ConfigurationParameter(name = "readPOS", mandatory = true, defaultValue = {"true"}, description = "Sets whether to use or not to use already existing POS tags from another annotator for the\nparsing process.\n<p>\nDefault: false")
    private boolean readPos;
    public static final String PARAM_WRITE_POS = "writePOS";

    @ConfigurationParameter(name = "writePOS", mandatory = true, defaultValue = {"false"}, description = "Sets whether to create or not to create POS tags. The creation of constituent tags must be\nturned on for this to work.\n<p>\nDefault: true")
    private boolean writePos;
    public static final String PARAM_WRITE_PENN_TREE = "writePennTree";

    @ConfigurationParameter(name = "writePennTree", mandatory = true, defaultValue = {"false"}, description = "If this parameter is set to true, each sentence is annotated with a PennTree-Annotation,\ncontaining the whole parse tree in Penn Treebank style format.\n<p>\nDefault: false")
    private boolean writePennTree;
    public static final String PARAM_VITERBI = "viterbi";

    @ConfigurationParameter(name = PARAM_VITERBI, mandatory = true, defaultValue = {"false"}, description = "Compute Viterbi derivation instead of max-rule tree.\n<p>\nDefault: false (max-rule)")
    private boolean viterbi;
    public static final String PARAM_SUBSTATES = "substates";

    @ConfigurationParameter(name = PARAM_SUBSTATES, mandatory = true, defaultValue = {"false"}, description = "Output sub-categories (only for binarized Viterbi trees).\n<p>\nDefault: false")
    private boolean substates;
    public static final String PARAM_SCORES = "scores";

    @ConfigurationParameter(name = PARAM_SCORES, mandatory = true, defaultValue = {"false"}, description = "Output inside scores (only for binarized viterbi trees).\n<p>\nDefault: false")
    private boolean scores;
    public static final String PARAM_ACCURATE = "accurate";

    @ConfigurationParameter(name = PARAM_ACCURATE, mandatory = true, defaultValue = {"false"}, description = "Set thresholds for accuracy.\n<p>\nDefault: false (set thresholds for efficiency)")
    private boolean accurate;
    public static final String PARAM_VARIATIONAL = "variational";

    @ConfigurationParameter(name = PARAM_VARIATIONAL, mandatory = true, defaultValue = {"false"}, description = "Use variational rule score approximation instead of max-rule\n<p>\nDefault: false")
    private boolean variational;
    public static final String PARAM_KEEP_FUNCTION_LABELS = "keepFunctionLabels";

    @ConfigurationParameter(name = PARAM_KEEP_FUNCTION_LABELS, mandatory = true, defaultValue = {"false"}, description = "Retain predicted function labels. Model must have been trained with function labels.\n<p>\nDefault: false")
    private boolean keepFunctionLabels;
    public static final String PARAM_BINARIZE = "binarize";

    @ConfigurationParameter(name = PARAM_BINARIZE, mandatory = true, defaultValue = {"false"}, description = "Output binarized trees.\n<p>\nDefault: false")
    private boolean binarize;
    private CasConfigurableProviderBase<CoarseToFineMaxRuleParser> modelProvider;
    private MappingProvider posMappingProvider;
    private MappingProvider constituentMappingProvider;

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/berkeleyparser/BerkeleyParser$BerkeleyParserModelProvider.class */
    private class BerkeleyParserModelProvider extends ModelProviderBase<CoarseToFineMaxRuleParser> {
        private BerkeleyParserModelProvider() {
            setContextObject(BerkeleyParser.this);
            setDefault(ResourceObjectProviderBase.ARTIFACT_ID, "${groupId}.berkeleyparser-model-parser-${language}-${variant}");
            setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/${package}/lib/parser-${language}-${variant}.bin");
            setDefaultVariantsLocation("${package}/lib/parser-default-variants.map");
            setOverride(ResourceObjectProviderBase.LOCATION, BerkeleyParser.this.modelLocation);
            setOverride("language", BerkeleyParser.this.language);
            setOverride("variant", BerkeleyParser.this.variant);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableStreamProviderBase, de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase
        public CoarseToFineMaxRuleParser produceResource(URL url) throws IOException {
            ObjectInputStream objectInputStream = null;
            try {
                try {
                    objectInputStream = new ObjectInputStream(new GZIPInputStream(url.openStream()));
                    ParserData parserData = (ParserData) objectInputStream.readObject();
                    Grammar grammar = parserData.getGrammar();
                    Lexicon lexicon = parserData.getLexicon();
                    Numberer.setNumberers(parserData.getNumbs());
                    Properties resourceMetaData = getResourceMetaData();
                    SingletonTagset singletonTagset = new SingletonTagset(POS.class, resourceMetaData.getProperty(ResourceMetadata.META_POS_TAGSET));
                    SingletonTagset singletonTagset2 = new SingletonTagset(Constituent.class, resourceMetaData.getProperty(ResourceMetadata.META_CONSTITUENT_TAGSET));
                    Numberer numberer = (Numberer) parserData.getNumbs().get("tags");
                    for (int i = 0; i < numberer.size(); i++) {
                        String str = (String) numberer.object(i);
                        if (BerkeleyParser.this.binarize || !str.startsWith("@")) {
                            if (str.endsWith("^g")) {
                                singletonTagset2.add(str.substring(0, str.length() - 2));
                            } else if (RootLabels.DEFAULT_ROOTSYMBOL.equals(str)) {
                                singletonTagset2.add(str);
                            } else {
                                singletonTagset.add(str);
                            }
                        }
                    }
                    addTagset(singletonTagset, BerkeleyParser.this.writePos);
                    addTagset(singletonTagset2);
                    if (BerkeleyParser.this.printTagSet) {
                        BerkeleyParser.this.getContext().getLogger().log(Level.INFO, getTagset().toString());
                    }
                    CoarseToFineMaxRuleParser coarseToFineMaxRuleParser = new CoarseToFineMaxRuleParser(grammar, lexicon, 1.0d, -1, BerkeleyParser.this.viterbi, BerkeleyParser.this.substates, BerkeleyParser.this.scores, BerkeleyParser.this.accurate, BerkeleyParser.this.variational, true, true);
                    IOUtils.closeQuietly((InputStream) objectInputStream);
                    return coarseToFineMaxRuleParser;
                } catch (ClassNotFoundException e) {
                    throw new IOException(e);
                }
            } catch (Throwable th) {
                IOUtils.closeQuietly((InputStream) objectInputStream);
                throw th;
            }
        }
    }

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.modelProvider = new BerkeleyParserModelProvider();
        this.posMappingProvider = MappingProviderFactory.createPosMappingProvider(this.posMappingLocation, this.language, this.modelProvider);
        this.constituentMappingProvider = MappingProviderFactory.createConstituentMappingProvider(this.constituentMappingLocation, this.language, this.modelProvider);
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        CAS cas = jCas.getCas();
        this.modelProvider.configure(cas);
        this.posMappingProvider.configure(cas);
        this.constituentMappingProvider.configure(cas);
        for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
            List<Token> selectCovered = JCasUtil.selectCovered(jCas, Token.class, sentence);
            List<String> text = JCasUtil.toText(selectCovered);
            ArrayList arrayList = null;
            if (this.readPos) {
                arrayList = new ArrayList(selectCovered.size());
                Iterator<Token> it = selectCovered.iterator();
                while (it.hasNext()) {
                    arrayList.add(it.next().getPos().getPosValue());
                }
            }
            Tree<String> bestConstrainedParse = this.modelProvider.getResource().getBestConstrainedParse(text, (List<String>) arrayList, false);
            if (bestConstrainedParse.getChildren().isEmpty()) {
                getLogger().warn("Unable to parse sentence: [" + sentence.getCoveredText() + "]");
            } else {
                if (!this.binarize) {
                    bestConstrainedParse = TreeAnnotations.unAnnotateTree(bestConstrainedParse, this.keepFunctionLabels);
                }
                createConstituentAnnotationFromTree(jCas, bestConstrainedParse, null, selectCovered, new MutableInt(0));
                if (this.writePennTree) {
                    PennTree pennTree = new PennTree(jCas, sentence.getBegin(), sentence.getEnd());
                    pennTree.setPennTree(bestConstrainedParse.toString());
                    pennTree.addToIndexes();
                }
            }
        }
    }

    private Annotation createConstituentAnnotationFromTree(JCas jCas, Tree<String> tree, Annotation annotation, List<Token> list, MutableInt mutableInt) {
        if (tree.isPreTerminal()) {
            Token token = list.get(mutableInt.intValue());
            if (annotation != null) {
                token.setParent(annotation);
            }
            if (this.writePos) {
                String label = tree.getLabel();
                POS pos = (POS) jCas.getCas().createAnnotation(this.posMappingProvider.getTagType(label), token.getBegin(), token.getEnd());
                pos.setPosValue(this.internTags ? label.intern() : label);
                pos.addToIndexes();
                token.setPos(pos);
            }
            mutableInt.add(1);
            return token;
        }
        String label2 = tree.getLabel();
        Constituent constituent = (Constituent) jCas.getCas().createAnnotation(this.constituentMappingProvider.getTagType(label2), 0, 0);
        constituent.setConstituentType(label2);
        if (annotation != null) {
            constituent.setParent(annotation);
        }
        ArrayList arrayList = new ArrayList();
        Iterator<Tree<String>> it = tree.getChildren().iterator();
        while (it.hasNext()) {
            Annotation createConstituentAnnotationFromTree = createConstituentAnnotationFromTree(jCas, it.next(), constituent, list, mutableInt);
            if (createConstituentAnnotationFromTree != null) {
                arrayList.add(createConstituentAnnotationFromTree);
            }
        }
        constituent.setBegin(((Annotation) arrayList.get(0)).getBegin());
        constituent.setEnd(((Annotation) arrayList.get(arrayList.size() - 1)).getEnd());
        constituent.setChildren(FSCollectionFactory.createFSArray(jCas, arrayList));
        jCas.addFsToIndexes(constituent);
        return constituent;
    }
}
