package de.tudarmstadt.ukp.dkpro.core.io.penntree;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(outputs = {"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS", "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreebankCombinedReader", description = "Penn Treebank combined format reader.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/io/penntree/PennTreebankCombinedReader.class */
public class PennTreebankCombinedReader extends JCasResourceCollectionReader_ImplBase {
    public static final String PARAM_ENCODING = "sourceEncoding";

    @ConfigurationParameter(name = "sourceEncoding", mandatory = true, defaultValue = {"UTF-8"}, description = "Name of configuration parameter that contains the character encoding used by the input files.")
    private String encoding;
    public static final String PARAM_POS_TAG_SET = "POSTagSet";

    @ConfigurationParameter(name = "POSTagSet", mandatory = false, description = "Use this part-of-speech tag set to use to resolve the tag set mapping instead of using the\ntag set defined as part of the model meta data. This can be useful if a custom model is\nspecified which does not have such meta data, or it can be used in readers.")
    protected String posTagset;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Load the part-of-speech tag to UIMA type mapping from this location instead of locating\nthe mapping automatically.")
    protected String posMappingLocation;
    public static final String PARAM_READ_POS = "readPOS";

    @ConfigurationParameter(name = "readPOS", mandatory = true, defaultValue = {"true"}, description = "Sets whether to create or not to create POS tags. The creation of\nconstituent tags must be turned on for this to work.\n\n<p>Default: true</p>")
    private boolean createPosTags;
    public static final String PARAM_CONSTITUENT_TAG_SET = "ConstituentTagSet";

    @ConfigurationParameter(name = "ConstituentTagSet", mandatory = false, description = "Use this constituent tag set to use to resolve the tag set mapping instead of using the\ntag set defined as part of the model meta data. This can be useful if a custom model is\nspecified which does not have such meta data, or it can be used in readers.")
    protected String constituentTagset;
    public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = "ConstituentMappingLocation";

    @ConfigurationParameter(name = "ConstituentMappingLocation", mandatory = false, description = "Load the constituent tag to UIMA type mapping from this location instead of locating\nthe mapping automatically.")
    protected String constituentMappingLocation;
    public static final String PARAM_INTERN_TAGS = "internTags";

    @ConfigurationParameter(name = "internTags", mandatory = false, defaultValue = {"true"}, description = "Use the String#intern() method on tags. This is usually a good idea to avoid\nspaming the heap with thousands of strings representing only a few different tags.\n\n<p>Default: true</p>")
    private boolean internTags;
    public static final String PARAM_REMOVE_TRACES = "removeTraces";

    @ConfigurationParameter(name = PARAM_REMOVE_TRACES, mandatory = false, defaultValue = {"true"})
    private boolean removeTraces;
    public static final String PARAM_WRITE_TRACES_TO_TEXT = "writeTracesToText";

    @ConfigurationParameter(name = "writeTracesToText", mandatory = false, defaultValue = {"false"})
    private boolean writeTracesToText;
    private static final String NONE = "-NONE-";
    private MappingProvider posMappingProvider;
    private MappingProvider constituentMappingProvider;
    private PennTreeToJCasConverter converter;
    private int lineNumber = 0;
    private String lineBuffer = null;

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase, org.apache.uima.fit.component.CasCollectionReader_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.posMappingProvider = MappingProviderFactory.createPosMappingProvider(this.posMappingLocation, this.posTagset, getLanguage());
        this.constituentMappingProvider = MappingProviderFactory.createConstituentMappingProvider(this.constituentMappingLocation, this.constituentTagset, getLanguage());
        this.converter = new PennTreeToJCasConverter(this.posMappingProvider, this.constituentMappingProvider);
        this.converter.setInternTags(this.internTags);
        this.converter.setWriteTracesToText(this.writeTracesToText);
        this.converter.setCreatePosTags(this.createPosTags);
    }

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase
    public void getNext(JCas jCas) throws IOException, CollectionException {
        ResourceCollectionReaderBase.Resource nextFile = nextFile();
        initCas(jCas.getCas(), nextFile);
        try {
            this.posMappingProvider.configure(jCas.getCas());
            this.constituentMappingProvider.configure(jCas.getCas());
            StringBuilder sb = new StringBuilder();
            InputStream inputStream = nextFile.getInputStream();
            Throwable th = null;
            try {
                try {
                    this.lineNumber = 0;
                    LineIterator lineIterator = IOUtils.lineIterator(inputStream, this.encoding);
                    while (lineIterator.hasNext()) {
                        PennTreeNode readTree = readTree(lineIterator);
                        if (this.removeTraces) {
                            doRemoveTraces(readTree);
                        }
                        Constituent convertPennTree = this.converter.convertPennTree(jCas, sb, readTree);
                        new Sentence(jCas, convertPennTree.getBegin(), convertPennTree.getEnd()).addToIndexes();
                        sb.append('\n');
                    }
                    if (inputStream != null) {
                        if (0 != 0) {
                            try {
                                inputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            inputStream.close();
                        }
                    }
                    jCas.setDocumentText(sb.toString());
                } finally {
                }
            } catch (Throwable th3) {
                if (inputStream != null) {
                    if (th != null) {
                        try {
                            inputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        inputStream.close();
                    }
                }
                throw th3;
            }
        } catch (AnalysisEngineProcessException e) {
            throw new IOException(e);
        }
    }

    private boolean doRemoveTraces(PennTreeNode pennTreeNode) {
        if ("-NONE-".equals(pennTreeNode.getLabel())) {
            return true;
        }
        if (pennTreeNode.getChildren().size() == 1) {
            return doRemoveTraces(pennTreeNode.getChildren().get(0));
        }
        for (PennTreeNode pennTreeNode2 : (PennTreeNode[]) pennTreeNode.getChildren().toArray(new PennTreeNode[pennTreeNode.getChildren().size()])) {
            if (doRemoveTraces(pennTreeNode2)) {
                pennTreeNode.getChildren().remove(pennTreeNode2);
            }
        }
        return false;
    }

    private PennTreeNode readTree(LineIterator lineIterator) {
        StringBuilder sb = new StringBuilder();
        while (true) {
            if (lineIterator.hasNext() || this.lineBuffer != null) {
                String nextLine = this.lineBuffer != null ? this.lineBuffer : lineIterator.nextLine();
                this.lineNumber++;
                this.lineBuffer = null;
                if (!StringUtils.isBlank(nextLine)) {
                    if (sb.length() > 0 && nextLine.charAt(0) == '(') {
                        this.lineBuffer = nextLine;
                        break;
                    }
                    sb.append(nextLine);
                    sb.append('\n');
                } else if (sb.length() <= 0) {
                }
            }
        }
        try {
            return PennTreeUtils.parsePennTree(sb.toString());
        } catch (RuntimeException e) {
            getLogger().error("Unable to parse tree before line [" + this.lineNumber + "]:\n" + ((Object) sb));
            throw e;
        }
    }
}
