package de.tudarmstadt.ukp.dkpro.core.io.conll;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticArgument;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticPredicate;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.factory.JCasBuilder;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(outputs = {"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma", "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency", "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticPredicate", "de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticArgument"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.io.conll.Conll2009Reader", description = "<p>Reads a file in the CoNLL-2009 format.</p>\n\n<ol>\n<li>ID - <b>(ignored)</b> Token counter, starting at 1 for each new sentence.</li>\n<li>FORM - <b>(Token)</b> Word form or punctuation symbol.</li>\n<li>LEMMA - <b>(Lemma)</b> Fine-grained part-of-speech tag, where the tagset depends on the\nlanguage, or identical to the coarse-grained part-of-speech tag if not available.</li>\n<li>PLEMMA - <b>(ignored)</b> Automatically predicted lemma of FORM</li>\n<li>POS - <b>(POS)</b> Fine-grained part-of-speech tag, where the tagset depends on the language,\nor identical to the coarse-grained part-of-speech tag if not available.</li>\n<li>PPOS - <b>(ignored)</b> Automatically predicted major POS by a language-specific tagger</li>\n<li>FEAT - <b>(MorphologicalFeatures)</b> Unordered set of syntactic and/or morphological features (depending\non the particular language), separated by a vertical bar (|), or an underscore if not available.</li>\n<li>PFEAT - <b>(ignored)</b> Automatically predicted morphological features (if applicable)</li>\n<li>HEAD - <b>(Dependency)</b> Head of the current token, which is either a value of ID or zero\n('0'). Note that depending on the original treebank annotation, there may be multiple tokens with\nan ID of zero.</li>\n<li>PHEAD - <b>(ignored)</b> Automatically predicted syntactic head</li>\n<li>DEPREL - <b>(Dependency)</b> Dependency relation to the HEAD. The set of dependency relations\ndepends on the particular language. Note that depending on the original treebank annotation, the\ndependency relation may be meaningfull or simply 'ROOT'.</li>\n<li>PDEPREL - <b>(ignored)</b> Automatically predicted dependency relation to PHEAD</li>\n<li>FILLPRED - <b>(ignored)</b> Contains 'Y' for argument-bearing tokens</li>\n<li>PRED - <b>(SemanticPredicate)</b> (sense) identifier of a semantic 'predicate' coming from a\ncurrent token</li>\n<li>APREDs - <b>(SemanticArgument)</b> Columns with argument labels for each semantic predicate\n(in the ID order)</li>\n</ol>\n\n<p>Sentences are separated by a blank new line.</p>", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/io/conll/Conll2009Reader.class */
public class Conll2009Reader extends JCasResourceCollectionReader_ImplBase {
    public static final String PARAM_ENCODING = "sourceEncoding";

    @ConfigurationParameter(name = "sourceEncoding", mandatory = true, defaultValue = {"UTF-8"})
    private String encoding;
    public static final String PARAM_READ_POS = "readPOS";

    @ConfigurationParameter(name = "readPOS", mandatory = true, defaultValue = {"true"})
    private boolean readPos;
    public static final String PARAM_POS_TAG_SET = "POSTagSet";

    @ConfigurationParameter(name = "POSTagSet", mandatory = false, description = "Use this part-of-speech tag set to use to resolve the tag set mapping instead of using the\ntag set defined as part of the model meta data. This can be useful if a custom model is\nspecified which does not have such meta data, or it can be used in readers.")
    protected String posTagset;
    public static final String PARAM_POS_MAPPING_LOCATION = "POSMappingLocation";

    @ConfigurationParameter(name = "POSMappingLocation", mandatory = false, description = "Load the part-of-speech tag to UIMA type mapping from this location instead of locating\nthe mapping automatically.")
    protected String posMappingLocation;
    public static final String PARAM_READ_MORPH = "readMorph";

    @ConfigurationParameter(name = "readMorph", mandatory = true, defaultValue = {"true"})
    private boolean readMorph;
    public static final String PARAM_READ_LEMMA = "readLemma";

    @ConfigurationParameter(name = "readLemma", mandatory = true, defaultValue = {"true"})
    private boolean readLemma;
    public static final String PARAM_READ_DEPENDENCY = "readDependency";

    @ConfigurationParameter(name = "readDependency", mandatory = true, defaultValue = {"true"})
    private boolean readDependency;
    public static final String PARAM_READ_SEMANTIC_PREDICATE = "readSemanticPredicate";

    @ConfigurationParameter(name = "readSemanticPredicate", mandatory = true, defaultValue = {"true"})
    private boolean readSemanticPredicate;
    private static final String UNUSED = "_";
    private static final int ID = 0;
    private static final int FORM = 1;
    private static final int LEMMA = 2;
    private static final int POS = 4;
    private static final int FEAT = 6;
    private static final int HEAD = 8;
    private static final int DEPREL = 10;
    private static final int PRED = 13;
    private static final int APRED = 14;
    private MappingProvider posMappingProvider;

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase, org.apache.uima.fit.component.CasCollectionReader_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.posMappingProvider = MappingProviderFactory.createPosMappingProvider(this.posMappingLocation, this.posTagset, getLanguage());
    }

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase
    public void getNext(JCas jCas) throws IOException, CollectionException {
        ResourceCollectionReaderBase.Resource nextFile = nextFile();
        initCas(jCas, nextFile);
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(nextFile.getInputStream(), this.encoding));
            convert(jCas, bufferedReader);
            IOUtils.closeQuietly((Reader) bufferedReader);
        } catch (Throwable th) {
            IOUtils.closeQuietly((Reader) bufferedReader);
            throw th;
        }
    }

    public void convert(JCas jCas, BufferedReader bufferedReader) throws IOException {
        if (this.readPos) {
            try {
                this.posMappingProvider.configure(jCas.getCas());
            } catch (AnalysisEngineProcessException e) {
                throw new IOException(e);
            }
        }
        JCasBuilder jCasBuilder = new JCasBuilder(jCas);
        while (true) {
            List<String[]> readSentence = readSentence(bufferedReader);
            if (readSentence == null) {
                jCasBuilder.close();
                return;
            }
            if (!readSentence.isEmpty()) {
                int position = jCasBuilder.getPosition();
                int i = position;
                HashMap hashMap = new HashMap();
                ArrayList arrayList = new ArrayList();
                for (String[] strArr : readSentence) {
                    Token token = (Token) jCasBuilder.add(strArr[1], Token.class);
                    hashMap.put(Integer.valueOf(strArr[0]), token);
                    jCasBuilder.add(" ");
                    if (!"_".equals(strArr[2]) && this.readLemma) {
                        Lemma lemma = new Lemma(jCas, token.getBegin(), token.getEnd());
                        lemma.setValue(strArr[2]);
                        lemma.addToIndexes();
                        token.setLemma(lemma);
                    }
                    if (!"_".equals(strArr[4]) && this.readPos) {
                        POS pos = (POS) jCas.getCas().createAnnotation(this.posMappingProvider.getTagType(strArr[4]), token.getBegin(), token.getEnd());
                        pos.setPosValue(strArr[4]);
                        pos.addToIndexes();
                        token.setPos(pos);
                    }
                    if (!"_".equals(strArr[6]) && this.readMorph) {
                        MorphologicalFeatures morphologicalFeatures = new MorphologicalFeatures(jCas, token.getBegin(), token.getEnd());
                        morphologicalFeatures.setValue(strArr[6]);
                        morphologicalFeatures.addToIndexes();
                    }
                    if (!"_".equals(strArr[13]) && this.readSemanticPredicate) {
                        SemanticPredicate semanticPredicate = new SemanticPredicate(jCas, token.getBegin(), token.getEnd());
                        semanticPredicate.setCategory(strArr[13]);
                        semanticPredicate.addToIndexes();
                        arrayList.add(semanticPredicate);
                    }
                    i = token.getEnd();
                }
                if (this.readDependency) {
                    for (String[] strArr2 : readSentence) {
                        if (!"_".equals(strArr2[10])) {
                            int intValue = Integer.valueOf(strArr2[0]).intValue();
                            int intValue2 = Integer.valueOf(strArr2[8]).intValue();
                            if (intValue2 == 0) {
                                ROOT root = new ROOT(jCas);
                                root.setGovernor((Token) hashMap.get(Integer.valueOf(intValue)));
                                root.setDependent((Token) hashMap.get(Integer.valueOf(intValue)));
                                root.setDependencyType(strArr2[10]);
                                root.setBegin(root.getDependent().getBegin());
                                root.setEnd(root.getDependent().getEnd());
                                root.addToIndexes();
                            } else {
                                Dependency dependency = new Dependency(jCas);
                                dependency.setGovernor((Token) hashMap.get(Integer.valueOf(intValue2)));
                                dependency.setDependent((Token) hashMap.get(Integer.valueOf(intValue)));
                                dependency.setDependencyType(strArr2[10]);
                                dependency.setBegin(dependency.getDependent().getBegin());
                                dependency.setEnd(dependency.getDependent().getEnd());
                                dependency.addToIndexes();
                            }
                        }
                    }
                }
                if (this.readSemanticPredicate) {
                    for (int i2 = 0; i2 < arrayList.size(); i2++) {
                        ArrayList arrayList2 = new ArrayList();
                        for (String[] strArr3 : readSentence) {
                            if (!"_".equals(strArr3[14 + i2])) {
                                Token token2 = (Token) hashMap.get(Integer.valueOf(strArr3[0]));
                                SemanticArgument semanticArgument = new SemanticArgument(jCas, token2.getBegin(), token2.getEnd());
                                semanticArgument.setRole(strArr3[14 + i2]);
                                semanticArgument.addToIndexes();
                                arrayList2.add(semanticArgument);
                            }
                        }
                        ((SemanticPredicate) arrayList.get(i2)).setArguments(FSCollectionFactory.createFSArray(jCas, arrayList2));
                    }
                }
                new Sentence(jCas, position, i).addToIndexes();
                jCasBuilder.add("\n");
            }
        }
    }

    private static List<String[]> readSentence(BufferedReader bufferedReader) throws IOException {
        String readLine;
        ArrayList arrayList = new ArrayList();
        while (true) {
            readLine = bufferedReader.readLine();
            if (readLine == null || StringUtils.isBlank(readLine) || readLine.startsWith("<")) {
                break;
            }
            arrayList.add(readLine.split("\t"));
        }
        if (readLine == null && arrayList.isEmpty()) {
            return null;
        }
        return arrayList;
    }
}
