package de.tudarmstadt.ukp.dkpro.core.io.conll;

import de.tudarmstadt.ukp.dkpro.core.api.io.IobDecoder;
import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.factory.JCasBuilder;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(outputs = {"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.io.conll.Conll2002Reader", description = "<p>Reads the CoNLL 2002 named entity format. The columns are separated by a single space, like\nillustrated below.</p>\n\n<pre><code>\nWolff      B-PER\n,          O\ncurrently  O\na          O\njournalist O\nin         O\nArgentina  B-LOC\n,          O\nplayed     O\nwith       O\nDel        B-PER\nBosque     I-PER\nin         O\nthe        O\nfinal      O\nyears      O\nof         O\nthe        O\nseventies  O\nin         O\nReal       B-ORG\nMadrid     I-ORG\n.          O\n</code></pre>\n\n<ol>\n<li>FORM - token</li>\n<li>NER - named entity (BIO encoded)</li>\n</ol>\n\n<p>Sentences are separated by a blank new line.</p>", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/io/conll/Conll2002Reader.class */
public class Conll2002Reader extends JCasResourceCollectionReader_ImplBase {
    private static final int FORM = 0;
    private static final int IOB = 1;
    public static final String PARAM_ENCODING = "sourceEncoding";

    @ConfigurationParameter(name = "sourceEncoding", mandatory = true, defaultValue = {"UTF-8"}, description = "Character encoding of the input data.")
    private String encoding;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "The language.")
    private String language;
    public static final String PARAM_INTERN_TAGS = "internTags";

    @ConfigurationParameter(name = "internTags", mandatory = false, defaultValue = {"true"}, description = "Use the String#intern() method on tags. This is usually a good idea to avoid\nspamming the heap with thousands of strings representing only a few different tags.\n\nDefault: true")
    private boolean internTags;
    public static final String PARAM_READ_NAMED_ENTITY = "readNamedEntity";

    @ConfigurationParameter(name = "readNamedEntity", mandatory = true, defaultValue = {"true"}, description = "Write named entity information.\n\nDefault: true")
    private boolean namedEntityEnabled;
    private MappingProvider namedEntityMappingProvider;

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase, org.apache.uima.fit.component.CasCollectionReader_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.namedEntityMappingProvider = new MappingProvider();
        this.namedEntityMappingProvider.setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/there/is/no/mapping/yet");
        this.namedEntityMappingProvider.setDefault(MappingProvider.BASE_TYPE, NamedEntity.class.getName());
    }

    @Override // de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase
    public void getNext(JCas jCas) throws IOException, CollectionException {
        try {
            if (this.namedEntityEnabled) {
                this.namedEntityMappingProvider.configure(jCas.getCas());
            }
            ResourceCollectionReaderBase.Resource nextFile = nextFile();
            initCas(jCas, nextFile);
            BufferedReader bufferedReader = null;
            try {
                bufferedReader = new BufferedReader(new InputStreamReader(nextFile.getInputStream(), this.encoding));
                convert(jCas, bufferedReader);
                IOUtils.closeQuietly((Reader) bufferedReader);
            } catch (Throwable th) {
                IOUtils.closeQuietly((Reader) bufferedReader);
                throw th;
            }
        } catch (AnalysisEngineProcessException e) {
            throw new IOException(e);
        }
    }

    private void convert(JCas jCas, BufferedReader bufferedReader) throws IOException {
        JCasBuilder jCasBuilder = new JCasBuilder(jCas);
        IobDecoder iobDecoder = new IobDecoder(jCas.getCas(), JCasUtil.getType(jCas, NamedEntity.class).getFeatureByBaseName("value"), this.namedEntityMappingProvider);
        iobDecoder.setInternTags(this.internTags);
        while (true) {
            List<String[]> readSentence = readSentence(bufferedReader);
            if (readSentence == null) {
                jCasBuilder.close();
                return;
            }
            if (!readSentence.isEmpty()) {
                int position = jCasBuilder.getPosition();
                int i = position;
                ArrayList arrayList = new ArrayList();
                String[] strArr = new String[readSentence.size()];
                int i2 = 0;
                for (String[] strArr2 : readSentence) {
                    Token token = (Token) jCasBuilder.add(strArr2[0], Token.class);
                    i = token.getEnd();
                    jCasBuilder.add(" ");
                    arrayList.add(token);
                    strArr[i2] = strArr2[1];
                    i2++;
                }
                if (this.namedEntityEnabled) {
                    iobDecoder.decode(arrayList, strArr);
                }
                new Sentence(jCas, position, i).addToIndexes();
                jCasBuilder.add("\n");
            }
        }
    }

    private static List<String[]> readSentence(BufferedReader bufferedReader) throws IOException {
        String readLine;
        ArrayList arrayList = new ArrayList();
        while (true) {
            readLine = bufferedReader.readLine();
            if (readLine == null || StringUtils.isBlank(readLine)) {
                break;
            }
            String[] split = readLine.split(" ");
            if (split.length != 2) {
                throw new IOException("Invalid file format. Line needs to have 2 space-separted fields.");
            }
            arrayList.add(split);
        }
        if (readLine == null && arrayList.isEmpty()) {
            return null;
        }
        return arrayList;
    }
}
