package com.clearnlp.nlp.train;

import com.clearnlp.classification.feature.JointFtrXml;
import com.clearnlp.classification.model.StringModel;
import com.clearnlp.classification.train.StringTrainSpace;
import com.clearnlp.component.AbstractStatisticalComponent;
import com.clearnlp.component.pos.AbstractPOSTagger;
import com.clearnlp.component.pos.DefaultPOSTagger;
import com.clearnlp.component.pos.EnglishPOSTagger;
import com.clearnlp.dependency.DEPTree;
import com.clearnlp.morphology.Embedding;
import com.clearnlp.nlp.NLPProcess;
import com.clearnlp.reader.AbstractReader;
import com.clearnlp.reader.JointReader;
import com.clearnlp.util.UTInput;
import com.clearnlp.util.UTXml;
import com.clearnlp.util.map.Prob1DMap;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.ObjectInputStream;
import java.util.HashSet;
import java.util.Set;
import org.w3c.dom.Element;

/* loaded from: input_file:com/clearnlp/nlp/train/POSTrainer.class */
public class POSTrainer extends AbstractNLPTrainer {
    @Override // com.clearnlp.nlp.train.AbstractNLPTrainer
    protected AbstractStatisticalComponent<?> getComponent(Element element, JointReader jointReader, JointFtrXml[] jointFtrXmlArr, String[] strArr, int i) {
        return getTrainedComponentBoot(element, jointReader, getCollector(element, jointReader, getLanguage(element), jointFtrXmlArr, strArr, i), jointFtrXmlArr, strArr, i);
    }

    @Override // com.clearnlp.nlp.train.AbstractNLPTrainer
    protected AbstractStatisticalComponent<?> getComponent(Element element, String str, JointFtrXml[] jointFtrXmlArr, StringModel[] stringModelArr, Object[] objArr) {
        return str.equals(AbstractReader.LANG_EN) ? new EnglishPOSTagger(jointFtrXmlArr, stringModelArr, objArr) : new DefaultPOSTagger(jointFtrXmlArr, stringModelArr, objArr);
    }

    @Override // com.clearnlp.nlp.train.AbstractNLPTrainer
    protected AbstractStatisticalComponent<?> getComponent(Element element, String str, JointFtrXml[] jointFtrXmlArr, StringTrainSpace[] stringTrainSpaceArr, StringModel[] stringModelArr, Object[] objArr) {
        return str.equals(AbstractReader.LANG_EN) ? stringModelArr == null ? new EnglishPOSTagger(jointFtrXmlArr, stringTrainSpaceArr, objArr) : new EnglishPOSTagger(jointFtrXmlArr, stringTrainSpaceArr, stringModelArr, objArr) : stringModelArr == null ? new DefaultPOSTagger(jointFtrXmlArr, stringTrainSpaceArr, objArr) : new DefaultPOSTagger(jointFtrXmlArr, stringTrainSpaceArr, stringModelArr, objArr);
    }

    @Override // com.clearnlp.nlp.train.AbstractNLPTrainer
    protected StringTrainSpace[] getStringTrainSpaces(JointFtrXml[] jointFtrXmlArr, Object[] objArr, int i) {
        return getStringTrainSpaces(jointFtrXmlArr);
    }

    @Override // com.clearnlp.nlp.train.AbstractNLPTrainer
    public String getMode() {
        return "pos";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractPOSTagger getCollector(Element element, JointReader jointReader, String str, JointFtrXml[] jointFtrXmlArr, String[] strArr, int i) {
        Set<String> lowerSimplifiedForms = getLowerSimplifiedForms(jointReader, jointFtrXmlArr[0], strArr, i);
        return str.equals(AbstractReader.LANG_EN) ? new EnglishPOSTagger(jointFtrXmlArr, lowerSimplifiedForms) : new DefaultPOSTagger(jointFtrXmlArr, lowerSimplifiedForms);
    }

    protected Set<String> getLowerSimplifiedForms(JointReader jointReader, JointFtrXml jointFtrXml, String[] strArr, int i) {
        HashSet hashSet = new HashSet();
        int length = strArr.length;
        Prob1DMap prob1DMap = new Prob1DMap();
        this.LOG.info("Collecting word-forms:\n");
        for (int i2 = 0; i2 < length; i2++) {
            if (i != i2) {
                jointReader.open(UTInput.createBufferedFileReader(strArr[i2]));
                hashSet.clear();
                while (true) {
                    DEPTree next = jointReader.next();
                    if (next == null) {
                        break;
                    }
                    NLPProcess.simplifyForms(next);
                    int size = next.size();
                    for (int i3 = 1; i3 < size; i3++) {
                        hashSet.add(next.get(i3).lowerSimplifiedForm);
                    }
                }
                jointReader.close();
                prob1DMap.addAll(hashSet);
                this.LOG.debug(".");
            }
        }
        this.LOG.debug("\n");
        return prob1DMap.toSet(jointFtrXml.getDocumentFrequencyCutoff());
    }

    protected Embedding getEmbedding(Element element) {
        Element firstElementByTagName = UTXml.getFirstElementByTagName(element, "embedding");
        if (firstElementByTagName == null) {
            return null;
        }
        Embedding embedding = null;
        try {
            ObjectInputStream objectInputStream = new ObjectInputStream(new BufferedInputStream(new FileInputStream(UTXml.getTrimmedTextContent(firstElementByTagName))));
            embedding = (Embedding) objectInputStream.readObject();
            objectInputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return embedding;
    }
}
