package com.clearnlp.component.pos;

import com.clearnlp.classification.feature.FtrToken;
import com.clearnlp.classification.feature.JointFtrXml;
import com.clearnlp.classification.instance.StringInstance;
import com.clearnlp.classification.model.StringModel;
import com.clearnlp.classification.prediction.StringPrediction;
import com.clearnlp.classification.train.StringTrainSpace;
import com.clearnlp.classification.vector.StringFeatureVector;
import com.clearnlp.component.AbstractStatisticalComponent;
import com.clearnlp.component.evaluation.POSEval;
import com.clearnlp.component.morph.AbstractMPAnalyzer;
import com.clearnlp.component.state.POSState;
import com.clearnlp.dependency.DEPNode;
import com.clearnlp.dependency.DEPTree;
import com.clearnlp.nlp.NLPProcess;
import com.clearnlp.util.UTArray;
import com.clearnlp.util.UTString;
import com.clearnlp.util.map.Prob2DMap;
import com.clearnlp.util.pair.Pair;
import com.clearnlp.util.pair.StringDoublePair;
import com.google.common.collect.Lists;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;

/* loaded from: input_file:com/clearnlp/component/pos/AbstractPOSTagger.class */
public abstract class AbstractPOSTagger extends AbstractStatisticalComponent<POSState> {
    protected final int LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
    protected final int LEXICA_AMBIGUITY_CLASSES = 1;
    protected Set<String> s_lsfs;
    protected Prob2DMap p_ambi;
    protected Map<String, String> m_ambi;
    protected AbstractMPAnalyzer mp_analyzer;

    public AbstractPOSTagger(JointFtrXml[] jointFtrXmlArr, Set<String> set) {
        super(jointFtrXmlArr);
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        this.s_lsfs = set;
        this.p_ambi = new Prob2DMap();
    }

    public AbstractPOSTagger(JointFtrXml[] jointFtrXmlArr, StringTrainSpace[] stringTrainSpaceArr, Object[] objArr) {
        super(jointFtrXmlArr, stringTrainSpaceArr, objArr);
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        initMorphologicalAnalyzer();
    }

    public AbstractPOSTagger(JointFtrXml[] jointFtrXmlArr, StringModel[] stringModelArr, Object[] objArr) {
        super(jointFtrXmlArr, stringModelArr, objArr, new POSEval());
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        initMorphologicalAnalyzer();
    }

    public AbstractPOSTagger(JointFtrXml[] jointFtrXmlArr, StringTrainSpace[] stringTrainSpaceArr, StringModel[] stringModelArr, Object[] objArr) {
        super(jointFtrXmlArr, stringTrainSpaceArr, stringModelArr, objArr);
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        initMorphologicalAnalyzer();
    }

    public AbstractPOSTagger(ObjectInputStream objectInputStream) {
        super(objectInputStream);
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        initMorphologicalAnalyzer();
    }

    protected abstract void initMorphologicalAnalyzer();

    protected abstract boolean applyRules(POSState pOSState);

    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public void load(ObjectInputStream objectInputStream) throws Exception {
        loadDefault(objectInputStream);
        loadLexica(objectInputStream);
        objectInputStream.close();
    }

    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public void save(ObjectOutputStream objectOutputStream) throws Exception {
        saveDefault(objectOutputStream);
        saveLexica(objectOutputStream);
        objectOutputStream.close();
    }

    @Override // com.clearnlp.component.AbstractStatisticalComponent
    protected void initLexia(Object[] objArr) {
        this.s_lsfs = (Set) objArr[0];
        this.m_ambi = (Map) objArr[1];
    }

    protected void loadLexica(ObjectInputStream objectInputStream) throws Exception {
        this.LOG.info("Loading lexica.\n");
        initLexia(new Object[]{objectInputStream.readObject(), objectInputStream.readObject()});
    }

    protected void saveLexica(ObjectOutputStream objectOutputStream) throws Exception {
        this.LOG.info("Saving lexica.\n");
        objectOutputStream.writeObject(this.s_lsfs);
        objectOutputStream.writeObject(this.m_ambi);
    }

    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public Object[] getLexica() {
        Object[] objArr = new Object[2];
        objArr[0] = this.s_lsfs;
        objArr[1] = isLexica() ? getAmbiguityClasses() : this.m_ambi;
        return objArr;
    }

    public Set<String> getLowerSimplifiedForms() {
        return this.s_lsfs;
    }

    public void clearLowerSimplifiedForms() {
        this.s_lsfs.clear();
    }

    private Map<String, String> getAmbiguityClasses() {
        double ambiguityClassThreshold = this.f_xmls[0].getAmbiguityClassThreshold();
        HashMap hashMap = new HashMap();
        for (String str : this.p_ambi.keySet()) {
            StringBuilder sb = new StringBuilder();
            StringDoublePair[] prob1D = this.p_ambi.getProb1D(str);
            UTArray.sortReverseOrder(prob1D);
            for (StringDoublePair stringDoublePair : prob1D) {
                if (stringDoublePair.d <= ambiguityClassThreshold) {
                    break;
                }
                sb.append("_");
                sb.append(stringDoublePair.s);
            }
            if (sb.length() > 0) {
                hashMap.put(str, sb.substring(1));
            }
        }
        return hashMap;
    }

    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public Set<String> getLabels() {
        return getDefaultLabels();
    }

    @Override // com.clearnlp.component.AbstractComponent
    public void process(DEPTree dEPTree) {
        POSState init = init(dEPTree);
        processAux(init);
        if (isDevelop()) {
            this.e_eval.countAccuracy(init.getTree(), init.getGoldLabels());
        }
    }

    protected POSState init(DEPTree dEPTree) {
        POSState pOSState = new POSState(dEPTree);
        NLPProcess.simplifyForms(dEPTree);
        if (!isDecode()) {
            pOSState.setGoldLabels(dEPTree.getPOSTags());
            dEPTree.clearPOSTags();
        }
        return pOSState;
    }

    protected void processAux(POSState pOSState) {
        if (isLexica()) {
            addLexica(pOSState);
            return;
        }
        List<StringInstance> tag = tag(pOSState);
        if (isTrainOrBootstrap()) {
            this.s_spaces[0].addInstances(tag);
        }
    }

    protected void addLexica(POSState pOSState) {
        while (true) {
            DEPNode shift = pOSState.shift();
            if (shift == null) {
                return;
            }
            if (this.s_lsfs.contains(shift.lowerSimplifiedForm)) {
                this.p_ambi.add(shift.simplifiedForm, pOSState.getGoldLabel());
            }
        }
    }

    protected List<StringInstance> tag(POSState pOSState) {
        ArrayList newArrayList = Lists.newArrayList();
        while (true) {
            DEPNode shift = pOSState.shift();
            if (shift == null) {
                return newArrayList;
            }
            if (!applyRules(pOSState)) {
                shift.pos = getLabel(newArrayList, pOSState);
            }
            this.mp_analyzer.analyze(shift);
        }
    }

    private String getLabel(List<StringInstance> list, POSState pOSState) {
        StringFeatureVector featureVector = getFeatureVector(this.f_xmls[0], pOSState);
        String str = null;
        if (isTrain()) {
            str = pOSState.getGoldLabel();
            if (featureVector.size() > 0) {
                list.add(new StringInstance(str, featureVector));
            }
        } else if (isDevelopOrDecode()) {
            str = getAutoLabel(featureVector, pOSState);
        } else if (isBootstrap()) {
            str = getAutoLabel(featureVector, pOSState);
            if (featureVector.size() > 0) {
                list.add(new StringInstance(pOSState.getGoldLabel(), featureVector));
            }
        }
        return str;
    }

    private String getAutoLabel(StringFeatureVector stringFeatureVector, POSState pOSState) {
        Pair<StringPrediction, StringPrediction> predictTwo = this.s_models[0].predictTwo(stringFeatureVector);
        StringPrediction stringPrediction = predictTwo.o1;
        StringPrediction stringPrediction2 = predictTwo.o2;
        if (stringPrediction.score - stringPrediction2.score < 1.0d) {
            pOSState.add2ndLabel(stringPrediction2.label);
        }
        return stringPrediction.label;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public String getField(FtrToken ftrToken, POSState pOSState) {
        int parseInt;
        int length;
        DEPNode node = pOSState.getNode(ftrToken);
        if (node == null) {
            return null;
        }
        if (ftrToken.isField("sf")) {
            if (containsLowerSimplifiedForm(node)) {
                return node.simplifiedForm;
            }
            return null;
        }
        if (ftrToken.isField(JointFtrXml.F_LOWER_SIMPLIFIED_FORM)) {
            if (containsLowerSimplifiedForm(node)) {
                return node.lowerSimplifiedForm;
            }
            return null;
        }
        if (ftrToken.isField(JointFtrXml.F_LEMMA)) {
            if (containsLowerSimplifiedForm(node)) {
                return node.lemma;
            }
            return null;
        }
        if (ftrToken.isField("p")) {
            return node.pos;
        }
        if (ftrToken.isField(JointFtrXml.F_AMBIGUITY_CLASS)) {
            return this.m_ambi.get(node.simplifiedForm);
        }
        Matcher matcher = JointFtrXml.P_BOOLEAN.matcher(ftrToken.field);
        if (!matcher.find()) {
            Matcher matcher2 = JointFtrXml.P_FEAT.matcher(ftrToken.field);
            if (matcher2.find()) {
                return node.getFeat(matcher2.group(1));
            }
            Matcher matcher3 = JointFtrXml.P_PREFIX.matcher(ftrToken.field);
            if (matcher3.find()) {
                int parseInt2 = Integer.parseInt(matcher3.group(1));
                if (parseInt2 <= node.lowerSimplifiedForm.length()) {
                    return node.lowerSimplifiedForm.substring(0, parseInt2);
                }
                return null;
            }
            Matcher matcher4 = JointFtrXml.P_SUFFIX.matcher(ftrToken.field);
            if (!matcher4.find() || (parseInt = Integer.parseInt(matcher4.group(1))) > (length = node.lowerSimplifiedForm.length())) {
                return null;
            }
            return node.lowerSimplifiedForm.substring(length - parseInt, length);
        }
        int parseInt3 = Integer.parseInt(matcher.group(1));
        switch (parseInt3) {
            case 0:
                if (UTString.isAllUpperCase(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 1:
                if (UTString.isAllLowerCase(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 2:
                if (UTString.beginsWithUpperCase(node.simplifiedForm) && (!pOSState.isInputFirstNode())) {
                    return ftrToken.field;
                }
                return null;
            case 3:
                if (UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1) {
                    return ftrToken.field;
                }
                return null;
            case 4:
                if (UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1) {
                    return ftrToken.field;
                }
                return null;
            case 5:
                if (node.simplifiedForm.contains(".")) {
                    return ftrToken.field;
                }
                return null;
            case 6:
                if (UTString.containsDigit(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 7:
                if (node.simplifiedForm.contains("-")) {
                    return ftrToken.field;
                }
                return null;
            case 8:
                if (pOSState.isInputLastNode()) {
                    return ftrToken.field;
                }
                return null;
            case 9:
                if (pOSState.isInputFirstNode()) {
                    return ftrToken.field;
                }
                return null;
            default:
                throw new IllegalArgumentException("Unsupported feature: " + parseInt3);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.clearnlp.component.AbstractStatisticalComponent
    public String[] getFields(FtrToken ftrToken, POSState pOSState) {
        DEPNode node = pOSState.getNode(ftrToken);
        if (node == null) {
            return null;
        }
        String[] strArr = null;
        Matcher matcher = JointFtrXml.P_PREFIX.matcher(ftrToken.field);
        if (matcher.find()) {
            strArr = UTString.getPrefixes(node.lowerSimplifiedForm, Integer.parseInt(matcher.group(1)));
        } else {
            Matcher matcher2 = JointFtrXml.P_SUFFIX.matcher(ftrToken.field);
            if (matcher2.find()) {
                strArr = UTString.getSuffixes(node.lowerSimplifiedForm, Integer.parseInt(matcher2.group(1)));
            }
        }
        if (strArr == null || strArr.length == 0) {
            return null;
        }
        return strArr;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean containsLowerSimplifiedForm(DEPNode dEPNode) {
        return this.s_lsfs.contains(dEPNode.lowerSimplifiedForm);
    }
}
