package edu.berkeley.nlp.PCFGLA;

import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.smoothing.Smoother;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.StateSetWithFeatures;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.Counter;
import edu.berkeley.nlp.util.Indexer;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.Pair;
import edu.berkeley.nlp.util.PriorityQueue;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/berkeley/nlp/PCFGLA/HierarchicalFullyConnectedAdaptiveLexiconWithFeatures.class */
public class HierarchicalFullyConnectedAdaptiveLexiconWithFeatures extends HierarchicalFullyConnectedAdaptiveLexicon {
    private static final long serialVersionUID = 1;
    Indexer<String> featureIndexer;
    SimpleLexicon simpleLex;
    private final int minFeatureCount = 50;

    public HierarchicalFullyConnectedAdaptiveLexiconWithFeatures(short[] sArr, int i, double[] dArr, Smoother smoother, StateSetTreeList stateSetTreeList, int i2) {
        super(sArr, i2);
        this.minFeatureCount = 50;
        this.simpleLex = new SimpleLexicon(sArr, -1.0d);
        init(stateSetTreeList);
    }

    /* JADX WARN: Type inference failed for: r1v17, types: [double[][], double[][][]] */
    /* JADX WARN: Type inference failed for: r1v20, types: [double[][], double[][][]] */
    /* JADX WARN: Type inference failed for: r1v34, types: [edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveLexicalRule[], edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveLexicalRule[][]] */
    @Override // edu.berkeley.nlp.PCFGLA.HierarchicalFullyConnectedLexicon, edu.berkeley.nlp.PCFGLA.SimpleLexicon
    public void init(StateSetTreeList stateSetTreeList) {
        Iterator<Tree<StateSet>> it = stateSetTreeList.iterator();
        while (it.hasNext()) {
            Iterator<StateSet> it2 = it.next().getYield().iterator();
            while (it2.hasNext()) {
                this.wordIndexer.add(it2.next().getWord());
            }
        }
        this.wordCounter = new int[this.wordIndexer.size()];
        Counter<String> counter = new Counter<>();
        this.featureIndexer = new Indexer<>();
        Iterator<Tree<StateSet>> it3 = stateSetTreeList.iterator();
        while (it3.hasNext()) {
            int i = 0;
            for (StateSet stateSet : it3.next().getYield()) {
                String word = stateSet.getWord();
                int[] iArr = this.wordCounter;
                int indexOf = this.wordIndexer.indexOf(word);
                iArr[indexOf] = iArr[indexOf] + 1;
                int i2 = i;
                i++;
                this.wordIndexer.add(getSignature(stateSet.getWord(), i2));
                tallyWordFeatures(stateSet.getWord(), counter);
            }
        }
        this.featureIndexer = new Indexer<>();
        for (String str : counter.keySet()) {
            if (counter.getCount(str) >= 50.0d) {
                System.out.println("keeping: \t" + str);
                this.featureIndexer.add(str);
            } else {
                System.out.println("too rare:\t" + str);
            }
        }
        this.simpleLex.wordCounter = this.wordCounter;
        labelTrees(stateSetTreeList);
        this.tagWordIndexer = new SimpleLexicon.IntegerIndexer[this.numStates];
        for (int i3 = 0; i3 < this.numStates; i3++) {
            this.tagWordIndexer[i3] = new SimpleLexicon.IntegerIndexer(this.featureIndexer.size());
        }
        boolean[] zArr = new boolean[this.numStates];
        Iterator<Tree<StateSet>> it4 = stateSetTreeList.iterator();
        while (it4.hasNext()) {
            Tree<StateSet> next = it4.next();
            List<StateSet> yield = next.getYield();
            List<StateSet> preTerminalYield = next.getPreTerminalYield();
            int i4 = 0;
            for (StateSet stateSet2 : yield) {
                short state = preTerminalYield.get(i4).getState();
                Iterator<Integer> it5 = ((StateSetWithFeatures) stateSet2).features.iterator();
                while (it5.hasNext()) {
                    this.tagWordIndexer[state].add(it5.next().intValue());
                }
                zArr[state] = true;
                i4++;
            }
        }
        this.expectedCounts = new double[this.numStates];
        this.scores = new double[this.numStates];
        for (int i5 = 0; i5 < this.numStates; i5++) {
            if (zArr[i5]) {
                this.scores[i5] = new double[this.numSubStates[i5]][this.tagWordIndexer[i5].size()];
            } else {
                this.tagWordIndexer[i5] = null;
            }
        }
        this.nWords = this.wordIndexer.size();
        this.scores = (double[][][]) null;
        this.hierarchicalScores = (List[][]) null;
        this.finalLevels = (int[][]) null;
        this.rules = new HierarchicalAdaptiveLexicalRule[this.numStates];
        for (int i6 = 0; i6 < this.numStates; i6++) {
            if (this.tagWordIndexer[i6] == null) {
                this.rules[i6] = new HierarchicalAdaptiveLexicalRule[0];
            } else {
                this.rules[i6] = new HierarchicalAdaptiveLexicalRule[this.tagWordIndexer[i6].size()];
                for (int i7 = 0; i7 < this.rules[i6].length; i7++) {
                    this.rules[i6][i7] = new HierarchicalAdaptiveLexicalRule();
                }
            }
        }
    }

    private void tallyWordFeatures(String str, Counter<String> counter) {
        int length = str.length();
        if (length > 4) {
            for (int i = 1; i < 4; i++) {
                String str2 = "SUFF-" + str.substring(length - i);
                this.featureIndexer.add(str2);
                counter.incrementCount(str2, 1.0d);
            }
        }
    }

    public StateSet tallyFeatures(StateSet stateSet, boolean z) {
        String word = stateSet.getWord();
        String lowerCase = word.toLowerCase();
        short s = stateSet.from;
        String newSignature = this.simpleLex.getNewSignature(word, s);
        StateSetWithFeatures stateSetWithFeatures = new StateSetWithFeatures(stateSet);
        if (z) {
            this.featureIndexer.add(newSignature);
        }
        stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf(newSignature)));
        if (z) {
            this.featureIndexer.add("UNK");
        }
        stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("UNK")));
        int length = word.length();
        if (length > 4) {
            for (int i = 1; i < 4; i++) {
                int indexOf = this.featureIndexer.indexOf("SUFF-" + lowerCase.substring(length - i));
                if (indexOf >= 0) {
                    stateSetWithFeatures.features.add(Integer.valueOf(indexOf));
                }
            }
        }
        int length2 = word.length();
        int i2 = 0;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        for (int i3 = 0; i3 < length2; i3++) {
            char charAt = word.charAt(i3);
            if (Character.isDigit(charAt)) {
                z2 = true;
            } else if (charAt == '-') {
                z3 = true;
            } else if (Character.isLetter(charAt)) {
                if (Character.isLowerCase(charAt)) {
                    z4 = true;
                } else if (Character.isTitleCase(charAt)) {
                    z4 = true;
                    i2++;
                } else {
                    i2++;
                }
            }
        }
        char charAt2 = word.charAt(0);
        if (Character.isUpperCase(charAt2) || Character.isTitleCase(charAt2)) {
            if (s == 0 && i2 == 1) {
                if (z) {
                    this.featureIndexer.add("INITC");
                }
                stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("INITC")));
            } else {
                if (z) {
                    this.featureIndexer.add("CAPS");
                }
                stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("CAPS")));
            }
        } else if (!Character.isLetter(charAt2) && i2 > 0) {
            if (z) {
                this.featureIndexer.add("CAPS");
            }
            stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("CAPS")));
        } else if (z4) {
            if (z) {
                this.featureIndexer.add("LC");
            }
            stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("LC")));
        }
        if (z2) {
            if (z) {
                this.featureIndexer.add("NUM");
            }
            stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("NUM")));
        }
        if (z3) {
            if (z) {
                this.featureIndexer.add("DASH");
            }
            stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("DASH")));
        }
        if (lowerCase.endsWith("s") && length2 >= 3) {
            char charAt3 = lowerCase.charAt(length2 - 2);
            if (charAt3 != 's' && charAt3 != 'i' && charAt3 != 'u') {
                if (z) {
                    this.featureIndexer.add("s");
                }
                stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf("s")));
            }
        } else if (word.length() < 5 || z3 || !z2 || i2 > 0) {
        }
        return stateSetWithFeatures;
    }

    @Override // edu.berkeley.nlp.PCFGLA.HierarchicalFullyConnectedLexicon, edu.berkeley.nlp.PCFGLA.SimpleLexicon
    public void labelTrees(StateSetTreeList stateSetTreeList) {
        Iterator<Tree<StateSet>> it = stateSetTreeList.iterator();
        while (it.hasNext()) {
            Tree<StateSet> next = it.next();
            int i = 0;
            for (Tree<StateSet> tree : next.getTerminals()) {
                StateSetWithFeatures stateSetWithFeatures = new StateSetWithFeatures(tree.getLabel());
                if (stateSetWithFeatures.wordIndex < 0 || stateSetWithFeatures.wordIndex >= this.wordCounter.length) {
                    System.out.println("Have never seen this word before: " + stateSetWithFeatures.getWord() + " " + stateSetWithFeatures.wordIndex);
                    System.out.println(next);
                } else if (this.wordCounter[stateSetWithFeatures.wordIndex] <= this.knownWordCount) {
                    stateSetWithFeatures = (StateSetWithFeatures) tallyFeatures(stateSetWithFeatures, false);
                } else {
                    stateSetWithFeatures.sigIndex = -1;
                }
                this.featureIndexer.add(stateSetWithFeatures.getWord());
                stateSetWithFeatures.features.add(Integer.valueOf(this.featureIndexer.indexOf(stateSetWithFeatures.getWord())));
                tree.setLabel(stateSetWithFeatures);
                i++;
            }
        }
    }

    @Override // edu.berkeley.nlp.PCFGLA.HierarchicalFullyConnectedLexicon, edu.berkeley.nlp.PCFGLA.SimpleLexicon, edu.berkeley.nlp.PCFGLA.Lexicon
    public double[] score(StateSet stateSet, short s, boolean z, boolean z2) {
        StateSetWithFeatures stateSetWithFeatures;
        int indexOf;
        double[] dArr = new double[this.numSubStates[s]];
        Arrays.fill(dArr, 1.0d);
        if (stateSet.wordIndex == -2) {
            stateSetWithFeatures = new StateSetWithFeatures(stateSet);
            int indexOf2 = this.wordIndexer.indexOf(stateSet.getWord());
            if (indexOf2 < 0 || (indexOf2 >= 0 && this.wordCounter[indexOf2] <= this.knownWordCount)) {
                stateSetWithFeatures = (StateSetWithFeatures) tallyFeatures(stateSet, false);
            }
            int indexOf3 = this.featureIndexer.indexOf(stateSet.getWord());
            if (indexOf3 >= 0) {
                stateSetWithFeatures.features.add(Integer.valueOf(indexOf3));
            }
        } else {
            stateSetWithFeatures = (StateSetWithFeatures) stateSet;
        }
        Iterator<Integer> it = stateSetWithFeatures.features.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            if (intValue >= 0 && (indexOf = this.tagWordIndexer[s].indexOf(intValue)) >= 0) {
                double[] dArr2 = this.rules[s][indexOf].scores;
                for (int i = 0; i < dArr.length; i++) {
                    int i2 = i;
                    dArr[i2] = dArr[i2] * dArr2[i];
                }
            }
        }
        return dArr;
    }

    @Override // edu.berkeley.nlp.PCFGLA.HierarchicalFullyConnectedAdaptiveLexicon, edu.berkeley.nlp.PCFGLA.HierarchicalLexicon, edu.berkeley.nlp.PCFGLA.SimpleLexicon
    public String toString() {
        StringBuffer stringBuffer = new StringBuffer();
        Numberer globalNumberer = Numberer.getGlobalNumberer("tags");
        PriorityQueue priorityQueue = new PriorityQueue();
        for (int i = 0; i < this.rules.length; i++) {
            int[] iArr = new int[6];
            if (this.rules[i].length != 0) {
                for (int i2 = 0; i2 < this.featureIndexer.size(); i2++) {
                    int indexOf = this.tagWordIndexer[i].indexOf(i2);
                    if (indexOf >= 0) {
                        String str = this.featureIndexer.get(i2);
                        if (str.length() > 4 && str.substring(0, 4).equals("SUFF")) {
                            priorityQueue.add(new Pair(Integer.valueOf(i), Integer.valueOf(i2)), this.rules[i][indexOf].scores[0]);
                        }
                    }
                }
            }
        }
        while (priorityQueue.hasNext()) {
            Pair pair = (Pair) priorityQueue.next();
            int intValue = ((Integer) pair.getSecond()).intValue();
            int intValue2 = ((Integer) pair.getFirst()).intValue();
            String str2 = (String) globalNumberer.object(intValue2);
            int indexOf2 = this.tagWordIndexer[intValue2].indexOf(intValue);
            stringBuffer.append(str2 + " " + this.featureIndexer.get(intValue) + "\n");
            stringBuffer.append(this.rules[intValue2][indexOf2].toString());
            stringBuffer.append("\n\n");
        }
        stringBuffer.append("-----------Start unsorted----------\n");
        for (int i3 = 0; i3 < this.rules.length; i3++) {
            int[] iArr2 = new int[6];
            String str3 = (String) globalNumberer.object(i3);
            if (this.rules[i3].length != 0) {
                for (int i4 = 0; i4 < this.featureIndexer.size(); i4++) {
                    int indexOf3 = this.tagWordIndexer[i3].indexOf(i4);
                    if (indexOf3 >= 0) {
                        stringBuffer.append(str3 + " " + this.featureIndexer.get(i4) + "\n");
                        stringBuffer.append(this.rules[i3][indexOf3].toString());
                        stringBuffer.append("\n\n");
                        int depth = this.rules[i3][indexOf3].hierarchy.getDepth();
                        iArr2[depth] = iArr2[depth] + 1;
                    }
                }
                System.out.print(globalNumberer.object(i3) + ", lexical rules per level: ");
                for (int i5 = 1; i5 < 6; i5++) {
                    System.out.print(iArr2[i5] + " ");
                }
                System.out.print("\n");
            }
        }
        return stringBuffer.toString();
    }
}
