package edu.stanford.nlp.classify;

import edu.stanford.nlp.classify.LogPrior;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.optimization.DiffFunction;
import edu.stanford.nlp.optimization.Minimizer;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WordShapeClassifier;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.Distribution;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ErasureUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.xalan.templates.Constants;

/* loaded from: input_file:edu/stanford/nlp/classify/ColumnDataClassifier.class */
public class ColumnDataClassifier {
    private static final double DEFAULT_VALUE = 1.0d;
    private static final String DEFAULT_IGNORE_REGEXP = "\\s+";
    private final Flags[] flags;
    private final Flags globalFlags;
    private Classifier<String, String> classifier;
    private TokenizerFactory<Word> ptbFactory;
    private static final Pattern tab;
    private static int numGroups;
    private static String lastGroup;
    private static int numInGroup;
    private static double bestProb;
    private static double bestSim;
    private static boolean currentHighestProbCorrect;
    private static boolean foundAnswerInGroup;
    private static String storedHeader;
    private static final NumberFormat nf;
    private static final Map<String, Collection<String>> wordToSubstrings;
    private static PrintWriter cliqueWriter;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/stanford/nlp/classify/ColumnDataClassifier$Flags.class */
    public static class Flags implements Serializable {
        private static final long serialVersionUID = -7076671761070232566L;
        boolean lowercase;
        public static final String realValuedFeaturePrefix = "Value";
        String useClassifierFactory;
        String classifierFactoryArgs;
        boolean biased;
        String wordNGramBoundaryRegexp;
        Pattern wordNGramBoundaryPattern;
        static String printSVMLightFormatTo;
        boolean usesRealValues;
        boolean filename;
        boolean useAllSplitWordPairs;
        boolean useAllSplitWordTriples;
        boolean splitWordCount;
        boolean logSplitWordCount;
        int[] binnedSplitWordCounts;
        Map<String, float[]> wordVectors;
        static String trainFile = null;
        static String serializeTo = null;
        static String printTo = null;
        static boolean trainFromSVMLight = false;
        static boolean testFromSVMLight = false;
        static String encoding = null;
        static boolean displayAllAnswers = false;
        static boolean csvInput = false;
        static InputFormat inputFormat = InputFormat.PLAIN;
        static String csvOutput = null;
        boolean useNGrams = false;
        boolean usePrefixSuffixNGrams = false;
        boolean lowercaseNGrams = false;
        boolean useSplitNGrams = false;
        boolean useSplitPrefixSuffixNGrams = false;
        boolean cacheNGrams = false;
        int maxNGramLeng = -1;
        int minNGramLeng = 2;
        String partialNGramRegexp = null;
        Pattern partialNGramPattern = null;
        boolean useSum = false;
        double tolerance = 1.0E-4d;
        String printFeatures = null;
        String printClassifier = null;
        int printClassifierParam = 100;
        boolean exitAfterTrainingFeaturization = false;
        boolean intern = false;
        Pattern splitWordsPattern = null;
        Pattern splitWordsTokenizerPattern = null;
        Pattern splitWordsIgnorePattern = Pattern.compile(ColumnDataClassifier.DEFAULT_IGNORE_REGEXP);
        boolean useSplitWords = false;
        boolean useSplitWordPairs = false;
        boolean useLowercaseSplitWordPairs = false;
        boolean useSplitFirstLastWords = false;
        boolean useLowercaseSplitWords = false;
        boolean useLowercaseSplitFirstLastWords = false;
        int wordShape = -1;
        int splitWordShape = -1;
        boolean useString = false;
        boolean useClassFeature = false;
        int[] binnedLengths = null;
        TwoDimensionalCounter<String, String> binnedLengthsCounter = null;
        double[] binnedValues = null;
        TwoDimensionalCounter<String, String> binnedValuesCounter = null;
        double binnedValuesNaN = -1.0d;
        boolean isRealValued = false;
        boolean logitTransform = false;
        boolean logTransform = false;
        boolean sqrtTransform = false;
        char[] countChars = null;
        int[] countCharsBins = {0, 1};
        ClassicCounter<String> biasedHyperplane = null;
        boolean justify = false;
        boolean featureFormat = false;
        boolean significantColumnId = false;
        boolean useNB = false;
        boolean useQN = true;
        int QNsize = 15;
        int prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
        double sigma = 1.0d;
        double epsilon = 0.01d;
        int featureMinimumSupport = 0;
        int displayedColumn = 1;
        int groupingColumn = -1;
        int rankingScoreColumn = -1;
        String rankingAccuracyClass = null;
        int goldAnswerColumn = 0;
        boolean useSplitWordNGrams = false;
        int maxWordNGramLeng = -1;
        int minWordNGramLeng = 1;
        boolean useBinary = false;
        double l1reg = 0.0d;
        boolean useAdaptL1 = false;
        int limitFeatures = 0;
        String limitFeaturesLabels = null;
        double l1regmin = 0.0d;
        double l1regmax = 500.0d;
        double featureWeightThreshold = 0.0d;
        String testFile = null;
        String loadClassifier = null;
        boolean showTokenization = false;
        int crossValidationFolds = -1;
        boolean shuffleTrainingData = false;
        long shuffleSeed = 0;
        boolean splitWordsWithPTBTokenizer = false;
        boolean printCrossValidationDecisions = false;

        Flags() {
        }

        public String toString() {
            return "Flags[goldAnswerColumn = " + this.goldAnswerColumn + ", useString = " + this.useString + ", useNGrams = " + this.useNGrams + ", usePrefixSuffixNGrams = " + this.usePrefixSuffixNGrams + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/stanford/nlp/classify/ColumnDataClassifier$InputFormat.class */
    public enum InputFormat {
        PLAIN,
        COMMENTS,
        HEADER
    }

    public Datum<String, String> makeDatumFromLine(String str) {
        return makeDatumFromStrings(splitLineToFields(str));
    }

    public Datum<String, String> makeDatumFromStrings(String[] strArr) {
        if (this.globalFlags.usesRealValues) {
            return makeRVFDatumFromStrings(strArr);
        }
        if (!this.globalFlags.featureFormat) {
            return makeDatum(strArr);
        }
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < strArr.length; i++) {
            if (i != this.globalFlags.goldAnswerColumn) {
                if (this.globalFlags.significantColumnId) {
                    arrayList.add(String.format("%d:%s", Integer.valueOf(i), strArr[i]));
                } else {
                    arrayList.add(strArr[i]);
                }
            }
        }
        return new BasicDatum(arrayList, strArr[this.globalFlags.goldAnswerColumn]);
    }

    private RVFDatum<String, String> makeRVFDatumFromStrings(String[] strArr) {
        if (!this.globalFlags.featureFormat) {
            return makeRVFDatum(strArr);
        }
        ClassicCounter classicCounter = new ClassicCounter();
        for (int i = 0; i < strArr.length; i++) {
            if (i != this.globalFlags.goldAnswerColumn) {
                if (this.flags[i] == null || !(this.flags[i].isRealValued || this.flags[i].logTransform || this.flags[i].logitTransform || this.flags[i].sqrtTransform)) {
                    classicCounter.setCount(strArr[i], 1.0d);
                } else {
                    addFeatureValue(strArr[i], this.flags[i], classicCounter);
                }
            }
        }
        return new RVFDatum<>(classicCounter, strArr[this.globalFlags.goldAnswerColumn]);
    }

    public GeneralDataset<String, String> readTrainingExamples(String str) {
        return readAndReturnTrainingExamples(str).first();
    }

    public Pair<GeneralDataset<String, String>, List<String[]>> readAndReturnTrainingExamples(String str) {
        if (this.globalFlags.printFeatures != null) {
            newFeaturePrinter(this.globalFlags.printFeatures, "train", Flags.encoding);
        }
        Pair<GeneralDataset<String, String>, List<String[]>> readDataset = readDataset(str, true);
        GeneralDataset<String, String> first = readDataset.first();
        if (this.globalFlags.featureMinimumSupport > 1) {
            System.err.println("Removing Features with counts < " + this.globalFlags.featureMinimumSupport);
            first.applyFeatureCountThreshold(this.globalFlags.featureMinimumSupport);
        }
        first.summaryStatistics();
        return readDataset;
    }

    public Pair<GeneralDataset<String, String>, List<String[]>> readTestExamples(String str) {
        return readDataset(str, true);
    }

    private static List<String[]> makeSVMLightLineInfos(List<String> list) {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().replaceFirst("#.*$", "").split(DEFAULT_IGNORE_REGEXP));
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Removed duplicated region for block: B:44:0x0143 A[Catch: Exception -> 0x01f9, TryCatch #0 {Exception -> 0x01f9, blocks: (B:84:0x007d, B:24:0x0086, B:26:0x0090, B:27:0x00a5, B:28:0x00c2, B:30:0x00cc, B:69:0x00ea, B:72:0x00f0, B:42:0x010e, B:61:0x011d, B:62:0x0142, B:44:0x0143, B:46:0x014b, B:47:0x0150, B:49:0x0158, B:52:0x0161, B:53:0x016b, B:58:0x0176, B:59:0x01a8, B:55:0x01a9, B:33:0x00f8, B:35:0x0101, B:80:0x01c3, B:82:0x009c), top: B:83:0x007d }] */
    /* JADX WARN: Removed duplicated region for block: B:60:0x011d A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private edu.stanford.nlp.util.Pair<edu.stanford.nlp.classify.GeneralDataset<java.lang.String, java.lang.String>, java.util.List<java.lang.String[]>> readDataset(java.lang.String r6, boolean r7) {
        /*
            Method dump skipped, instructions count: 577
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.classify.ColumnDataClassifier.readDataset(java.lang.String, boolean):edu.stanford.nlp.util.Pair");
    }

    private String[] splitLineToFields(String str) {
        Flags flags = this.globalFlags;
        if (!Flags.csvInput) {
            return tab.split(str);
        }
        String[] splitOnCharWithQuoting = StringUtils.splitOnCharWithQuoting(str, ',', '\"', '\"');
        for (int i = 0; i < splitOnCharWithQuoting.length; i++) {
            if (splitOnCharWithQuoting[i].startsWith("\"") && splitOnCharWithQuoting[i].endsWith("\"")) {
                splitOnCharWithQuoting[i] = splitOnCharWithQuoting[i].substring(1, splitOnCharWithQuoting[i].length() - 1);
            }
        }
        return splitOnCharWithQuoting;
    }

    private Pair<Double, Double> writeResultsSummary(int i, Counter<String> counter, Collection<String> collection) {
        System.err.println();
        System.err.print(i + " examples");
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            System.err.print(" and " + numGroups + " ranking groups");
        }
        System.err.println(" in test set");
        int i2 = 0;
        double d = 0.0d;
        double d2 = 0.0d;
        for (String str : collection) {
            i2++;
            int count = (int) counter.getCount(str + "|TP");
            int count2 = (int) counter.getCount(str + "|FN");
            int count3 = (int) counter.getCount(str + "|FP");
            int count4 = (int) counter.getCount(str + "|TN");
            double d3 = count + count3 == 0 ? 1.0d : count / (count + count3);
            double d4 = count + count2 == 0 ? 1.0d : count / (count + count2);
            double d5 = (d3 == 0.0d && d4 == 0.0d) ? 0.0d : ((2.0d * d3) * d4) / (d3 + d4);
            d2 += d5;
            d += count;
            System.err.println("Cls " + str + ": TP=" + count + " FN=" + count2 + " FP=" + count3 + " TN=" + count4 + "; Acc " + nf.format((count + count4) / i) + " P " + nf.format(d3) + " R " + nf.format(d4) + " F1 " + nf.format(d5));
        }
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            double count5 = (int) counter.getCount("Ranking|Correct");
            double count6 = (int) counter.getCount("Ranking|Error");
            System.err.print("Ranking accuracy: " + nf.format(count5 + count6 == 0.0d ? 0.0d : count5 / (count5 + count6)));
            double count7 = (int) counter.getCount("Ranking|Covered");
            double count8 = (int) counter.getCount("Ranking|Uncovered");
            double d6 = count7 + count8 == 0.0d ? 0.0d : count7 / (count7 + count8);
            if (count8 > 0.5d) {
                double count9 = (int) (counter.getCount("Ranking|Error") - counter.getCount("Ranking|Uncovered"));
                System.err.println(" (on " + nf.format(d6) + " of groups with correct answer: " + nf.format(count5 + count9 == 0.0d ? 0.0d : count5 / (count5 + count9)) + ')');
            } else {
                System.err.println();
            }
            if (this.globalFlags.rankingScoreColumn >= 0) {
                System.err.println("Ranking average score: " + nf.format(count5 + count6 == 0.0d ? 0.0d : counter.getCount("Ranking|Score") / (count5 + count6)));
            }
        }
        double d7 = d / i;
        double d8 = d2 / i2;
        DecimalFormat decimalFormat = new DecimalFormat("0.00000");
        System.err.println("Accuracy/micro-averaged F1: " + decimalFormat.format(d7));
        System.err.println("Macro-averaged F1: " + decimalFormat.format(d8));
        return new Pair<>(Double.valueOf(d7), Double.valueOf(d8));
    }

    private void writeAnswer(String[] strArr, String str, Distribution<String> distribution, Counter<String> counter, Classifier<String, String> classifier, double d) {
        String str2;
        String str3 = this.globalFlags.goldAnswerColumn < strArr.length ? strArr[this.globalFlags.goldAnswerColumn] : "";
        String str4 = this.globalFlags.displayedColumn >= 0 ? strArr[this.globalFlags.displayedColumn] : "";
        Flags flags = this.globalFlags;
        if (Flags.displayAllAnswers) {
            TreeSet treeSet = new TreeSet();
            for (String str5 : distribution.keySet()) {
                treeSet.add(new Pair(Double.valueOf(distribution.probabilityOf(str5)), str5));
            }
            StringBuilder sb = new StringBuilder();
            for (Pair pair : treeSet.descendingSet()) {
                if (sb.length() > 0) {
                    sb.append('\t');
                }
                sb.append(((Double) pair.first()).toString()).append('\t').append((String) pair.second());
            }
            str2 = sb.toString();
        } else {
            str2 = str + '\t' + nf.format(distribution.probabilityOf(str)) + '\t' + nf.format(distribution.probabilityOf(str3));
        }
        System.out.println(str4.isEmpty() ? str3 + '\t' + str2 : str4 + '\t' + str3 + '\t' + str2);
    }

    private void updatePerformanceStatistics(String[] strArr, String str, Distribution<String> distribution, Counter<String> counter, Classifier<String, String> classifier, double d) {
        String str2 = this.globalFlags.goldAnswerColumn < strArr.length ? strArr[this.globalFlags.goldAnswerColumn] : "";
        for (String str3 : classifier.labels()) {
            if (str3.equals(str2)) {
                if (str3.equals(str)) {
                    counter.incrementCount(str3 + "|TP");
                } else {
                    counter.incrementCount(str3 + "|FN");
                }
            } else if (str3.equals(str)) {
                counter.incrementCount(str3 + "|FP");
            } else {
                counter.incrementCount(str3 + "|TN");
            }
        }
        if (this.globalFlags.groupingColumn < 0 || this.globalFlags.rankingAccuracyClass == null) {
            return;
        }
        String str4 = strArr[this.globalFlags.groupingColumn];
        if (!str4.equals(lastGroup)) {
            finishRanking(counter, bestSim);
            numGroups++;
            lastGroup = str4;
            bestProb = distribution.probabilityOf(this.globalFlags.rankingAccuracyClass);
            bestSim = d;
            numInGroup = 1;
            currentHighestProbCorrect = str2.equals(this.globalFlags.rankingAccuracyClass);
            foundAnswerInGroup = this.globalFlags.rankingAccuracyClass.equals(str2);
            return;
        }
        numInGroup++;
        double probabilityOf = distribution.probabilityOf(this.globalFlags.rankingAccuracyClass);
        if (probabilityOf > bestProb) {
            bestProb = probabilityOf;
            bestSim = d;
            currentHighestProbCorrect = str2.equals(this.globalFlags.rankingAccuracyClass);
        }
        if (this.globalFlags.rankingAccuracyClass.equals(str2)) {
            foundAnswerInGroup = true;
        }
    }

    private void finishRanking(Counter<String> counter, double d) {
        if (numInGroup > 0) {
            if (this.globalFlags.justify) {
                System.err.print("Previous group of " + numInGroup + ": ");
                if (!foundAnswerInGroup) {
                    System.err.print("no correct answer; ");
                }
                System.err.print("highest ranked guess was: " + (currentHighestProbCorrect ? "correct" : "incorrect"));
                System.err.println(" (sim. = " + nf.format(d) + ')');
            }
            if (currentHighestProbCorrect) {
                counter.incrementCount("Ranking|Correct");
            } else {
                counter.incrementCount("Ranking|Error");
            }
            if (foundAnswerInGroup) {
                counter.incrementCount("Ranking|Covered");
            } else {
                counter.incrementCount("Ranking|Uncovered");
            }
            counter.incrementCount("Ranking|Score", d);
        }
    }

    private Pair<Double, Double> testExamples(Classifier<String, String> classifier, GeneralDataset<String, String> generalDataset, List<String[]> list) {
        if (this.globalFlags.crossValidationFolds <= 0 || this.globalFlags.printCrossValidationDecisions) {
            Flags flags = this.globalFlags;
            if (Flags.csvOutput != null) {
                PrintStream printStream = System.out;
                Flags flags2 = this.globalFlags;
                printStream.print(formatCsv(Flags.csvOutput, storedHeader.split("\t"), null));
            } else {
                System.err.print("Output format: ");
                if (this.globalFlags.displayedColumn >= 0) {
                    System.err.printf("dataColumn%d\t", Integer.valueOf(this.globalFlags.displayedColumn));
                }
                System.err.print("goldAnswer\t");
                Flags flags3 = this.globalFlags;
                if (Flags.displayAllAnswers) {
                    System.err.println("[P(class) class]+ {sorted by probability}");
                } else {
                    System.err.println("classifierAnswer\tP(clAnswer)\tP(goldAnswer)");
                }
            }
        }
        ClassicCounter classicCounter = new ClassicCounter();
        int size = generalDataset.size();
        for (int i = 0; i < size; i++) {
            testExample(classifier, generalDataset, list, classicCounter, i);
        }
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            finishRanking(classicCounter, bestSim);
        }
        return writeResultsSummary(generalDataset.size(), classicCounter, classifier.labels());
    }

    private void testExample(Classifier<String, String> classifier, GeneralDataset<String, String> generalDataset, List<String[]> list, Counter<String> counter, int i) {
        String[] strArr = list.get(i);
        Datum<String, String> rVFDatum = this.globalFlags.usesRealValues ? generalDataset.getRVFDatum(i) : generalDataset.getDatum(i);
        if (this.globalFlags.justify) {
            System.err.println("### Test item " + i);
            for (String str : strArr) {
                System.err.print(str);
                System.err.print('\t');
            }
            System.err.println();
            if (classifier instanceof LinearClassifier) {
                ((LinearClassifier) classifier).justificationOf(rVFDatum);
            }
            System.err.println();
        }
        Counter<String> scoresOf = this.globalFlags.usesRealValues ? ((RVFClassifier) ErasureUtils.uncheckedCast(classifier)).scoresOf((RVFDatum) rVFDatum) : classifier.scoresOf(rVFDatum);
        Distribution<String> distributionFromLogisticCounter = Distribution.distributionFromLogisticCounter(scoresOf);
        String str2 = null;
        if (this.globalFlags.biasedHyperplane != null) {
            ArrayList arrayList = new ArrayList(scoresOf.keySet());
            Collections.sort(arrayList, Counters.toComparatorDescending(scoresOf));
            Iterator it = arrayList.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                String str3 = (String) it.next();
                if (distributionFromLogisticCounter.probabilityOf(str3) > this.globalFlags.biasedHyperplane.getCount(str3)) {
                    str2 = str3;
                    break;
                }
            }
        }
        if (str2 == null) {
            str2 = this.globalFlags.usesRealValues ? (String) ((RVFClassifier) ErasureUtils.uncheckedCast(classifier)).classOf((RVFDatum) rVFDatum) : classifier.classOf(rVFDatum);
        }
        double d = 0.0d;
        if (this.globalFlags.rankingScoreColumn >= 0) {
            try {
                d = Double.parseDouble(strArr[this.globalFlags.rankingScoreColumn]);
            } catch (NumberFormatException e) {
            }
        }
        if (this.globalFlags.crossValidationFolds <= 0 || this.globalFlags.printCrossValidationDecisions) {
            Flags flags = this.globalFlags;
            if (Flags.csvOutput != null) {
                PrintStream printStream = System.out;
                Flags flags2 = this.globalFlags;
                printStream.print(formatCsv(Flags.csvOutput, strArr, str2));
            } else {
                writeAnswer(strArr, str2, distributionFromLogisticCounter, counter, classifier, d);
            }
        }
        updatePerformanceStatistics(strArr, str2, distributionFromLogisticCounter, counter, classifier, d);
    }

    private String formatCsv(String str, String[] strArr, String str2) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int length = str.length();
        while (i < length) {
            char charAt = str.charAt(i);
            if (charAt != '%' || i + 1 >= length) {
                sb.append(charAt);
            } else {
                char charAt2 = str.charAt(i + 1);
                if (charAt2 >= '0' && charAt2 <= '9') {
                    int i2 = charAt2 - '0';
                    if (i2 >= strArr.length) {
                        throw new IllegalArgumentException("Not enough columns for format " + str);
                    }
                    sb.append(strArr[i2]);
                } else if (charAt2 == 'c') {
                    if (str2 != null) {
                        sb.append(str2);
                    } else if (this.globalFlags.goldAnswerColumn < strArr.length) {
                        sb.append(strArr[this.globalFlags.goldAnswerColumn]);
                    } else {
                        sb.append("Class");
                    }
                } else {
                    if (charAt2 != 'n') {
                        throw new IllegalArgumentException("Unrecognized format specification in " + str);
                    }
                    sb.append('\n');
                }
                i++;
            }
            i++;
        }
        return sb.toString();
    }

    private Datum<String, String> makeDatum(String[] strArr) {
        String str = this.globalFlags.goldAnswerColumn < strArr.length ? strArr[this.globalFlags.goldAnswerColumn] : "";
        ArrayList arrayList = new ArrayList();
        Collection<String> newHashSet = Generics.newHashSet();
        if (this.globalFlags.useClassFeature) {
            newHashSet.add(Expressions.TYPE_CLASS);
        }
        addAllInterningAndPrefixing(arrayList, newHashSet, "");
        for (int i = 0; i < this.flags.length; i++) {
            Collection<String> newHashSet2 = Generics.newHashSet();
            makeDatum(strArr[i], this.flags[i], newHashSet2, str);
            addAllInterningAndPrefixing(arrayList, newHashSet2, i + "-");
        }
        if (this.globalFlags.printFeatures != null) {
            printFeatures(strArr, arrayList);
        }
        return new BasicDatum(arrayList, str);
    }

    private RVFDatum<String, String> makeRVFDatum(String[] strArr) {
        String str = this.globalFlags.goldAnswerColumn < strArr.length ? strArr[this.globalFlags.goldAnswerColumn] : "";
        ClassicCounter<String> classicCounter = new ClassicCounter<>();
        ClassicCounter<String> classicCounter2 = new ClassicCounter<>();
        if (this.globalFlags.useClassFeature) {
            classicCounter2.setCount(Expressions.TYPE_CLASS, 1.0d);
        }
        addAllInterningAndPrefixingRVF(classicCounter, classicCounter2, "");
        for (int i = 0; i < this.flags.length; i++) {
            ClassicCounter<String> classicCounter3 = new ClassicCounter<>();
            makeDatum(strArr[i], this.flags[i], classicCounter3, str);
            addAllInterningAndPrefixingRVF(classicCounter, classicCounter3, i + "-");
        }
        if (this.globalFlags.printFeatures != null) {
            printFeatures(strArr, classicCounter);
        }
        return new RVFDatum<>(classicCounter, str);
    }

    private void addAllInterningAndPrefixingRVF(ClassicCounter<String> classicCounter, ClassicCounter<String> classicCounter2, String str) {
        if (!$assertionsDisabled && str == null) {
            throw new AssertionError();
        }
        Iterator<String> it = classicCounter2.keySet().iterator();
        while (it.hasNext()) {
            String next = it.next();
            double count = classicCounter2.getCount(next);
            if (!str.isEmpty()) {
                next = str + next;
            }
            if (this.globalFlags.intern) {
                next = next.intern();
            }
            classicCounter.incrementCount(next, count);
        }
    }

    private void addAllInterningAndPrefixing(Collection<String> collection, Collection<String> collection2, String str) {
        if (!$assertionsDisabled && str == null) {
            throw new AssertionError();
        }
        Iterator<String> it = collection2.iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (!str.isEmpty()) {
                next = str + next;
            }
            if (this.globalFlags.intern) {
                next = next.intern();
            }
            collection.add(next);
        }
    }

    private static void addFeatureValue(String str, Flags flags, Object obj) {
        double doubleValue = Double.valueOf(str).doubleValue();
        if (flags.logTransform) {
            double log = Math.log(doubleValue);
            if (Double.isInfinite(log) || Double.isNaN(log)) {
                System.err.println("WARNING: Log transform attempted on out of range value; feature ignored");
                return;
            } else {
                addFeature(obj, "Log", log);
                return;
            }
        }
        if (!flags.logitTransform) {
            if (flags.sqrtTransform) {
                addFeature(obj, "Sqrt", Math.sqrt(doubleValue));
                return;
            } else {
                addFeature(obj, Flags.realValuedFeaturePrefix, doubleValue);
                return;
            }
        }
        double log2 = Math.log(doubleValue / (1.0d - doubleValue));
        if (Double.isInfinite(log2) || Double.isNaN(log2)) {
            System.err.println("WARNING: Logit transform attempted on out of range value; feature ignored");
        } else {
            addFeature(obj, "Logit", log2);
        }
    }

    private static <F> void addFeature(Object obj, F f, double d) {
        if (obj instanceof Counter) {
            ((Counter) ErasureUtils.uncheckedCast(obj)).setCount(f, d);
        } else {
            if (!(obj instanceof Collection)) {
                throw new RuntimeException("addFeature was called with a features object that is neither a counter nor a collection!");
            }
            ((Collection) ErasureUtils.uncheckedCast(obj)).add(f);
        }
    }

    private void makeDatum(String str, Flags flags, Object obj, String str2) {
        if (flags == null) {
            return;
        }
        if (flags.filename) {
            str = IOUtils.slurpFileNoExceptions(str);
        }
        if (flags.lowercase) {
            str = str.toLowerCase(Locale.ENGLISH);
        }
        if (flags.useString) {
            addFeature(obj, AbstractBottomUpParser.START + str, 1.0d);
        }
        if (flags.binnedLengths != null) {
            int length = str.length();
            String str3 = null;
            int i = 0;
            while (i <= flags.binnedLengths.length) {
                if (i == flags.binnedLengths.length || length <= flags.binnedLengths[i]) {
                    str3 = "Len-" + (i == 0 ? 0 : flags.binnedLengths[i - 1] + 1) + '-' + (i == flags.binnedLengths.length ? "Inf" : Integer.toString(flags.binnedLengths[i]));
                    if (flags.binnedLengthsCounter != null) {
                        flags.binnedLengthsCounter.incrementCount(str3, str2);
                    }
                    addFeature(obj, str3, 1.0d);
                } else {
                    i++;
                }
            }
            addFeature(obj, str3, 1.0d);
        }
        if (flags.binnedValues != null) {
            double d = flags.binnedValuesNaN;
            try {
                d = Double.parseDouble(str);
            } catch (NumberFormatException e) {
            }
            String str4 = null;
            int i2 = 0;
            while (i2 <= flags.binnedValues.length) {
                if (i2 == flags.binnedValues.length || d <= flags.binnedValues[i2]) {
                    str4 = "Val-(" + (i2 == 0 ? "-Inf" : Double.toString(flags.binnedValues[i2 - 1])) + ',' + (i2 == flags.binnedValues.length ? "Inf" : Double.toString(flags.binnedValues[i2])) + ']';
                    if (flags.binnedValuesCounter != null) {
                        flags.binnedValuesCounter.incrementCount(str4, str2);
                    }
                    addFeature(obj, str4, 1.0d);
                } else {
                    i2++;
                }
            }
            addFeature(obj, str4, 1.0d);
        }
        if (flags.countChars != null) {
            int[] iArr = new int[flags.countChars.length];
            for (int i3 = 0; i3 < iArr.length; i3++) {
                iArr[i3] = 0;
            }
            int length2 = str.length();
            for (int i4 = 0; i4 < length2; i4++) {
                char charAt = str.charAt(i4);
                for (int i5 = 0; i5 < iArr.length; i5++) {
                    if (charAt == flags.countChars[i5]) {
                        int i6 = i5;
                        iArr[i6] = iArr[i6] + 1;
                    }
                }
            }
            for (int i7 = 0; i7 < iArr.length; i7++) {
                String str5 = null;
                int i8 = 0;
                while (i8 <= flags.countCharsBins.length) {
                    if (i8 == flags.countCharsBins.length || iArr[i7] <= flags.countCharsBins[i8]) {
                        str5 = "Char-" + flags.countChars[i7] + '-' + (i8 == 0 ? 0 : flags.countCharsBins[i8 - 1] + 1) + '-' + (i8 == flags.countCharsBins.length ? "Inf" : Integer.toString(flags.countCharsBins[i8]));
                        addFeature(obj, str5, 1.0d);
                    } else {
                        i8++;
                    }
                }
                addFeature(obj, str5, 1.0d);
            }
        }
        if (flags.splitWordsPattern != null || flags.splitWordsTokenizerPattern != null || flags.splitWordsWithPTBTokenizer) {
            String[] regexpTokenize = flags.splitWordsTokenizerPattern != null ? regexpTokenize(flags.splitWordsTokenizerPattern, flags.splitWordsIgnorePattern, str) : flags.splitWordsPattern != null ? splitTokenize(flags.splitWordsPattern, flags.splitWordsIgnorePattern, str) : ptbTokenize(str);
            if (flags.showTokenization) {
                System.err.print("Tokenization: ");
                System.err.println(Arrays.toString(regexpTokenize));
            }
            if (flags.splitWordCount) {
                addFeature(obj, "SWNUM", regexpTokenize.length);
            }
            if (flags.logSplitWordCount) {
                addFeature(obj, "LSWNUM", Math.log(regexpTokenize.length));
            }
            if (flags.binnedSplitWordCounts != null) {
                String str6 = null;
                int i9 = 0;
                while (i9 <= flags.binnedSplitWordCounts.length) {
                    if (i9 == flags.binnedSplitWordCounts.length || regexpTokenize.length <= flags.binnedSplitWordCounts[i9]) {
                        str6 = "SWNUMBIN-" + (i9 == 0 ? 0 : flags.binnedSplitWordCounts[i9 - 1] + 1) + '-' + (i9 == flags.binnedSplitWordCounts.length ? "Inf" : Integer.toString(flags.binnedSplitWordCounts[i9]));
                        addFeature(obj, str6, 1.0d);
                    } else {
                        i9++;
                    }
                }
                addFeature(obj, str6, 1.0d);
            }
            for (int i10 = 0; i10 < regexpTokenize.length; i10++) {
                if (flags.useSplitWords) {
                    addFeature(obj, "SW-" + regexpTokenize[i10], 1.0d);
                }
                if (flags.useLowercaseSplitWords) {
                    addFeature(obj, "LSW-" + regexpTokenize[i10].toLowerCase(), 1.0d);
                }
                if (flags.useSplitWordPairs && i10 + 1 < regexpTokenize.length) {
                    addFeature(obj, "SWP-" + regexpTokenize[i10] + '-' + regexpTokenize[i10 + 1], 1.0d);
                }
                if (flags.useLowercaseSplitWordPairs && i10 + 1 < regexpTokenize.length) {
                    addFeature(obj, "LSWP-" + regexpTokenize[i10].toLowerCase() + '-' + regexpTokenize[i10 + 1].toLowerCase(), 1.0d);
                }
                if (flags.useAllSplitWordPairs) {
                    for (int i11 = i10 + 1; i11 < regexpTokenize.length; i11++) {
                        if (regexpTokenize[i10].compareTo(regexpTokenize[i11]) < 0) {
                            addFeature(obj, "ASWP-" + regexpTokenize[i10] + '-' + regexpTokenize[i11], 1.0d);
                        } else {
                            addFeature(obj, "ASWP-" + regexpTokenize[i11] + '-' + regexpTokenize[i10], 1.0d);
                        }
                    }
                }
                if (flags.useAllSplitWordTriples) {
                    for (int i12 = i10 + 1; i12 < regexpTokenize.length; i12++) {
                        for (int i13 = i12 + 1; i13 < regexpTokenize.length; i13++) {
                            String[] strArr = {regexpTokenize[i10], regexpTokenize[i12], regexpTokenize[i13]};
                            Arrays.sort(strArr);
                            addFeature(obj, "ASWT-" + strArr[0] + '-' + strArr[1] + '-' + strArr[2], 1.0d);
                        }
                    }
                }
                if (flags.useSplitWordNGrams) {
                    StringBuilder sb = new StringBuilder("SW#");
                    for (int i14 = i10; i14 < (i10 + flags.minWordNGramLeng) - 1 && i14 < regexpTokenize.length; i14++) {
                        sb.append('-');
                        sb.append(regexpTokenize[i14]);
                    }
                    int min = flags.maxWordNGramLeng > 0 ? Math.min(regexpTokenize.length, i10 + flags.maxWordNGramLeng) : regexpTokenize.length;
                    for (int i15 = (i10 + flags.minWordNGramLeng) - 1; i15 < min && (flags.wordNGramBoundaryRegexp == null || !flags.wordNGramBoundaryPattern.matcher(regexpTokenize[i15]).matches()); i15++) {
                        sb.append('-');
                        sb.append(regexpTokenize[i15]);
                        addFeature(obj, sb.toString(), 1.0d);
                    }
                }
                if (flags.useSplitFirstLastWords) {
                    if (i10 == 0) {
                        addFeature(obj, "SFW-" + regexpTokenize[i10], 1.0d);
                    } else if (i10 == regexpTokenize.length - 1) {
                        addFeature(obj, "SLW-" + regexpTokenize[i10], 1.0d);
                    }
                }
                if (flags.useLowercaseSplitFirstLastWords) {
                    if (i10 == 0) {
                        addFeature(obj, "LSFW-" + regexpTokenize[i10].toLowerCase(), 1.0d);
                    } else if (i10 == regexpTokenize.length - 1) {
                        addFeature(obj, "SLW-" + regexpTokenize[i10].toLowerCase(), 1.0d);
                    }
                }
                if (flags.useSplitNGrams || flags.useSplitPrefixSuffixNGrams) {
                    Iterator<String> it = makeNGramFeatures(regexpTokenize[i10], flags, true, "S#").iterator();
                    while (it.hasNext()) {
                        addFeature(obj, it.next(), 1.0d);
                    }
                }
                if (flags.splitWordShape > -1) {
                    addFeature(obj, "SSHAPE-" + WordShapeClassifier.wordShape(regexpTokenize[i10], flags.splitWordShape), 1.0d);
                }
            }
            if (flags.wordVectors != null) {
                double[] dArr = null;
                for (String str7 : regexpTokenize) {
                    float[] fArr = flags.wordVectors.get(str7);
                    if (fArr != null && dArr == null) {
                        dArr = new double[fArr.length];
                        for (int i16 = 0; i16 < fArr.length; i16++) {
                            int i17 = i16;
                            dArr[i17] = dArr[i17] + fArr[i16];
                        }
                    }
                }
                if (dArr != null) {
                    for (int i18 = 0; i18 < dArr.length; i18++) {
                        double[] dArr2 = dArr;
                        int i19 = i18;
                        dArr2[i19] = dArr2[i19] / regexpTokenize.length;
                        addFeature(obj, "SWV-" + i18, dArr[i18]);
                    }
                }
            }
        }
        if (flags.wordShape > -1) {
            addFeature(obj, "SHAPE-" + WordShapeClassifier.wordShape(str, flags.wordShape), 1.0d);
        }
        if (flags.useNGrams || flags.usePrefixSuffixNGrams) {
            Iterator<String> it2 = makeNGramFeatures(str, flags, false, "#").iterator();
            while (it2.hasNext()) {
                addFeature(obj, it2.next(), 1.0d);
            }
        }
        if (flags.isRealValued || flags.logTransform || flags.logitTransform || flags.sqrtTransform) {
            addFeatureValue(str, flags, obj);
        }
    }

    private String[] ptbTokenize(String str) {
        if (this.ptbFactory == null) {
            this.ptbFactory = PTBTokenizer.factory();
        }
        List<Word> list = this.ptbFactory.getTokenizer(new StringReader(str)).tokenize();
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i).word();
        }
        return strArr;
    }

    private String intern(String str) {
        return this.globalFlags.intern ? str.intern() : str;
    }

    private Collection<String> makeNGramFeatures(String str, Flags flags, boolean z, String str2) {
        boolean z2;
        boolean z3;
        String str3 = str;
        if (z) {
            z2 = flags.useSplitNGrams;
            z3 = flags.useSplitPrefixSuffixNGrams;
        } else {
            z2 = flags.useNGrams;
            z3 = flags.usePrefixSuffixNGrams;
        }
        if (flags.lowercaseNGrams) {
            str3 = str3.toLowerCase(Locale.ENGLISH);
        }
        if (flags.partialNGramRegexp != null) {
            Matcher matcher = flags.partialNGramPattern.matcher(str3);
            if (matcher.find()) {
                str3 = matcher.groupCount() > 0 ? matcher.group(1) : matcher.group();
            }
        }
        Collection<String> collection = flags.cacheNGrams ? wordToSubstrings.get(str3) : null;
        if (collection == null) {
            collection = new ArrayList();
            String str4 = str2 + '-';
            String str5 = str2 + "B-";
            String str6 = str2 + "E-";
            int length = str3.length();
            for (int i = 0; i < length; i++) {
                int min = Math.min(length, i + flags.maxNGramLeng);
                for (int i2 = i + flags.minNGramLeng; i2 <= min; i2++) {
                    if (z3) {
                        if (i == 0) {
                            collection.add(intern(str5 + str3.substring(i, i2)));
                        }
                        if (i2 == length) {
                            collection.add(intern(str6 + str3.substring(i, i2)));
                        }
                    }
                    if (z2) {
                        collection.add(intern(str4 + str3.substring(i, i2)));
                    }
                }
            }
            if (flags.cacheNGrams) {
                wordToSubstrings.put(str3, collection);
            }
        }
        return collection;
    }

    private static void newFeaturePrinter(String str, String str2, String str3) {
        if (cliqueWriter != null) {
            closeFeaturePrinter();
        }
        try {
            cliqueWriter = IOUtils.getPrintWriter(str + '.' + str2, str3);
        } catch (IOException e) {
            cliqueWriter = null;
        }
    }

    private static void closeFeaturePrinter() {
        cliqueWriter.close();
        cliqueWriter = null;
    }

    private static void printFeatures(String[] strArr, ClassicCounter<String> classicCounter) {
        if (cliqueWriter != null) {
            for (int i = 0; i < strArr.length; i++) {
                if (i > 0) {
                    cliqueWriter.print("\t");
                }
                cliqueWriter.print(strArr[i]);
            }
            for (String str : classicCounter.keySet()) {
                cliqueWriter.print("\t");
                cliqueWriter.print(str);
                cliqueWriter.print("\t");
                cliqueWriter.print(classicCounter.getCount(str));
            }
            cliqueWriter.println();
        }
    }

    private static void printFeatures(String[] strArr, List<String> list) {
        if (cliqueWriter != null) {
            for (int i = 0; i < strArr.length; i++) {
                if (i > 0) {
                    cliqueWriter.print("\t");
                }
                cliqueWriter.print(strArr[i]);
            }
            for (String str : list) {
                cliqueWriter.print("\t");
                cliqueWriter.print(str);
            }
            cliqueWriter.println();
        }
    }

    private Classifier<String, String> makeClassifierAdaptL1(GeneralDataset<String, String> generalDataset) {
        if (!$assertionsDisabled && (!this.globalFlags.useAdaptL1 || this.globalFlags.limitFeatures <= 0)) {
            throw new AssertionError();
        }
        LinearClassifier linearClassifier = null;
        double d = this.globalFlags.l1reg;
        double d2 = this.globalFlags.l1regmax;
        double d3 = this.globalFlags.l1regmin;
        if (this.globalFlags.l1reg <= 0.0d) {
            System.err.println("WARNING: useAdaptL1 set and limitFeatures to " + this.globalFlags.limitFeatures + ", but invalid value of l1reg=" + this.globalFlags.l1reg + ", defaulting to " + this.globalFlags.l1regmax);
            d = d2;
        } else {
            System.err.println("TRAIN: useAdaptL1 set and limitFeatures to " + this.globalFlags.limitFeatures + ", l1reg=" + this.globalFlags.l1reg + ", l1regmax=" + this.globalFlags.l1regmax + ", l1regmin=" + this.globalFlags.l1regmin);
        }
        Set set = null;
        if (this.globalFlags.limitFeaturesLabels != null) {
            String[] split = this.globalFlags.limitFeaturesLabels.split(",");
            set = Generics.newHashSet();
            for (String str : split) {
                set.add(str.trim());
            }
        }
        double d4 = d2;
        double d5 = d3;
        while (true) {
            System.err.println("Training: l1reg=" + d + ", threshold=" + this.globalFlags.featureWeightThreshold + ", target=" + this.globalFlags.limitFeatures);
            int i = -1;
            try {
                LinearClassifier trainClassifier = new LinearClassifierFactory((Minimizer<DiffFunction>) ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", Double.valueOf(d)), this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon).trainClassifier((GeneralDataset) generalDataset);
                linearClassifier = trainClassifier;
                i = trainClassifier.getFeatureCount(set, this.globalFlags.featureWeightThreshold, false);
                System.err.println("Training Done: l1reg=" + d + ", threshold=" + this.globalFlags.featureWeightThreshold + ", features=" + i + ", target=" + this.globalFlags.limitFeatures);
                String str2 = trainClassifier.topFeaturesToString(trainClassifier.getTopFeatures(set, this.globalFlags.featureWeightThreshold, false, this.globalFlags.limitFeatures, true));
                System.err.println("Printing top " + this.globalFlags.limitFeatures + " features with weights above " + this.globalFlags.featureWeightThreshold);
                if (this.globalFlags.limitFeaturesLabels != null) {
                    System.err.println("  Limited to labels: " + this.globalFlags.limitFeaturesLabels);
                }
                System.err.println(str2);
            } catch (RuntimeException e) {
                if (e.getMessage() == null || !e.getMessage().startsWith("L-BFGS chose a non-descent direction")) {
                    throw e;
                }
                System.err.println("Error in optimization, will try again with different l1reg");
                e.printStackTrace(System.err);
            }
            if (i < 0 || i < this.globalFlags.limitFeatures - 5) {
                d4 = d;
                d = 0.5d * (d + d5);
                if (d4 - d < 0.05d) {
                    System.err.println("Stopping: old l1reg  " + d4 + "- new l1reg " + d + ", difference less than 0.05");
                    break;
                }
            } else {
                if (i <= this.globalFlags.limitFeatures + 5) {
                    System.err.println("Stopping: # of features within 5 of target");
                    break;
                }
                d5 = d;
                d = 0.5d * (d + d4);
                if (d - d5 < 0.05d) {
                    System.err.println("Stopping: new l1reg  " + d + "- old l1reg " + d5 + ", difference less than 0.05");
                    break;
                }
            }
        }
        this.globalFlags.l1reg = d;
        return linearClassifier;
    }

    public Classifier<String, String> makeClassifier(GeneralDataset<String, String> generalDataset) {
        Classifier<String, String> trainClassifier;
        if (this.globalFlags.useClassifierFactory != null) {
            trainClassifier = (this.globalFlags.classifierFactoryArgs != null ? (ClassifierFactory) ReflectionLoading.loadByReflection(this.globalFlags.useClassifierFactory, this.globalFlags.classifierFactoryArgs) : (ClassifierFactory) ReflectionLoading.loadByReflection(this.globalFlags.useClassifierFactory, new Object[0])).trainClassifier(generalDataset);
        } else if (this.globalFlags.useNB) {
            trainClassifier = new NBLinearClassifierFactory(this.globalFlags.prior == 0 ? 0.0d : this.globalFlags.sigma, this.globalFlags.useClassFeature).trainClassifier((GeneralDataset) generalDataset);
        } else if (this.globalFlags.useBinary) {
            trainClassifier = new LogisticClassifierFactory().trainClassifier(generalDataset, this.globalFlags.l1reg, this.globalFlags.tolerance, new LogPrior(this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon), this.globalFlags.biased);
        } else if (this.globalFlags.biased) {
            trainClassifier = new LogisticClassifierFactory().trainClassifier((GeneralDataset) generalDataset, new LogPrior(this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon), true);
        } else if (!this.globalFlags.useAdaptL1 || this.globalFlags.limitFeatures <= 0) {
            LinearClassifierFactory linearClassifierFactory = this.globalFlags.l1reg > 0.0d ? new LinearClassifierFactory((Minimizer<DiffFunction>) ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", Double.valueOf(this.globalFlags.l1reg)), this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon) : new LinearClassifierFactory(this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon, this.globalFlags.QNsize);
            if (!this.globalFlags.useQN) {
                linearClassifierFactory.useConjugateGradientAscent();
            }
            trainClassifier = linearClassifierFactory.trainClassifier((GeneralDataset) generalDataset);
        } else {
            trainClassifier = makeClassifierAdaptL1(generalDataset);
        }
        return trainClassifier;
    }

    private static String[] regexpTokenize(Pattern pattern, Pattern pattern2, String str) {
        ArrayList arrayList = new ArrayList();
        String str2 = str;
        while (true) {
            String str3 = str2;
            if (str3.isEmpty()) {
                return (String[]) arrayList.toArray(new String[arrayList.size()]);
            }
            Matcher matcher = null;
            if (pattern2 != null) {
                matcher = pattern2.matcher(str3);
            }
            if (matcher == null || !matcher.lookingAt()) {
                Matcher matcher2 = pattern.matcher(str3);
                if (matcher2.lookingAt()) {
                    arrayList.add(str3.substring(0, matcher2.end()));
                    str2 = str3.substring(matcher2.end());
                } else {
                    System.err.println("Warning: regexpTokenize pattern " + pattern + " didn't match on |" + str3.substring(0, 1) + "| of |" + str3 + '|');
                    arrayList.add(str3.substring(0, 1));
                    str2 = str3.substring(1);
                }
            } else {
                str2 = str3.substring(matcher.end());
            }
        }
    }

    private static String[] splitTokenize(Pattern pattern, Pattern pattern2, String str) {
        String[] split = pattern.split(str);
        if (pattern2 != null) {
            ArrayList arrayList = new ArrayList(split.length);
            for (String str2 : split) {
                if (!pattern2.matcher(str2).matches()) {
                    arrayList.add(str2);
                }
            }
            if (arrayList.size() != split.length) {
                split = new String[arrayList.size()];
                arrayList.toArray(split);
            }
        }
        return split;
    }

    static Map<String, float[]> loadWordVectors(String str) {
        Timing timing = new Timing();
        System.err.print("Loading word vectors from " + str + " ... ");
        HashMap hashMap = new HashMap(10000);
        BufferedReader bufferedReader = null;
        try {
            try {
                bufferedReader = IOUtils.readerFromString(str);
                int i = -1;
                boolean z = false;
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        IOUtils.closeIgnoringExceptions(bufferedReader);
                        timing.done();
                        return hashMap;
                    }
                    String[] split = readLine.split(DEFAULT_IGNORE_REGEXP);
                    if (i < 0) {
                        i = split.length - 1;
                    } else if (i != split.length - 1 && !z) {
                        System.err.println("loadWordVectors: Inconsistent vector size: " + i + " vs. " + (split.length - 1));
                        z = true;
                    }
                    float[] fArr = new float[split.length - 1];
                    for (int i2 = 1; i2 < split.length; i2++) {
                        fArr[i2 - 1] = Float.parseFloat(split[i2]);
                    }
                    hashMap.put(split[0], fArr);
                }
            } catch (IOException e) {
                throw new RuntimeIOException("Couldn't load word vectors", e);
            }
        } catch (Throwable th) {
            IOUtils.closeIgnoringExceptions(bufferedReader);
            throw th;
        }
    }

    /*  JADX ERROR: JadxRuntimeException in pass: BlockProcessor
        jadx.core.utils.exceptions.JadxRuntimeException: CFG modification limit reached, blocks count: 662
        	at jadx.core.dex.visitors.blocks.BlockProcessor.processBlocksTree(BlockProcessor.java:64)
        	at jadx.core.dex.visitors.blocks.BlockProcessor.visit(BlockProcessor.java:44)
        */
    private edu.stanford.nlp.classify.ColumnDataClassifier.Flags[] setProperties(java.util.Properties r7) {
        /*
            Method dump skipped, instructions count: 3828
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.classify.ColumnDataClassifier.setProperties(java.util.Properties):edu.stanford.nlp.classify.ColumnDataClassifier$Flags[]");
    }

    public ColumnDataClassifier(String str) {
        this(StringUtils.propFileToProperties(str));
    }

    public ColumnDataClassifier(Properties properties) {
        this.flags = setProperties(properties);
        this.globalFlags = this.flags[0];
    }

    public static void main(String[] strArr) throws IOException {
        System.err.println(StringUtils.toInvocationString("ColumnDataClassifier", strArr));
        ColumnDataClassifier columnDataClassifier = new ColumnDataClassifier(StringUtils.argsToProperties(strArr));
        String str = columnDataClassifier.globalFlags.testFile;
        if ((str == null && Flags.serializeTo == null && columnDataClassifier.globalFlags.crossValidationFolds < 2) || (Flags.trainFile == null && columnDataClassifier.globalFlags.loadClassifier == null)) {
            System.err.println("usage: java edu.stanford.nlp.classify.ColumnDataClassifier -prop propFile");
            System.err.println("  and/or: -trainFile trainFile -testFile testFile|-serializeTo modelFile [-useNGrams|-sigma sigma|...]");
        } else if ((columnDataClassifier.globalFlags.loadClassifier != null || columnDataClassifier.trainClassifier()) && str != null) {
            columnDataClassifier.testClassifier(str);
        }
    }

    private boolean trainClassifier() throws IOException {
        Pair<GeneralDataset<String, String>, List<String[]>> readAndReturnTrainingExamples = readAndReturnTrainingExamples(Flags.trainFile);
        GeneralDataset<String, String> first = readAndReturnTrainingExamples.first();
        List<E> list = (List) readAndReturnTrainingExamples.second();
        if (this.globalFlags.shuffleTrainingData) {
            first.shuffleWithSideInformation(this.globalFlags.shuffleSeed != 0 ? this.globalFlags.shuffleSeed : System.nanoTime(), list);
        }
        for (int i = 0; i < this.flags.length; i++) {
            if (this.flags[i] != null && this.flags[i].binnedValuesCounter != null) {
                System.err.println("BinnedValuesStatistics for column " + i);
                System.err.println(this.flags[i].binnedValuesCounter.toString());
            }
        }
        for (int i2 = 0; i2 < this.flags.length; i2++) {
            if (this.flags[i2] != null && this.flags[i2].binnedLengthsCounter != null) {
                System.err.println("BinnedLengthsStatistics for column " + i2);
                System.err.println(this.flags[i2].binnedLengthsCounter.toString());
            }
        }
        if (Flags.printSVMLightFormatTo != null) {
            PrintWriter printWriter = IOUtils.getPrintWriter(Flags.printSVMLightFormatTo, Flags.encoding);
            first.printSVMLightFormat(printWriter);
            IOUtils.closeIgnoringExceptions(printWriter);
            first.featureIndex().saveToFilename(Flags.printSVMLightFormatTo + ".featureIndex");
            first.labelIndex().saveToFilename(Flags.printSVMLightFormatTo + ".labelIndex");
        }
        if (this.globalFlags.crossValidationFolds > 1) {
            crossValidate(first, list);
        }
        if (this.globalFlags.exitAfterTrainingFeaturization) {
            return false;
        }
        this.classifier = makeClassifier(first);
        printClassifier(this.classifier);
        String str = Flags.serializeTo;
        if (str == null) {
            return true;
        }
        System.err.println("Serializing classifier to " + str + "...");
        ObjectOutputStream writeStreamFromString = IOUtils.writeStreamFromString(str);
        writeStreamFromString.writeObject(this.classifier);
        String str2 = this.globalFlags.testFile;
        this.globalFlags.testFile = null;
        writeStreamFromString.writeObject(this.flags);
        this.globalFlags.testFile = str2;
        writeStreamFromString.close();
        System.err.println("Done.");
        return true;
    }

    private void printClassifier(Classifier classifier) {
        String linearClassifier = classifier instanceof LinearClassifier ? ((LinearClassifier) classifier).toString(this.globalFlags.printClassifier, this.globalFlags.printClassifierParam) : classifier.toString();
        if (Flags.printTo == null) {
            System.err.print("Built this classifier: ");
            System.err.println(linearClassifier);
            return;
        }
        PrintWriter printWriter = null;
        try {
            try {
                printWriter = IOUtils.getPrintWriter(Flags.printTo, Flags.encoding);
                printWriter.write(linearClassifier);
                printWriter.println();
                IOUtils.closeIgnoringExceptions(printWriter);
            } catch (IOException e) {
                e.printStackTrace();
                IOUtils.closeIgnoringExceptions(printWriter);
            }
            System.err.println("Built classifier described in file " + Flags.printTo);
        } catch (Throwable th) {
            IOUtils.closeIgnoringExceptions(printWriter);
            throw th;
        }
    }

    private void testClassifier(String str) {
        if (this.globalFlags.printFeatures != null) {
            newFeaturePrinter(this.globalFlags.printFeatures, Constants.ATTRNAME_TEST, Flags.encoding);
        }
        Pair<GeneralDataset<String, String>, List<String[]>> readTestExamples = readTestExamples(str);
        testExamples(this.classifier, readTestExamples.first(), readTestExamples.second());
        if (this.globalFlags.printFeatures != null) {
            closeFeaturePrinter();
        }
    }

    public Pair<Double, Double> crossValidate(GeneralDataset<String, String> generalDataset, List<String[]> list) {
        int i = this.globalFlags.crossValidationFolds;
        double d = 0.0d;
        double d2 = 0.0d;
        for (int i2 = 0; i2 < i; i2++) {
            System.err.println();
            System.err.println("### Fold " + i2);
            Pair<GeneralDataset<String, String>, GeneralDataset<String, String>> splitOutFold = generalDataset.splitOutFold(i2, i);
            GeneralDataset<String, String> first = splitOutFold.first();
            GeneralDataset<String, String> second = splitOutFold.second();
            Classifier<String, String> makeClassifier = makeClassifier(first);
            printClassifier(makeClassifier);
            int size = list.size() / i;
            int i3 = size * i2;
            int i4 = i3 + size;
            if (i2 == i - 1) {
                i4 = list.size();
            }
            Pair<Double, Double> testExamples = testExamples(makeClassifier, second, list.subList(i3, i4));
            d += testExamples.first().doubleValue();
            d2 += testExamples.second().doubleValue();
        }
        double d3 = d / i;
        double d4 = d2 / i;
        DecimalFormat decimalFormat = new DecimalFormat("0.00000");
        System.err.println("Average accuracy/micro-averaged F1: " + decimalFormat.format(d3));
        System.err.println("Average macro-averaged F1: " + decimalFormat.format(d4));
        System.err.println();
        return new Pair<>(Double.valueOf(d3), Double.valueOf(d4));
    }

    public String classOf(Datum<String, String> datum) {
        if (this.classifier == null) {
            throw new RuntimeException("Classifier is not initialized");
        }
        return this.classifier.classOf(datum);
    }

    static {
        $assertionsDisabled = !ColumnDataClassifier.class.desiredAssertionStatus();
        tab = Pattern.compile("\\t");
        numGroups = 0;
        lastGroup = "";
        numInGroup = 0;
        bestProb = 0.0d;
        bestSim = 0.0d;
        currentHighestProbCorrect = false;
        foundAnswerInGroup = false;
        nf = new DecimalFormat("0.000");
        wordToSubstrings = new ConcurrentHashMap();
    }
}
