package edu.stanford.nlp.wordseg;

import com.clearnlp.component.label.IDEPLabel;
import com.clearnlp.constituent.CTLibEn;
import com.ibm.icu.text.DateFormat;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.Clique;
import edu.stanford.nlp.sequences.FeatureFactory;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.trees.international.pennchinese.RadicalMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PaddedList;
import is2.data.PipeGen;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/wordseg/ChineseSegmenterFeatureFactory.class */
public class ChineseSegmenterFeatureFactory<IN extends CoreLabel> extends FeatureFactory<IN> implements Serializable {
    private static final long serialVersionUID = 3387166382968763350L;
    private static TagAffixDetector taDetector = null;
    private static Pattern patE = Pattern.compile("[a-z]");
    private static Pattern patEC = Pattern.compile("[A-Z]");
    private static Pattern patP = Pattern.compile("[·\\-\\.]");
    private static CorpusDictionary outDict = null;

    @Override // edu.stanford.nlp.sequences.FeatureFactory
    public void init(SeqClassifierFlags seqClassifierFlags) {
        super.init(seqClassifierFlags);
    }

    @Override // edu.stanford.nlp.sequences.FeatureFactory
    public Collection<String> getCliqueFeatures(PaddedList<IN> paddedList, int i, Clique clique) {
        Set newHashSet = Generics.newHashSet();
        if (clique == cliqueC) {
            addAllInterningAndSuffixing(newHashSet, featuresC(paddedList, i), "C");
        } else if (clique == cliqueCpC) {
            addAllInterningAndSuffixing(newHashSet, featuresCpC(paddedList, i), "CpC");
            addAllInterningAndSuffixing(newHashSet, featuresCnC(paddedList, i - 1), "CnC");
        }
        return newHashSet;
    }

    private static String isEnglish(String str, String str2) {
        Matcher matcher = patE.matcher(str);
        Matcher matcher2 = patE.matcher(str2);
        Matcher matcher3 = patEC.matcher(str);
        Matcher matcher4 = patEC.matcher(str2);
        return (matcher.matches() && matcher4.matches()) ? "BND" : (matcher.matches() && matcher2.matches()) ? "ENG" : (matcher3.matches() && matcher4.matches()) ? "BCC" : (!matcher.matches() || matcher2.matches() || matcher4.matches()) ? (!matcher2.matches() || matcher.matches() || matcher3.matches()) ? (!matcher3.matches() || matcher2.matches() || matcher4.matches()) ? (!matcher4.matches() || matcher.matches() || matcher3.matches()) ? "" : "e4" : "e3" : "e2" : "e1";
    }

    private static String isEngPU(String str) {
        return patP.matcher(str).matches() ? "1:EngPU" : "";
    }

    public Collection<String> featuresC(PaddedList<IN> paddedList, int i) {
        ArrayList arrayList = new ArrayList();
        IN in = paddedList.get(i);
        IN in2 = paddedList.get(i + 1);
        IN in3 = paddedList.get(i + 2);
        IN in4 = paddedList.get(i + 3);
        IN in5 = paddedList.get(i - 1);
        IN in6 = paddedList.get(i - 2);
        IN in7 = paddedList.get(i - 3);
        String str = (String) in.get(CoreAnnotations.CharAnnotation.class);
        String str2 = (String) in2.get(CoreAnnotations.CharAnnotation.class);
        String str3 = (String) in3.get(CoreAnnotations.CharAnnotation.class);
        String str4 = (String) in5.get(CoreAnnotations.CharAnnotation.class);
        String str5 = (String) in6.get(CoreAnnotations.CharAnnotation.class);
        if (this.flags.useWord1) {
            arrayList.add(str + "::c");
            arrayList.add(str2 + "::c1");
            arrayList.add(str4 + "::p");
            arrayList.add(str5 + "::p2");
            arrayList.add(str + str2 + "::cn");
            arrayList.add(str4 + str + "::pc");
            arrayList.add(str4 + str2 + "::pn");
            arrayList.add(str5 + str4 + "::p2p");
            arrayList.add(str5 + str + "::p2c");
            arrayList.add(str3 + str + "::n2c");
            arrayList.add("|word1");
        }
        return arrayList;
    }

    public Collection<String> featuresCpC(PaddedList<IN> paddedList, int i) {
        String[] strArr;
        ArrayList arrayList = new ArrayList();
        IN in = paddedList.get(i);
        IN in2 = paddedList.get(i + 1);
        IN in3 = paddedList.get(i + 2);
        IN in4 = paddedList.get(i + 3);
        IN in5 = paddedList.get(i - 1);
        IN in6 = paddedList.get(i - 2);
        IN in7 = paddedList.get(i - 3);
        String str = (String) in.get(CoreAnnotations.CharAnnotation.class);
        if (str == null) {
            str = "";
        }
        String str2 = (String) in2.get(CoreAnnotations.CharAnnotation.class);
        if (str2 == null) {
            str2 = "";
        }
        String str3 = (String) in3.get(CoreAnnotations.CharAnnotation.class);
        if (str3 == null) {
            str3 = "";
        }
        String str4 = (String) in4.get(CoreAnnotations.CharAnnotation.class);
        if (str4 == null) {
            str4 = "";
        }
        String str5 = (String) in5.get(CoreAnnotations.CharAnnotation.class);
        if (str5 == null) {
            str5 = "";
        }
        String str6 = (String) in6.get(CoreAnnotations.CharAnnotation.class);
        if (str6 == null) {
            str6 = "";
        }
        String str7 = (String) in7.get(CoreAnnotations.CharAnnotation.class);
        if (str7 == null) {
            str7 = "";
        }
        if (this.flags.useWord2) {
            arrayList.add(str + "::c");
            arrayList.add(str2 + "::c1");
            arrayList.add(str5 + "::p");
            arrayList.add(str6 + "::p2");
            arrayList.add(str + str2 + "::cn");
            arrayList.add(str5 + str + "::pc");
            arrayList.add(str5 + str2 + "::pn");
            arrayList.add(str6 + str5 + "::p2p");
            arrayList.add(str6 + str + "::p2c");
            arrayList.add(str3 + str + "::n2c");
            arrayList.add("|word2");
        }
        char radical = str.length() == 0 ? 'n' : RadicalMap.getRadical(str.charAt(0));
        char radical2 = str2.length() == 0 ? 'n' : RadicalMap.getRadical(str2.charAt(0));
        if (str3.length() != 0) {
            RadicalMap.getRadical(str3.charAt(0));
        }
        if (str4.length() != 0) {
            RadicalMap.getRadical(str4.charAt(0));
        }
        char radical3 = str5.length() == 0 ? 'n' : RadicalMap.getRadical(str5.charAt(0));
        if (str6.length() != 0) {
            RadicalMap.getRadical(str6.charAt(0));
        }
        if (str7.length() != 0) {
            RadicalMap.getRadical(str7.charAt(0));
        }
        if (this.flags.useRad2) {
            arrayList.add(radical + "rc");
            arrayList.add(radical2 + "rc1");
            arrayList.add(radical3 + "rp");
            arrayList.add((radical3 + radical) + "rpc");
            arrayList.add((radical + radical2) + "rcc1");
            arrayList.add((radical3 + radical + radical2) + "rpcc1");
            arrayList.add("|rad2");
        }
        if (this.flags.useDict2) {
            arrayList.add(new NonDict2(this.flags).checkDic(str5 + str, this.flags) + "nondict");
            arrayList.add("|useDict2");
        }
        if (this.flags.useOutDict2) {
            if (outDict == null) {
                System.err.println("reading " + this.flags.outDict2 + " as a seen lexicon");
                outDict = new CorpusDictionary(this.flags.outDict2, true);
            }
            arrayList.add(outDict.getW(str5 + str) + "outdict");
            arrayList.add(outDict.getW(str + str2) + "outdict");
            arrayList.add(outDict.getW(str6 + str5) + "outdict");
            arrayList.add(outDict.getW(str6 + str5 + str) + "outdict");
            arrayList.add(outDict.getW(str7 + str6 + str5) + "outdict");
            arrayList.add(outDict.getW(str5 + str + str2) + "outdict");
            arrayList.add(outDict.getW(str + str2 + str3) + "outdict");
            arrayList.add(outDict.getW(str5 + str + str2 + str3) + "outdict");
        }
        if (this.flags.useCTBChar2 || this.flags.useASBCChar2 || this.flags.useHKChar2 || this.flags.usePKChar2 || this.flags.useMSRChar2) {
            if (!this.flags.useChPos) {
                strArr = new String[]{"2", PipeGen._3, PipeGen._4};
            } else if (this.flags.useCTBChar2) {
                strArr = new String[]{"AD", "AS", "BA", "CC", CTLibEn.POS_CD, "CS", "DE", CTLibEn.POS_DT, CTLibEn.FTAG_ETC, "IJ", CTLibEn.POS_JJ, "LB", "LC", DateFormat.NUM_MONTH, CTLibEn.POS_NN, "NR", "NT", "OD", IDEPLabel.LB_PASS, "PN", "PU", "SB", "SP", "VA", "VC", "VE", "VV"};
            } else {
                if (!this.flags.usePKChar2) {
                    throw new RuntimeException("only support settings for CTB and PK now.");
                }
                strArr = new String[]{"2", PipeGen._3, PipeGen._4};
            }
            if (taDetector == null) {
                taDetector = new TagAffixDetector(this.flags);
            }
            for (int i2 = 0; i2 < strArr.length; i2++) {
                arrayList.add(taDetector.checkDic(strArr[i2] + "p", str5) + taDetector.checkDic(strArr[i2] + "i", str5) + taDetector.checkDic(strArr[i2] + "s", str) + taDetector.checkInDic(str5) + taDetector.checkInDic(str) + strArr[i2] + "prep-sufc");
            }
        }
        if (this.flags.useRule2) {
            if (str5.equals(str)) {
                arrayList.add("11");
            }
            if (str5.equals(str2)) {
                arrayList.add("22");
            }
            if (!this.flags.usePk && !this.flags.useHk && str.equals(str3)) {
                arrayList.add("33");
            }
            char charAt = str2.length() > 0 ? str2.charAt(0) : ' ';
            char charAt2 = str3.length() > 0 ? str3.charAt(0) : ' ';
            char charAt3 = str.length() > 0 ? str.charAt(0) : ' ';
            char charAt4 = str5.length() > 0 ? str5.charAt(0) : ' ';
            String valueOf = String.valueOf(radical3);
            Pattern compile = Pattern.compile("[a-zA-Z]");
            Pattern compile2 = Pattern.compile("[0-9]");
            Matcher matcher = compile.matcher(str5);
            Matcher matcher2 = compile.matcher(str);
            Matcher matcher3 = compile.matcher(str6);
            Matcher matcher4 = compile2.matcher(str);
            Matcher matcher5 = compile2.matcher(str6);
            if (charAt3 < '0' || charAt3 > '9' || charAt4 < '0' || charAt4 > '9') {
                if (charAt4 >= '0' && charAt4 <= '9') {
                    arrayList.add("1N");
                } else if (matcher.matches()) {
                    arrayList.add("E");
                } else if (valueOf.equals(".") && str5.length() == 1) {
                    if (!this.flags.useHk && !this.flags.usePk) {
                        if (matcher2.matches()) {
                            arrayList.add("PU+E");
                        }
                        if (matcher3.matches()) {
                            arrayList.add("E+PU");
                        }
                        if (matcher4.matches()) {
                            arrayList.add("PU+N");
                        }
                        if (matcher5.matches()) {
                            arrayList.add("N+PU");
                        }
                    }
                    arrayList.add("PU");
                }
            } else if (charAt3 == '9' && charAt4 == '1' && charAt == '9' && charAt2 >= '0' && charAt2 <= '9') {
                arrayList.add("YR");
            } else {
                arrayList.add("2N");
            }
            String isEnglish = isEnglish(str5, str);
            String isEngPU = isEngPU(str5);
            if (!isEnglish.equals("")) {
                arrayList.add(isEnglish);
            }
            if (!isEngPU.equals("") && !isEnglish.equals("")) {
                arrayList.add(isEngPU + isEnglish);
            }
        }
        String str8 = (String) in.get(CoreAnnotations.OriginalCharAnnotation.class);
        switch (Character.getType(str8.length() > 0 ? str8.charAt(0) : ' ')) {
            case 1:
            case 2:
                arrayList.add("CHARTYPE-LETTER");
                break;
            case 3:
            case 4:
            case 6:
            case 7:
            case 8:
            default:
                arrayList.add("CHARTYPE-MISC");
                break;
            case 5:
                arrayList.add("CHARTYPE-OTHER_LETTER");
                break;
            case 9:
                arrayList.add("CHARTYPE-DECIMAL_DIGIT_NUMBER");
                break;
        }
        return arrayList;
    }

    public Collection<String> featuresCnC(PaddedList<IN> paddedList, int i) {
        ArrayList arrayList = new ArrayList();
        IN in = paddedList.get(i);
        IN in2 = paddedList.get(i + 1);
        IN in3 = paddedList.get(i - 1);
        String str = (String) in.get(CoreAnnotations.CharAnnotation.class);
        String str2 = (String) in2.get(CoreAnnotations.CharAnnotation.class);
        String str3 = (String) in3.get(CoreAnnotations.CharAnnotation.class);
        if (this.flags.useWordn) {
            arrayList.add(str + "c");
            arrayList.add(str2 + "c1");
            arrayList.add(str3 + "p");
            arrayList.add(str3 + str + "pc");
            if (this.flags.useAs || this.flags.useMsr || this.flags.usePk || this.flags.useHk) {
                arrayList.add(str + str2 + "cc1");
                arrayList.add(str3 + str2 + "pc1");
            }
            arrayList.add("|wordn");
        }
        return arrayList;
    }
}
