package com.clearnlp.constituent;

import com.clearnlp.morphology.MPLibEn;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:com/clearnlp/constituent/CTLibEn.class */
public class CTLibEn extends CTLib {
    public static final String PTAG_S = "S";
    public static final String PTAG_SBAR = "SBAR";
    public static final String PTAG_SBARQ = "SBARQ";
    public static final String PTAG_SINV = "SINV";
    public static final String PTAG_SQ = "SQ";
    public static final String PTAG_ADJP = "ADJP";
    public static final String PTAG_ADVP = "ADVP";
    public static final String PTAG_CAPTION = "CAPTION";
    public static final String PTAG_CIT = "CIT";
    public static final String PTAG_CONJP = "CONJP";
    public static final String PTAG_EDITED = "EDITED";
    public static final String PTAG_EMBED = "EMBED";
    public static final String PTAG_FRAG = "FRAG";
    public static final String PTAG_HEADING = "HEADING";
    public static final String PTAG_INTJ = "INTJ";
    public static final String PTAG_LST = "LST";
    public static final String PTAG_META = "META";
    public static final String PTAG_NAC = "NAC";
    public static final String PTAG_NML = "NML";
    public static final String PTAG_NP = "NP";
    public static final String PTAG_NX = "NX";
    public static final String PTAG_PP = "PP";
    public static final String PTAG_PRN = "PRN";
    public static final String PTAG_PRT = "PRT";
    public static final String PTAG_QP = "QP";
    public static final String PTAG_RRC = "RRC";
    public static final String PTAG_TITLE = "TITLE";
    public static final String PTAG_TYPO = "TYPO";
    public static final String PTAG_UCP = "UCP";
    public static final String PTAG_VP = "VP";
    public static final String PTAG_WHADJP = "WHADJP";
    public static final String PTAG_WHADVP = "WHADVP";
    public static final String PTAG_WHNP = "WHNP";
    public static final String PTAG_WHPP = "WHPP";
    public static final String POS_ADD = "ADD";
    public static final String POS_AFX = "AFX";
    public static final String POS_CC = "CC";
    public static final String POS_CD = "CD";
    public static final String POS_CODE = "CODE";
    public static final String POS_DT = "DT";
    public static final String POS_EX = "EX";
    public static final String POS_FW = "FW";
    public static final String POS_IN = "IN";
    public static final String POS_JJ = "JJ";
    public static final String POS_JJR = "JJR";
    public static final String POS_JJS = "JJS";
    public static final String POS_LS = "LS";
    public static final String POS_MD = "MD";
    public static final String POS_NN = "NN";
    public static final String POS_NNS = "NNS";
    public static final String POS_NNP = "NNP";
    public static final String POS_NNPS = "NNPS";
    public static final String POS_PDT = "PDT";
    public static final String POS_POS = "POS";
    public static final String POS_PRP = "PRP";
    public static final String POS_PRPS = "PRP$";
    public static final String POS_RB = "RB";
    public static final String POS_RBR = "RBR";
    public static final String POS_RBS = "RBS";
    public static final String POS_RP = "RP";
    public static final String POS_TO = "TO";
    public static final String POS_UH = "UH";
    public static final String POS_VB = "VB";
    public static final String POS_VBD = "VBD";
    public static final String POS_VBG = "VBG";
    public static final String POS_VBN = "VBN";
    public static final String POS_VBP = "VBP";
    public static final String POS_VBZ = "VBZ";
    public static final String POS_WDT = "WDT";
    public static final String POS_WP = "WP";
    public static final String POS_WPS = "WP$";
    public static final String POS_WRB = "WRB";
    public static final String POS_DOLLAR = "$";
    public static final String POS_COLON = ":";
    public static final String POS_COMMA = ",";
    public static final String POS_PERIOD = ".";
    public static final String POS_LQ = "``";
    public static final String POS_RQ = "''";
    public static final String POS_LRB = "-LRB-";
    public static final String POS_RRB = "-RRB-";
    public static final String POS_HYPH = "HYPH";
    public static final String POS_NFP = "NFP";
    public static final String POS_SYM = "SYM";
    public static final String POS_PUNC = "PUNC";
    public static final String FTAG_ADV = "ADV";
    public static final String FTAG_BNF = "BNF";
    public static final String FTAG_CLF = "CLF";
    public static final String FTAG_CLR = "CLR";
    public static final String FTAG_DIR = "DIR";
    public static final String FTAG_DTV = "DTV";
    public static final String FTAG_ETC = "ETC";
    public static final String FTAG_EXT = "EXT";
    public static final String FTAG_HLN = "HLN";
    public static final String FTAG_IMP = "IMP";
    public static final String FTAG_INT = "INT";
    public static final String FTAG_LGS = "LGS";
    public static final String FTAG_LOC = "LOC";
    public static final String FTAG_MNR = "MNR";
    public static final String FTAG_NOM = "NOM";
    public static final String FTAG_PRD = "PRD";
    public static final String FTAG_PRP = "PRP";
    public static final String FTAG_PUT = "PUT";
    public static final String FTAG_SBJ = "SBJ";
    public static final String FTAG_SEZ = "SEZ";
    public static final String FTAG_TMP = "TMP";
    public static final String FTAG_TPC = "TPC";
    public static final String FTAG_TTL = "TTL";
    public static final String FTAG_UNF = "UNF";
    public static final String FTAG_VOC = "VOC";
    public static final String EC_EXP = "*EXP*";
    public static final String EC_ESM = "*?*";
    public static final String EC_ICH = "*ICH*";
    public static final String EC_NOT = "*NOT*";
    public static final String EC_ZERO = "0";
    public static final String EC_PPA = "*PPA*";
    public static final String EC_PRO = "*PRO*";
    public static final String EC_RNR = "*RNR*";
    public static final String EC_NULL = "*";
    public static final String EC_TRACE = "*T*";
    public static final String EC_UNIT = "*U*";
    public static final Pattern RE_COMP_POS = Pattern.compile("^(WDT|WP.*|WRB)$");
    public static final Pattern RE_COMP_FORM = Pattern.compile("^(how|however|that|what|whatever|whatsoever|when|whenever|where|whereby|wherein|whereupon|wherever|which|whichever|whither|who|whoever|whom|whose|why)$");
    public static final Pattern RE_NULL = Pattern.compile("^(\\*|\\*-.+)$");
    public static final Pattern RE_COMP_LINK = Pattern.compile("^(WHNP|WHPP|WHADVP)$");
    public static final Pattern RE_COMP_LINK_FORM = Pattern.compile("^(0|that|when|where|whereby|wherein|whereupon|which|who|whom|whose)$");
    public static final Pattern RE_ICH_PPA_RNR = Pattern.compile("\\*(ICH|PPA|RNR)\\*.*");

    public static void preprocessTree(CTTree cTTree) {
        fixFunctionTags(cTTree);
        linkReducedPassiveNulls(cTTree);
        linkComplementizers(cTTree);
    }

    public static boolean isPassiveNull(CTNode cTNode) {
        if (!cTNode.isEmptyCategory() || !RE_NULL.matcher(cTNode.form).find() || cTNode.parent == null) {
            return false;
        }
        CTNode cTNode2 = cTNode.parent;
        return cTNode2.isPTag("NP") && cTNode2.s_fTags.isEmpty() && cTNode2.parent != null && cTNode2.parent.isPTag("VP") && cTNode2.i_siblingId > 0 && cTNode2.parent.getChild(cTNode2.i_siblingId - 1).isPTagAny(POS_VBN, POS_VBD);
    }

    public static boolean isPassiveVerb(CTNode cTNode) {
        CTNode parent;
        CTNode firstChild;
        if (!cTNode.isPTag(POS_VBN) || (parent = cTNode.getParent()) == null || !parent.isPTag("VP")) {
            return false;
        }
        CTNode parent2 = parent.getParent();
        return parent2 == null || !parent2.isPTag("VP") || (firstChild = parent2.getFirstChild("+VB.*")) == null || !MPLibEn.isHave(firstChild.form);
    }

    public static boolean isComplementizer(CTNode cTNode) {
        if (cTNode.isPhrase()) {
            return false;
        }
        return RE_COMP_POS.matcher(cTNode.pTag).find() || (cTNode.isPTag("-NONE-") && cTNode.isForm("0"));
    }

    public static CTNode getComplementizer(CTNode cTNode) {
        if (!cTNode.pTag.startsWith("WH")) {
            return null;
        }
        List<CTNode> subTerminals = cTNode.getSubTerminals();
        if (cTNode.isEmptyCategoryRec()) {
            return subTerminals.get(0);
        }
        for (CTNode cTNode2 : subTerminals) {
            if (RE_COMP_POS.matcher(cTNode2.pTag).find()) {
                return cTNode2;
            }
        }
        for (CTNode cTNode3 : subTerminals) {
            if (RE_COMP_FORM.matcher(cTNode3.form.toLowerCase()).find()) {
                return cTNode3;
            }
        }
        return null;
    }

    public static void linkReducedPassiveNulls(CTTree cTTree) {
        linkReducedPassiveNullsAux(cTTree, cTTree.getRoot());
    }

    private static void linkReducedPassiveNullsAux(CTTree cTTree, CTNode cTNode) {
        List<CTNode> coIndexedEmptyCategories;
        if (isPassiveNull(cTNode) && cTNode.form.equals("*")) {
            CTNode cTNode2 = cTNode.parent;
            if (cTNode2.parent.coIndex != -1 && (coIndexedEmptyCategories = cTTree.getCoIndexedEmptyCategories(cTNode2.parent.coIndex)) != null) {
                cTNode2 = coIndexedEmptyCategories.get(0);
            }
            CTNode highestChainedAncestor = cTNode2.getHighestChainedAncestor("+VP|RRC|UCP");
            if (highestChainedAncestor.parent.matchesPTag("NP|NML") || highestChainedAncestor.parent.hasFTag(FTAG_NOM)) {
                cTNode.antecedent = highestChainedAncestor.getPrevSibling("+NP|NML");
                if (cTNode.antecedent == null) {
                    cTNode.antecedent = highestChainedAncestor.getPrevSibling("+NN.*");
                }
                if (cTNode.antecedent == null) {
                    cTNode.antecedent = highestChainedAncestor.getPrevSibling(PTAG_QP);
                }
                if (cTNode.antecedent == null) {
                    cTNode.antecedent = highestChainedAncestor.getPrevSibling("-NOM");
                }
            } else if (highestChainedAncestor.parent.matchesPTag("S.*")) {
                cTNode.antecedent = highestChainedAncestor.getPrevSibling("NP", "-SBJ");
                if (cTNode.antecedent == null) {
                    cTNode.antecedent = highestChainedAncestor.getNextSibling("NP", "-SBJ");
                }
            }
        }
        Iterator<CTNode> it = cTNode.ls_children.iterator();
        while (it.hasNext()) {
            linkReducedPassiveNullsAux(cTTree, it.next());
        }
    }

    public static void linkComplementizers(CTTree cTTree) {
        linkComlementizersAux(cTTree, cTTree.getRoot());
    }

    private static void linkComlementizersAux(CTTree cTTree, CTNode cTNode) {
        CTNode prevSibling;
        CTNode cTNode2;
        if (!RE_COMP_LINK.matcher(cTNode.pTag).find()) {
            Iterator<CTNode> it = cTNode.ls_children.iterator();
            while (it.hasNext()) {
                linkComlementizersAux(cTTree, it.next());
            }
            return;
        }
        CTNode complementizer = getComplementizer(cTNode);
        CTNode highestChainedAncestor = cTNode.getHighestChainedAncestor(PTAG_SBAR);
        if (complementizer == null || highestChainedAncestor == null || highestChainedAncestor.hasFTag(FTAG_NOM) || !RE_COMP_LINK_FORM.matcher(complementizer.form.toLowerCase()).find()) {
            return;
        }
        if (highestChainedAncestor.coIndex != -1) {
            List<CTNode> coIndexedEmptyCategories = cTTree.getCoIndexedEmptyCategories(highestChainedAncestor.coIndex);
            if (coIndexedEmptyCategories != null) {
                Iterator<CTNode> it2 = coIndexedEmptyCategories.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    CTNode next = it2.next();
                    if (next.form.startsWith(EC_ICH) && next.parent.isPTag(PTAG_SBAR)) {
                        highestChainedAncestor = next.parent;
                        break;
                    }
                }
            }
        } else if (highestChainedAncestor.parent != null && highestChainedAncestor.parent.isPTag(PTAG_UCP)) {
            highestChainedAncestor = highestChainedAncestor.getParent();
        }
        CTNode cTNode3 = highestChainedAncestor.parent;
        if (cTNode3 == null) {
            return;
        }
        if (cTNode3.isPTag("NP")) {
            CTNode prevSibling2 = highestChainedAncestor.getPrevSibling("NP");
            if (prevSibling2 != null) {
                complementizer.antecedent = prevSibling2;
            }
        } else if (cTNode3.isPTag("ADVP")) {
            CTNode prevSibling3 = highestChainedAncestor.getPrevSibling("ADVP");
            if (prevSibling3 != null) {
                complementizer.antecedent = prevSibling3;
            }
        } else if (cTNode3.isPTag("VP") && (prevSibling = highestChainedAncestor.getPrevSibling("-PRD")) != null && (highestChainedAncestor.hasFTag(FTAG_CLF) || ((cTNode.isPTag(PTAG_WHNP) && prevSibling.isPTag("NP")) || ((cTNode.isPTag(PTAG_WHPP) && prevSibling.isPTag(PTAG_PP)) || (cTNode.isPTag(PTAG_WHADVP) && prevSibling.isPTag("ADVP")))))) {
            complementizer.antecedent = prevSibling;
        }
        CTNode cTNode4 = complementizer.antecedent;
        while (true) {
            cTNode2 = cTNode4;
            if (cTNode2 == null || !cTNode2.isEmptyCategoryRec()) {
                break;
            } else {
                cTNode4 = cTNode2.getSubTerminals().get(0).getAntecedent();
            }
        }
        complementizer.antecedent = cTNode2;
    }

    public static boolean containsCoordination(CTNode cTNode) {
        return containsCoordination(cTNode, cTNode.getChildren());
    }

    public static boolean containsCoordination(CTNode cTNode, List<CTNode> list) {
        if (cTNode.isPTag(PTAG_UCP)) {
            return true;
        }
        if (cTNode.isPTagAny(PTAG_NML, "NP") && containsEtc(list)) {
            return true;
        }
        Iterator<CTNode> it = list.iterator();
        while (it.hasNext()) {
            if (isConjunction(it.next())) {
                return true;
            }
        }
        return false;
    }

    private static boolean containsEtc(List<CTNode> list) {
        for (int size = list.size() - 1; size > 0; size--) {
            CTNode cTNode = list.get(size);
            if (!isPunctuation(cTNode)) {
                return isEtc(cTNode);
            }
        }
        return false;
    }

    public static boolean isEtc(CTNode cTNode) {
        if (cTNode.hasFTag(FTAG_ETC)) {
            return true;
        }
        return cTNode.getSubTerminals().get(0).form.equalsIgnoreCase("etc.");
    }

    public static boolean isCoordinator(CTNode cTNode) {
        return isConjunction(cTNode) || isSeparator(cTNode);
    }

    public static boolean isConjunction(CTNode cTNode) {
        return cTNode.isPTag("CC") || cTNode.isPTag(PTAG_CONJP);
    }

    public static boolean isSeparator(CTNode cTNode) {
        return cTNode.isPTag(",") || cTNode.isPTag(":");
    }

    public static boolean isCorrelativeConjunction(CTNode cTNode) {
        if (cTNode.isPTag("CC")) {
            String lowerCase = cTNode.form.toLowerCase();
            return lowerCase.equals("either") || lowerCase.equals("neither") || lowerCase.equals("whether") || lowerCase.equals("both");
        }
        if (cTNode.isPTag(PTAG_CONJP)) {
            return cTNode.toForms(false, " ").toLowerCase().equals("not only");
        }
        return false;
    }

    public static boolean isPunctuation(CTNode cTNode) {
        return cTNode.isPTagAny(":", ",", ".", "``", "''", "-LRB-", "-RRB-", POS_HYPH, POS_NFP, POS_SYM, "PUNC");
    }

    public static boolean isClause(CTNode cTNode) {
        return cTNode.isPTagAny("S", PTAG_SQ, PTAG_SINV, PTAG_SBAR, PTAG_SBARQ);
    }

    public static boolean isAdjective(CTNode cTNode) {
        return MPLibEn.isAdjective(cTNode.pTag);
    }

    public static boolean isAdverb(CTNode cTNode) {
        return MPLibEn.isAdverb(cTNode.pTag);
    }

    public static boolean isNoun(CTNode cTNode) {
        return MPLibEn.isNoun(cTNode.pTag);
    }

    public static boolean isVerb(CTNode cTNode) {
        return MPLibEn.isVerb(cTNode.pTag);
    }

    public static boolean isNounPhrase(CTNode cTNode) {
        return cTNode.isPTagAny("NP", PTAG_NML, PTAG_NX, PTAG_NAC);
    }

    public static boolean isLeftBracket(CTNode cTNode) {
        return !cTNode.isPhrase() && cTNode.form.matches("^-L(R|S|C)B-$");
    }

    public static boolean isRightBracket(CTNode cTNode) {
        return !cTNode.isPhrase() && cTNode.form.matches("^-R(R|S|C)B-$");
    }

    public static void fixFunctionTags(CTTree cTTree) {
        fixFunctionTagsAux(cTTree.getRoot());
    }

    private static void fixFunctionTagsAux(CTNode cTNode) {
        fixSBJ(cTNode);
        fixLGS(cTNode);
        fixCLF(cTNode);
        Iterator<CTNode> it = cTNode.getChildren().iterator();
        while (it.hasNext()) {
            fixFunctionTagsAux(it.next());
        }
    }

    private static boolean fixSBJ(CTNode cTNode) {
        if (!cTNode.hasFTag(FTAG_SBJ)) {
            return false;
        }
        CTNode parent = cTNode.getParent();
        if (parent.getChildrenSize() != 1 || parent.isPTagAny(PTAG_EDITED, PTAG_EMBED) || !parent.getFTags().isEmpty()) {
            return false;
        }
        cTNode.removeFTag(FTAG_SBJ);
        parent.addFTag(FTAG_SBJ);
        parent.pTag = cTNode.pTag;
        return true;
    }

    private static boolean fixLGS(CTNode cTNode) {
        if (!cTNode.hasFTag(FTAG_LGS) || cTNode.isPTag(PTAG_PP)) {
            return false;
        }
        CTNode parent = cTNode.getParent();
        if (!parent.isPTagAny(PTAG_PP, PTAG_SBAR)) {
            return false;
        }
        cTNode.removeFTag(FTAG_LGS);
        parent.addFTag(FTAG_LGS);
        return true;
    }

    private static boolean fixCLF(CTNode cTNode) {
        if (!cTNode.hasFTag(FTAG_CLF) || !cTNode.matchesPTag("S|SQ|SINV")) {
            return false;
        }
        CTNode firstDescendant = cTNode.getFirstDescendant("+SBAR.*");
        cTNode.removeFTag(FTAG_CLF);
        if (firstDescendant == null) {
            return false;
        }
        firstDescendant.addFTag(FTAG_CLF);
        return true;
    }

    public static boolean isRelPhrase(CTNode cTNode) {
        return cTNode.pTag.startsWith("WH");
    }
}
