package com.clearnlp.pattern;

import com.clearnlp.classification.feature.JointFtrXml;
import com.clearnlp.component.label.IDEPLabel;
import com.clearnlp.dependency.srl.SRLLib;
import com.clearnlp.headrule.HeadRule;
import com.clearnlp.util.pair.Pair;
import com.google.common.collect.Lists;
import com.ibm.icu.text.DateFormat;
import de.tudarmstadt.ukp.dkpro.core.io.tei.internal.TeiConstants;
import is2.data.PipeGen;
import java.util.List;
import java.util.regex.Pattern;
import opennlp.tools.parser.Parse;
import opennlp.tools.tokenize.TokenizerME;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.xalan.xsltc.compiler.Constants;
import org.apache.xml.serializer.SerializerConstants;
import org.codehaus.plexus.util.LineOrientedInterpolatingReader;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:com/clearnlp/pattern/PTHtml.class */
public class PTHtml {
    private List<Pair<Pattern, String>> l_replaces = Lists.newArrayList();

    public PTHtml() {
        addSymbols();
        addASCII();
    }

    private void addSymbols() {
        addPattern(SerializerConstants.ENTITY_QUOT, "\"");
        addPattern(SerializerConstants.ENTITY_AMP, BeanFactory.FACTORY_BEAN_PREFIX);
        addPattern(SerializerConstants.ENTITY_LT, "<");
        addPattern(SerializerConstants.ENTITY_GT, ">");
        addPattern("&nbsp;", " ");
    }

    private void addASCII() {
        addPattern("&#32;", " ");
        addPattern("&#33;", QuickTargetSourceCreator.PREFIX_PROTOTYPE);
        addPattern("&#34;", "\"");
        addPattern("&#35;", "#");
        addPattern("&#36;", "$");
        addPattern("&#37;", QuickTargetSourceCreator.PREFIX_THREAD_LOCAL);
        addPattern("&#38;", BeanFactory.FACTORY_BEAN_PREFIX);
        addPattern("&#39;", "'");
        addPattern("&#40;", "(");
        addPattern("&#41;", ")");
        addPattern("&#42;", "*");
        addPattern("&#43;", "+");
        addPattern("&#44;", ",");
        addPattern("&#45;", "-");
        addPattern("&#46;", ".");
        addPattern("&#47;", "/");
        addPattern("&#48;", "0");
        addPattern("&#49;", "1");
        addPattern("&#50;", "2");
        addPattern("&#51;", PipeGen._3);
        addPattern("&#52;", PipeGen._4);
        addPattern("&#53;", "5");
        addPattern("&#54;", "6");
        addPattern("&#55;", "7");
        addPattern("&#56;", "8");
        addPattern("&#57;", "9");
        addPattern("&#58;", ":");
        addPattern("&#59;", ";");
        addPattern("&#60;", "<");
        addPattern("&#61;", "=");
        addPattern("&#62;", ">");
        addPattern("&#63;", "?");
        addPattern("&#64;", "@");
        addPattern("&#65;", "A");
        addPattern("&#66;", "B");
        addPattern("&#67;", "C");
        addPattern("&#68;", IDEPLabel.LB_DASH);
        addPattern("&#69;", "E");
        addPattern("&#70;", "F");
        addPattern("&#71;", "G");
        addPattern("&#72;", "H");
        addPattern("&#73;", "I");
        addPattern("&#74;", "J");
        addPattern("&#75;", "K");
        addPattern("&#76;", "L");
        addPattern("&#77;", DateFormat.NUM_MONTH);
        addPattern("&#78;", IDEPLabel.LB_NO);
        addPattern("&#79;", "O");
        addPattern("&#80;", IDEPLabel.LB_PASS);
        addPattern("&#81;", "Q");
        addPattern("&#82;", "R");
        addPattern("&#83;", "S");
        addPattern("&#84;", TokenizerME.SPLIT);
        addPattern("&#85;", "U");
        addPattern("&#86;", "V");
        addPattern("&#87;", "W");
        addPattern("&#88;", "X");
        addPattern("&#89;", "Y");
        addPattern("&#90;", Constants.HASIDCALL_INDEX_SIG);
        addPattern("&#91;", "[");
        addPattern("&#92;", LineOrientedInterpolatingReader.DEFAULT_ESCAPE_SEQ);
        addPattern("&#93;", "]");
        addPattern("&#94;", SRLLib.DELIM_PATH_UP);
        addPattern("&#95;", "_");
        addPattern("&#96;", "`");
        addPattern("&#97;", JointFtrXml.F_AMBIGUITY_CLASS);
        addPattern("&#98;", "b");
        addPattern("&#99;", "c");
        addPattern("&#100;", "d");
        addPattern("&#101;", "e");
        addPattern("&#102;", "f");
        addPattern("&#103;", "g");
        addPattern("&#104;", "h");
        addPattern("&#105;", "i");
        addPattern("&#106;", "j");
        addPattern("&#107;", "k");
        addPattern("&#108;", HeadRule.DIR_LEFT_TO_RIGHT);
        addPattern("&#109;", JointFtrXml.F_LEMMA);
        addPattern("&#110;", "n");
        addPattern("&#111;", "o");
        addPattern("&#112;", "p");
        addPattern("&#113;", "q");
        addPattern("&#114;", HeadRule.DIR_RIGHT_TO_LEFT);
        addPattern("&#115;", "s");
        addPattern("&#116;", "t");
        addPattern("&#117;", TeiConstants.TAG_U);
        addPattern("&#118;", "v");
        addPattern("&#119;", TeiConstants.TAG_WORD);
        addPattern("&#120;", "x");
        addPattern("&#121;", DateFormat.YEAR);
        addPattern("&#122;", CompressorStreamFactory.Z);
        addPattern("&#123;", Parse.BRACKET_LCB);
        addPattern("&#124;", "|");
        addPattern("&#125;", "}");
        addPattern("&#126;", "~");
        addPattern("&#039;", "'");
    }

    public void addPattern(String str, String str2) {
        this.l_replaces.add(new Pair<>(Pattern.compile(str), str2));
    }

    public String toText(String str) {
        for (Pair<Pattern, String> pair : this.l_replaces) {
            str = pair.o1.matcher(str).replaceAll(pair.o2);
        }
        return str;
    }
}
