package se.lth.cs.srl.preprocessor;

import is2.data.SentenceData09;
import is2.lemmatizer.Lemmatizer;
import is2.parser.Parser;
import is2.tag.Tagger;
import is2.tools.Tool;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import se.lth.cs.srl.preprocessor.tokenization.StanfordChineseSegmenterWrapper;
import se.lth.cs.srl.preprocessor.tokenization.Tokenizer;
import se.lth.cs.srl.util.BohnetHelper;
import se.lth.cs.srl.util.Util;

/* loaded from: input_file:se/lth/cs/srl/preprocessor/Preprocessor.class */
public class Preprocessor {
    protected final Tokenizer tokenizer;
    protected final Tool lemmatizer;
    protected final Tagger tagger;
    protected final is2.mtag.Tagger mtagger;
    protected final Parser parser;
    public long tokenizeTime = 0;
    public long lemmatizeTime = 0;
    public long tagTime = 0;
    public long mtagTime = 0;
    public long dpTime = 0;

    public Preprocessor(Tokenizer tokenizer, Lemmatizer lemmatizer, Tagger tagger, is2.mtag.Tagger tagger2, Parser parser) {
        this.tokenizer = tokenizer;
        this.lemmatizer = lemmatizer;
        this.tagger = tagger;
        this.mtagger = tagger2;
        this.parser = parser;
    }

    public SentenceData09 preprocess(String[] strArr) {
        SentenceData09 sentenceData09 = new SentenceData09();
        sentenceData09.init(strArr);
        return preprocess(sentenceData09);
    }

    private SentenceData09 preprocess(SentenceData09 sentenceData09) {
        SentenceData09 sentenceData092;
        if (this.lemmatizer != null) {
            long currentTimeMillis = System.currentTimeMillis();
            this.lemmatizer.apply(sentenceData09);
            this.lemmatizeTime += System.currentTimeMillis() - currentTimeMillis;
        }
        if (this.tagger != null) {
            long currentTimeMillis2 = System.currentTimeMillis();
            this.tagger.apply(sentenceData09);
            this.tagTime += System.currentTimeMillis() - currentTimeMillis2;
        }
        if (this.mtagger != null) {
            long currentTimeMillis3 = System.currentTimeMillis();
            this.mtagger.apply(sentenceData09);
            for (int i = 1; i < sentenceData09.pfeats.length; i++) {
                if (sentenceData09.pfeats[i] != null && !sentenceData09.pfeats[i].equals("_")) {
                    sentenceData09.feats[i] = sentenceData09.pfeats[i].split("\\|");
                }
            }
            this.mtagTime += System.currentTimeMillis() - currentTimeMillis3;
        } else {
            sentenceData09.pfeats = new String[sentenceData09.forms.length];
            Arrays.fill(sentenceData09.pfeats, "_");
        }
        if (this.parser != null) {
            synchronized (this.parser) {
                long currentTimeMillis4 = System.currentTimeMillis();
                sentenceData092 = this.parser.apply(sentenceData09);
                this.dpTime += System.currentTimeMillis() - currentTimeMillis4;
            }
        } else {
            sentenceData092 = new SentenceData09(sentenceData09);
        }
        return sentenceData092;
    }

    public String[] tokenize(String str) {
        String[] strArr;
        synchronized (this.tokenizer) {
            long currentTimeMillis = System.currentTimeMillis();
            strArr = this.tokenizer.tokenize(str);
            this.tokenizeTime += System.currentTimeMillis() - currentTimeMillis;
        }
        return strArr;
    }

    public StringBuilder getStatus() {
        StringBuilder sb = new StringBuilder();
        if (this.tokenizer != null) {
            sb.append("Tokenizer: " + this.tokenizer.getClass().getSimpleName()).append('\n');
        }
        sb.append("Tokenizer time:  " + Util.insertCommas(this.tokenizeTime)).append('\n');
        sb.append("Lemmatizer time: " + Util.insertCommas(this.lemmatizeTime)).append('\n');
        sb.append("Tagger time:     " + Util.insertCommas(this.tagTime)).append('\n');
        sb.append("MTagger time:    " + Util.insertCommas(this.mtagTime)).append('\n');
        sb.append("Parser time:     " + Util.insertCommas(this.dpTime)).append('\n');
        return sb;
    }

    public static void main(String[] strArr) throws Exception {
        File file = new File("chi-desegmented.out");
        Preprocessor preprocessor = new Preprocessor(new StanfordChineseSegmenterWrapper(new File("/home/anders/Download/stanford-chinese-segmenter-2008-05-21/data")), new SimpleChineseLemmatizer(), BohnetHelper.getTagger(new File("models/chi/tag-chn.model")), null, null);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            } else {
                System.out.println(preprocessor.preprocess(preprocessor.tokenize(readLine)));
            }
        }
    }

    public boolean hasParser() {
        return this.parser != null;
    }
}
