package info.textgrid.lab.lemmatizer.serviceclient;

import java.io.IOException;
import java.net.URL;
import java.util.Map;
import java.util.regex.Pattern;
import opennlp.tools.lang.german.SentenceDetector;
import opennlp.tools.lang.german.Tokenizer;
import org.eclipse.core.runtime.FileLocator;
import org.eclipse.core.runtime.Path;

/* loaded from: input_file:info/textgrid/lab/lemmatizer/serviceclient/GermanTokenizer.class */
public class GermanTokenizer {
    private Pattern c = Pattern.compile("\\p{Punct}");
    private Tokenizer tObj;
    private SentenceDetector sObj;

    public String createTEIOutput(String[] strArr) {
        StringBuilder sb = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<TEI>\n");
        for (String str : strArr) {
            String trim = str.trim();
            if (!trim.isEmpty()) {
                sb.append("<s>");
                createTEIOutputWords0(tokenize(trim), sb);
                sb.append("</s>\n");
            }
        }
        sb.append("</TEI>");
        return sb.toString();
    }

    public String createTEIOutputWords(String[] strArr) {
        return createTEIOutputWords0(strArr, new StringBuilder()).toString();
    }

    public String createWordformList(String[] strArr) {
        StringBuilder sb = new StringBuilder();
        for (String str : strArr) {
            String trim = str.trim();
            if (!trim.isEmpty()) {
                for (String str2 : tokenize(trim)) {
                    String trim2 = str2.trim();
                    if (!trim2.isEmpty()) {
                        sb.append(String.valueOf(trim2) + "\n");
                    }
                }
            }
        }
        return sb.toString();
    }

    private StringBuilder createTEIOutputWords0(String[] strArr, StringBuilder sb) {
        for (String str : strArr) {
            String trim = str.trim();
            if (!trim.isEmpty()) {
                if (this.c.matcher(trim).matches()) {
                    sb.append("<c>").append(trim.replaceAll("<", "&lt;")).append("</c>\n");
                } else {
                    sb.append("<w>").append(trim.replaceAll("<", "&lt;")).append("</w>\n");
                }
            }
        }
        return sb;
    }

    public void printSentences(String[] strArr) {
        int i = 0;
        for (String str : strArr) {
            System.out.printf("Satz %d: %s\n", Integer.valueOf(i), str);
            i++;
        }
    }

    public GermanTokenizer() {
        try {
            URL find = FileLocator.find(Activator.getDefault().getBundle(), new Path("resources/tokenModel.bin.gz"), (Map) null);
            URL find2 = FileLocator.find(Activator.getDefault().getBundle(), new Path("resources/sentenceModel.bin.gz"), (Map) null);
            this.tObj = new Tokenizer(FileLocator.toFileURL(find).getPath());
            this.sObj = new SentenceDetector(FileLocator.toFileURL(find2).getPath());
        } catch (IOException e) {
            e.printStackTrace();
        } catch (NullPointerException unused) {
            try {
                this.tObj = new Tokenizer("resources/tokenModel.bin.gz");
                this.sObj = new SentenceDetector("resources/sentenceModel.bin.gz");
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
    }

    public String[] getSentences(String str) {
        return this.sObj.sentDetect(str);
    }

    public String[] tokenize(String str) {
        return this.tObj.tokenize(str);
    }
}
