package de.tudarmstadt.ukp.dkpro.core.sfst;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.morph.MorphologicalFeaturesParser;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.SingletonTagset;
import de.tudarmstadt.ukp.dkpro.core.api.resources.LittleEndianDataInputStream;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceMetadata;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.api.resources.RuntimeProvider;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.lang.ProcessBuilder;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@TypeCapability(inputs = {"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token", "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"}, outputs = {"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS", "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures"})
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.sfst.SfstAnnotator", description = "Sfst morphological analyzer.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/sfst/SfstAnnotator.class */
public class SfstAnnotator extends JCasAnnotator_ImplBase {
    private static final String FLUSH_TOKEN = "-= FLUSH =-";
    public static final String PARAM_WRITE_POS = "writePOS";

    @ConfigurationParameter(name = "writePOS", mandatory = true, defaultValue = {"true"}, description = "Write part-of-speech information.\n\nDefault: true")
    private boolean writePos;
    public static final String PARAM_WRITE_LEMMA = "writeLemma";

    @ConfigurationParameter(name = "writeLemma", mandatory = true, defaultValue = {"true"}, description = "Write lemma information.\n\nDefault: true")
    private boolean writeLemma;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Use this language instead of the document language to resolve the model.")
    private String language;
    public static final String PARAM_VARIANT = "modelVariant";

    @ConfigurationParameter(name = "modelVariant", mandatory = false, description = "Override the default variant used to locate the model.")
    private String variant;
    public static final String PARAM_MODEL_LOCATION = "modelLocation";

    @ConfigurationParameter(name = "modelLocation", mandatory = false, description = "Load the model from this location instead of locating the model automatically.")
    private String modelLocation;
    public static final String PARAM_PRINT_TAGSET = "printTagSet";

    @ConfigurationParameter(name = "printTagSet", mandatory = true, defaultValue = {"false"}, description = "Write the tag set(s) to the log when a model is loaded.")
    protected boolean printTagSet;
    public static final String PARAM_MODEL_ENCODING = "modelEncoding";

    @ConfigurationParameter(name = "modelEncoding", mandatory = true, defaultValue = {"UTF-8"}, description = "Specifies the model encoding.")
    private String modelEncoding;
    public static final String PARAM_MODE = "mode";

    @ConfigurationParameter(name = "mode", mandatory = true, defaultValue = {"FIRST"})
    private Mode mode;
    public static final String PARAM_MORPH_MAPPING_LOCATION = "MorphMappingLocation";

    @ConfigurationParameter(name = "MorphMappingLocation", mandatory = false)
    private String morphMappingLocation;
    private ModelProviderBase<File> modelProvider;
    private MorphologicalFeaturesParser featuresParser;
    private RuntimeProvider runtimeProvider;

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/sfst/SfstAnnotator$Mode.class */
    public enum Mode {
        FIRST,
        ALL
    }

    @Override // org.apache.uima.fit.component.JCasAnnotator_ImplBase, org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.modelProvider = new ModelProviderBase<File>() { // from class: de.tudarmstadt.ukp.dkpro.core.sfst.SfstAnnotator.1
            {
                setContextObject(SfstAnnotator.this);
                setDefault(ResourceObjectProviderBase.ARTIFACT_ID, "${groupId}.sfst-model-morph-${language}-${variant}");
                setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/sfst/lib/morph-${language}-${variant}.ca");
                setDefaultVariantsLocation("de/tudarmstadt/ukp/dkpro/core/sfst/lib/sfst-default-variants.map");
                setOverride(ResourceObjectProviderBase.LOCATION, SfstAnnotator.this.modelLocation);
                setOverride("language", SfstAnnotator.this.language);
                setOverride("variant", SfstAnnotator.this.variant);
            }

            /* JADX INFO: Access modifiers changed from: protected */
            @Override // de.tudarmstadt.ukp.dkpro.core.api.resources.CasConfigurableStreamProviderBase, de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase
            public File produceResource(URL url) throws IOException {
                SingletonTagset singletonTagset = new SingletonTagset(MorphologicalFeatures.class, getResourceMetaData().getProperty("morph.tagset"));
                LittleEndianDataInputStream littleEndianDataInputStream = new LittleEndianDataInputStream(url.openStream());
                Throwable th = null;
                try {
                    if (littleEndianDataInputStream.readByte() != 99) {
                        throw new IOException("Incompatible model. Must be a compact model.");
                    }
                    SfstAnnotator.this.getLogger().info("Model encoding: " + (littleEndianDataInputStream.readByte() == 0 ? "unknown" : "UTF-8"));
                    int readShort = littleEndianDataInputStream.readShort();
                    for (int i = 0; i < readShort; i++) {
                        littleEndianDataInputStream.readShort();
                        String readZeroTerminatedString = readZeroTerminatedString(littleEndianDataInputStream, "UTF-8");
                        if (readZeroTerminatedString.startsWith("<") && readZeroTerminatedString.endsWith(">") && readZeroTerminatedString.length() > 2) {
                            singletonTagset.add(readZeroTerminatedString);
                        }
                    }
                    addTagset(singletonTagset);
                    if (SfstAnnotator.this.printTagSet) {
                        SfstAnnotator.this.getLogger().info(getTagset().toString());
                    }
                    return ResourceUtils.getUrlAsFile(url, true);
                } finally {
                    if (littleEndianDataInputStream != null) {
                        if (0 != 0) {
                            try {
                                littleEndianDataInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            littleEndianDataInputStream.close();
                        }
                    }
                }
            }

            private String readZeroTerminatedString(DataInput dataInput, String str) throws IOException {
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                byte readByte = dataInput.readByte();
                while (true) {
                    byte b = readByte;
                    if (b == 0) {
                        return new String(byteArrayOutputStream.toByteArray(), str);
                    }
                    byteArrayOutputStream.write(b);
                    readByte = dataInput.readByte();
                }
            }
        };
        this.featuresParser = new MorphologicalFeaturesParser();
        this.featuresParser.setDefault(ResourceObjectProviderBase.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/api/lexmorph/tagset/${language}-${morph.tagset}-morph.map");
        this.featuresParser.setOverride(ResourceObjectProviderBase.LOCATION, this.morphMappingLocation);
        this.featuresParser.setOverride("language", this.language);
        this.featuresParser.addImport("morph.tagset", this.modelProvider);
        this.runtimeProvider = new RuntimeProvider("classpath:/de/tudarmstadt/ukp/dkpro/core/sfst/bin/");
    }

    @Override // org.apache.uima.analysis_component.JCasAnnotator_ImplBase
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        CAS cas = jCas.getCas();
        this.modelProvider.configure(cas);
        this.featuresParser.configure(cas);
        String str = (String) this.modelProvider.getResourceMetaData().get(ResourceMetadata.META_MODEL_ENCODING);
        if (str == null) {
            throw new AnalysisEngineProcessException(new Throwable("Model should contain encoding metadata"));
        }
        try {
            ProcessBuilder processBuilder = new ProcessBuilder(this.runtimeProvider.getFile("fst-infl2").getAbsolutePath(), "-s", "-q", this.modelProvider.getResource().getAbsolutePath());
            processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT);
            StringBuffer stringBuffer = new StringBuffer();
            String str2 = null;
            Process process = null;
            try {
                try {
                    process = processBuilder.start();
                    PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(process.getOutputStream(), str));
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(process.getInputStream(), str));
                    Iterator it = JCasUtil.select(jCas, Sentence.class).iterator();
                    while (it.hasNext()) {
                        List<Token> selectCovered = JCasUtil.selectCovered(Token.class, (Sentence) it.next());
                        if (!selectCovered.isEmpty()) {
                            for (Token token : selectCovered) {
                                stringBuffer.append(token.getCoveredText()).append(' ');
                                printWriter.printf("%s%n", token.getCoveredText());
                                printWriter.printf("%s%n", FLUSH_TOKEN);
                            }
                            printWriter.flush();
                            for (Token token2 : selectCovered) {
                                boolean z = false;
                                while (true) {
                                    String readLine = bufferedReader.readLine();
                                    str2 = readLine;
                                    if (readLine == null) {
                                        break;
                                    }
                                    if (!str2.startsWith(">")) {
                                        if (str2.contains(FLUSH_TOKEN)) {
                                            break;
                                        }
                                        if (str2.startsWith("no result for")) {
                                            MorphologicalFeatures morphologicalFeatures = new MorphologicalFeatures(jCas, token2.getBegin(), token2.getEnd());
                                            morphologicalFeatures.setValue("");
                                            morphologicalFeatures.addToIndexes();
                                            if (token2.getMorph() == null) {
                                                token2.setMorph(morphologicalFeatures);
                                            }
                                        } else {
                                            if (!z) {
                                                MorphologicalFeatures parse = this.featuresParser.parse(jCas, token2, str2);
                                                if (token2.getMorph() == null) {
                                                    token2.setMorph(parse);
                                                }
                                            }
                                            switch (this.mode) {
                                                case FIRST:
                                                    z = true;
                                            }
                                        }
                                    }
                                }
                            }
                            stringBuffer.setLength(0);
                        }
                    }
                    if (1 == 0) {
                        getLogger().error("Sent before error: [" + ((Object) stringBuffer) + "]");
                        getLogger().error("Last response before error: [" + str2 + "]");
                    }
                    if (process != null) {
                        process.destroy();
                    }
                } catch (IOException e) {
                    throw new AnalysisEngineProcessException(e);
                }
            } catch (Throwable th) {
                if (0 == 0) {
                    getLogger().error("Sent before error: [" + ((Object) stringBuffer) + "]");
                    getLogger().error("Last response before error: [" + str2 + "]");
                }
                if (process != null) {
                    process.destroy();
                }
                throw th;
            }
        } catch (IOException e2) {
            throw new AnalysisEngineProcessException(e2);
        }
    }

    @Override // org.apache.uima.analysis_component.AnalysisComponent_ImplBase, org.apache.uima.analysis_component.AnalysisComponent
    public void destroy() {
        this.runtimeProvider.uninstall();
        super.destroy();
    }
}
