package de.tudarmstadt.ukp.dkpro.core.api.io;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.tools.ant.DirectoryScanner;
import org.apache.tools.ant.types.resources.FileResource;
import org.apache.tools.ant.types.resources.FileResourceIterator;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.CasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

@Deprecated
@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.api.io.FileSetCollectionReaderBase", description = "Base class for file system collection readers. Uses an Ant FileSet to conveniently walk the\nfile system.\n<p>\nExample of a hypothetic <code>FooReader</code> that should read only files ending in\n<code>.foo</code> from in the directory <code>foodata</code> or any subdirectory thereof:\n<pre>\nCollectionReader reader = createReader(FooReader.class,\n    FileSetCollectionReaderBase.PARAM_LANGUAGE, \"en\",\n    FileSetCollectionReaderBase.PARAM_SOURCE_LOCATION, \"some/path\",\n    FileSetCollectionReaderBase.PARAM_PATTERNS, \"[+]foodata&#47;**&#47;*.foo\" );\n</pre>", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/api/io/FileSetCollectionReaderBase.class */
public abstract class FileSetCollectionReaderBase extends CasCollectionReader_ImplBase {
    public static final String INCLUDE_PREFIX = "[+]";
    public static final String EXCLUDE_PREFIX = "[-]";

    @Deprecated
    public static final String PARAM_PATH = "sourceLocation";
    public static final String PARAM_SOURCE_LOCATION = "sourceLocation";

    @ConfigurationParameter(name = "sourceLocation", mandatory = false, description = "Location from which the input is read.")
    private File sourceLocation;
    public static final String PARAM_PATTERNS = "patterns";

    @ConfigurationParameter(name = "patterns", mandatory = true, description = "A set of Ant-like include/exclude patterns. A pattern starts with #INCLUDE_PREFIX [+]\nif it is an include pattern and with #EXCLUDE_PREFIX [-] if it is an exclude pattern.\nThe wildcard <code>&#47;**&#47;</code> can be used to address any number of sub-directories.\nThe wildcard * can be used to a address a part of a name.")
    private String[] patterns;
    public static final String PARAM_USE_DEFAULT_EXCLUDES = "useDefaultExcludes";

    @ConfigurationParameter(name = "useDefaultExcludes", mandatory = true, defaultValue = {"true"}, description = "Use the default excludes.")
    private boolean useDefaultExcludes;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "The language.")
    private String language;
    public static final String PARAM_CASE_SENSITIVE = "caseSensitive";

    @ConfigurationParameter(name = PARAM_CASE_SENSITIVE, mandatory = false, defaultValue = {"true"}, description = "States whether the matching is done case sensitive. (default: true)")
    private boolean caseSensitive;
    private DirectoryScanner directoryScanner;
    private int completed;
    private Iterator<FileResource> fileSetIterator;

    @Override // org.apache.uima.fit.component.CasCollectionReader_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.directoryScanner = new DirectoryScanner();
        if (this.sourceLocation != null) {
            this.directoryScanner.setBasedir(this.sourceLocation);
        }
        this.directoryScanner.setCaseSensitive(this.caseSensitive);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (String str : this.patterns) {
            if (str.startsWith("[+]")) {
                arrayList.add(str.substring("[+]".length()));
            } else if (str.startsWith("[-]")) {
                arrayList2.add(str.substring("[-]".length()));
            } else {
                if (str.matches("^\\[.\\].*")) {
                    throw new ResourceInitializationException(new IllegalArgumentException("Patterns have to start with [+] or [-]."));
                }
                arrayList.add(str);
            }
        }
        if (this.useDefaultExcludes) {
            arrayList2.add("**/*~");
            arrayList2.add("**/#*#");
            arrayList2.add("**/.#*");
            arrayList2.add("**/%*%");
            arrayList2.add("**/._*");
            arrayList2.add("**/CVS");
            arrayList2.add("**/CVS/**");
            arrayList2.add("**/.cvsignore");
            arrayList2.add("**/SCCS");
            arrayList2.add("**/SCCS/**");
            arrayList2.add("**/vssver.scc");
            arrayList2.add("**/.svn");
            arrayList2.add("**/.svn/**");
            arrayList2.add("**/.DS_Store");
            arrayList2.add("**/.git");
            arrayList2.add("**/.git/**");
            arrayList2.add("**/.gitattributes");
            arrayList2.add("**/.gitignore");
            arrayList2.add("**/.gitmodules");
            arrayList2.add("**/.hg");
            arrayList2.add("**/.hg/**");
            arrayList2.add("**/.hgignore");
            arrayList2.add("**/.hgsub");
            arrayList2.add("**/.hgsubstate");
            arrayList2.add("**/.hgtags");
            arrayList2.add("**/.bzr");
            arrayList2.add("**/.bzr/**");
            arrayList2.add("**/.bzrignore");
        }
        this.directoryScanner.setIncludes((String[]) arrayList.toArray(new String[arrayList.size()]));
        this.directoryScanner.setExcludes((String[]) arrayList2.toArray(new String[arrayList2.size()]));
        this.directoryScanner.scan();
        this.fileSetIterator = new FileResourceIterator(null, this.sourceLocation, this.directoryScanner.getIncludedFiles());
        getLogger().info("Found [" + getIncludedFilesCount() + "] files to be read");
    }

    protected int getIncludedFilesCount() {
        return this.directoryScanner.getIncludedFilesCount();
    }

    protected Iterator<FileResource> getFileSetIterator() {
        return this.fileSetIterator;
    }

    protected FileResource nextFile() {
        try {
            return this.fileSetIterator.next();
        } finally {
            this.completed++;
        }
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.completed, getIncludedFilesCount(), "file")};
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        return this.fileSetIterator.hasNext();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initCas(CAS cas, FileResource fileResource, String str) {
        String str2 = str != null ? "#" + str : "";
        try {
            DocumentMetaData create = DocumentMetaData.create(cas);
            File file = fileResource.getFile();
            create.setDocumentTitle(file.getName());
            create.setDocumentUri(file.toURI().toString() + str2);
            create.setDocumentId(fileResource.getName() + str2);
            if (fileResource.getBaseDir() != null) {
                create.setDocumentBaseUri(this.sourceLocation.toURI().toString());
                create.setCollectionId(fileResource.getBaseDir().getPath());
            }
            cas.setDocumentLanguage(this.language);
        } catch (CASException e) {
            throw new RuntimeException(e);
        }
    }

    public String getLanguage() {
        return this.language;
    }
}
