package de.tudarmstadt.ukp.dkpro.core.api.io;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.CasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.util.AntPathMatcher;

@ResourceMetaData(name = "de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase", description = "Base class for collection readers that plan to access resources on the file system or in the\nclasspath or basically anywhere where Spring can resolve them. ANT-style patterns are supported\nto include or exclude particular resources.\n<p>\nExample of a hypothetic <code>FooReader</code> that should read only files ending in\n<code>.foo</code> from in the directory <code>foodata</code> or any subdirectory thereof:\n\n<pre>\nCollectionReader reader = createReader(FooReader.class,\n        FooReader.PARAM_LANGUAGE, &quot;en&quot;,\n        FooReader.PARAM_SOURCE_LOCATION, &quot;some/path&quot;,\n        FooReader.PARAM_PATTERNS, &quot;[+]foodata/*&#42;/*.foo&quot;);\n</pre>\n<p>\nThe list of resources returned is sorted, so for the same set of resources, they are always\nreturned in the same order.", version = "1.8.0", vendor = "DKPro Core Project", copyright = "Copyright 2010-2015\n                            Ubiquitous Knowledge Processing (UKP) Lab\n                            Technische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/api/io/ResourceCollectionReaderBase.class */
public abstract class ResourceCollectionReaderBase extends CasCollectionReader_ImplBase {
    protected static final String JAR_PREFIX = "jar:file:";
    public static final String INCLUDE_PREFIX = "[+]";
    public static final String EXCLUDE_PREFIX = "[-]";

    @Deprecated
    public static final String PARAM_PATH = "sourceLocation";
    public static final String PARAM_SOURCE_LOCATION = "sourceLocation";

    @ConfigurationParameter(name = "sourceLocation", mandatory = false, description = "Location from which the input is read.")
    private String sourceLocation;
    public static final String PARAM_PATTERNS = "patterns";

    @ConfigurationParameter(name = "patterns", mandatory = false, description = "A set of Ant-like include/exclude patterns. A pattern starts with #INCLUDE_PREFIX [+]\nif it is an include pattern and with #EXCLUDE_PREFIX [-] if it is an exclude pattern.\nThe wildcard <code>&#47;**&#47;</code> can be used to address any number of sub-directories.\nThe wildcard * can be used to a address a part of a name.")
    private String[] patterns;
    public static final String PARAM_USE_DEFAULT_EXCLUDES = "useDefaultExcludes";

    @ConfigurationParameter(name = "useDefaultExcludes", mandatory = true, defaultValue = {"true"}, description = "Use the default excludes.")
    private boolean useDefaultExcludes;
    public static final String PARAM_INCLUDE_HIDDEN = "includeHidden";

    @ConfigurationParameter(name = PARAM_INCLUDE_HIDDEN, mandatory = true, defaultValue = {"false"}, description = "Include hidden files and directories.")
    private boolean includeHidden;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "Name of optional configuration parameter that contains the language of the documents in the\ninput directory. If specified, this information will be added to the CAS.")
    private String language;
    public static final String KEY_RESOURCE_RESOLVER = "resolver";

    @ExternalResource(key = KEY_RESOURCE_RESOLVER, mandatory = false, description = "Name of optional external (UIMA) resource that contains the Locator for a (Spring)\nResourcePatternResolver implementation for locating (spring) resources.")
    private final ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
    private int completed;
    private Collection<Resource> resources;
    private Iterator<Resource> resourceIterator;

    /* loaded from: input_file:de/tudarmstadt/ukp/dkpro/core/api/io/ResourceCollectionReaderBase$Resource.class */
    public static class Resource {
        private final String location;
        private final String base;
        private final URI resolvedUri;
        private final String resolvedBase;
        private final String path;
        private final org.springframework.core.io.Resource resource;

        public Resource(String str, String str2, URI uri, String str3, String str4, org.springframework.core.io.Resource resource) {
            this.location = str;
            this.base = str2;
            this.resolvedUri = uri;
            this.resolvedBase = str3;
            this.path = str4;
            this.resource = resource;
        }

        public String getLocation() {
            return this.location;
        }

        public String getBase() {
            return this.base;
        }

        public URI getResolvedUri() {
            return this.resolvedUri;
        }

        public String getResolvedBase() {
            return this.resolvedBase;
        }

        public String getPath() {
            return this.path;
        }

        public org.springframework.core.io.Resource getResource() {
            return this.resource;
        }

        public InputStream getInputStream() throws IOException {
            return this.resource.getInputStream();
        }

        public int hashCode() {
            return (31 * 1) + (this.resolvedUri == null ? 0 : this.resolvedUri.hashCode());
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            Resource resource = (Resource) obj;
            return this.resolvedUri == null ? resource.resolvedUri == null : this.resolvedUri.equals(resource.resolvedUri);
        }

        public String toString() {
            return this.location;
        }
    }

    @Override // org.apache.uima.fit.component.CasCollectionReader_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        if ((this.patterns == null || this.patterns.length == 0) && StringUtils.isBlank(this.sourceLocation)) {
            throw new IllegalArgumentException("Either a source location, pattern, or both must be specified.");
        }
        if (this.patterns == null) {
            int indexOf = this.sourceLocation.indexOf(42);
            int indexOf2 = this.sourceLocation.indexOf(58);
            if (indexOf != -1 && indexOf > indexOf2) {
                int max = Math.max(Math.max(this.sourceLocation.lastIndexOf(File.separatorChar, indexOf), this.sourceLocation.lastIndexOf(47, indexOf)), this.sourceLocation.lastIndexOf(58, indexOf));
                if (max != -1) {
                    this.patterns = new String[]{"[+]" + this.sourceLocation.substring(max + 1)};
                    this.sourceLocation = this.sourceLocation.substring(0, max + 1);
                } else {
                    this.patterns = new String[]{"[+]" + this.sourceLocation};
                    this.sourceLocation = "";
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        List<String> defaultExcludes = getDefaultExcludes();
        if (this.patterns != null) {
            for (String str : this.patterns) {
                if (str.startsWith("[+]")) {
                    arrayList.add(str.substring("[+]".length()));
                } else if (str.startsWith("[-]")) {
                    defaultExcludes.add(str.substring("[-]".length()));
                } else {
                    if (str.matches("^\\[.\\].*")) {
                        throw new ResourceInitializationException(new IllegalArgumentException("Patterns have to start with [+] or [-]."));
                    }
                    arrayList.add(str);
                }
            }
        }
        try {
            if (this.sourceLocation == null) {
                ListIterator listIterator = arrayList.listIterator();
                while (listIterator.hasNext()) {
                    listIterator.set(locationToUrl((String) listIterator.next()));
                }
                ListIterator<String> listIterator2 = defaultExcludes.listIterator();
                while (listIterator2.hasNext()) {
                    listIterator2.set(locationToUrl(listIterator2.next()));
                }
            } else {
                this.sourceLocation = locationToUrl(this.sourceLocation);
            }
            this.resources = scan(getSourceLocation(), arrayList, defaultExcludes);
            this.resourceIterator = this.resources.iterator();
            getLogger().info("Found [" + this.resources.size() + "] resources to be read");
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    protected List<String> getDefaultExcludes() {
        ArrayList arrayList = new ArrayList();
        if (this.useDefaultExcludes) {
            arrayList.add("**/*~");
            arrayList.add("**/#*#");
            arrayList.add("**/.#*");
            arrayList.add("**/%*%");
            arrayList.add("**/._*");
            arrayList.add("**/CVS");
            arrayList.add("**/CVS/**");
            arrayList.add("**/.cvsignore");
            arrayList.add("**/SCCS");
            arrayList.add("**/SCCS/**");
            arrayList.add("**/vssver.scc");
            arrayList.add("**/.svn");
            arrayList.add("**/.svn/**");
            arrayList.add("**/.DS_Store");
            arrayList.add("**/.git");
            arrayList.add("**/.git/**");
            arrayList.add("**/.gitattributes");
            arrayList.add("**/.gitignore");
            arrayList.add("**/.gitmodules");
            arrayList.add("**/.hg");
            arrayList.add("**/.hg/**");
            arrayList.add("**/.hgignore");
            arrayList.add("**/.hgsub");
            arrayList.add("**/.hgsubstate");
            arrayList.add("**/.hgtags");
            arrayList.add("**/.bzr");
            arrayList.add("**/.bzr/**");
            arrayList.add("**/.bzrignore");
        }
        return arrayList;
    }

    protected String locationToUrl(String str) throws MalformedURLException {
        String str2 = str;
        if (isUnmarkedFileLocation(str)) {
            str2 = new File(str2).toURI().toURL().toString();
        } else if (str2.startsWith(JAR_PREFIX) && !str2.contains(QuickTargetSourceCreator.PREFIX_PROTOTYPE)) {
            str2 = str2 + QuickTargetSourceCreator.PREFIX_PROTOTYPE;
        }
        return str2;
    }

    private boolean isUnmarkedFileLocation(String str) {
        return str.indexOf(58) < 2;
    }

    protected Collection<Resource> getResources() {
        return this.resources;
    }

    protected Iterator<Resource> getResourceIterator() {
        return this.resourceIterator;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Resource nextFile() {
        try {
            return this.resourceIterator.next();
        } finally {
            this.completed++;
        }
    }

    protected String getSourceLocation() {
        return this.sourceLocation;
    }

    protected boolean isSingleLocation() {
        return this.patterns == null;
    }

    protected String getBase() {
        return getBase(getSourceLocation());
    }

    protected String getBase(String str) {
        String str2;
        boolean z = this.patterns == null;
        if (str != null) {
            str2 = str;
            if (!z && !str2.endsWith("/") && !str2.endsWith(":")) {
                str2 = str2 + "/";
            }
        } else {
            str2 = "";
        }
        return str2;
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.completed, this.resources.size(), "file")};
    }

    protected ResourcePatternResolver getResolver() {
        return this.resolver;
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        return this.resourceIterator.hasNext();
    }

    protected Collection<Resource> scan(String str, Collection<String> collection, Collection<String> collection2) throws IOException {
        boolean isSingleLocation = isSingleLocation();
        String base = getBase(str);
        getLogger().info("Scanning [" + base + "]");
        Collection<String> singleton = (collection == null || collection.size() == 0) ? !isSingleLocation ? Collections.singleton("**/*") : Collections.singleton("") : collection;
        Collection<String> emptySet = (collection2 == null || collection2.size() == 0) ? Collections.emptySet() : collection2;
        AntPathMatcher antPathMatcher = new AntPathMatcher();
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        if (base.length() > 0 && !isSingleLocation) {
            for (org.springframework.core.io.Resource resource : this.resolver.getResources(base)) {
                URI uri = getUri(resource, false);
                if (uri != null) {
                    hashSet.add(uri.toString());
                }
            }
        }
        Iterator<String> it = singleton.iterator();
        while (it.hasNext()) {
            for (org.springframework.core.io.Resource resource2 : this.resolver.getResources(base + it.next())) {
                URI uri2 = getUri(resource2, true);
                if (uri2 != null) {
                    String uri3 = uri2.toString();
                    String str2 = null;
                    if (base.length() <= 0 || isSingleLocation) {
                        str2 = base;
                    } else {
                        Iterator it2 = hashSet.iterator();
                        while (true) {
                            if (!it2.hasNext()) {
                                break;
                            }
                            String str3 = (String) it2.next();
                            if (uri3.startsWith(str3)) {
                                str2 = str3;
                                break;
                            }
                        }
                        if (str2 == null) {
                            throw new IllegalStateException("No base found for location [" + uri3 + "]");
                        }
                    }
                    if (emptySet != null) {
                        Iterator<String> it3 = emptySet.iterator();
                        while (it3.hasNext()) {
                            if (antPathMatcher.match(it3.next(), uri3.substring(str2.length()))) {
                                if (getLogger().isDebugEnabled()) {
                                    getLogger().debug("Excluded: " + uri3);
                                }
                            }
                        }
                    }
                    String substring = uri3.substring(str2.length());
                    String str4 = base + substring;
                    if (isSingleLocation()) {
                        substring = StringUtils.substringAfterLast(str2, "/");
                        str2 = StringUtils.substringBeforeLast(str2, "/") + '/';
                    }
                    arrayList.add(new Resource(str4, base, resource2.getURI(), str2, substring, resource2));
                }
            }
        }
        Collections.sort(arrayList, new Comparator<Resource>() { // from class: de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase.1
            @Override // java.util.Comparator
            public int compare(Resource resource3, Resource resource4) {
                return resource3.location.compareTo(resource4.location);
            }
        });
        if (isSingleLocation && arrayList.isEmpty()) {
            throw new FileNotFoundException("Resource not found or not a file: [" + str + "]. Please specify a file or use a pattern. Directories without patterns are not valid.");
        }
        return arrayList;
    }

    private URI getUri(org.springframework.core.io.Resource resource, boolean z) throws IOException {
        try {
            File file = resource.getFile();
            if (file.isHidden() && !this.includeHidden) {
                return null;
            }
            if (file.getPath().length() == 0 || ((z && file.isFile()) || (!z && file.isDirectory()))) {
                return resource.getFile().toURI();
            }
            return null;
        } catch (IOException e) {
            return resource.getURI();
        } catch (UnsupportedOperationException e2) {
            return resource.getURI();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initCas(CAS cas, Resource resource) {
        initCas(cas, resource, null);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initCas(CAS cas, Resource resource, String str) {
        String str2 = str != null ? "#" + str : "";
        try {
            DocumentMetaData create = DocumentMetaData.create(cas);
            create.setDocumentTitle(new File(resource.getPath()).getName());
            create.setDocumentUri(resource.getResolvedUri().toString() + str2);
            create.setDocumentId(resource.getPath() + str2);
            if (resource.getBase() != null) {
                create.setDocumentBaseUri(resource.getResolvedBase());
                create.setCollectionId(resource.getResolvedBase());
            }
            cas.setDocumentLanguage(this.language);
        } catch (CASException e) {
            throw new RuntimeException(e);
        }
    }

    public String getLanguage() {
        return this.language;
    }
}
