package org.gbif.dwca.io;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.HiddenFileFilter;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.io.input.BOMInputStream;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.Term;
import org.gbif.dwc.terms.TermFactory;
import org.gbif.dwca.io.ArchiveField;
import org.gbif.ipt.config.DataDir;
import org.gbif.ipt.task.GenerateDwca;
import org.gbif.util.DownloadUtil;
import org.gbif.utils.file.CompressionUtil;
import org.gbif.utils.file.FileUtils;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.utils.file.csv.CSVReaderFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/dwca-io-1.31.jar:org/gbif/dwca/io/ArchiveFactory.class */
public class ArchiveFactory {
    private static final TermFactory TERM_FACTORY = TermFactory.instance();
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) ArchiveFactory.class);
    private static final List<String> DATA_FILE_SUFFICES = ImmutableList.of(".csv", GenerateDwca.TEXT_FILE_EXTENSION, ".tsv", ".tab", ".text", ".data", ".dwca");
    private static final Map<Term, Term> TERM_TO_ROW_TYPE;
    private static final List<Term> ID_TERMS;
    private static final SAXParserFactory SAX_FACTORY;

    public static Archive openArchive(URL url, File file) throws IOException, UnsupportedArchiveException {
        File file2 = new File(file, "dwca-download");
        File file3 = new File(file, "dwca");
        DownloadUtil.download(url, file2);
        return openArchive(file2, file3);
    }

    public static Archive openArchive(File file, File file2) throws IOException, UnsupportedArchiveException {
        if (file2.exists()) {
            LOG.debug("Deleting existing archive folder [{}]", file2.getAbsolutePath());
            FileUtils.deleteDirectoryRecursively(file2);
        }
        org.apache.commons.io.FileUtils.forceMkdir(file2);
        try {
            CompressionUtil.decompressFile(file2, file, true);
            File[] listFiles = file2.listFiles((FileFilter) HiddenFileFilter.VISIBLE);
            if (listFiles.length == 1) {
                File file3 = listFiles[0];
                if (file3.isDirectory()) {
                    LOG.debug("Removing single root folder {} found in decompressed archive", file3.getAbsoluteFile());
                    for (File file4 : org.apache.commons.io.FileUtils.listFiles(file3, TrueFileFilter.TRUE, (IOFileFilter) null)) {
                        file4.renameTo(new File(file2, file4.getName()));
                    }
                }
            }
            return openArchive(file2);
        } catch (CompressionUtil.UnsupportedCompressionType e) {
            LOG.debug("Could not uncompress archive [{}], try to read as single text file", file, e);
            Archive openArchiveDataFile = openArchiveDataFile(file);
            Files.copy(file, new File(file2, file.getName()));
            return openArchiveDataFile;
        }
    }

    public static Archive openArchiveDataFile(File file) throws IOException, UnsupportedArchiveException {
        Archive archive = new Archive();
        archive.setLocation(file);
        archive.setCore(readFileHeaders(file));
        discoverMetadataFile(archive, file.getParentFile());
        return validateArchive(archive);
    }

    public static Archive openArchive(File file) throws IOException, UnsupportedArchiveException {
        if (!file.exists()) {
            throw new FileNotFoundException("Archive folder not existing: " + file.getAbsolutePath());
        }
        if (file.isFile()) {
            return openArchiveDataFile(file);
        }
        Archive archive = new Archive();
        archive.setLocation(file);
        Iterator<File> iterateFiles = org.apache.commons.io.FileUtils.iterateFiles(file, new String[]{"xml", "txt"}, false);
        while (iterateFiles.hasNext()) {
            File next = iterateFiles.next();
            if (next.getName().startsWith("\\")) {
                String name = next.getName();
                String replaceFirst = next.getName().replaceFirst("\\\\", "");
                LOG.info("Renaming file from {} to {}", name, replaceFirst);
                next.renameTo(new File(file, replaceFirst));
            }
        }
        File file2 = new File(file, Archive.META_FN);
        if (file2.exists()) {
            readMetaDescriptor(archive, new FileInputStream(file2));
        } else {
            ArrayList arrayList = new ArrayList();
            Iterator<String> it = DATA_FILE_SUFFICES.iterator();
            while (it.hasNext()) {
                arrayList.addAll(Arrays.asList(file.listFiles((FileFilter) FileFilterUtils.and(FileFilterUtils.suffixFileFilter(it.next(), IOCase.INSENSITIVE), HiddenFileFilter.VISIBLE))));
            }
            if (arrayList.size() != 1) {
                throw new UnsupportedArchiveException("The archive given is a folder with more or less than 1 data files having a csv, txt or tab suffix");
            }
            File file3 = new File(file, ((File) arrayList.get(0)).getName());
            ArchiveFile readFileHeaders = readFileHeaders(file3);
            readFileHeaders.getLocations().clear();
            readFileHeaders.addLocation(file3.getName());
            archive.setCore(readFileHeaders);
        }
        discoverMetadataFile(archive, file2.getParentFile());
        return validateArchive(archive);
    }

    private static void discoverMetadataFile(Archive archive, File file) {
        if (archive.getMetadataLocation() == null) {
            Iterator it = Lists.newArrayList(DataDir.EML_XML_FILENAME, "metadata.xml").iterator();
            while (it.hasNext()) {
                String str = (String) it.next();
                if (new File(file, str).exists()) {
                    archive.setMetadataLocation(str);
                    return;
                }
            }
        }
    }

    private static ArchiveFile readFileHeaders(File file) throws UnsupportedArchiveException, IOException {
        ArchiveFile archiveFile = new ArchiveFile();
        archiveFile.addLocation(null);
        archiveFile.setIgnoreHeaderLines(1);
        CSVReader build = CSVReaderFactory.build(file);
        Throwable th = null;
        try {
            try {
                archiveFile.setEncoding(build.encoding);
                archiveFile.setFieldsTerminatedBy(build.delimiter);
                archiveFile.setFieldsEnclosedBy(build.quoteChar);
                String[] header = build.getHeader();
                if (build != null) {
                    if (0 != 0) {
                        try {
                            build.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        build.close();
                    }
                }
                int i = 0;
                for (String str : header) {
                    if (str != null && str.length() > 1) {
                        try {
                            archiveFile.addField(new ArchiveField(Integer.valueOf(i), TERM_FACTORY.findTerm(str), null, ArchiveField.DataType.string));
                        } catch (IllegalArgumentException e) {
                            LOG.warn("Illegal term name >>{}<< found in header, ignore column {}", str, Integer.valueOf(i));
                        }
                    }
                    i++;
                }
                List list = (List) archiveFile.getFields().keySet().stream().collect(Collectors.toList());
                determineRecordIdentifier(list).ifPresent(term -> {
                    archiveFile.setId(archiveFile.getField(term));
                });
                Optional<Term> determineRowType = determineRowType(list);
                archiveFile.getClass();
                determineRowType.ifPresent(archiveFile::setRowType);
                return archiveFile;
            } finally {
            }
        } catch (Throwable th3) {
            if (build != null) {
                if (th != null) {
                    try {
                        build.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    build.close();
                }
            }
            throw th3;
        }
    }

    @VisibleForTesting
    protected static void readMetaDescriptor(Archive archive, InputStream inputStream) throws UnsupportedArchiveException {
        try {
            BOMInputStream bOMInputStream = new BOMInputStream(inputStream);
            Throwable th = null;
            try {
                try {
                    SAXParser newSAXParser = SAX_FACTORY.newSAXParser();
                    MetaXMLSaxHandler metaXMLSaxHandler = new MetaXMLSaxHandler(archive);
                    LOG.debug("Reading archive metadata file");
                    newSAXParser.parse(bOMInputStream, metaXMLSaxHandler);
                    if (bOMInputStream != null) {
                        if (0 != 0) {
                            try {
                                bOMInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bOMInputStream.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (Exception e) {
            LOG.warn("Exception caught", (Throwable) e);
            throw new UnsupportedArchiveException(e);
        }
    }

    private static Archive validateArchive(Archive archive) throws UnsupportedArchiveException {
        validateCoreFile(archive.getCore(), !archive.getExtensions().isEmpty());
        Iterator<ArchiveFile> it = archive.getExtensions().iterator();
        while (it.hasNext()) {
            validateExtensionFile(it.next());
        }
        LOG.debug("Archive contains " + archive.getExtensions().size() + " described extension files");
        LOG.debug("Archive contains " + archive.getCore().getFields().size() + " core properties");
        return archive;
    }

    private static void validateCoreFile(ArchiveFile archiveFile, boolean z) throws UnsupportedArchiveException {
        if (z && archiveFile.getId() == null) {
            LOG.warn("DwC-A core data file " + archiveFile.getTitle() + " is lacking an id column. No extensions allowed in this case");
        }
        validateFile(archiveFile);
    }

    private static void validateExtensionFile(ArchiveFile archiveFile) throws UnsupportedArchiveException {
        if (archiveFile.getId() == null) {
            throw new UnsupportedArchiveException("DwC-A data file " + archiveFile.getTitle() + " requires an id or foreign key to the core id");
        }
        validateFile(archiveFile);
    }

    private static void validateFile(ArchiveFile archiveFile) throws UnsupportedArchiveException {
        if (archiveFile == null) {
            throw new UnsupportedArchiveException("DwC-A data file is NULL");
        }
        if (archiveFile.getLocationFile() == null) {
            throw new UnsupportedArchiveException("DwC-A data file " + archiveFile.getTitle() + " requires a location");
        }
        if (archiveFile.getEncoding() == null) {
            throw new UnsupportedArchiveException("DwC-A data file " + archiveFile.getTitle() + " requires a character encoding");
        }
    }

    static Optional<Term> determineRowType(List<Term> list) {
        return TERM_TO_ROW_TYPE.entrySet().stream().filter(entry -> {
            return list.contains(entry.getKey());
        }).map((v0) -> {
            return v0.getValue();
        }).findFirst();
    }

    static Optional<Term> determineRecordIdentifier(List<Term> list) {
        Stream<Term> stream = ID_TERMS.stream();
        list.getClass();
        return stream.filter((v1) -> {
            return r1.contains(v1);
        }).findFirst();
    }

    static {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        linkedHashMap.put(DwcTerm.occurrenceID, DwcTerm.Occurrence);
        linkedHashMap.put(DwcTerm.taxonID, DwcTerm.Taxon);
        linkedHashMap.put(DwcTerm.eventID, DwcTerm.Event);
        TERM_TO_ROW_TYPE = Collections.unmodifiableMap(linkedHashMap);
        ID_TERMS = Collections.unmodifiableList(Arrays.asList(DwcTerm.occurrenceID, DwcTerm.taxonID, DwcTerm.eventID, DcTerm.identifier));
        SAX_FACTORY = SAXParserFactory.newInstance();
        SAX_FACTORY.setNamespaceAware(true);
        SAX_FACTORY.setValidating(false);
    }
}
