package org.gbif.ipt.task;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.geometry.VectorFormat;
import org.apache.log4j.Level;
import org.gbif.api.model.common.DOI;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.Term;
import org.gbif.dwc.terms.TermFactory;
import org.gbif.dwca.io.Archive;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.dwca.io.ArchiveField;
import org.gbif.dwca.io.ArchiveFile;
import org.gbif.dwca.io.MetaDescriptorWriter;
import org.gbif.ipt.config.AppConfig;
import org.gbif.ipt.config.Constants;
import org.gbif.ipt.config.DataDir;
import org.gbif.ipt.model.Extension;
import org.gbif.ipt.model.ExtensionMapping;
import org.gbif.ipt.model.ExtensionProperty;
import org.gbif.ipt.model.PropertyMapping;
import org.gbif.ipt.model.RecordFilter;
import org.gbif.ipt.model.Resource;
import org.gbif.ipt.service.admin.VocabulariesManager;
import org.gbif.ipt.service.manage.SourceManager;
import org.gbif.ipt.utils.MapUtils;
import org.gbif.utils.file.ClosableReportingIterator;
import org.gbif.utils.file.CompressionUtil;
import org.gbif.utils.file.FileSplitter;
import org.gbif.utils.file.FileUtils;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.utils.file.csv.CSVReaderFactory;
import org.gbif.utils.text.LineComparator;
import org.gbif.ws.paths.OccurrencePaths;

/* loaded from: input_file:WEB-INF/classes/org/gbif/ipt/task/GenerateDwca.class */
public class GenerateDwca extends ReportingTask implements Callable<Map<String, Integer>> {
    private final Resource resource;
    private Map<String, Integer> recordsByExtension;
    private Archive archive;
    private File dwcaFolder;
    private int currRecords;
    private int currRecordsSkipped;
    private String currExtension;
    private STATE state;
    private final SourceManager sourceManager;
    private final VocabulariesManager vocabManager;
    private Map<String, String> basisOfRecords;
    private Exception exception;
    private AppConfig cfg;
    private static final int ID_COLUMN_INDEX = 0;
    public static final String CHARACTER_ENCODING = "UTF-8";
    private static final String SORTED_FILE_PREFIX = "sorted_";
    public static final String CANCELLED_STATE_MSG = "Archive generation cancelled";
    public static final String ID_COLUMN_NAME = "id";
    public static final String TEXT_FILE_EXTENSION = ".txt";
    public static final String WILDCARD_CHARACTER = "*";
    private static final Pattern escapeChars = Pattern.compile("[\t\n\r]");
    private static final TermFactory TERM_FACTORY = TermFactory.instance();
    private static final FileUtils GBIF_FILE_UTILS = new FileUtils();
    public static final Set<DwcTerm> DWC_MULTI_VALUE_TERMS = ImmutableSet.of(DwcTerm.recordedBy, DwcTerm.preparations, DwcTerm.associatedMedia, DwcTerm.associatedReferences, DwcTerm.associatedSequences, DwcTerm.associatedTaxa, DwcTerm.otherCatalogNumbers, DwcTerm.associatedOccurrences, DwcTerm.associatedOrganisms, DwcTerm.previousIdentifications, DwcTerm.higherGeography, DwcTerm.georeferencedBy, DwcTerm.georeferenceSources, DwcTerm.typeStatus, DwcTerm.identifiedBy, DwcTerm.identificationReferences, DwcTerm.higherClassification, DwcTerm.measurementDeterminedBy);
    private static final Comparator<String> IGNORE_CASE_COMPARATOR = Ordering.from(new Comparator<String>() { // from class: org.gbif.ipt.task.GenerateDwca.1
        @Override // java.util.Comparator
        public int compare(String str, String str2) {
            return str.compareToIgnoreCase(str2);
        }
    }).nullsFirst();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/classes/org/gbif/ipt/task/GenerateDwca$STATE.class */
    public enum STATE {
        WAITING,
        STARTED,
        DATAFILES,
        METADATA,
        BUNDLING,
        COMPLETED,
        ARCHIVING,
        VALIDATING,
        CANCELLED,
        FAILED
    }

    @Inject
    public GenerateDwca(@Assisted Resource resource, @Assisted ReportHandler reportHandler, DataDir dataDir, SourceManager sourceManager, AppConfig appConfig, VocabulariesManager vocabulariesManager) throws IOException {
        super(1000, resource.getShortname(), reportHandler, dataDir);
        this.recordsByExtension = Maps.newHashMap();
        this.currRecords = 0;
        this.currRecordsSkipped = 0;
        this.state = STATE.WAITING;
        this.resource = resource;
        this.sourceManager = sourceManager;
        this.cfg = appConfig;
        this.vocabManager = vocabulariesManager;
    }

    public void addDataFile(List<ExtensionMapping> list, @Nullable Integer num) throws IOException, IllegalArgumentException, InterruptedException, GeneratorException {
        checkForInterruption();
        if (list == null || list.isEmpty()) {
            return;
        }
        this.currRecords = 0;
        this.currRecordsSkipped = 0;
        Extension extension = list.get(0).getExtension();
        this.currExtension = extension.getTitle();
        Iterator<ExtensionMapping> it = list.iterator();
        while (it.hasNext()) {
            if (!extension.equals(it.next().getExtension())) {
                throw new IllegalArgumentException("All mappings for a single data file need to be mapped to the same extension: " + extension.getRowType());
            }
        }
        ArchiveFile buildTabFile = ArchiveFile.buildTabFile();
        buildTabFile.setRowType(TERM_FACTORY.findTerm(extension.getRowType()));
        buildTabFile.setEncoding("UTF-8");
        buildTabFile.setDateFormat("YYYY-MM-DD");
        ArchiveField archiveField = new ArchiveField();
        archiveField.setIndex(0);
        buildTabFile.setId(archiveField);
        List<ExtensionProperty> orderedMappedExtensionProperties = getOrderedMappedExtensionProperties(extension, addFieldsToArchive(list, buildTabFile));
        assignIndexesOrderedByExtension(orderedMappedExtensionProperties, buildTabFile);
        int size = 1 + orderedMappedExtensionProperties.size();
        File file = new File(this.dwcaFolder, createFileName(this.dwcaFolder, extension.getName() == null ? "f" : extension.getName().toLowerCase().replaceAll("\\s", FileSplitter.SEPARATOR)));
        Writer startNewUtf8File = FileUtils.startNewUtf8File(file);
        buildTabFile.addLocation(file.getName());
        addMessage(Level.INFO, "Start writing data file for " + this.currExtension);
        try {
            try {
                boolean z = false;
                for (ExtensionMapping extensionMapping : list) {
                    PropertyMapping[] propertyMappingArr = new PropertyMapping[size];
                    for (ArchiveField archiveField2 : buildTabFile.getFields().values()) {
                        if (archiveField2.getIndex() != null && archiveField2.getIndex().intValue() > 0) {
                            propertyMappingArr[archiveField2.getIndex().intValue()] = extensionMapping.getField(archiveField2.getTerm().qualifiedName());
                        }
                    }
                    if (!z) {
                        writeHeaderLine(orderedMappedExtensionProperties, size, buildTabFile, startNewUtf8File);
                        z = true;
                    }
                    dumpData(startNewUtf8File, propertyMappingArr, extensionMapping, size, num, this.resource.getDoi());
                    this.recordsByExtension.put(extension.getRowType(), Integer.valueOf(this.currRecords));
                }
                if (this.resource.getCoreRowType() == null || !this.resource.getCoreRowType().equalsIgnoreCase(extension.getRowType())) {
                    this.archive.addExtension(buildTabFile);
                } else {
                    this.archive.setCore(buildTabFile);
                }
                addMessage(Level.INFO, "Data file written for " + this.currExtension + " with " + this.currRecords + " records and " + size + " columns");
                if (this.currRecordsSkipped > 0) {
                    addMessage(Level.WARN, "!!! " + this.currRecordsSkipped + " records were skipped for " + this.currExtension + " due to errors interpreting line, or because the line was empty");
                }
            } catch (IOException e) {
                this.log.error("Fatal DwC-A Generator Error encountered while writing header line to data file", e);
                setState(e);
                throw new GeneratorException("Error writing header line to data file", e);
            }
        } finally {
            startNewUtf8File.close();
        }
    }

    private void writeHeaderLine(List<ExtensionProperty> list, int i, ArchiveFile archiveFile, Writer writer) throws IOException {
        String[] strArr = new String[i];
        strArr[0] = "id";
        int i2 = 1;
        Iterator<ExtensionProperty> it = list.iterator();
        while (it.hasNext()) {
            strArr[i2] = it.next().simpleName();
            i2++;
        }
        String tabRow = tabRow(strArr);
        archiveFile.setIgnoreHeaderLines(1);
        writer.write(tabRow);
    }

    private void addEmlFile() throws GeneratorException, InterruptedException {
        checkForInterruption();
        setState(STATE.METADATA);
        try {
            org.apache.commons.io.FileUtils.copyFile(this.dataDir.resourceEmlFile(this.resource.getShortname()), new File(this.dwcaFolder, DataDir.EML_XML_FILENAME));
            this.archive.setMetadataLocation(DataDir.EML_XML_FILENAME);
            addMessage(Level.INFO, "EML file added");
        } catch (IOException e) {
            throw new GeneratorException("Problem occurred while adding EML file to DwC-A folder", e);
        }
    }

    private ArchiveField buildField(Term term, @Nullable String str) {
        ArchiveField archiveField = new ArchiveField();
        archiveField.setTerm(term);
        archiveField.setDefaultValue(null);
        if (str != null && (term instanceof DwcTerm) && DWC_MULTI_VALUE_TERMS.contains(term)) {
            archiveField.setDelimitedBy(str);
        }
        return archiveField;
    }

    private void bundleArchive() throws GeneratorException, InterruptedException {
        checkForInterruption();
        setState(STATE.BUNDLING);
        File file = null;
        BigDecimal emlVersion = this.resource.getEmlVersion();
        try {
            try {
                File tmpFile = this.dataDir.tmpFile("dwca", ".zip");
                CompressionUtil.zipDir(this.dwcaFolder, tmpFile);
                if (!tmpFile.exists()) {
                    throw new GeneratorException("Archive bundling failed: temp archive not created: " + tmpFile.getAbsolutePath());
                }
                File resourceDwcaFile = this.dataDir.resourceDwcaFile(this.resource.getShortname(), emlVersion);
                if (resourceDwcaFile.exists()) {
                    org.apache.commons.io.FileUtils.forceDelete(resourceDwcaFile);
                }
                org.apache.commons.io.FileUtils.moveFile(tmpFile, resourceDwcaFile);
                if (tmpFile != null && tmpFile.exists()) {
                    org.apache.commons.io.FileUtils.deleteQuietly(tmpFile);
                }
                addMessage(Level.INFO, "Archive has been compressed");
            } catch (IOException e) {
                throw new GeneratorException("Problem occurred while bundling DwC-A", e);
            }
        } catch (Throwable th) {
            if (0 != 0 && file.exists()) {
                org.apache.commons.io.FileUtils.deleteQuietly(null);
            }
            throw th;
        }
    }

    private void validate() throws GeneratorException, InterruptedException {
        checkForInterruption();
        setState(STATE.VALIDATING);
        try {
            Archive openArchive = ArchiveFactory.openArchive(this.dwcaFolder);
            loadBasisOfRecordMapFromVocabulary();
            validateCoreDataFile(openArchive.getCore(), !openArchive.getExtensions().isEmpty());
            if (isEventCore(openArchive)) {
                validateEventCore(openArchive);
            }
            if (!openArchive.getExtensions().isEmpty()) {
                validateExtensionDataFiles(openArchive.getExtensions());
            }
            addMessage(Level.INFO, "Archive validated");
        } catch (IOException e) {
            throw new GeneratorException("Problem occurred while validating DwC-A", e);
        }
    }

    private File sortCoreDataFile(ArchiveFile archiveFile, int i) throws IOException {
        File locationFile = archiveFile.getLocationFile();
        File file = new File(locationFile.getParentFile(), SORTED_FILE_PREFIX + locationFile.getName());
        int intValue = archiveFile.getIgnoreHeaderLines().intValue();
        String fieldsTerminatedBy = archiveFile.getFieldsTerminatedBy();
        Character fieldsEnclosedBy = archiveFile.getFieldsEnclosedBy();
        String linesTerminatedBy = archiveFile.getLinesTerminatedBy();
        long currentTimeMillis = System.currentTimeMillis();
        GBIF_FILE_UTILS.sort(locationFile, file, "UTF-8", i, fieldsTerminatedBy, fieldsEnclosedBy, linesTerminatedBy, intValue, new LineComparator(i, fieldsTerminatedBy, fieldsEnclosedBy, IGNORE_CASE_COMPARATOR), true);
        this.log.debug("Finished sorting file " + locationFile.getAbsolutePath() + " in " + String.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000) + " secs, check: " + file.getAbsoluteFile().toString());
        return file;
    }

    private void validateExtensionDataFiles(Set<ArchiveFile> set) throws InterruptedException, GeneratorException, IOException {
        Iterator<ArchiveFile> it = set.iterator();
        while (it.hasNext()) {
            validateExtensionDataFile(it.next());
        }
    }

    private void loadBasisOfRecordMapFromVocabulary() {
        if (this.basisOfRecords == null) {
            this.basisOfRecords = new HashMap();
            this.basisOfRecords.putAll(this.vocabManager.getI18nVocab(Constants.VOCAB_URI_BASIS_OF_RECORDS, Locale.ENGLISH.getLanguage(), false));
            this.basisOfRecords = MapUtils.getMapWithLowercaseKeys(this.basisOfRecords);
        }
    }

    private void validateExtensionDataFile(ArchiveFile archiveFile) throws GeneratorException, InterruptedException, IOException {
        Preconditions.checkNotNull(this.resource.getCoreRowType());
        addMessage(Level.INFO, "Validating the extension file: " + archiveFile.getTitle() + ". Depending on the number of records, this can take a while.");
        Term findTerm = TERM_FACTORY.findTerm(AppConfig.coreIdTerm(this.resource.getCoreRowType()));
        Term findTerm2 = TERM_FACTORY.findTerm(Constants.DWC_OCCURRENCE_ID);
        Term findTerm3 = TERM_FACTORY.findTerm(Constants.DWC_BASIS_OF_RECORD);
        int i = -1;
        if (isOccurrenceFile(archiveFile)) {
            if (!archiveFile.hasTerm(findTerm3)) {
                addMessage(Level.ERROR, "Archive validation failed, because required term basisOfRecord was not mapped in the occurrence extension data file: " + archiveFile.getTitle());
                throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + "Required term basisOfRecord was not mapped in the occurrence extension data file: " + archiveFile.getTitle());
            }
            addMessage(Level.INFO, "? Validating the basisOfRecord in the occurrence extension data file is always present and its value matches the Darwin Core Type Vocabulary.");
            if (archiveFile.hasTerm(findTerm2)) {
                addMessage(Level.INFO, "? Validating the occurrenceId in occurrence extension data file is always present and unique. ");
            } else {
                addMessage(Level.WARN, "No occurrenceId found in occurrence extension. To be indexed by GBIF, each occurrence record within a resource must have a unique record level identifier.");
            }
            i = archiveFile.getField(findTerm3).getIndex().intValue();
        }
        if (archiveFile.getId() == null) {
            addMessage(Level.ERROR, "Archive validation failed, because the ID field " + findTerm.simpleName() + "was not mapped in the extension data file: " + archiveFile.getTitle());
            throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + ". The ID field was not mapped in the extension data file: " + archiveFile.getTitle());
        }
        addMessage(Level.INFO, "? Validating the ID field " + findTerm.simpleName() + " is always present in extension data file. ");
        int intValue = (!archiveFile.hasTerm(findTerm2) || archiveFile.getField(findTerm2).getIndex() == null) ? 0 : archiveFile.getField(findTerm2).getIndex().intValue();
        File sortCoreDataFile = sortCoreDataFile(archiveFile, intValue);
        int i2 = 0;
        AtomicInteger atomicInteger = new AtomicInteger(0);
        AtomicInteger atomicInteger2 = new AtomicInteger(0);
        AtomicInteger atomicInteger3 = new AtomicInteger(0);
        AtomicInteger atomicInteger4 = new AtomicInteger(0);
        AtomicInteger atomicInteger5 = new AtomicInteger(0);
        CSVReader build = CSVReaderFactory.build(sortCoreDataFile, "UTF-8", archiveFile.getFieldsTerminatedBy(), archiveFile.getFieldsEnclosedBy(), archiveFile.getIgnoreHeaderLines());
        int i3 = 0;
        String str = null;
        while (build.hasNext()) {
            try {
                try {
                    i3++;
                    if (i3 % 1000 == 0) {
                        checkForInterruption(i3);
                        reportIfNeeded();
                    }
                    String[] next = build.next();
                    if (next != null && next.length != 0) {
                        if (build.hasRowError() && build.getException() != null) {
                            throw new GeneratorException("A fatal error was encountered while trying to validate sorted extension data file: " + build.getErrorMessage(), build.getException());
                        }
                        if (Strings.isNullOrEmpty(next[0])) {
                            i2++;
                        }
                        if (isOccurrenceFile(archiveFile)) {
                            if (archiveFile.hasTerm(findTerm2)) {
                                str = validateIdentifier(next[intValue], str, atomicInteger, atomicInteger2);
                            }
                            validateBasisOfRecord(next[i], i3, atomicInteger3, atomicInteger4, atomicInteger5);
                        }
                    }
                } catch (InterruptedException e) {
                    setState(e);
                    throw e;
                } catch (Exception e2) {
                    this.log.error("Exception caught while validating extension file", e2);
                    setState(e2);
                    throw new GeneratorException("Error while validating extension file occurred on line " + i3, e2);
                }
            } finally {
                if (!build.hasRowError() && build.getErrorMessage() != null) {
                    writePublicationLogMessage("Error reading data: " + build.getErrorMessage());
                }
                build.close();
                org.apache.commons.io.FileUtils.deleteQuietly(sortCoreDataFile);
            }
        }
        if (i2 > 0) {
            addMessage(Level.ERROR, String.valueOf(i2) + " line(s) in extension missing an ID " + findTerm.simpleName() + ", which is required when linking the extension record and core record together");
            throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + ". Each line in extension must have an ID " + findTerm.simpleName() + ", which is required in order to link the extension to the core ");
        }
        addMessage(Level.INFO, "✓ Validated each line in extension has an ID " + findTerm.simpleName());
        writePublicationLogMessage("No lines in extension are missing an ID " + findTerm.simpleName());
        if (isOccurrenceFile(archiveFile)) {
            if (archiveFile.hasTerm(findTerm2)) {
                summarizeIdentifierValidation(atomicInteger, atomicInteger2, findTerm2.simpleName());
            }
            summarizeBasisOfRecordValidation(atomicInteger3, atomicInteger4, atomicInteger5);
        }
    }

    private void validateCoreDataFile(ArchiveFile archiveFile, boolean z) throws GeneratorException, InterruptedException, IOException {
        Preconditions.checkNotNull(this.resource.getCoreRowType());
        addMessage(Level.INFO, "Validating the core file: " + archiveFile.getTitle() + ". Depending on the number of records, this can take a while.");
        Term findTerm = TERM_FACTORY.findTerm(AppConfig.coreIdTerm(this.resource.getCoreRowType()));
        Term findTerm2 = TERM_FACTORY.findTerm(Constants.DWC_BASIS_OF_RECORD);
        int i = -1;
        if (isOccurrenceFile(archiveFile)) {
            if (!archiveFile.hasTerm(findTerm2)) {
                addMessage(Level.ERROR, "Archive validation failed, because required term basisOfRecord was not mapped in the occurrence core");
                throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + ". Required term basisOfRecord was not mapped in the occurrence core");
            }
            addMessage(Level.INFO, "? Validating the core basisOfRecord is always present is always present and its value matches the Darwin Core Type Vocabulary.");
            i = archiveFile.getField(findTerm2).getIndex().intValue();
        }
        if (archiveFile.hasTerm(findTerm) || z) {
            String str = "? Validating the core ID field " + findTerm.simpleName() + " is always present and unique.";
            if (z) {
                str = str + " Note: the core ID field is required to link core records and extension records together. ";
            }
            addMessage(Level.INFO, str);
        }
        File sortCoreDataFile = sortCoreDataFile(archiveFile, 0);
        CSVReader build = CSVReaderFactory.build(sortCoreDataFile, "UTF-8", archiveFile.getFieldsTerminatedBy(), archiveFile.getFieldsEnclosedBy(), archiveFile.getIgnoreHeaderLines());
        AtomicInteger atomicInteger = new AtomicInteger(0);
        AtomicInteger atomicInteger2 = new AtomicInteger(0);
        AtomicInteger atomicInteger3 = new AtomicInteger(0);
        AtomicInteger atomicInteger4 = new AtomicInteger(0);
        AtomicInteger atomicInteger5 = new AtomicInteger(0);
        int i2 = 0;
        String str2 = null;
        while (build.hasNext()) {
            try {
                try {
                    i2++;
                    if (i2 % 1000 == 0) {
                        checkForInterruption(i2);
                        reportIfNeeded();
                    }
                    String[] next = build.next();
                    if (next != null && next.length != 0) {
                        if (build.hasRowError() && build.getException() != null) {
                            throw new GeneratorException("A fatal error was encountered while trying to validate sorted core data file: " + build.getErrorMessage(), build.getException());
                        }
                        if (archiveFile.hasTerm(findTerm) || z) {
                            str2 = validateIdentifier(next[0], str2, atomicInteger, atomicInteger2);
                        }
                        if (isOccurrenceFile(archiveFile)) {
                            validateBasisOfRecord(next[i], i2, atomicInteger3, atomicInteger4, atomicInteger5);
                        }
                    }
                } catch (InterruptedException e) {
                    setState(e);
                    throw e;
                } catch (Exception e2) {
                    this.log.error("Exception caught while validating archive", e2);
                    setState(e2);
                    throw new GeneratorException("Error while validating archive occurred on line " + i2, e2);
                }
            } finally {
                if (!build.hasRowError() && build.getErrorMessage() != null) {
                    writePublicationLogMessage("Error reading data: " + build.getErrorMessage());
                }
                build.close();
                org.apache.commons.io.FileUtils.deleteQuietly(sortCoreDataFile);
            }
        }
        if (archiveFile.hasTerm(findTerm) || z) {
            summarizeIdentifierValidation(atomicInteger, atomicInteger2, findTerm.simpleName());
        }
        if (isOccurrenceFile(archiveFile)) {
            summarizeBasisOfRecordValidation(atomicInteger3, atomicInteger4, atomicInteger5);
        }
    }

    private String validateIdentifier(String str, String str2, AtomicInteger atomicInteger, AtomicInteger atomicInteger2) {
        if (Strings.isNullOrEmpty(str)) {
            atomicInteger.getAndIncrement();
        }
        if (!Strings.isNullOrEmpty(str2) && !Strings.isNullOrEmpty(str) && str.equalsIgnoreCase(str2)) {
            writePublicationLogMessage("Duplicate id found: " + str);
            atomicInteger2.getAndIncrement();
        }
        return str;
    }

    private void validateBasisOfRecord(String str, int i, AtomicInteger atomicInteger, AtomicInteger atomicInteger2, AtomicInteger atomicInteger3) {
        if (Strings.isNullOrEmpty(str)) {
            atomicInteger.getAndIncrement();
            return;
        }
        if (!this.basisOfRecords.containsKey(str.toLowerCase())) {
            writePublicationLogMessage("Line #" + String.valueOf(i) + " has basisOfRecord [" + str + "] that does not match the Darwin Core Type Vocabulary");
            atomicInteger2.getAndIncrement();
        } else if (str.equalsIgnoreCase(OccurrencePaths.OCCURRENCE_PATH)) {
            atomicInteger3.getAndIncrement();
        }
    }

    private void validateEventCore(Archive archive) throws GeneratorException {
        boolean z = true;
        ArchiveFile extension = archive.getExtension(DwcTerm.Occurrence);
        if (extension == null) {
            z = false;
        } else if (!extension.iterator().hasNext()) {
            z = false;
        }
        if (z) {
            return;
        }
        addMessage(Level.WARN, "The sampling event resource has no associated occurrences.");
    }

    private void summarizeBasisOfRecordValidation(AtomicInteger atomicInteger, AtomicInteger atomicInteger2, AtomicInteger atomicInteger3) throws GeneratorException {
        if (atomicInteger.get() > 0) {
            addMessage(Level.ERROR, String.valueOf(atomicInteger) + " line(s) are missing a basisOfRecord");
        } else {
            writePublicationLogMessage("No lines are missing a basisOfRecord");
        }
        if (atomicInteger2.get() > 0) {
            addMessage(Level.ERROR, String.valueOf(atomicInteger2) + " line(s) have basisOfRecord that does not match the Darwin Core Type Vocabulary (please note comparisons are case insensitive)");
        } else {
            writePublicationLogMessage("All lines have basisOfRecord that matches the Darwin Core Type Vocabulary");
        }
        if (atomicInteger3.get() > 0) {
            addMessage(Level.WARN, String.valueOf(atomicInteger3) + " line(s) use ambiguous basisOfRecord 'occurrence'. It is advised that occurrence be reserved for cases when the basisOfRecord is unknown. Otherwise, a more specific basisOfRecord should be chosen.");
        } else {
            writePublicationLogMessage("No lines have ambiguous basisOfRecord 'occurrence'.");
        }
        if (atomicInteger.get() == 0 && atomicInteger2.get() == 0) {
            addMessage(Level.INFO, "✓ Validated each line has a basisOfRecord, and each basisOfRecord matches the Darwin Core Type Vocabulary");
        } else {
            addMessage(Level.ERROR, "Archive validation failed, because not every row in the occurrence file(s) has a valid basisOfRecord (please note all basisOfRecord must match Darwin Core Type Vocabulary, and comparisons are case insensitive)");
            throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + ". Each row in the occurrence file(s) must have a basisOfRecord, and each basisOfRecord must match the Darwin Core Type Vocabulary (please note comparisons are case insensitive)");
        }
    }

    private void summarizeIdentifierValidation(AtomicInteger atomicInteger, AtomicInteger atomicInteger2, String str) throws GeneratorException {
        if (atomicInteger.get() > 0) {
            addMessage(Level.ERROR, String.valueOf(atomicInteger) + " line(s) missing " + str);
        } else {
            writePublicationLogMessage("No lines are missing " + str);
        }
        if (atomicInteger2.get() > 0) {
            addMessage(Level.ERROR, String.valueOf(atomicInteger2) + " line(s) having a duplicate " + str + " (please note comparisons are case insensitive)");
        } else {
            writePublicationLogMessage("No lines have duplicate " + str);
        }
        if (atomicInteger.get() == 0 && atomicInteger2.get() == 0) {
            addMessage(Level.INFO, "✓ Validated each line has a " + str + ", and each " + str + " is unique");
        } else {
            addMessage(Level.ERROR, "Archive validation failed, because not every line has a unique " + str + " (please note comparisons are case insensitive)");
            throw new GeneratorException("Can't validate DwC-A for resource " + this.resource.getShortname() + ". Each line must have a " + str + ", and each " + str + " must be unique (please note comparisons are case insensitive)");
        }
    }

    private boolean isOccurrenceFile(ArchiveFile archiveFile) {
        return archiveFile.getRowType().equals(DwcTerm.Occurrence);
    }

    private boolean isEventCore(Archive archive) {
        return archive.getCore().getRowType().equals(DwcTerm.Event);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.concurrent.Callable
    public Map<String, Integer> call() throws Exception {
        try {
            try {
                try {
                    checkForInterruption();
                    setState(STATE.STARTED);
                    addMessage(Level.INFO, "Archive generation started for version #" + String.valueOf(this.resource.getEmlVersion()));
                    this.dwcaFolder = this.dataDir.tmpDir();
                    this.archive = new Archive();
                    createDataFiles();
                    addEmlFile();
                    createMetaFile();
                    validate();
                    bundleArchive();
                    addMessage(Level.INFO, "Archive version #" + String.valueOf(this.resource.getEmlVersion()) + " generated successfully!");
                    setState(STATE.COMPLETED);
                    Map<String, Integer> map = this.recordsByExtension;
                    if (this.dwcaFolder != null && this.dwcaFolder.exists()) {
                        org.apache.commons.io.FileUtils.deleteQuietly(this.dwcaFolder);
                    }
                    closePublicationLogWriter();
                    return map;
                } catch (InterruptedException e) {
                    setState(e);
                    writeFailureToPublicationLog(e);
                    throw e;
                }
            } catch (GeneratorException e2) {
                setState(e2);
                if (this.cfg.debug()) {
                    writeFailureToPublicationLog(e2);
                } else {
                    this.log.error("Exception occurred trying to generate Darwin Core Archive for resource " + this.resource.getTitleAndShortname() + ": " + e2.getMessage(), e2);
                }
                throw e2;
            } catch (Exception e3) {
                setState(e3);
                writeFailureToPublicationLog(e3);
                throw new GeneratorException(e3);
            }
        } catch (Throwable th) {
            if (this.dwcaFolder != null && this.dwcaFolder.exists()) {
                org.apache.commons.io.FileUtils.deleteQuietly(this.dwcaFolder);
            }
            closePublicationLogWriter();
            throw th;
        }
    }

    private void checkForInterruption() throws InterruptedException {
        if (Thread.interrupted()) {
            String str = "Interrupting dwca generator. Last status: " + report().getState();
            this.log.info(str);
            throw new InterruptedException(str);
        }
    }

    private void checkForInterruption(int i) throws InterruptedException {
        if (Thread.interrupted()) {
            String str = "Interrupting dwca generator at line " + i + ". Last status: " + report().getState();
            this.log.info(str);
            throw new InterruptedException(str);
        }
    }

    @Override // org.gbif.ipt.task.ReportingTask
    protected boolean completed() {
        return STATE.COMPLETED == this.state;
    }

    private void createDataFiles() throws GeneratorException, InterruptedException {
        checkForInterruption();
        setState(STATE.DATAFILES);
        if (!this.resource.hasCore() || this.resource.getCoreRowType() == null || this.resource.getCoreMappings().get(0).getSource() == null) {
            throw new GeneratorException("Core is not mapped");
        }
        for (Extension extension : this.resource.getMappedExtensions()) {
            report();
            try {
                addDataFile(this.resource.getMappings(extension.getRowType()), null);
            } catch (IOException e) {
                throw new GeneratorException("Problem occurred while writing data file", e);
            } catch (IllegalArgumentException e2) {
                throw new GeneratorException("Problem occurred while writing data file", e2);
            }
        }
        addMessage(Level.INFO, "All data files completed");
        report();
    }

    private void createMetaFile() throws GeneratorException, InterruptedException {
        checkForInterruption();
        setState(STATE.METADATA);
        try {
            MetaDescriptorWriter.writeMetaFile(new File(this.dwcaFolder, Archive.META_FN), this.archive);
            addMessage(Level.INFO, "meta.xml archive descriptor written");
        } catch (IOException e) {
            throw new GeneratorException("Meta.xml file could not be written", e);
        }
    }

    @Override // org.gbif.ipt.task.ReportingTask
    protected Exception currentException() {
        return this.exception;
    }

    @Override // org.gbif.ipt.task.ReportingTask
    protected String currentState() {
        switch (this.state) {
            case WAITING:
                return "Not started yet";
            case STARTED:
                return "Starting archive generation";
            case DATAFILES:
                return "Processing record " + this.currRecords + " for data file <em>" + this.currExtension + "</em>";
            case METADATA:
                return "Creating metadata files";
            case BUNDLING:
                return "Compressing archive";
            case COMPLETED:
                return "Archive generated!";
            case VALIDATING:
                return "Validating archive";
            case ARCHIVING:
                return "Archiving version of archive";
            case CANCELLED:
                return CANCELLED_STATE_MSG;
            case FAILED:
                return "Failed. Fatal error!";
            default:
                return "You should never see this";
        }
    }

    private void dumpData(Writer writer, PropertyMapping[] propertyMappingArr, ExtensionMapping extensionMapping, int i, @Nullable Integer num, @Nullable DOI doi) throws GeneratorException, InterruptedException {
        boolean matches;
        String trimToEmpty = StringUtils.trimToEmpty(extensionMapping.getIdSuffix());
        RecordFilter filter = extensionMapping.getFilter();
        int intValue = extensionMapping.getIdColumn() == null ? -1 : extensionMapping.getIdColumn().intValue();
        for (PropertyMapping propertyMapping : extensionMapping.getFields()) {
            if (propertyMapping.getIndex() != null && intValue < propertyMapping.getIndex().intValue()) {
                intValue = propertyMapping.getIndex().intValue();
            }
        }
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        ClosableReportingIterator<String[]> closableReportingIterator = null;
        int i6 = 0;
        try {
            try {
                try {
                    closableReportingIterator = this.sourceManager.rowIterator(extensionMapping.getSource());
                    while (closableReportingIterator.hasNext()) {
                        i6++;
                        if (i6 % 1000 == 0) {
                            checkForInterruption(i6);
                            reportIfNeeded();
                        }
                        String[] strArr = (String[]) closableReportingIterator.next();
                        if (strArr != null && strArr.length != 0) {
                            if (closableReportingIterator.hasRowError()) {
                                writePublicationLogMessage("Error reading line #" + i6 + "\n" + closableReportingIterator.getErrorMessage());
                                i2++;
                                this.currRecordsSkipped++;
                            } else if (!isEmptyLine(strArr)) {
                                if (strArr.length <= intValue) {
                                    writePublicationLogMessage("Line with fewer columns than mapped. SourceBase:" + extensionMapping.getSource().getName() + " Line #" + i6 + " has " + strArr.length + " Columns: " + printLine(strArr));
                                    String[] strArr2 = new String[intValue + 1];
                                    System.arraycopy(strArr, 0, strArr2, 0, strArr.length);
                                    strArr = strArr2;
                                    i3++;
                                }
                                String[] strArr3 = new String[i];
                                boolean z = false;
                                if (filter != null && filter.getColumn() != null && filter.getComparator() != null && filter.getParam() != null) {
                                    if (filter.getFilterTime() == RecordFilter.FilterTime.AfterTranslation) {
                                        applyTranslations(propertyMappingArr, strArr, strArr3, extensionMapping.isDoiUsedForDatasetId(), doi);
                                        matches = filter.matches(strArr);
                                        z = true;
                                    } else {
                                        matches = filter.matches(strArr);
                                    }
                                    if (!matches) {
                                        writePublicationLogMessage("Line did not match the filter criteria and was skipped. SourceBase:" + extensionMapping.getSource().getName() + " Line #" + i6 + ": " + printLine(strArr));
                                        i4++;
                                    }
                                }
                                if (extensionMapping.getIdColumn() == null) {
                                    strArr3[0] = null;
                                } else if (extensionMapping.getIdColumn().equals(ExtensionMapping.IDGEN_LINE_NUMBER)) {
                                    strArr3[0] = i6 + trimToEmpty;
                                } else if (extensionMapping.getIdColumn().equals(ExtensionMapping.IDGEN_UUID)) {
                                    strArr3[0] = UUID.randomUUID().toString();
                                } else if (extensionMapping.getIdColumn().intValue() >= 0) {
                                    strArr3[0] = Strings.isNullOrEmpty(strArr[extensionMapping.getIdColumn().intValue()]) ? trimToEmpty : strArr[extensionMapping.getIdColumn().intValue()] + trimToEmpty;
                                }
                                if (!z) {
                                    applyTranslations(propertyMappingArr, strArr, strArr3, extensionMapping.isDoiUsedForDatasetId(), doi);
                                }
                                String tabRow = tabRow(strArr3);
                                if (tabRow != null) {
                                    writer.write(tabRow);
                                    this.currRecords++;
                                    if (num != null && this.currRecords >= num.intValue()) {
                                        break;
                                    }
                                }
                            } else {
                                writePublicationLogMessage("Empty line was skipped. SourceBase:" + extensionMapping.getSource().getName() + " Line #" + i6 + ": " + printLine(strArr));
                                i5++;
                                this.currRecordsSkipped++;
                            }
                        }
                    }
                    if (closableReportingIterator != null) {
                        if (!closableReportingIterator.hasRowError() && closableReportingIterator.getErrorMessage() != null) {
                            writePublicationLogMessage("Error reading data: " + closableReportingIterator.getErrorMessage());
                        }
                        try {
                            closableReportingIterator.close();
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                    String str = " for mapping " + extensionMapping.getExtension().getTitle() + " in source " + extensionMapping.getSource().getName();
                    if (i2 > 0) {
                        addMessage(Level.WARN, String.valueOf(i2) + " record(s) skipped due to errors" + str);
                    } else {
                        writePublicationLogMessage("No lines were skipped due to errors" + str);
                    }
                    if (i5 > 0) {
                        addMessage(Level.WARN, String.valueOf(i5) + " empty line(s) skipped" + str);
                    } else {
                        writePublicationLogMessage("No lines were skipped due to errors" + str);
                    }
                    if (i3 > 0) {
                        addMessage(Level.WARN, String.valueOf(i3) + " line(s) with fewer columns than mapped" + str);
                    } else {
                        writePublicationLogMessage("No lines with fewer columns than mapped" + str);
                    }
                    if (i4 > 0) {
                        addMessage(Level.INFO, String.valueOf(i4) + " line(s) did not match the filter criteria and got skipped " + str);
                    } else {
                        writePublicationLogMessage("All lines match the filter criteria" + str);
                    }
                } catch (Exception e2) {
                    this.log.error("Fatal DwC-A Generator Error encountered", e2);
                    setState(e2);
                    throw new GeneratorException("Error writing data file for mapping " + extensionMapping.getExtension().getTitle() + " in source " + extensionMapping.getSource().getName() + ", line " + i6, e2);
                }
            } catch (InterruptedException e3) {
                setState(e3);
                throw e3;
            }
        } catch (Throwable th) {
            if (closableReportingIterator != null) {
                if (!closableReportingIterator.hasRowError() && closableReportingIterator.getErrorMessage() != null) {
                    writePublicationLogMessage("Error reading data: " + closableReportingIterator.getErrorMessage());
                }
                try {
                    closableReportingIterator.close();
                } catch (Exception e4) {
                    e4.printStackTrace();
                }
            }
            throw th;
        }
    }

    private void setState(Exception exc) {
        this.exception = exc;
        this.state = this.exception instanceof InterruptedException ? STATE.CANCELLED : STATE.FAILED;
        report();
    }

    private void setState(STATE state) {
        this.state = state;
        report();
    }

    @VisibleForTesting
    protected String tabRow(String[] strArr) {
        Preconditions.checkNotNull(strArr);
        boolean z = true;
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i] != null) {
                z = false;
                strArr[i] = StringUtils.trimToNull(escapeChars.matcher(strArr[i]).replaceAll(" "));
            }
        }
        if (z) {
            return null;
        }
        return StringUtils.join((Object[]) strArr, '\t') + "\n";
    }

    private void applyTranslations(PropertyMapping[] propertyMappingArr, String[] strArr, String[] strArr2, boolean z, DOI doi) {
        for (int i = 1; i < propertyMappingArr.length; i++) {
            PropertyMapping propertyMapping = propertyMappingArr[i];
            String str = null;
            if (propertyMapping != null) {
                if (propertyMapping.getIndex() != null) {
                    str = strArr[propertyMapping.getIndex().intValue()];
                    if (propertyMapping.getTranslation() != null && propertyMapping.getTranslation().containsKey(str)) {
                        str = propertyMapping.getTranslation().get(str);
                        strArr[propertyMapping.getIndex().intValue()] = str;
                    }
                }
                if (str == null) {
                    str = propertyMapping.getDefaultValue();
                }
                if (propertyMapping.getTerm().qualifiedName().equalsIgnoreCase(Constants.DWC_DATASET_ID) && z && doi != null) {
                    str = doi.getDoiString();
                }
            }
            strArr2[i] = str;
        }
    }

    private String printLine(String[] strArr) {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        for (int i = 0; i < strArr.length; i++) {
            sb.append(strArr[i]);
            if (i != strArr.length - 1) {
                sb.append(VectorFormat.DEFAULT_SEPARATOR);
            }
        }
        sb.append("]");
        return sb.toString();
    }

    private void writeFailureToPublicationLog(Throwable th) {
        StringBuilder sb = new StringBuilder();
        sb.append("Archive generation failed!\n");
        StringWriter stringWriter = new StringWriter();
        th.printStackTrace(new PrintWriter(stringWriter));
        sb.append(stringWriter.toString());
        writePublicationLogMessage(sb.toString());
    }

    private Set<Term> addFieldsToArchive(List<ExtensionMapping> list, ArchiveFile archiveFile) throws GeneratorException {
        HashSet hashSet = new HashSet();
        for (ExtensionMapping extensionMapping : list) {
            String trimToNull = StringUtils.trimToNull(extensionMapping.getSource().getMultiValueFieldsDelimitedBy());
            for (PropertyMapping propertyMapping : extensionMapping.getFields()) {
                Term findTerm = TERM_FACTORY.findTerm(propertyMapping.getTerm().qualifiedName());
                if (findTerm != null && extensionMapping.getExtension().getProperty(findTerm) != null) {
                    if (archiveFile.hasTerm(findTerm)) {
                        ArchiveField field = archiveFile.getField(findTerm);
                        hashSet.add(findTerm);
                        if (field.getDelimitedBy() != null && !field.getDelimitedBy().equals(trimToNull)) {
                            throw new GeneratorException("More than one type of multi-value field delimiter is being used in the source files mapped to the " + extensionMapping.getExtension().getName() + " extension. Please either ensure all source files mapped to this extension use the same delimiter, otherwise just leave the delimiter blank.");
                        }
                    } else if ((propertyMapping.getIndex() != null && propertyMapping.getIndex().intValue() >= 0) || propertyMapping.getIndex() == null) {
                        this.log.debug("Handling property mapping for term: " + findTerm.qualifiedName() + " (index " + propertyMapping.getIndex() + ")");
                        archiveFile.addField(buildField(findTerm, trimToNull));
                        hashSet.add(findTerm);
                    }
                }
            }
            ExtensionProperty property = extensionMapping.getExtension().getProperty(DwcTerm.datasetID.qualifiedName());
            if (property != null && extensionMapping.isDoiUsedForDatasetId()) {
                this.log.debug("Detected that resource DOI to be used as value for datasetID mapping..");
                ArchiveField buildField = buildField(DwcTerm.datasetID, null);
                archiveFile.addField(buildField);
                PropertyMapping propertyMapping2 = new PropertyMapping(buildField);
                propertyMapping2.setTerm(property);
                extensionMapping.getFields().add(propertyMapping2);
                hashSet.add(DwcTerm.datasetID);
            }
        }
        return hashSet;
    }

    private void assignIndexesOrderedByExtension(List<ExtensionProperty> list, ArchiveFile archiveFile) {
        for (int i = 0; i < list.size(); i++) {
            ExtensionProperty extensionProperty = list.get(i);
            ArchiveField field = archiveFile.getField(TERM_FACTORY.findTerm(extensionProperty.getQualname()));
            if (field == null || field.getIndex() != null) {
                this.log.warn("Skipping ExtensionProperty: " + extensionProperty.getQualname());
            } else {
                field.setIndex(Integer.valueOf(i + 1));
            }
        }
    }

    private List<ExtensionProperty> getOrderedMappedExtensionProperties(Extension extension, Set<Term> set) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(extension.getProperties());
        HashSet hashSet = new HashSet();
        Iterator<Term> it = set.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().qualifiedName());
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            ExtensionProperty extensionProperty = (ExtensionProperty) it2.next();
            if (extensionProperty.qualifiedName() != null && !hashSet.contains(extensionProperty.qualifiedName())) {
                it2.remove();
            }
        }
        return arrayList;
    }

    protected String createFileName(File file, String str) {
        File[] listFiles = file.listFiles((FileFilter) new WildcardFileFilter(str + "*" + TEXT_FILE_EXTENSION, IOCase.INSENSITIVE));
        if (listFiles.length <= 0) {
            return str + TEXT_FILE_EXTENSION;
        }
        int i = 1;
        String str2 = null;
        for (File file2 : listFiles) {
            try {
                str2 = file2.getName();
                int intValue = Integer.valueOf(file2.getName().substring(str.length(), str2.indexOf(TEXT_FILE_EXTENSION))).intValue();
                if (intValue >= i) {
                    i = intValue;
                }
            } catch (NumberFormatException e) {
                this.log.debug("No numerical suffix could be parsed from file name: " + Strings.nullToEmpty(str2));
            }
        }
        return str + String.valueOf(i + 1) + TEXT_FILE_EXTENSION;
    }

    public void setDwcaFolder(File file) {
        this.dwcaFolder = file;
    }

    public void setArchive(Archive archive) {
        this.archive = archive;
    }

    private boolean isEmptyLine(String[] strArr) {
        return StringUtils.isBlank(Joiner.on("").useForNull("").join(strArr));
    }
}
