package org.gbif.utils.file.tabular;

import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.gbif.utils.file.CharsetDetection;
import org.gbif.utils.file.UnkownCharsetException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/gbif-common-0.36.jar:org/gbif/utils/file/tabular/TabularFileMetadataExtractor.class */
public class TabularFileMetadataExtractor {
    private static final int MAX_SAMPLE_SIZE = 15;
    private static final int CHARSET_DETECTION_BUFFER_LENGTH = 16384;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) TabularFileMetadataExtractor.class);
    private static final Character[] POTENTIAL_DELIMITER_CHAR = {',', '\t', ';', '|'};
    private static final Character[] POTENTIAL_QUOTES_CHAR = {'\"', '\''};
    private static final Predicate<LineDelimiterStats> CONTAINS_FREQUENCY = lineDelimiterStats -> {
        return lineDelimiterStats.getFrequency() > 0;
    };
    private static final Comparator<Map.Entry<Character, Long>> BY_VALUE_LONG_DESC = Comparator.comparing((v0) -> {
        return v0.getValue();
    }, Collections.reverseOrder());
    private static final BiFunction<Character, Character, Pattern> COMPILE_QUOTE_PATTERN_FCT = (ch, ch2) -> {
        return Pattern.compile("[" + ch + "][ ]*[" + ch2 + "][ ]*[^" + ch + "]");
    };

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:WEB-INF/lib/gbif-common-0.36.jar:org/gbif/utils/file/tabular/TabularFileMetadataExtractor$LineDelimiterStats.class */
    public static class LineDelimiterStats {
        private Character delimiter;
        private int frequency;

        LineDelimiterStats(Character ch, int i) {
            this.delimiter = ch;
            this.frequency = i;
        }

        Character getDelimiter() {
            return this.delimiter;
        }

        int getFrequency() {
            return this.frequency;
        }
    }

    private TabularFileMetadataExtractor() {
    }

    public static TabularFileMetadata extractTabularFileMetadata(Path path) throws IOException, UnkownCharsetException {
        Objects.requireNonNull(path, "filePath shall be provided");
        Preconditions.checkArgument(!Files.isDirectory(path, new LinkOption[0]), "filePath should point to a file, not a directory");
        try {
            Charset detectEncoding = CharsetDetection.detectEncoding(path.toFile(), 16384);
            if (detectEncoding == null) {
                throw new UnkownCharsetException("Unable to detect the files character encoding");
            }
            ArrayList arrayList = new ArrayList();
            BufferedReader newBufferedReader = Files.newBufferedReader(path, detectEncoding);
            Throwable th = null;
            do {
                try {
                    try {
                        String readLine = newBufferedReader.readLine();
                        if (readLine != null) {
                            arrayList.add(readLine);
                        }
                        if (readLine == null) {
                            break;
                        }
                    } finally {
                    }
                } catch (Throwable th2) {
                    if (newBufferedReader != null) {
                        if (th != null) {
                            try {
                                newBufferedReader.close();
                            } catch (Throwable th3) {
                                th.addSuppressed(th3);
                            }
                        } else {
                            newBufferedReader.close();
                        }
                    }
                    throw th2;
                }
            } while (arrayList.size() < 15);
            if (newBufferedReader != null) {
                if (0 != 0) {
                    try {
                        newBufferedReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    newBufferedReader.close();
                }
            }
            TabularFileMetadata extractTabularMetadata = extractTabularMetadata(arrayList);
            extractTabularMetadata.setEncoding(detectEncoding);
            return extractTabularMetadata;
        } catch (IOException e) {
            throw new UnkownCharsetException(e);
        }
    }

    static TabularFileMetadata extractTabularMetadata(List<String> list) {
        Objects.requireNonNull(list, "sample shall be provided");
        TabularFileMetadata tabularFileMetadata = new TabularFileMetadata();
        Character orElse = getDelimiterChar(list).orElse(null);
        if (orElse == null) {
            return tabularFileMetadata;
        }
        Character orElse2 = getHighestCountOf(list, str -> {
            return getQuoteCharWithHighestCount(str, orElse);
        }).orElse(null);
        tabularFileMetadata.setDelimiter(orElse);
        tabularFileMetadata.setQuotedBy(orElse2);
        return tabularFileMetadata;
    }

    private static Optional<Character> getHighestCountOf(List<String> list, Function<String, Optional<Character>> function) {
        return ((Map) list.stream().map(function).flatMap(optional -> {
            return (Stream) optional.map((v0) -> {
                return Stream.of(v0);
            }).orElseGet(Stream::empty);
        }).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))).entrySet().stream().sorted(BY_VALUE_LONG_DESC).findFirst().map((v0) -> {
            return v0.getKey();
        });
    }

    public static Optional<Character> getDelimiterChar(List<String> list) {
        List<LineDelimiterStats> computeLineDelimiterStats = computeLineDelimiterStats(list);
        Map map = (Map) computeDelimiterDistinctFrequency(computeLineDelimiterStats).entrySet().stream().filter(entry -> {
            return ((Set) entry.getValue()).size() > 1 || !((Set) entry.getValue()).contains(0);
        }).sorted(Comparator.comparing(entry2 -> {
            return Integer.valueOf(((Set) entry2.getValue()).size());
        })).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, (v0) -> {
            return v0.getValue();
        }, (set, set2) -> {
            return set2;
        }, LinkedHashMap::new));
        Set<Character> allEqualsToFirst = getAllEqualsToFirst(map, (set3, set4) -> {
            return Boolean.valueOf(set3.size() == set4.size());
        });
        Map map2 = (Map) computeDelimiterFrequencySums(computeLineDelimiterStats).entrySet().stream().sorted(Comparator.comparing(entry3 -> {
            return (Integer) entry3.getValue();
        }).reversed()).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, (v0) -> {
            return v0.getValue();
        }, (num, num2) -> {
            return num2;
        }, LinkedHashMap::new));
        Set<Character> allEqualsToFirst2 = getAllEqualsToFirst(map2, (v0, v1) -> {
            return v0.equals(v1);
        });
        Map map3 = (Map) computeDelimiterHighestFrequencyPerLine(list).entrySet().stream().sorted(Comparator.comparing(entry4 -> {
            return (Long) entry4.getValue();
        }).reversed()).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, (v0) -> {
            return v0.getValue();
        }, (l, l2) -> {
            return l2;
        }, LinkedHashMap::new));
        Set<Character> allEqualsToFirst3 = getAllEqualsToFirst(map3, (v0, v1) -> {
            return v0.equals(v1);
        });
        if (LOG.isDebugEnabled()) {
            LOG.debug("delimiterDistinctFrequency -> " + map);
            LOG.debug("mostStableDelimiter -> " + allEqualsToFirst);
            LOG.debug("delimiterFrequencySums -> " + map2);
            LOG.debug("mostFrequentDelimiter -> " + allEqualsToFirst2);
            LOG.debug("delimiterHighestFrequencyPerLine->" + map3);
            LOG.debug("mostFrequentDelimiterPerLine ->" + allEqualsToFirst3);
        }
        Optional<Character> intersectSingle = intersectSingle(allEqualsToFirst, allEqualsToFirst2);
        if (intersectSingle.isPresent()) {
            return intersectSingle;
        }
        Optional<Character> intersectSingle2 = intersectSingle(allEqualsToFirst, allEqualsToFirst3);
        if (intersectSingle2.isPresent()) {
            return intersectSingle2;
        }
        Optional<Character> intersectSingle3 = intersectSingle(allEqualsToFirst2, allEqualsToFirst3);
        return intersectSingle3.isPresent() ? intersectSingle3 : Optional.empty();
    }

    private static Optional<Character> intersectSingle(Set<Character> set, Set<Character> set2) {
        Sets.SetView intersection = Sets.intersection(set, set2);
        return intersection.size() == 1 ? intersection.stream().findFirst() : Optional.empty();
    }

    private static <T> Set<Character> getAllEqualsToFirst(Map<Character, T> map, BiFunction<T, T, Boolean> biFunction) {
        Optional<Map.Entry<Character, T>> findFirst = map.entrySet().stream().findFirst();
        if (!findFirst.isPresent()) {
            return Collections.EMPTY_SET;
        }
        T value = findFirst.get().getValue();
        return (Set) map.entrySet().stream().filter(entry -> {
            return ((Boolean) biFunction.apply(value, entry.getValue())).booleanValue();
        }).map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.toSet());
    }

    static List<LineDelimiterStats> computeLineDelimiterStats(List<String> list) {
        return (List) list.stream().map(TabularFileMetadataExtractor::lineToLineDelimiterStats).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
    }

    private static List<LineDelimiterStats> lineToLineDelimiterStats(String str) {
        return (List) Arrays.stream(POTENTIAL_DELIMITER_CHAR).map(ch -> {
            return new LineDelimiterStats(ch, StringUtils.countMatches(str, ch.charValue()));
        }).collect(Collectors.toList());
    }

    static Map<Character, Set<Integer>> computeDelimiterDistinctFrequency(List<LineDelimiterStats> list) {
        return (Map) list.stream().collect(Collectors.groupingBy((v0) -> {
            return v0.getDelimiter();
        }, Collectors.mapping((v0) -> {
            return v0.getFrequency();
        }, Collectors.toSet())));
    }

    static Map<Character, Long> computeDelimiterHighestFrequencyPerLine(List<String> list) {
        return (Map) list.stream().map(TabularFileMetadataExtractor::getDelimiterWithHighestCount2).flatMap(optional -> {
            return (Stream) optional.map((v0) -> {
                return Stream.of(v0);
            }).orElseGet(Stream::empty);
        }).collect(Collectors.groupingBy((v0) -> {
            return v0.getDelimiter();
        }, Collectors.counting()));
    }

    static Map<Character, Integer> computeDelimiterFrequencySums(List<LineDelimiterStats> list) {
        return (Map) list.stream().filter(CONTAINS_FREQUENCY).collect(Collectors.groupingBy((v0) -> {
            return v0.getDelimiter();
        }, Collectors.summingInt((v0) -> {
            return v0.getFrequency();
        })));
    }

    static Optional<Character> getDelimiterWithHighestCount(String str) {
        int i = 0;
        Character ch = null;
        for (Character ch2 : POTENTIAL_DELIMITER_CHAR) {
            int countMatches = StringUtils.countMatches(str, ch2.charValue());
            if (countMatches > i) {
                i = countMatches;
                ch = ch2;
            }
        }
        return Optional.ofNullable(ch);
    }

    static Optional<LineDelimiterStats> getDelimiterWithHighestCount2(String str) {
        int i = 0;
        LineDelimiterStats lineDelimiterStats = null;
        for (Character ch : POTENTIAL_DELIMITER_CHAR) {
            int countMatches = StringUtils.countMatches(str, ch.charValue());
            if (countMatches > i) {
                i = countMatches;
                lineDelimiterStats = new LineDelimiterStats(ch, i);
            }
        }
        return Optional.ofNullable(lineDelimiterStats);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Optional<Character> getQuoteCharWithHighestCount(String str, Character ch) {
        int i = 0;
        Character ch2 = null;
        for (Character ch3 : POTENTIAL_QUOTES_CHAR) {
            int i2 = 0;
            while (COMPILE_QUOTE_PATTERN_FCT.apply(ch, ch3).matcher(str).find()) {
                i2++;
            }
            if (i2 > i) {
                i = i2;
                ch2 = ch3;
            }
        }
        return Optional.ofNullable(ch2);
    }
}
