package org.gbif.utils.file;

import com.lowagie.text.pdf.BaseFont;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/gbif-common-0.59.jar:org/gbif/utils/file/CharsetDetection.class */
public class CharsetDetection {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) CharsetDetection.class);
    private static final byte LF = 10;
    private static final byte CR = 13;
    private static final byte TAB = 9;
    private static final int UNDEFINED_PENALTY = 100;
    private static final char[] COMMON_NON_ASCII_CHARS;
    private static final Charset LATIN1;
    private static final Charset WINDOWS1252;
    private static final Charset MACROMAN;
    private final byte[] buffer;

    private CharsetDetection(byte[] bArr) {
        this.buffer = bArr;
    }

    public static Charset detectEncoding(File file) throws IOException {
        Charset detectEncoding = new CharsetDetection(FileUtils.readByteBuffer(file).array()).detectEncoding();
        LOG.debug("Detected character encoding " + detectEncoding.displayName());
        return detectEncoding;
    }

    public static Charset detectEncoding(File file, int i) throws IOException {
        Charset detectEncoding = new CharsetDetection(FileUtils.readByteBuffer(file, i).array()).detectEncoding();
        LOG.debug("Detected character encoding " + detectEncoding.displayName());
        return detectEncoding;
    }

    public static Charset getDefaultSystemCharset() {
        return Charset.forName(System.getProperty("file.encoding"));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean hasUTF16BEBom(byte[] bArr) {
        return bArr[0] == -2 && bArr[1] == -1;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean hasUTF16LEBom(byte[] bArr) {
        return bArr[0] == -1 && bArr[1] == -2;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean hasUTF8Bom(byte[] bArr) {
        return bArr[0] == -17 && bArr[1] == -69 && bArr[2] == -65;
    }

    private static boolean isCommonChar(char c) {
        for (char c2 : COMMON_NON_ASCII_CHARS) {
            if (c == c2) {
                return true;
            }
        }
        return false;
    }

    private static boolean isContinuationChar(byte b) {
        return Byte.MIN_VALUE <= b && b <= -65;
    }

    private static boolean isFiveBytesSequence(byte b) {
        return -8 <= b && b <= -5;
    }

    private static boolean isFourBytesSequence(byte b) {
        return -16 <= b && b <= -9;
    }

    private static boolean isSixBytesSequence(byte b) {
        return -4 <= b && b <= -3;
    }

    private static boolean isThreeBytesSequence(byte b) {
        return -32 <= b && b <= -17;
    }

    private static boolean isTwoBytesSequence(byte b) {
        return -64 <= b && b <= -33;
    }

    private Charset detectCharacterEncoding8bit() {
        long testLatin1 = testLatin1();
        Charset charset = LATIN1;
        if (WINDOWS1252 != null) {
            long testWindows1252 = testWindows1252();
            if (testWindows1252 < testLatin1) {
                testLatin1 = testWindows1252;
                charset = WINDOWS1252;
            }
        }
        if (MACROMAN != null) {
            long testMacRoman = testMacRoman();
            if (testMacRoman < testLatin1) {
                testLatin1 = testMacRoman;
                charset = MACROMAN;
            }
        }
        LOG.debug("8bit Encoding guessed: {} with {} rare characters", charset, Long.valueOf(testLatin1));
        return charset;
    }

    public Charset detectEncoding() {
        if (hasUTF8Bom(this.buffer)) {
            return StandardCharsets.UTF_8;
        }
        if (hasUTF16LEBom(this.buffer)) {
            return StandardCharsets.UTF_16LE;
        }
        if (hasUTF16BEBom(this.buffer)) {
            return StandardCharsets.UTF_16BE;
        }
        Charset detectUtf16 = detectUtf16();
        if (detectUtf16 != null) {
            return detectUtf16;
        }
        boolean z = true;
        int length = this.buffer.length;
        int i = 0;
        while (i < length - 6) {
            byte b = this.buffer[i];
            byte b2 = this.buffer[i + 1];
            byte b3 = this.buffer[i + 2];
            byte b4 = this.buffer[i + 3];
            byte b5 = this.buffer[i + 4];
            byte b6 = this.buffer[i + 5];
            if (b < 0) {
                if (isTwoBytesSequence(b)) {
                    if (isContinuationChar(b2)) {
                        i++;
                    } else {
                        z = false;
                    }
                } else if (isThreeBytesSequence(b)) {
                    if (isContinuationChar(b2) && isContinuationChar(b3)) {
                        i += 2;
                    } else {
                        z = false;
                    }
                } else if (isFourBytesSequence(b)) {
                    if (isContinuationChar(b2) && isContinuationChar(b3) && isContinuationChar(b4)) {
                        i += 3;
                    } else {
                        z = false;
                    }
                } else if (isFiveBytesSequence(b)) {
                    if (isContinuationChar(b2) && isContinuationChar(b3) && isContinuationChar(b4) && isContinuationChar(b5)) {
                        i += 4;
                    } else {
                        z = false;
                    }
                } else if (!isSixBytesSequence(b)) {
                    z = false;
                } else if (isContinuationChar(b2) && isContinuationChar(b3) && isContinuationChar(b4) && isContinuationChar(b5) && isContinuationChar(b6)) {
                    i += 5;
                } else {
                    z = false;
                }
            }
            if (!z) {
                break;
            }
            i++;
        }
        return z ? StandardCharsets.UTF_8 : detectCharacterEncoding8bit();
    }

    private Charset detectUtf16() {
        int i = 0;
        int i2 = 0;
        boolean z = true;
        int length = this.buffer.length;
        int i3 = 0;
        while (i3 < length) {
            byte b = this.buffer[i3];
            i3++;
            z = !z;
            if (b == 0) {
                if (z) {
                    i++;
                } else {
                    i2++;
                }
            }
        }
        int length2 = this.buffer.length / 10;
        if ((i2 <= length2 && i <= length2) || Math.abs(i2 - i) <= length2) {
            return null;
        }
        Charset charset = i2 > i ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE;
        try {
            charset.newDecoder().decode(ByteBuffer.wrap(this.buffer));
            return charset;
        } catch (CharacterCodingException e) {
            Charset charset2 = StandardCharsets.UTF_16;
            try {
                charset2.newDecoder().decode(ByteBuffer.wrap(this.buffer));
                return charset2;
            } catch (CharacterCodingException e2) {
                return null;
            }
        }
    }

    private long testLatin1() {
        long j = 0;
        try {
            CharBuffer decode = StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(this.buffer));
            while (decode.hasRemaining()) {
                if (isCommonChar(decode.get())) {
                    j--;
                }
            }
            int length = this.buffer.length;
            int i = 0;
            while (i < length) {
                byte b = this.buffer[i];
                i++;
                if (b >= Byte.MIN_VALUE && b <= -97) {
                    j += 100;
                }
            }
        } catch (CharacterCodingException e) {
            j = Long.MAX_VALUE;
        }
        return j;
    }

    private long testMacRoman() {
        long j = 0;
        try {
            CharBuffer decode = MACROMAN.newDecoder().decode(ByteBuffer.wrap(this.buffer));
            while (decode.hasRemaining()) {
                if (isCommonChar(decode.get())) {
                    j--;
                }
            }
            int length = this.buffer.length;
            for (int i = 0; i < length; i++) {
                byte b = this.buffer[i];
            }
        } catch (CharacterCodingException e) {
            j = Long.MAX_VALUE;
        }
        return j;
    }

    private long testWindows1252() {
        long j = 0;
        try {
            CharBuffer decode = WINDOWS1252.newDecoder().decode(ByteBuffer.wrap(this.buffer));
            while (decode.hasRemaining()) {
                if (isCommonChar(decode.get())) {
                    j--;
                }
            }
            int length = this.buffer.length;
            int i = 0;
            while (i < length) {
                byte b = this.buffer[i];
                i++;
                if (b == -127 || b == -115 || b == -113 || b == -112 || b == -99) {
                    j += 100;
                }
            }
        } catch (CharacterCodingException e) {
            j = Long.MAX_VALUE;
        }
        return j;
    }

    static {
        CharBuffer allocate = CharBuffer.allocate("äåáàæœčéèêëïñøöüßšž°±".length() * 2);
        for (char c : "äåáàæœčéèêëïñøöüßšž°±".toCharArray()) {
            allocate.append(c);
            allocate.append(Character.toUpperCase(c));
        }
        COMMON_NON_ASCII_CHARS = allocate.array();
        LATIN1 = StandardCharsets.ISO_8859_1;
        Charset charset = null;
        try {
            charset = Charset.forName("Cp1252");
        } catch (Exception e) {
            LOG.warn("Windows 1252 encoding not supported on this Virtual Machine");
        }
        WINDOWS1252 = charset;
        Charset charset2 = null;
        try {
            charset2 = Charset.forName(BaseFont.MACROMAN);
        } catch (Exception e2) {
            LOG.warn("MacRoman encoding not supported on this Virtual Machine");
        }
        MACROMAN = charset2;
    }
}
