package org.gbif.common.parsers;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.util.ProcessIdUtil;
import org.gbif.api.vocabulary.Language;
import org.gbif.common.parsers.core.EnumParser;
import org.gbif.ipt.utils.FileUtils;
import org.gbif.utils.file.FileSplitter;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.utils.file.csv.CSVReaderFactory;

/* loaded from: input_file:WEB-INF/lib/gbif-parsers-0.67.jar:org/gbif/common/parsers/LanguageParser.class */
public class LanguageParser extends EnumParser<Language> {
    private static LanguageParser singletonObject = null;
    private static final Pattern LOCALE = Pattern.compile("^[a-zA-Z]{2,3}[_-][a-zA-Z0-9]");
    private static final List<Pattern> REMOVE_FROM_NAME_PATTERNS = Arrays.asList(Pattern.compile("\\(.\\)"), Pattern.compile(", ?langues"), Pattern.compile(" languages"));

    private LanguageParser() {
        super(Language.class, false, LanguageParser.class.getResourceAsStream("/dictionaries/parse/language.tsv"));
        for (Language language : Language.values()) {
            add(language.name(), language);
            add(language.getTitleEnglish(), language);
            add(language.getTitleNative(), language);
            add(language.getIso2LetterCode(), language);
            add(language.getIso3LetterCode(), language);
        }
        for (Locale locale : Locale.getAvailableLocales()) {
            Language fromIsoCode = Language.fromIsoCode(locale.getISO3Language());
            if (fromIsoCode == null) {
                this.log.warn("ISO code {} not part of our language enumeration", fromIsoCode);
            } else {
                add(locale.getISO3Language(), fromIsoCode);
                add(locale.getDisplayLanguage(), fromIsoCode);
                add(locale.getLanguage(), fromIsoCode);
                for (Locale locale2 : Locale.getAvailableLocales()) {
                    add(locale.getDisplayLanguage(locale2), fromIsoCode);
                }
            }
        }
        try {
            CSVReader build = CSVReaderFactory.build(LanguageParser.class.getResourceAsStream("/dictionaries/parse/ISO-639-2_utf-8.txt"), FileUtils.UTF8, "|", (Character) null, (Integer) 0);
            while (build.hasNext()) {
                String[] next = build.next();
                if (next.length > 2) {
                    String str = next[2];
                    if (!StringUtils.isBlank(str)) {
                        Language fromIsoCode2 = Language.fromIsoCode(str);
                        if (fromIsoCode2 == null || fromIsoCode2 == Language.UNKNOWN) {
                            this.log.warn("ISO code {} not part of our language enumeration", str);
                        } else {
                            add(next[0], fromIsoCode2);
                            add(next[1], fromIsoCode2);
                            Iterator<String> it = mutateLanguageName(next[3]).iterator();
                            while (it.hasNext()) {
                                add(it.next(), fromIsoCode2);
                            }
                            Iterator<String> it2 = mutateLanguageName(next[4]).iterator();
                            while (it2.hasNext()) {
                                add(it2.next(), fromIsoCode2);
                            }
                        }
                    }
                }
            }
            try {
                CSVReader buildTabReader = CSVReaderFactory.buildTabReader(LanguageParser.class.getResourceAsStream("/dictionaries/parse/iso-639-3-sil.tab"), FileUtils.UTF8, 1);
                while (buildTabReader.hasNext()) {
                    String[] next2 = buildTabReader.next();
                    if (next2.length > 2) {
                        String str2 = next2[3];
                        if (!StringUtils.isBlank(str2)) {
                            Language fromIsoCode3 = Language.fromIsoCode(str2);
                            if (fromIsoCode3 == null || fromIsoCode3 == Language.UNKNOWN) {
                                this.log.warn("ISO code {} not part of our language enumeration", str2);
                            } else {
                                add(next2[0], fromIsoCode3);
                                add(next2[1], fromIsoCode3);
                                add(next2[2], fromIsoCode3);
                                add(next2[6], fromIsoCode3);
                            }
                        }
                    }
                }
            } catch (Exception e) {
                this.log.error("Cannot initiate language parser: {}", e.getMessage());
                throw new IllegalStateException("Cannot initiate language parser", e);
            }
        } catch (Exception e2) {
            this.log.error("Cannot initiate language parser: {}", e2.getMessage());
            throw new IllegalStateException("Cannot initiate language parser", e2);
        }
    }

    private Set<String> mutateLanguageName(String str) {
        HashSet hashSet = new HashSet();
        for (String str2 : str.split(";")) {
            hashSet.add(str2);
            Iterator<Pattern> it = REMOVE_FROM_NAME_PATTERNS.iterator();
            while (it.hasNext()) {
                hashSet.add(it.next().matcher(str2).replaceAll(""));
            }
        }
        return hashSet;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.gbif.common.parsers.core.EnumParser, org.gbif.common.parsers.core.DictionaryBackedParser
    public String normalize(String str) {
        if (str == null) {
            return null;
        }
        if (LOCALE.matcher(str).find()) {
            int indexOf = str.indexOf(ProcessIdUtil.DEFAULT_PROCESSID);
            if (indexOf == -1) {
                indexOf = str.indexOf(FileSplitter.SEPARATOR);
            }
            if (indexOf > 1 && indexOf < 4) {
                return super.normalize(str.substring(0, indexOf));
            }
        }
        return super.normalize(str);
    }

    public static LanguageParser getInstance() {
        synchronized (LanguageParser.class) {
            if (singletonObject == null) {
                singletonObject = new LanguageParser();
            }
        }
        return singletonObject;
    }
}
