package org.gbif.nameparser;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.FutureTask;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.vocabulary.NamePart;
import org.gbif.api.vocabulary.Rank;
import org.gbif.utils.concurrent.NamedThreadFactory;
import org.gbif.utils.file.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/name-parser-2.21.jar:org/gbif/nameparser/NormalisedNameParser.class */
public class NormalisedNameParser {
    private final long timeout;
    protected static final String NAME_LETTERS = "A-ZÏËÖÜÄÉÈČÁÀÆŒ";
    protected static final String name_letters = "a-zïëöüäåéèčáàæœ";
    protected static final String AUTHOR_LETTERS = "A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}";
    protected static final String author_letters = "a-zïëöüäåéèčáàæœ\\p{Ll}-";
    protected static final String AUTHOR_PREFIXES = "(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')";
    protected static final String AUTHOR = "(?:(?:(?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}][a-zïëöüäåéèčáàæœ\\p{Ll}-?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)";
    protected static final String AUTHOR_TEAM = "(?:(?:(?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}][a-zïëöüäåéèčáàæœ\\p{Ll}-?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}][a-zïëöüäåéèčáàæœ\\p{Ll}-?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*";
    protected static final String YEAR = "[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?";
    private static final String NOTHO = "notho";
    protected static final String EPHITHET_PREFIXES = "van|novae";
    protected static final String GENETIC_EPHITHETS = "bacilliform|coliform|coryneform|cytoform|chemoform|biovar|serovar|genomovar|agamovar|cultivar|genotype|serotype|subtype|ribotype|isolate";
    protected static final String EPHITHET = "(?:[0-9]+-?|[doml]')?(?:(?:van|novae) [a-z])?[a-zïëöüäåéèčáàæœ+-]{1,}(?<! d)[a-zïëöüäåéèčáàæœ](?<!(?:\\bex|bacilliform|coliform|coryneform|cytoform|chemoform|biovar|serovar|genomovar|agamovar|cultivar|genotype|serotype|subtype|ribotype|isolate))(?=\\b)";
    protected static final String MONOMIAL = "[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?";
    private static final Pattern LATIN_ENDINGS;
    protected static final String INFRAGENERIC;
    protected static final String RANK_MARKER_ALL;
    private static final Pattern RANK_MARKER_ONLY;
    public static final Pattern CANON_NAME_IGNORE_AUTHORS;
    public static final Pattern NAME_PATTERN;
    private static Logger LOG = LoggerFactory.getLogger((Class<?>) NormalisedNameParser.class);
    private static final ExecutorService EXEC = new ThreadPoolExecutor(0, 100, 10, TimeUnit.SECONDS, new SynchronousQueue(), new NamedThreadFactory("NormalisedNameParser", 10, true), new ThreadPoolExecutor.CallerRunsPolicy());
    protected static final Pattern AUTHOR_TEAM_PATTERN = Pattern.compile("^(?:(?:(?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}][a-zïëöüäåéèčáàæœ\\p{Ll}-?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}][a-zïëöüäåéèčáàæœ\\p{Ll}-?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|de|di|da)[`' _]|(?:Des|De|Di|N)[`' _]?|(?:de )?(?:la|le) |d'|D'|Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}]+[a-zïëöüäåéèčáàæœ\\p{Ll}-?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*$");
    protected static final String RANK_MARKER_SPECIES = "(?:notho)?(?:" + StringUtils.join(RankUtils.RANK_MARKER_MAP_INFRASPECIFIC.keySet(), "|") + ")\\.?";
    private static final Function<Rank, String> REMOVE_RANK_MARKER = new Function<Rank, String>() { // from class: org.gbif.nameparser.NormalisedNameParser.1
        @Override // com.google.common.base.Function
        public String apply(Rank rank) {
            return rank.getMarker().replaceAll("\\.", "\\\\.");
        }
    };
    protected static final String RANK_MARKER_MICROBIAL = "(?:bv\\.|ct\\.|f\\. ?sp\\.|" + StringUtils.join(Lists.transform(Lists.newArrayList(RankUtils.INFRASUBSPECIFIC_MICROBIAL_RANKS), REMOVE_RANK_MARKER), "|") + ")";

    /* loaded from: input_file:WEB-INF/lib/name-parser-2.21.jar:org/gbif/nameparser/NormalisedNameParser$MatcherCallable.class */
    private class MatcherCallable implements Callable<Matcher> {
        private final String scientificName;

        MatcherCallable(String str) {
            this.scientificName = str;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public Matcher call() throws Exception {
            Matcher matcher = NormalisedNameParser.NAME_PATTERN.matcher(this.scientificName);
            matcher.find();
            return matcher;
        }
    }

    public NormalisedNameParser(long j) {
        this.timeout = j;
    }

    public boolean parseNormalisedName(ParsedName parsedName, String str, @Nullable Rank rank) {
        LOG.debug("Parse normed name string: {}", str);
        FutureTask futureTask = new FutureTask(new MatcherCallable(str));
        EXEC.execute(futureTask);
        try {
            Matcher matcher = (Matcher) futureTask.get(this.timeout, TimeUnit.MILLISECONDS);
            if (!matcher.group(0).equals(str)) {
                return false;
            }
            if (LOG.isDebugEnabled()) {
                logMatcher(matcher);
            }
            parsedName.setGenusOrAbove(StringUtils.trimToNull(matcher.group(1)));
            boolean z = false;
            if (matcher.group(2) != null) {
                z = true;
                parsedName.setInfraGeneric(StringUtils.trimToNull(matcher.group(2)));
            } else if (matcher.group(4) != null) {
                setRank(parsedName, matcher.group(3));
                parsedName.setInfraGeneric(StringUtils.trimToNull(matcher.group(4)));
            }
            parsedName.setSpecificEpithet(StringUtils.trimToNull(matcher.group(5)));
            if (matcher.group(6) != null && matcher.group(6).length() > 1 && !matcher.group(6).contains("null")) {
                parsedName.setRank(Rank.INFRASUBSPECIFIC_NAME);
            }
            if (matcher.group(7) != null && matcher.group(7).length() > 1) {
                setRank(parsedName, matcher.group(7));
            }
            parsedName.setInfraSpecificEpithet(StringUtils.trimToNull(matcher.group(8)));
            if (matcher.group(9) != null) {
                setRank(parsedName, matcher.group(9));
                parsedName.setInfraSpecificEpithet(matcher.group(10));
            }
            parsedName.setBracketAuthorship(StringUtils.trimToNull(matcher.group(12)));
            if (z && infragenericIsAuthor(parsedName, rank)) {
                parsedName.setBracketAuthorship(parsedName.getInfraGeneric());
                parsedName.setInfraGeneric(null);
                LOG.debug("swapped subrank with bracket author: {}", parsedName.getBracketAuthorship());
            }
            if (matcher.group(13) != null && matcher.group(13).length() > 2) {
                parsedName.setBracketYear(matcher.group(13).trim());
            }
            parsedName.setAuthorship(StringUtils.trimToNull(matcher.group(14)));
            if (matcher.group(15) != null && matcher.group(15).length() > 2) {
                parsedName.setYear(matcher.group(15).trim());
            }
            lookForIrregularRankMarker(parsedName);
            checkEpithetVsAuthorPrefx(parsedName);
            if (parsedName.getRank() != null || rank == null) {
                return true;
            }
            parsedName.setRank(rank);
            return true;
        } catch (IllegalStateException e) {
            return false;
        } catch (InterruptedException e2) {
            LOG.warn("Thread got interrupted, shutdown executor", (Throwable) e2);
            EXEC.shutdown();
            return false;
        } catch (ExecutionException e3) {
            LOG.warn("ExecutionException for name: {}", str, e3);
            return false;
        } catch (TimeoutException e4) {
            LOG.info("Parsing timeout for name: {}", str);
            return false;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void setRank(ParsedName parsedName, String str) {
        String trimToNull = StringUtils.trimToNull(str);
        Rank inferRank = RankUtils.inferRank(trimToNull);
        parsedName.setRank(inferRank);
        if (inferRank == null || !trimToNull.startsWith(NOTHO)) {
            return;
        }
        if (inferRank.isInfraspecific()) {
            parsedName.setNotho(NamePart.INFRASPECIFIC);
            return;
        }
        if (inferRank == Rank.SPECIES) {
            parsedName.setNotho(NamePart.SPECIFIC);
        } else if (inferRank.isInfrageneric()) {
            parsedName.setNotho(NamePart.INFRAGENERIC);
        } else if (inferRank == Rank.GENUS) {
            parsedName.setNotho(NamePart.GENERIC);
        }
    }

    private static boolean infragenericIsAuthor(ParsedName parsedName, Rank rank) {
        return parsedName.getBracketAuthorship() == null && parsedName.getSpecificEpithet() == null && ((rank != null && (!rank.isInfrageneric() || rank.isSpeciesOrBelow())) || (rank == null && !LATIN_ENDINGS.matcher(parsedName.getInfraGeneric()).find()));
    }

    public boolean parseNormalisedNameIgnoreAuthors(ParsedName parsedName, String str, @Nullable Rank rank) {
        LOG.debug("Parse normed name string ignoring authors: {}", str);
        Matcher matcher = CANON_NAME_IGNORE_AUTHORS.matcher(str);
        if (!matcher.find()) {
            return false;
        }
        if (LOG.isDebugEnabled()) {
            logMatcher(matcher);
        }
        parsedName.setGenusOrAbove(StringUtils.trimToNull(matcher.group(1)));
        if (matcher.group(2) != null) {
            parsedName.setInfraGeneric(StringUtils.trimToNull(matcher.group(2)));
            if (infragenericIsAuthor(parsedName, rank)) {
                parsedName.setInfraGeneric(null);
            }
        } else if (matcher.group(4) != null) {
            setRank(parsedName, matcher.group(3));
            parsedName.setInfraGeneric(StringUtils.trimToNull(matcher.group(4)));
        }
        parsedName.setSpecificEpithet(StringUtils.trimToNull(matcher.group(5)));
        if (matcher.group(6) != null && matcher.group(6).length() > 1 && !matcher.group(6).contains("null")) {
            parsedName.setRank(Rank.INFRASUBSPECIFIC_NAME);
        }
        if (matcher.group(7) != null && matcher.group(7).length() > 1) {
            setRank(parsedName, matcher.group(7));
        }
        if (matcher.group(8) != null && matcher.group(8).length() >= 2) {
            setCanonicalInfraSpecies(parsedName, matcher.group(8));
        }
        if (matcher.group(9) != null) {
            setRank(parsedName, matcher.group(9));
            parsedName.setInfraSpecificEpithet(matcher.group(10));
        }
        lookForIrregularRankMarker(parsedName);
        return true;
    }

    private static void setCanonicalInfraSpecies(ParsedName parsedName, String str) {
        if (str == null || str.equalsIgnoreCase("sec") || str.equalsIgnoreCase("sensu")) {
            return;
        }
        parsedName.setInfraSpecificEpithet(StringUtils.trimToNull(str));
    }

    private void lookForIrregularRankMarker(ParsedName parsedName) {
        if (parsedName.getRank() == null) {
            if (parsedName.getInfraSpecificEpithet() != null) {
                if (RANK_MARKER_ONLY.matcher(parsedName.getInfraSpecificEpithet()).find()) {
                    setRank(parsedName, parsedName.getInfraSpecificEpithet());
                    parsedName.setInfraSpecificEpithet(null);
                    return;
                }
                return;
            }
            if (parsedName.getSpecificEpithet() == null || !RANK_MARKER_ONLY.matcher(parsedName.getSpecificEpithet()).find()) {
                return;
            }
            setRank(parsedName, parsedName.getSpecificEpithet());
            parsedName.setSpecificEpithet(null);
        }
    }

    private void checkEpithetVsAuthorPrefx(ParsedName parsedName) {
        if (parsedName.getRank() == null) {
            if (parsedName.getInfraSpecificEpithet() != null) {
                String str = parsedName.getInfraSpecificEpithet() + " " + parsedName.getAuthorship();
                if (AUTHOR_TEAM_PATTERN.matcher(str).find()) {
                    LOG.debug("use infraspecific epithet as author prefix");
                    parsedName.setInfraSpecificEpithet(null);
                    parsedName.setAuthorship(str);
                    return;
                }
                return;
            }
            String str2 = parsedName.getSpecificEpithet() + " " + parsedName.getAuthorship();
            if (AUTHOR_TEAM_PATTERN.matcher(str2).find()) {
                LOG.debug("use specific epithet as author prefix");
                parsedName.setSpecificEpithet(null);
                parsedName.setAuthorship(str2);
            }
        }
    }

    private void logMatcher(Matcher matcher) {
        int i = -1;
        while (i < matcher.groupCount()) {
            i++;
            LOG.debug("  {}: >{}<", Integer.valueOf(i), matcher.group(i));
        }
    }

    static {
        try {
            LATIN_ENDINGS = Pattern.compile("(" + Joiner.on('|').skipNulls().join(FileUtils.streamToList(FileUtils.classpathStream("latin-endings.txt"))) + ")$");
            INFRAGENERIC = "(?:\\( ?([A-ZÏËÖÜÄÉÈČÁÀÆŒ][a-zïëöüäåéèčáàæœ-]+) ?\\)|(" + StringUtils.join(RankUtils.RANK_MARKER_MAP_INFRAGENERIC.keySet(), "|") + ")\\.? ?([" + NAME_LETTERS + "][" + name_letters + "-]+))";
            RANK_MARKER_ALL = "(notho)? *(" + StringUtils.join(RankUtils.RANK_MARKER_MAP.keySet(), "|") + ")\\.?";
            RANK_MARKER_ONLY = Pattern.compile("^" + RANK_MARKER_ALL + "$");
            CANON_NAME_IGNORE_AUTHORS = Pattern.compile("^(×?(?:\\?|[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?))(?:(?<!ceae) " + INFRAGENERIC + ")?(?: " + AUTHOR_PREFIXES + ")?(?: (×?" + EPHITHET + "))?(?: " + AUTHOR_PREFIXES + ")?(?:( " + EPHITHET + ")?(?:(?: .+)?( " + RANK_MARKER_SPECIES + "[ .]))? (×?" + EPHITHET + "))?(?: (" + RANK_MARKER_MICROBIAL + ")[ .](\\S+))?");
            NAME_PATTERN = Pattern.compile("^(×?(?:\\?|[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?))(?:(?<!ceae) " + INFRAGENERIC + ")?(?: (×?" + EPHITHET + "))?(?:( " + EPHITHET + ")??(?:(?: .+)??( " + RANK_MARKER_SPECIES + "))?(?: (×?\"?" + EPHITHET + "\"?)))?(?: (" + RANK_MARKER_MICROBIAL + ")[ .](\\S+))?(,?(?: ?\\((" + AUTHOR_TEAM + ")?,?( ?" + YEAR + ")?\\))?( " + AUTHOR_TEAM + ")?(?: ?\\(?,? ?(" + YEAR + ")\\)?)?)$");
        } catch (IOException e) {
            throw new IllegalStateException("Failed to read latin-endings.txt from classpath resources", e);
        }
    }
}
