package org.gbif.nameparser;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CharMatcher;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.lowagie.text.rtf.parser.destinations.RtfDestinationMgr;
import com.opensymphony.xwork2.conversion.impl.XWorkConverter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.gbif.common.shaded.com.fasterxml.jackson.dataformat.csv.CsvSchema;
import org.gbif.nameparser.api.Authorship;
import org.gbif.nameparser.api.NamePart;
import org.gbif.nameparser.api.NameType;
import org.gbif.nameparser.api.NomCode;
import org.gbif.nameparser.api.ParsedName;
import org.gbif.nameparser.api.Rank;
import org.gbif.nameparser.api.UnparsableNameException;
import org.gbif.nameparser.util.RankUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/name-parser-3.1.6.jar:org/gbif/nameparser/ParsingJob.class */
class ParsingJob implements Callable<ParsedName> {
    static final String NAME_LETTERS = "A-ZÏËÖÜÄÉÈČÁÀÆŒ";
    static final String AUTHOR_LETTERS = "A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}";
    static final String author_letters = "a-zïëöüäåéèčáàæœ\\p{Ll}-?";
    private static final String AUTHOR_TOKEN = "(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?";
    private static final String AUTHOR = "(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*";
    private static final String AUTHOR_TEAM = "(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*(?:[&,;]+(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*)*";
    static final String AUTHORSHIP = "(?:((?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*(?:[&,;]+(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*)*) ?\\bex[. ])?((?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*(?:[&,;]+(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*)*)(?: *: *(Pers\\.?|Fr\\.?))?";
    private static final String YEAR = "[12][0-9][0-9][0-9?]";
    private static final String YEAR_LOOSE = "[12][0-9][0-9][0-9?][abcdh?]?(?:[/,-][0-9]{1,4})?";
    private static final String NOTHO = "notho";
    static final String MONOMIAL = "[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?";
    private static final Pattern LATIN_ENDINGS;
    private static final String INFRAGENERIC;
    static final String RANK_MARKER_ALL;
    private static final Pattern RANK_MARKER_ONLY;
    private static char[] QUOTES;
    public static final String HYBRID_MARKER = "×";
    public static final Pattern HYBRID_FORMULA_PATTERN;
    public static final String EXTINCT_MARKER = "†";
    private static final Pattern EXTINCT_PATTERN;

    @VisibleForTesting
    protected static final Pattern CULTIVAR;
    private static final Pattern CULTIVAR_GROUP;
    private static final Pattern INFRASPEC_UPPER;
    private static final Pattern STRAIN;
    public static final Pattern IS_VIRUS_PATTERN;
    public static final Pattern IS_VIRUS_PATTERN_CASE_SENSITIVE;
    private static final Pattern IS_VIRUS_PATTERN_POSTFAIL;
    public static final Pattern IS_GENE;
    private static final Pattern OTU_PATTERN;
    private static final String CANDIDATUS = "(Candidatus\\s|Ca\\.)";
    private static final Pattern IS_CANDIDATUS_PATTERN;
    private static final Pattern IS_CANDIDATUS_QUOTE_PATTERN;

    @VisibleForTesting
    static final Pattern FAMILY_PREFIX;
    private static final Pattern SUPRA_RANK_PREFIX;
    private static final Pattern RANK_MARKER_AT_END;
    private static final Pattern FILIUS_AT_END;
    static final Pattern EXTRACT_SENSU;
    private static final String NOV_RANKS = "((?:[sS]ub)?(?:[fF]am|[gG]en|[sS]s?p(?:ec)?|[vV]ar|[fF](?:orma?)?))";
    private static final Pattern NOV_RANK_MARKER;
    static final Pattern EXTRACT_NOMSTATUS;
    private static final Pattern EXTRACT_REMARKS;
    private static final Pattern COMMA_AFTER_BASYEAR;
    private static final Pattern NORM_APOSTROPHES;
    private static final Pattern NORM_QUOTES;
    private static final Pattern REPL_GENUS_QUOTE;
    private static final Pattern REPL_ENCLOSING_QUOTE;
    private static final Pattern NORM_UPPERCASE_WORDS;
    private static final Pattern NORM_LOWERCASE_BINOMIAL;
    private static final Pattern NORM_WHITESPACE;
    private static final Pattern REPL_UNDERSCORE;
    private static final Pattern NORM_NO_SQUARE_BRACKETS;
    private static final Pattern NORM_BRACKETS_OPEN;
    private static final Pattern NORM_BRACKETS_CLOSE;
    private static final Pattern NORM_BRACKETS_OPEN_STRONG;
    private static final Pattern NORM_BRACKETS_CLOSE_STRONG;
    private static final Pattern NORM_AND;
    private static final Pattern NORM_SUBGENUS;
    private static final Pattern NO_Q_MARKS;
    private static final Pattern NORM_PUNCTUATIONS;
    private static final Pattern NORM_YEAR;
    private static final Pattern NORM_IMPRINT_YEAR;
    private static final Pattern NORM_HYBRIDS_GENUS;
    private static final Pattern NORM_HYBRIDS_EPITH;
    private static final Pattern NORM_HYBRIDS_FORM;
    private static final Pattern NORM_TF_GENUS;
    private static final Pattern REPL_IN_REF;
    private static final Pattern REPL_RANK_PREFIXES;
    private static final Pattern MANUSCRIPT_NAMES;
    private static final Pattern MANUSCRIPT_SUFFIX;
    private static final Pattern REPL_AFF;
    private static final Pattern NO_LETTERS;
    private static final Pattern REMOVE_PLACEHOLDER_AUTHOR;
    private static final Pattern PLACEHOLDER_GENUS;
    private static final String PLACEHOLDER_NAME = "(?:allocation|awaiting|deleted?|dummy|incertae sedis|mixed|not assigned|not stated|place ?holder|temp|tobedeleted|unaccepted|unallocated|unassigned|uncertain|unclassed|unclassified|uncultured|undescribed|undetermined|unknown|unnamed|unplaced|unspecified)";
    private static final Pattern REMOVE_PLACEHOLDER_INFRAGENERIC;
    private static final Pattern PLACEHOLDER;
    private static final Pattern DOUBTFUL;
    private static final Pattern DOUBTFUL2;
    private static final Pattern XML_ENTITY_STRIP;
    private static final Pattern AMPERSAND_ENTITY;
    private static final Pattern XML_TAGS;
    private static final Pattern STARTING_EPITHET;
    private static final Pattern FORM_SPECIALIS;
    private static final Pattern SENSU_LATU;
    private static final Pattern TYPE_TO_VAR;

    @VisibleForTesting
    static final Pattern POTENTIAL_NAME_PATTERN;
    private static final Pattern REMOVE_INTER_RANKS;
    private static final String SKIP_AUTHORS = "(?:\\b[ \\p{Ll}'(-]{0,3}\\p{Lu}.*?\\b)??";
    public static final Pattern NAME_PATTERN;
    private final Rank rank;
    private final String scientificName;
    private final ParsedName pn = new ParsedName();
    private boolean ignoreAuthorship;
    static Logger LOG = LoggerFactory.getLogger((Class<?>) ParsingJob.class);
    private static final CharMatcher AUTHORTEAM_DELIMITER = CharMatcher.anyOf(",&");
    private static final Splitter AUTHORTEAM_SPLITTER = Splitter.on(AUTHORTEAM_DELIMITER).trimResults().omitEmptyStrings();
    private static final Splitter AUTHORTEAM_SEMI_SPLITTER = Splitter.on(CsvSchema.DEFAULT_ARRAY_ELEMENT_SEPARATOR).trimResults().omitEmptyStrings();
    private static final Pattern AUTHOR_INITIAL_SWAP = Pattern.compile("^([^,]+) *, *([^,]+)$");
    private static final Pattern NORM_EX_HORT = Pattern.compile("\\b(?:hort(?:usa?)?|cv)[. ]ex ", 2);
    static final Pattern AUTHOR_TEAM_PATTERN = Pattern.compile("^(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*(?:[&,;]+(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*)*$");
    static final String RANK_MARKER = ("(?:notho)?(?:(?<!f[ .])sp|" + StringUtils.join(RankUtils.RANK_MARKER_MAP_INFRASPECIFIC.keySet(), "|") + ")").replace("|hort|", "|hort(?!\\.ex)|");
    static final String RANK_MARKER_MICROBIAL = "(?:bv\\.|ct\\.|f\\.sp\\.|" + StringUtils.join(Lists.transform(Lists.newArrayList(RankUtils.INFRASUBSPECIFIC_MICROBIAL_RANKS), new Function<Rank, String>() { // from class: org.gbif.nameparser.ParsingJob.1
        @Override // com.google.common.base.Function
        @Nullable
        public String apply(@Nullable Rank rank) {
            return rank.getMarker().replaceAll("\\.", "\\\\.");
        }
    }), "|") + ")";
    static final String name_letters = "a-zïëöüäåéèčáàæœ";
    private static final String AUTHOR_TOKEN_3 = "fil|filius|hort|jun|junior|sen|senior";
    private static final String UNALLOWED_EPITHET_ENDING = "bacilliform|coliform|coryneform|cytoform|chemoform|biovar|serovar|genomovar|agamovar|cultivar|genotype|serotype|subtype|ribotype|isolate";
    static final String EPHITHET = "(?:[0-9]+-?|[doml]'|(?:van|novae) [a-z])?(?!" + RANK_MARKER + "\\b)[" + name_letters + "+-]{1,}(?<! d)[" + name_letters + "](?<!(?:\\b(?:ex|l[ae]|v[ao]n|" + AUTHOR_TOKEN_3 + ")\\.?|" + UNALLOWED_EPITHET_ENDING + "))(?=\\b)";

    static Matcher interruptableMatcher(Pattern pattern, String str) {
        return pattern.matcher(new InterruptibleCharSequence(str));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public ParsingJob(String str, Rank rank) {
        this.scientificName = (String) Preconditions.checkNotNull(str);
        this.rank = (Rank) Preconditions.checkNotNull(rank);
        this.pn.setRank(rank);
    }

    private ParsedName unparsable(NameType nameType) throws UnparsableNameException {
        throw new UnparsableNameException(nameType, this.scientificName);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.concurrent.Callable
    public ParsedName call() throws UnparsableNameException {
        long j = 0;
        if (LOG.isDebugEnabled()) {
            j = System.currentTimeMillis();
        }
        String preClean = preClean(this.scientificName);
        Matcher matcher = OTU_PATTERN.matcher(preClean);
        if (matcher.find()) {
            this.pn.setUninomial(matcher.group(1).toUpperCase());
            this.pn.setType(NameType.OTU);
            setRankIfNotContradicting(Rank.SPECIES);
            this.pn.setState(ParsedName.State.COMPLETE);
        } else {
            parse(preClean);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Parsing time: {} for {}", Long.valueOf(System.currentTimeMillis() - j), this.pn);
        }
        return this.pn;
    }

    private void parse(String str) throws UnparsableNameException {
        String replaceFirst = EXTINCT_PATTERN.matcher(str).replaceFirst("");
        Matcher matcher = IS_CANDIDATUS_QUOTE_PATTERN.matcher(this.scientificName);
        if (matcher.find()) {
            this.pn.setCandidatus(true);
            replaceFirst = matcher.replaceFirst(matcher.group(2));
        }
        String replaceAll = TYPE_TO_VAR.matcher(replaceFirst).replaceAll("$1var");
        Matcher matcher2 = INFRASPEC_UPPER.matcher(replaceAll);
        String str2 = null;
        if (matcher2.find()) {
            replaceAll = matcher2.replaceFirst("vulgaris");
            str2 = matcher2.group(1);
            this.pn.setType(NameType.INFORMAL);
        }
        Matcher matcher3 = REMOVE_PLACEHOLDER_AUTHOR.matcher(replaceAll);
        if (matcher3.find()) {
            replaceAll = matcher3.replaceFirst(" $1");
            this.pn.setType(NameType.PLACEHOLDER);
        }
        Matcher matcher4 = REMOVE_PLACEHOLDER_INFRAGENERIC.matcher(replaceAll);
        if (matcher4.find()) {
            replaceAll = matcher4.replaceFirst("");
            this.pn.setType(NameType.PLACEHOLDER);
        }
        Matcher matcher5 = PLACEHOLDER_GENUS.matcher(replaceAll);
        if (matcher5.find()) {
            replaceAll = matcher5.replaceFirst("? ");
            this.pn.setType(NameType.PLACEHOLDER);
        }
        if (PLACEHOLDER.matcher(replaceAll).find()) {
            unparsable(NameType.PLACEHOLDER);
        }
        if (IS_VIRUS_PATTERN.matcher(replaceAll).find() || IS_VIRUS_PATTERN_CASE_SENSITIVE.matcher(replaceAll).find()) {
            unparsable(NameType.VIRUS);
        }
        if (IS_GENE.matcher(replaceAll).find()) {
            this.pn.setType(NameType.INFORMAL);
        }
        String normalize = normalize(replaceAll);
        if (Strings.isNullOrEmpty(normalize)) {
            unparsable(NameType.NO_NAME);
        }
        Matcher matcher6 = FAMILY_PREFIX.matcher(normalize);
        if (matcher6.find()) {
            normalize = matcher6.replaceFirst("$1");
        }
        Matcher matcher7 = SUPRA_RANK_PREFIX.matcher(normalize);
        if (matcher7.find()) {
            this.pn.setRank(RankUtils.RANK_MARKER_MAP.get(matcher7.group(1).replace(XWorkConverter.PERIOD, "")));
            normalize = matcher7.replaceFirst("");
        }
        Matcher matcher8 = CULTIVAR_GROUP.matcher(normalize);
        if (matcher8.find()) {
            this.pn.setCultivarEpithet(matcher8.group(1));
            normalize = matcher8.replaceFirst(" ");
            String group = matcher8.group(2);
            if (group.equalsIgnoreCase("grex") || group.equalsIgnoreCase("gx")) {
                this.pn.setRank(Rank.GREX);
            } else {
                this.pn.setRank(Rank.CULTIVAR_GROUP);
            }
        }
        Matcher matcher9 = CULTIVAR.matcher(normalize);
        if (matcher9.find()) {
            this.pn.setCultivarEpithet(matcher9.group(2));
            normalize = matcher9.replaceFirst("$1");
            this.pn.setRank(Rank.CULTIVAR);
        }
        if (NO_LETTERS.matcher(normalize).find()) {
            unparsable(NameType.NO_NAME);
        }
        if (HYBRID_FORMULA_PATTERN.matcher(normalize).find()) {
            unparsable(NameType.HYBRID_FORMULA);
        }
        Matcher matcher10 = IS_CANDIDATUS_PATTERN.matcher(normalize);
        if (matcher10.find()) {
            this.pn.setCandidatus(true);
            normalize = matcher10.replaceFirst("");
        }
        Matcher matcher11 = EXTRACT_NOMSTATUS.matcher(normalize);
        if (matcher11.find()) {
            StringBuilder sb = new StringBuilder();
            StringBuffer stringBuffer = new StringBuffer();
            do {
                if (sb.length() > 0) {
                    sb.append(" ");
                }
                String trimToNull = StringUtils.trimToNull(matcher11.group(1));
                if (trimToNull != null) {
                    sb.append(trimToNull);
                    matcher11.appendReplacement(stringBuffer, "");
                    Matcher matcher12 = NOV_RANK_MARKER.matcher(trimToNull);
                    if (matcher12.find()) {
                        setRank(matcher12.group(1), true);
                    }
                }
            } while (matcher11.find());
            matcher11.appendTail(stringBuffer);
            normalize = stringBuffer.toString();
            this.pn.setNomenclaturalNotes(sb.toString());
        }
        Matcher matcher13 = MANUSCRIPT_NAMES.matcher(normalize);
        if (matcher13.find()) {
            this.pn.setType(NameType.INFORMAL);
            this.pn.addRemark(matcher13.group(0));
            setRank(matcher13.group(1).replace("indet", "sp"));
            normalize = matcher13.replaceFirst("");
        }
        Matcher matcher14 = MANUSCRIPT_SUFFIX.matcher(normalize);
        if (matcher14.find()) {
            this.pn.setType(NameType.INFORMAL);
            normalize = matcher14.replaceFirst("");
        }
        Matcher matcher15 = STRAIN.matcher(normalize);
        if (matcher15.find()) {
            normalize = matcher15.replaceFirst(matcher15.group(1));
            this.pn.setType(NameType.INFORMAL);
            this.pn.setStrain(matcher15.group(2));
            LOG.debug("Strain: {}", matcher15.group(2));
        }
        Matcher matcher16 = EXTRACT_SENSU.matcher(normalize);
        if (matcher16.find()) {
            this.pn.setTaxonomicNote(normNote(matcher16.group(1)));
            normalize = matcher16.replaceFirst("");
        }
        Matcher matcher17 = EXTRACT_REMARKS.matcher(normalize);
        if (matcher17.find()) {
            this.pn.setRemarks(StringUtils.trimToNull(matcher17.group(1)));
            normalize = matcher17.replaceFirst("");
        }
        Matcher matcher18 = RANK_MARKER_AT_END.matcher(normalize);
        if (matcher18.find() && !FILIUS_AT_END.matcher(normalize).find()) {
            this.ignoreAuthorship = true;
            if (this.pn.getCultivarEpithet() == null) {
                this.pn.setType(NameType.INFORMAL);
                setRank(matcher18.group(2));
            }
            normalize = matcher18.replaceAll("");
        }
        Matcher matcher19 = REPL_AFF.matcher(normalize);
        if (matcher19.find()) {
            this.pn.setType(NameType.INFORMAL);
            this.pn.addRemark(matcher19.group(0));
            normalize = matcher19.replaceAll("");
        }
        Matcher matcher20 = REPL_IN_REF.matcher(normalize);
        if (matcher20.find()) {
            this.pn.addRemark(normNote(matcher20.group(0)));
            normalize = matcher20.replaceFirst("");
        }
        Matcher matcher21 = REMOVE_INTER_RANKS.matcher(normalize);
        if (matcher21.find()) {
            this.pn.addWarning("Intermediate classification removed: " + matcher21.group(1));
            normalize = matcher21.replaceFirst("$2");
        }
        Rank rank = this.pn.getRank();
        String normalizeStrong = normalizeStrong(normalize);
        if (Strings.isNullOrEmpty(normalizeStrong)) {
            if (this.pn.hasName()) {
                this.pn.setState(ParsedName.State.COMPLETE);
                this.pn.setType(NameType.PLACEHOLDER);
                return;
            }
            unparsable(NameType.NO_NAME);
        }
        if (!parseNormalisedName(normalizeStrong)) {
            if (IS_VIRUS_PATTERN_POSTFAIL.matcher(normalizeStrong).find()) {
                unparsable(NameType.VIRUS);
            }
            if (POTENTIAL_NAME_PATTERN.matcher(normalize).find()) {
                unparsable(NameType.SCIENTIFIC);
            } else {
                unparsable(NameType.NO_NAME);
            }
        }
        if (str2 != null) {
            this.pn.setInfraspecificEpithet(str2);
        }
        if (rank != null && this.rank != rank) {
            this.pn.setRank(rank);
        }
        determineNameType(normalize);
        applyDoubtfulFlag(this.scientificName);
        if (this.pn.getRank().otherOrUnranked()) {
            this.pn.setRank(RankUtils.inferRank(this.pn));
        }
        determineCode();
    }

    private static String normNote(String str) {
        if (str.startsWith("(") && str.endsWith(")")) {
            str = str.substring(1, str.length() - 1);
        }
        return StringUtils.trimToNull(str.replaceAll("([,;)])(?!= )", "$1 ").replaceAll("(?<! )([(])", " $1").replaceAll("(?:\\.(?=[12][0-9][0-9][0-9?])|(?<=\\b[a-z]{2,})\\.(?! ))", ". ").replaceAll("&", " & "));
    }

    @VisibleForTesting
    String normalize(String str) {
        if (str == null) {
            return null;
        }
        String replaceChars = StringUtils.replaceChars(str, "¡", "i");
        Matcher matcher = FORM_SPECIALIS.matcher(replaceChars);
        if (matcher.find()) {
            replaceChars = matcher.replaceAll("fsp");
        }
        Matcher matcher2 = SENSU_LATU.matcher(replaceChars);
        if (matcher2.find()) {
            replaceChars = matcher2.replaceAll("sl");
        }
        String replaceAll = NORM_YEAR.matcher(replaceChars).replaceAll("$1");
        Matcher matcher3 = NORM_IMPRINT_YEAR.matcher(replaceAll);
        if (matcher3.find()) {
            LOG.debug("Imprint year {} removed", matcher3.group(2));
            replaceAll = matcher3.replaceAll("$1");
        }
        String replaceAll2 = REPL_UNDERSCORE.matcher(replaceAll).replaceAll(" ");
        Matcher matcher4 = NORM_PUNCTUATIONS.matcher(replaceAll2);
        if (matcher4.find()) {
            replaceAll2 = matcher4.replaceAll("$1");
        }
        String replaceAll3 = NORM_AND.matcher(replaceAll2).replaceAll("&");
        Matcher matcher5 = COMMA_AFTER_BASYEAR.matcher(replaceAll3);
        if (matcher5.find()) {
            replaceAll3 = matcher5.replaceFirst("$1)");
        }
        Matcher matcher6 = NORM_BRACKETS_OPEN.matcher(replaceAll3);
        if (matcher6.find()) {
            replaceAll3 = matcher6.replaceAll("$1");
        }
        Matcher matcher7 = NORM_BRACKETS_CLOSE.matcher(replaceAll3);
        if (matcher7.find()) {
            replaceAll3 = matcher7.replaceAll("$1");
        }
        Matcher matcher8 = NORM_HYBRIDS_GENUS.matcher(replaceAll3);
        if (matcher8.find()) {
            replaceAll3 = matcher8.replaceFirst("×$1");
        }
        Matcher matcher9 = NORM_HYBRIDS_EPITH.matcher(replaceAll3);
        if (matcher9.find()) {
            replaceAll3 = matcher9.replaceFirst("$1 ×$2");
        }
        Matcher matcher10 = NORM_HYBRIDS_FORM.matcher(replaceAll3);
        if (matcher10.find()) {
            replaceAll3 = matcher10.replaceAll(" × ");
        }
        Matcher matcher11 = NORM_UPPERCASE_WORDS.matcher(replaceAll3);
        if (matcher11.find()) {
            StringBuffer stringBuffer = new StringBuffer();
            matcher11.appendReplacement(stringBuffer, matcher11.group(1) + matcher11.group(2).toLowerCase());
            while (matcher11.find()) {
                matcher11.appendReplacement(stringBuffer, matcher11.group(1) + matcher11.group(2).toLowerCase());
            }
            matcher11.appendTail(stringBuffer);
            replaceAll3 = stringBuffer.toString();
        }
        Matcher matcher12 = NORM_LOWERCASE_BINOMIAL.matcher(replaceAll3);
        if (matcher12.find()) {
            replaceAll3 = matcher12.replaceFirst(StringUtils.capitalize(matcher12.group(1)) + " " + matcher12.group(2));
        }
        return StringUtils.trimToEmpty(NORM_WHITESPACE.matcher(replaceAll3).replaceAll(" "));
    }

    @VisibleForTesting
    String normalizeStrong(String str) {
        if (str == null) {
            return null;
        }
        String replaceFirst = REPL_GENUS_QUOTE.matcher(NORM_QUOTES.matcher(NORM_EX_HORT.matcher(str).replaceAll("hort.ex ")).replaceAll("'")).replaceFirst("$1 ");
        Matcher matcher = REPL_ENCLOSING_QUOTE.matcher(replaceFirst);
        if (matcher.find()) {
            replaceFirst = matcher.replaceAll("");
            this.pn.addWarning(Warnings.REPL_ENCLOSING_QUOTE);
        }
        Matcher matcher2 = NO_Q_MARKS.matcher(replaceFirst);
        if (matcher2.find()) {
            replaceFirst = matcher2.replaceAll("$1");
            this.pn.setDoubtful(true);
            this.pn.addWarning(Warnings.QUESTION_MARKS_REMOVED);
        }
        String replaceAll = REPL_RANK_PREFIXES.matcher(replaceFirst).replaceAll("");
        Matcher matcher3 = NORM_TF_GENUS.matcher(replaceAll);
        if (matcher3.find()) {
            replaceAll = matcher3.replaceAll("$1$2 ");
        }
        String replaceAll2 = NORM_BRACKETS_CLOSE_STRONG.matcher(NORM_BRACKETS_OPEN_STRONG.matcher(replaceAll).replaceAll("(")).replaceAll(")");
        Matcher matcher4 = STARTING_EPITHET.matcher(replaceAll2);
        if (matcher4.find()) {
            replaceAll2 = matcher4.replaceFirst("? $1");
            this.pn.addWarning(Warnings.MISSING_GENUS);
        }
        Matcher matcher5 = NORM_SUBGENUS.matcher(replaceAll2);
        if (matcher5.find() && parseRank(matcher5.group(3)) == null) {
            replaceAll2 = matcher5.replaceAll("$1($2)$3");
        }
        return StringUtils.trimToEmpty(NORM_WHITESPACE.matcher(NORM_PUNCTUATIONS.matcher(replaceAll2).replaceAll("$1")).replaceAll(" "));
    }

    @VisibleForTesting
    String preClean(String str) {
        Matcher matcher = XML_ENTITY_STRIP.matcher(str);
        if (matcher.find()) {
            str = matcher.replaceAll("&$1;");
        }
        int length = str.length();
        String unescapeHtml4 = StringEscapeUtils.unescapeHtml4(str);
        if (length > unescapeHtml4.length()) {
            this.pn.addWarning(Warnings.HTML_ENTITIES);
        }
        Matcher matcher2 = AMPERSAND_ENTITY.matcher(unescapeHtml4);
        if (matcher2.find()) {
            unescapeHtml4 = matcher2.replaceAll("&");
            this.pn.addWarning(Warnings.HTML_ENTITIES);
        }
        Matcher matcher3 = XML_TAGS.matcher(unescapeHtml4);
        if (matcher3.find()) {
            unescapeHtml4 = matcher3.replaceAll("");
            this.pn.addWarning(Warnings.XML_TAGS);
        }
        String trim = unescapeHtml4.trim();
        for (char c : QUOTES) {
            int i = 0;
            while (i < trim.length() && (c == trim.charAt(i) || Character.isWhitespace(trim.charAt(i)))) {
                i++;
            }
            if (i > 0) {
                int i2 = 0;
                while (c == trim.charAt((trim.length() - 1) - i2) && (trim.length() - i) - i2 > 0) {
                    i2++;
                }
                trim = trim.substring(i, trim.length() - i2);
            }
        }
        return StringUtils.trimToEmpty(NORM_APOSTROPHES.matcher(NORM_WHITESPACE.matcher(trim).replaceAll(" ")).replaceAll("'"));
    }

    private void setTypeIfNull(ParsedName parsedName, NameType nameType) {
        if (parsedName.getType() == null) {
            parsedName.setType(nameType);
        }
    }

    private void determineNameType(String str) {
        if (this.pn.getType() == null || this.pn.getType().isParsable()) {
            if (this.pn.getUninomial() != null && Character.isLowerCase(str.charAt(0))) {
                this.pn.addWarning(Warnings.LC_MONOMIAL);
                this.pn.setDoubtful(true);
                setTypeIfNull(this.pn, NameType.INFORMAL);
            } else if (this.pn.getRank().notOtherOrUnranked()) {
                if (this.pn.isIndetermined()) {
                    this.pn.setType(NameType.INFORMAL);
                    this.pn.addWarning(Warnings.INDETERMINED);
                } else if (this.pn.getRank().isSupraspecific() && (this.pn.getSpecificEpithet() != null || this.pn.getInfraspecificEpithet() != null)) {
                    this.pn.addWarning(Warnings.RANK_MISMATCH);
                    this.pn.setDoubtful(true);
                    this.pn.setType(NameType.INFORMAL);
                } else if (!this.pn.getRank().isSpeciesOrBelow() && this.pn.isBinomial()) {
                    this.pn.addWarning(Warnings.HIGHER_RANK_BINOMIAL);
                    this.pn.setDoubtful(true);
                }
            }
            if (this.pn.getType() == null) {
                if (this.pn.isAbbreviated() || this.pn.isIncomplete()) {
                    this.pn.setType(NameType.INFORMAL);
                } else if ("?".equals(this.pn.getUninomial()) || "?".equals(this.pn.getGenus()) || "?".equals(this.pn.getSpecificEpithet())) {
                    this.pn.setType(NameType.PLACEHOLDER);
                } else {
                    this.pn.setType(NameType.SCIENTIFIC);
                }
            }
        }
    }

    private void applyDoubtfulFlag(String str) {
        if (!DOUBTFUL.matcher(str).find()) {
            this.pn.setDoubtful(true);
            this.pn.addWarning(Warnings.UNUSUAL_CHARACTERS);
        } else if (this.pn.getType().isParsable() && DOUBTFUL2.matcher(str).find()) {
            this.pn.setDoubtful(true);
            this.pn.addWarning(Warnings.NULL_EPITHET);
        }
    }

    private void determineCode() {
        if (this.pn.getCode() == null) {
            if (this.pn.getRank().isRestrictedToCode() != null) {
                this.pn.setCode(this.pn.getRank().isRestrictedToCode());
                return;
            }
            if (this.pn.getCultivarEpithet() != null) {
                this.pn.setCode(NomCode.CULTIVARS);
                return;
            }
            if (this.pn.getSanctioningAuthor() != null) {
                this.pn.setCode(NomCode.BOTANICAL);
                return;
            }
            if (this.pn.getType() == NameType.VIRUS) {
                this.pn.setCode(NomCode.VIRUS);
            } else if (this.pn.isCandidatus() || this.pn.getStrain() != null) {
                this.pn.setCode(NomCode.BACTERIAL);
            }
        }
    }

    private boolean parseNormalisedName(String str) {
        LOG.debug("Parse normed name string: {}", str);
        Matcher interruptableMatcher = interruptableMatcher(NAME_PATTERN, str);
        if (!interruptableMatcher.find()) {
            return false;
        }
        if (StringUtils.isBlank(interruptableMatcher.group(21))) {
            this.pn.setState(ParsedName.State.COMPLETE);
        } else {
            LOG.debug("Partial match with unparsed remains \"{}\" for: {}", interruptableMatcher.group(21), str);
            this.pn.setState(ParsedName.State.PARTIAL);
            this.pn.setUnparsed(interruptableMatcher.group(21).trim());
        }
        if (LOG.isDebugEnabled()) {
            logMatcher(interruptableMatcher);
        }
        setUninomialOrGenus(interruptableMatcher, this.pn);
        boolean z = false;
        if (interruptableMatcher.group(2) != null) {
            z = true;
            this.pn.setInfragenericEpithet(StringUtils.trimToNull(interruptableMatcher.group(2)));
        } else if (interruptableMatcher.group(4) != null) {
            setRank(interruptableMatcher.group(3));
            this.pn.setInfragenericEpithet(StringUtils.trimToNull(interruptableMatcher.group(4)));
        }
        this.pn.setSpecificEpithet(StringUtils.trimToNull(interruptableMatcher.group(5)));
        if (interruptableMatcher.group(6) != null && interruptableMatcher.group(6).length() > 1 && !interruptableMatcher.group(6).contains(RtfDestinationMgr.DESTINATION_NULL)) {
            this.pn.setRank(Rank.INFRASUBSPECIFIC_NAME);
        }
        if (interruptableMatcher.group(7) != null && !interruptableMatcher.group(7).isEmpty()) {
            setRank(interruptableMatcher.group(7));
        }
        this.pn.setInfraspecificEpithet(StringUtils.trimToNull(interruptableMatcher.group(8)));
        if (interruptableMatcher.group(9) != null) {
            setRank(interruptableMatcher.group(9));
            this.pn.setInfraspecificEpithet(interruptableMatcher.group(10));
        }
        if (interruptableMatcher.group(11) != null) {
            setRank(interruptableMatcher.group(11));
            this.ignoreAuthorship = true;
        }
        lookForIrregularRankMarker();
        if (this.pn.isIndetermined()) {
            this.ignoreAuthorship = true;
        }
        if (this.ignoreAuthorship || interruptableMatcher.group(12) == null) {
            return true;
        }
        this.pn.setCombinationAuthorship(parseAuthorship(interruptableMatcher.group(17), interruptableMatcher.group(18), interruptableMatcher.group(20)));
        if (interruptableMatcher.group(19) != null) {
            this.pn.setSanctioningAuthor(interruptableMatcher.group(19));
        }
        this.pn.setBasionymAuthorship(parseAuthorship(interruptableMatcher.group(13), interruptableMatcher.group(14), interruptableMatcher.group(16)));
        if (!z || !infragenericIsAuthor(this.pn)) {
            return true;
        }
        this.pn.setBasionymAuthorship(parseAuthorship(null, this.pn.getInfragenericEpithet(), null));
        this.pn.setInfragenericEpithet(null);
        if (this.pn.getGenus() != null && this.pn.getSpecificEpithet() == null && this.pn.getInfraspecificEpithet() == null) {
            this.pn.setUninomial(this.pn.getGenus());
            this.pn.setGenus(null);
        }
        LOG.debug("swapped subrank with bracket author: {}", this.pn.getBasionymAuthorship());
        return true;
    }

    private static String cleanYear(String str) {
        if (str == null || str.length() <= 2) {
            return null;
        }
        return str.trim();
    }

    private void setRank(String str) {
        setRank(str, false);
    }

    private void setRank(String str, boolean z) {
        Rank parseRank = parseRank(str);
        if (parseRank == null || !parseRank.notOtherOrUnranked()) {
            return;
        }
        if (z) {
            this.pn.setRank(parseRank);
        } else {
            setRankIfNotContradicting(parseRank);
        }
        if (str.startsWith(NOTHO)) {
            if (parseRank.isInfraspecific()) {
                this.pn.setNotho(NamePart.INFRASPECIFIC);
                return;
            }
            if (parseRank == Rank.SPECIES) {
                this.pn.setNotho(NamePart.SPECIFIC);
            } else if (parseRank.isInfrageneric()) {
                this.pn.setNotho(NamePart.INFRAGENERIC);
            } else if (parseRank == Rank.GENUS) {
                this.pn.setNotho(NamePart.GENERIC);
            }
        }
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:4:0x001b. Please report as an issue. */
    private void setRankIfNotContradicting(Rank rank) {
        if (!this.pn.getRank().isUncomparable()) {
            return;
        }
        switch (this.pn.getRank()) {
            case INFRAGENERIC_NAME:
                if (!rank.isInfragenericStrictly()) {
                    return;
                }
            case INFRASPECIFIC_NAME:
                if (!rank.isInfraspecific()) {
                    return;
                }
            case INFRASUBSPECIFIC_NAME:
                if (!rank.isInfrasubspecific()) {
                    return;
                }
            default:
                this.pn.setRank(rank);
                return;
        }
    }

    private static Rank parseRank(String str) {
        return RankUtils.inferRank(StringUtils.trimToNull(str));
    }

    private static boolean infragenericIsAuthor(ParsedName parsedName) {
        return parsedName.getBasionymAuthorship().isEmpty() && parsedName.getSpecificEpithet() == null && parsedName.getInfraspecificEpithet() == null && !parsedName.getRank().isInfragenericStrictly() && !LATIN_ENDINGS.matcher(parsedName.getInfragenericEpithet()).find();
    }

    private void setUninomialOrGenus(Matcher matcher, ParsedName parsedName) {
        String trimToNull = StringUtils.trimToNull(matcher.group(1));
        if (matcher.group(2) != null || matcher.group(4) != null || matcher.group(5) != null || matcher.group(8) != null || parsedName.getRank().isSpeciesOrBelow()) {
            parsedName.setGenus(trimToNull);
        } else if (parsedName.getRank().isInfragenericStrictly()) {
            parsedName.setInfragenericEpithet(trimToNull);
        } else {
            parsedName.setUninomial(trimToNull);
        }
    }

    private void lookForIrregularRankMarker() {
        if (!this.pn.getRank().otherOrUnranked()) {
            if (this.pn.getRank() != Rank.SPECIES || this.pn.getInfraspecificEpithet() == null) {
                return;
            }
            this.pn.setRank(Rank.SUBSPECIES);
            this.pn.addWarning(Warnings.SUBSPECIES_ASSIGNED);
            return;
        }
        if (this.pn.getInfraspecificEpithet() != null && RANK_MARKER_ONLY.matcher(this.pn.getInfraspecificEpithet()).find()) {
            setRank(this.pn.getInfraspecificEpithet());
            this.pn.setInfraspecificEpithet(null);
        }
        if (this.pn.getSpecificEpithet() == null || !RANK_MARKER_ONLY.matcher(this.pn.getSpecificEpithet()).find()) {
            return;
        }
        setRank(this.pn.getSpecificEpithet());
        this.pn.setSpecificEpithet(null);
    }

    @VisibleForTesting
    static Authorship parseAuthorship(String str, String str2, String str3) {
        Authorship authorship = new Authorship();
        if (str2 != null) {
            authorship.setAuthors(splitTeam(str2));
        }
        if (str != null) {
            authorship.setExAuthors(splitTeam(str));
        }
        authorship.setYear(cleanYear(str3));
        return authorship;
    }

    private static List<String> splitTeam(String str) {
        if (str.contains(CsvSchema.DEFAULT_ARRAY_ELEMENT_SEPARATOR)) {
            ArrayList newArrayList = Lists.newArrayList();
            for (String str2 : AUTHORTEAM_SEMI_SPLITTER.split(str)) {
                Matcher matcher = AUTHOR_INITIAL_SWAP.matcher(str2);
                if (matcher.find()) {
                    newArrayList.add(normAuthor(matcher.group(2) + " " + matcher.group(1), true));
                } else {
                    newArrayList.add(normAuthor(str2, false));
                }
            }
            return newArrayList;
        }
        if (AUTHORTEAM_DELIMITER.matchesAnyOf(str)) {
            return AUTHORTEAM_SPLITTER.splitToList(normAuthor(str, false));
        }
        Pattern compile = Pattern.compile("^(\\p{Lu}\\p{Ll}+ \\p{Lu}+)(?: (\\p{Lu}\\p{Ll}+ \\p{Lu}+))*$");
        Pattern compile2 = Pattern.compile("(\\p{Lu}\\p{Ll}+) (\\p{Lu}+)");
        if (!compile.matcher(str).find()) {
            return Lists.newArrayList(normAuthor(str, false));
        }
        Matcher matcher2 = compile2.matcher(str);
        ArrayList newArrayList2 = Lists.newArrayList();
        while (matcher2.find()) {
            StringBuilder sb = new StringBuilder();
            for (char c : matcher2.group(2).toCharArray()) {
                sb.append(c);
                sb.append('.');
            }
            sb.append(matcher2.group(1));
            newArrayList2.add(sb.toString());
        }
        return newArrayList2;
    }

    private static String normAuthor(String str, boolean z) {
        if (z) {
            str = NORM_PUNCTUATIONS.matcher(str).replaceAll("$1");
        }
        return StringUtils.trimToNull(str);
    }

    static void logMatcher(Matcher matcher) {
        int i = -1;
        while (i < matcher.groupCount()) {
            i++;
            LOG.debug("  {}: >{}<", Integer.valueOf(i), matcher.group(i));
        }
    }

    static {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(ParsingJob.class.getResourceAsStream("/nameparser/latin-endings.txt")));
            Throwable th = null;
            try {
                LATIN_ENDINGS = Pattern.compile("(" + Joiner.on('|').skipNulls().join((Set) bufferedReader.lines().collect(Collectors.toSet())) + ")$");
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                INFRAGENERIC = "(?:\\(([A-ZÏËÖÜÄÉÈČÁÀÆŒ][a-zïëöüäåéèčáàæœ-]+)\\)| ((?:notho)?(?:" + StringUtils.join(RankUtils.RANK_MARKER_MAP_INFRAGENERIC.keySet(), "|") + "))[. ]([" + NAME_LETTERS + "][" + name_letters + "-]+))";
                RANK_MARKER_ALL = "(notho)? *(" + StringUtils.join(RankUtils.RANK_MARKER_MAP.keySet(), "|") + ")\\.?";
                RANK_MARKER_ONLY = Pattern.compile("^" + RANK_MARKER_ALL + "$");
                QUOTES = new char[4];
                QUOTES[0] = '\"';
                QUOTES[1] = '\'';
                QUOTES[2] = '\"';
                QUOTES[3] = '\'';
                HYBRID_FORMULA_PATTERN = Pattern.compile("[. ]× ");
                EXTINCT_PATTERN = Pattern.compile("†\\s*");
                CULTIVAR = Pattern.compile("(?:([. ])cv[. ])?[\"'] ?((?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+[- ]?){1,3}) ?[\"']");
                CULTIVAR_GROUP = Pattern.compile("(?<!^)\\b[\"']?((?:[A-ZÏËÖÜÄÉÈČÁÀÆŒ][a-zïëöüäåéèčáàæœ]{2,}[- ]?){1,3})[\"']? (Group|Hybrids|Sort|[Gg]rex|gx)\\b");
                INFRASPEC_UPPER = Pattern.compile("(?<=forma? )([A-Z])\\b");
                STRAIN = Pattern.compile("([a-z]\\.?) +([A-Z]+[ -]?(?![12][0-9][0-9][0-9?])[0-9]+T?)$");
                IS_VIRUS_PATTERN = Pattern.compile("virus(es)?\\b|\\b((bacterio|viro)?phage(in|s)?|particles?|prion|replicon|(alpha|beta|circular) ?satellites|[a-z]+satellite|vector|viroid|ictv$)\\b", 2);
                IS_VIRUS_PATTERN_CASE_SENSITIVE = Pattern.compile("\\b(:?[MS]?NP|G)V\\b");
                IS_VIRUS_PATTERN_POSTFAIL = Pattern.compile("(\\b(vector)\\b)", 2);
                IS_GENE = Pattern.compile("(RNA|DNA)[0-9]*(?:\\b|_)");
                OTU_PATTERN = Pattern.compile("(BOLD:[0-9A-Z]{7}$|SH[0-9]{6}\\.[0-9]{2}FU)", 2);
                IS_CANDIDATUS_PATTERN = Pattern.compile(CANDIDATUS);
                IS_CANDIDATUS_QUOTE_PATTERN = Pattern.compile("\"(Candidatus\\s|Ca\\.)(.+)\"", 2);
                FAMILY_PREFIX = Pattern.compile("^[A-Z][a-z]*(?:aceae|idae) +(" + StringUtils.join(RankUtils.RANK_MARKER_MAP_FAMILY_GROUP.keySet(), "|") + ")\\b");
                SUPRA_RANK_PREFIX = Pattern.compile("^(" + StringUtils.join(ImmutableMap.builder().putAll(RankUtils.RANK_MARKER_MAP_SUPRAGENERIC).putAll(RankUtils.RANK_MARKER_MAP_INFRAGENERIC).build().keySet(), "|") + ")[\\. ] *");
                RANK_MARKER_AT_END = Pattern.compile("[ .]" + RANK_MARKER_ALL.substring(0, RANK_MARKER_ALL.lastIndexOf(41)) + "|" + RANK_MARKER_MICROBIAL.substring(3) + "[. ]?(?:Ad|Lv)?\\.?$");
                FILIUS_AT_END = Pattern.compile("[ .]f\\.?$");
                EXTRACT_SENSU = Pattern.compile(" ?\\b((?:(?:excl[. ](?:gen|sp|var)|mut.char|p.p)[. ])?\\(?(?:ss?[. ](?:(?:ampl|l|s|str)[. ]|(?:ampl|lat|strict)(?:[uo]|issimo)?)|(?:(?:ss[. ])?auct|emend|fide|non|nec|sec|sensu|according to)[. ].+)\\)?)");
                NOV_RANK_MARKER = Pattern.compile("(((?:[sS]ub)?(?:[fF]am|[gG]en|[sS]s?p(?:ec)?|[vV]ar|[fF](?:orma?)?)))");
                EXTRACT_NOMSTATUS = Pattern.compile("[;, ]?\\(?\\b((?:comb|((?:[sS]ub)?(?:[fF]am|[gG]en|[sS]s?p(?:ec)?|[vV]ar|[fF](?:orma?)?)))[. ]nov\\b[. ]?(?:ined[. ])?|ined[. ]|nom(?:en)?[. ](?:utiq(?:ue)?[. ])?(?:ambig|alter|alt|correct|cons|dubium|dub|herb|illeg|invalid|inval|negatum|neg|novum|nov|nudum|nud|oblitum|obl|praeoccup|prov|prot|transf|superfl|super|rejic|rej)\\b[. ]?(?:prop[. ]|proposed\\b)?)\\)?");
                EXTRACT_REMARKS = Pattern.compile("\\s+(anon\\.?)(\\s.+)?$");
                COMMA_AFTER_BASYEAR = Pattern.compile("([12][0-9][0-9][0-9?])\\s*\\)\\s*,");
                NORM_APOSTROPHES = Pattern.compile("([`´‘’]+)");
                NORM_QUOTES = Pattern.compile("([\"'`´]+)");
                REPL_GENUS_QUOTE = Pattern.compile("^' *([A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?) *'");
                REPL_ENCLOSING_QUOTE = Pattern.compile("^[',\\s]+|[',\\s]+$");
                NORM_UPPERCASE_WORDS = Pattern.compile("\\b(\\p{Lu})(\\p{Lu}{2,})\\b");
                NORM_LOWERCASE_BINOMIAL = Pattern.compile("^(" + EPHITHET + ") (" + EPHITHET + ")");
                NORM_WHITESPACE = Pattern.compile("(?:\\\\[nr]|\\s)+");
                REPL_UNDERSCORE = Pattern.compile("_+");
                NORM_NO_SQUARE_BRACKETS = Pattern.compile("\\[(.*?)\\]");
                NORM_BRACKETS_OPEN = Pattern.compile("\\s*([{(\\[])\\s*,?\\s*");
                NORM_BRACKETS_CLOSE = Pattern.compile("\\s*,?\\s*([})\\]])\\s*");
                NORM_BRACKETS_OPEN_STRONG = Pattern.compile("( ?[{\\[] ?)+");
                NORM_BRACKETS_CLOSE_STRONG = Pattern.compile("( ?[}\\]] ?)+");
                NORM_AND = Pattern.compile("\\b *(and|et|und|\\+|,&) *\\b");
                NORM_SUBGENUS = Pattern.compile("([A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?) ([A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?) (" + EPHITHET + ")");
                NO_Q_MARKS = Pattern.compile("([a-zïëöüäåéèčáàæœ\\p{Ll}-?])\\?+");
                NORM_PUNCTUATIONS = Pattern.compile("\\s*([.,;:&(){}\\[\\]-])\\s*\\1*\\s*");
                NORM_YEAR = Pattern.compile("[\"'\\[]+\\s*([12][0-9][0-9][0-9?][abcdh?]?(?:[/,-][0-9]{1,4})?)\\s*[\"'\\]]+");
                NORM_IMPRINT_YEAR = Pattern.compile("([12][0-9][0-9][0-9?][abcdh?]?(?:[/,-][0-9]{1,4})?)\\s*([(\\[,&]? *(?:not|imprint)? *\"?[12][0-9][0-9][0-9?][abcdh?]?(?:[/,-][0-9]{1,4})?\"?[)\\]]?)");
                NORM_HYBRIDS_GENUS = Pattern.compile("^\\s*(?:[+×xX]|√ó)\\s*([A-ZÏËÖÜÄÉÈČÁÀÆŒ])");
                NORM_HYBRIDS_EPITH = Pattern.compile("^\\s*(×?[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?)\\s+(?:×|√ó|[xX]\\s)\\s*(" + EPHITHET + ")");
                NORM_HYBRIDS_FORM = Pattern.compile("\\b([×xX]|√ó) ");
                NORM_TF_GENUS = Pattern.compile("^([A-ZÏËÖÜÄÉÈČÁÀÆŒ])\\(([a-zïëöüäåéèčáàæœ-]+)\\)\\.? ");
                REPL_IN_REF = Pattern.compile("[, ]?\\b(?:in|IN|apud) ((?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*(?:[&,;]+(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?(?:[ '-]?(?:\\p{Lu}[\\p{Lu}\\p{Ll}'-]*|fil|filius|hort|jun|junior|sen|senior|al|f|j|jr|ms|sr|v|v[ao]n|bis|d[aeiou]?|de[nrmls]?|degli|e|l[ae]s?|s|ter|'?t|y)\\.?)*)*)");
                REPL_RANK_PREFIXES = Pattern.compile("^(sub)?(fossil|" + StringUtils.join(RankUtils.RANK_MARKER_MAP_SUPRAGENERIC.keySet(), "|") + ")\\.?\\s+", 2);
                MANUSCRIPT_NAMES = Pattern.compile("\\b(indet|spp?)[. ](?:nov\\.)?[A-Z0-9][a-zA-Z0-9-]*(?:\\(.+?\\))?");
                MANUSCRIPT_SUFFIX = Pattern.compile("\\bms\\.?$");
                REPL_AFF = Pattern.compile("\\b(undet|indet|aff|cf)[?.]?\\b", 2);
                NO_LETTERS = Pattern.compile("^[^a-zA-Z]+$");
                REMOVE_PLACEHOLDER_AUTHOR = Pattern.compile("\\b(?:unknown|unspecified|uncertain|\\?)[, ] ?([12][0-9][0-9][0-9?][abcdh?]?(?:[/,-][0-9]{1,4})?)$", 2);
                PLACEHOLDER_GENUS = Pattern.compile("^(In|Dummy|Missing|Temp|Unknown|Unplaced|Unspecified) (?=[a-z]+)\\b");
                REMOVE_PLACEHOLDER_INFRAGENERIC = Pattern.compile("\\b\\( ?(?:allocation|awaiting|deleted?|dummy|incertae sedis|mixed|not assigned|not stated|place ?holder|temp|tobedeleted|unaccepted|unallocated|unassigned|uncertain|unclassed|unclassified|uncultured|undescribed|undetermined|unknown|unnamed|unplaced|unspecified) ?\\) ", 2);
                PLACEHOLDER = Pattern.compile("\\b(?:allocation|awaiting|deleted?|dummy|incertae sedis|mixed|not assigned|not stated|place ?holder|temp|tobedeleted|unaccepted|unallocated|unassigned|uncertain|unclassed|unclassified|uncultured|undescribed|undetermined|unknown|unnamed|unplaced|unspecified)\\b", 2);
                DOUBTFUL = Pattern.compile("^[A-ZÏËÖÜÄÉÈČÁÀÆŒ\\p{Lu}a-zïëöüäåéèčáàæœ\\p{Ll}-?×\":;&*+\\s,.()\\[\\]/'`´0-9-†]+$");
                DOUBTFUL2 = Pattern.compile("\\bnull\\b");
                XML_ENTITY_STRIP = Pattern.compile("&\\s*([a-z]+)\\s*;");
                AMPERSAND_ENTITY = Pattern.compile("& *amp +");
                XML_TAGS = Pattern.compile("< */? *[a-zA-Z] *>");
                STARTING_EPITHET = Pattern.compile("^\\s*(" + EPHITHET + ")\\b");
                FORM_SPECIALIS = Pattern.compile("\\bf\\. *sp(?:ec)?\\b");
                SENSU_LATU = Pattern.compile("\\bs\\.l\\.\\b");
                StringBuilder sb = new StringBuilder();
                sb.append("\\b(");
                for (Rank rank : RankUtils.INFRASUBSPECIFIC_MICROBIAL_RANKS) {
                    if (rank.name().endsWith("VAR")) {
                        if (sb.length() > 4) {
                            sb.append("|");
                        }
                        sb.append(rank.name().toLowerCase().substring(0, rank.name().length() - 3));
                    }
                }
                sb.append(")type\\b");
                TYPE_TO_VAR = Pattern.compile(sb.toString());
                POTENTIAL_NAME_PATTERN = Pattern.compile("^×?[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?\\b");
                REMOVE_INTER_RANKS = Pattern.compile("\\b((?:subsp|ssp|var)[ .].+)\\b(" + RANK_MARKER + ")\\b");
                NAME_PATTERN = Pattern.compile("^(×?(?:\\?|[A-ZÏËÖÜÄÉÈČÁÀÆŒ](?:\\.|[a-zïëöüäåéèčáàæœ]+)(?:-[A-ZÏËÖÜÄÉÈČÁÀÆŒ]?[a-zïëöüäåéèčáàæœ]+)?))(?:(?<!ceae)" + INFRAGENERIC + ")?(?:(?:\\b| )(×?" + EPHITHET + ")(?:(?:.*?)( ×?" + EPHITHET + ")?[. ]?(" + RANK_MARKER + ")?[. ](×?\"?(?!(?:degli|de)\\b)" + EPHITHET + "\"?))?)?(?: (" + RANK_MARKER_MICROBIAL + ")[ .](\\S+))?([. ]" + RANK_MARKER + ")?([., ]?(?:\\((?:" + AUTHORSHIP + ")?[, ]?(" + YEAR_LOOSE + ")?\\))?(?:" + AUTHORSHIP + ")?(?: ?\\(?,?(" + YEAR_LOOSE + ")\\)?)?)(\\b.*?)??$");
            } finally {
            }
        } catch (IOException e) {
            throw new IllegalStateException("Failed to read latin-endings.txt from classpath resources", e);
        }
    }
}
