package org.gbif.common.parsers;

import com.lowagie.text.ElementTags;
import java.net.URI;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.opc.ContentTypes;
import org.apache.tika.Tika;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.gbif.api.model.common.MediaObject;
import org.gbif.api.vocabulary.MediaType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/gbif-parsers-0.62.jar:org/gbif/common/parsers/MediaParser.class */
public class MediaParser {
    private static final String HTML_TYPE = "text/html";
    private static MediaParser instance;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) MediaParser.class);
    private static final Tika TIKA = new Tika();
    private static final MimeTypes MIME_TYPES = MimeTypes.getDefaultMimeTypes();
    private static final Set<String> HTML_MIME_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList("text/x-coldfusion", "text/x-php", "text/asp", "text/aspdotnet", "text/x-cgi", "text/x-jsp", "text/x-perl", "text/html", "application/octet-stream")));
    private static final Map<Pattern, String> knownUrlPatterns = new HashMap();

    public static MediaParser getInstance() {
        synchronized (MediaParser.class) {
            if (instance == null) {
                instance = new MediaParser();
            }
        }
        return instance;
    }

    public MediaObject detectType(MediaObject mediaObject) {
        if (StringUtils.isEmpty(mediaObject.getFormat())) {
            mediaObject.setFormat(parseMimeType(mediaObject.getIdentifier()));
        }
        if ("text/html".equalsIgnoreCase(mediaObject.getFormat()) && mediaObject.getIdentifier() != null) {
            mediaObject.setReferences(mediaObject.getIdentifier());
            mediaObject.setIdentifier(null);
            mediaObject.setFormat(null);
        }
        if (StringUtils.isNotEmpty(mediaObject.getFormat())) {
            if (mediaObject.getFormat().startsWith(ElementTags.IMAGE)) {
                mediaObject.setType(MediaType.StillImage);
            } else if (mediaObject.getFormat().startsWith("audio")) {
                mediaObject.setType(MediaType.Sound);
            } else if (mediaObject.getFormat().startsWith("video")) {
                mediaObject.setType(MediaType.MovingImage);
            } else {
                LOG.debug("Unsupported media format {}", mediaObject.getFormat());
            }
        }
        return mediaObject;
    }

    public String parseMimeType(@Nullable String str) {
        if (str != null) {
            str = StringUtils.trimToNull(str.trim().toLowerCase());
        }
        if (str == null) {
            return null;
        }
        try {
            MimeType registeredMimeType = MIME_TYPES.getRegisteredMimeType(str);
            if (registeredMimeType != null) {
                return registeredMimeType.getName();
            }
        } catch (MimeTypeException e) {
        }
        if (MimeType.isValid(str)) {
            return str;
        }
        return null;
    }

    public String parseMimeType(@Nullable URI uri) {
        String detect;
        if (uri == null) {
            return null;
        }
        String uri2 = uri.toString();
        String detect2 = TIKA.detect(uri2);
        if (detect2 == null || !HTML_MIME_TYPES.contains(detect2.toLowerCase())) {
            return detect2;
        }
        if (uri.getQuery() != null && (detect = TIKA.detect(uri.getQuery())) != null && !HTML_MIME_TYPES.contains(detect.toLowerCase())) {
            return detect;
        }
        for (Map.Entry<Pattern, String> entry : knownUrlPatterns.entrySet()) {
            if (entry.getKey().matcher(uri2).matches()) {
                return entry.getValue();
            }
        }
        return "text/html";
    }

    static {
        knownUrlPatterns.put(Pattern.compile("http://www\\.unimus\\.no/felles/bilder/web_hent_bilde\\.php\\?id=\\d+&type=jpeg"), ContentTypes.IMAGE_JPEG);
        knownUrlPatterns.put(Pattern.compile("http://www\\.jacq\\.org/image\\.php\\?filename=\\d+&method=europeana"), ContentTypes.IMAGE_JPEG);
        knownUrlPatterns.put(Pattern.compile("https://images\\.ala\\.org\\.au/image/proxyImageThumbnailLarge\\?imageId=[0-9a-f-]{36}"), ContentTypes.IMAGE_JPEG);
        knownUrlPatterns.put(Pattern.compile("http://[a-zA-Z0-9-]+\\.wildlifemonitoring\\.ru/get_photo\\.php\\?id=\\d+"), ContentTypes.IMAGE_JPEG);
        knownUrlPatterns.put(Pattern.compile("http://procyon\\.acadiau\\.ca/ecsmith/cgi-bin/image\\.cgi\\?[0-9A-Z]+,jpeg"), ContentTypes.IMAGE_JPEG);
        knownUrlPatterns.put(Pattern.compile("http://www\\.biologie\\.uni-ulm\\.de/cgi-bin/perl/sound\\.pl\\?sid=T&objid=\\d+"), "audio/vnd.wave");
        knownUrlPatterns.put(Pattern.compile("https://dofbasen\\.dk/sound_proxy\\.php\\?referer=gbif&mode=o&snd=[0-9_]+.mp3&raw=1"), "audio/mpeg");
        MediaTypeRegistry mediaTypeRegistry = MIME_TYPES.getMediaTypeRegistry();
        mediaTypeRegistry.addAlias(org.apache.tika.mime.MediaType.audio("mpeg"), org.apache.tika.mime.MediaType.audio("mp3"));
        mediaTypeRegistry.addAlias(org.apache.tika.mime.MediaType.audio("mpeg"), org.apache.tika.mime.MediaType.audio("mpeg3"));
        instance = null;
    }
}
