package com.soulgalore.crawler.core.impl;

import com.soulgalore.crawler.core.HTMLPageResponse;
import com.soulgalore.crawler.core.PageURL;
import com.soulgalore.crawler.core.PageURLParser;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* loaded from: input_file:com/soulgalore/crawler/core/impl/AhrefPageURLParser.class */
public class AhrefPageURLParser implements PageURLParser {
    private static final String AHREF = "a[href]";
    private static final String ABS_HREF = "abs:href";
    private static final String MAIL_TO = "mailto:";
    private static final String IFRAME = "iframe";

    @Override // com.soulgalore.crawler.core.PageURLParser
    public Set<PageURL> get(HTMLPageResponse hTMLPageResponse) {
        String url = hTMLPageResponse.getUrl();
        Set<PageURL> hashSet = new HashSet();
        if (hTMLPageResponse.getResponseCode() == 200) {
            hashSet = fetch(AHREF, ABS_HREF, hTMLPageResponse.getBody(), url);
        }
        return hashSet;
    }

    private Set<PageURL> fetch(String str, String str2, Document document, String str3) {
        HashSet hashSet = new HashSet();
        Iterator it = document.select(str).iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (!element.attr(str2).isEmpty() && !element.attr(str2).startsWith(MAIL_TO)) {
                if (IFRAME.equals(element.tag().getName())) {
                    hashSet.add(new PageURL(element.attr(str2), str3));
                } else {
                    hashSet.add(new PageURL(element.attr(str2), str3));
                }
            }
        }
        return hashSet;
    }
}
