/*
 * Decompiled with CFR 0.152.
 */
package com.soulgalore.crawler.core.impl;

import com.soulgalore.crawler.core.HTMLPageResponse;
import com.soulgalore.crawler.core.PageURL;
import com.soulgalore.crawler.core.PageURLParser;
import java.util.HashSet;
import java.util.Set;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class AhrefPageURLParser
implements PageURLParser {
    private static final String AHREF = "a[href]";
    private static final String ABS_HREF = "abs:href";
    private static final String MAIL_TO = "mailto:";
    private static final String IFRAME = "iframe";

    @Override
    public Set<PageURL> get(HTMLPageResponse theResponse) {
        String url = theResponse.getUrl();
        Set<PageURL> ahrefs = new HashSet<PageURL>();
        if (theResponse.getResponseCode() == 200) {
            ahrefs = this.fetch(AHREF, ABS_HREF, theResponse.getBody(), url);
        }
        return ahrefs;
    }

    private Set<PageURL> fetch(String query, String attributeKey, Document doc, String url) {
        HashSet<PageURL> urls = new HashSet<PageURL>();
        Elements elements = doc.select(query);
        for (Element src : elements) {
            if (src.attr(attributeKey).isEmpty() || src.attr(attributeKey).startsWith(MAIL_TO)) continue;
            if (IFRAME.equals(src.tag().getName())) {
                urls.add(new PageURL(src.attr(attributeKey), url));
                continue;
            }
            urls.add(new PageURL(src.attr(attributeKey), url));
        }
        return urls;
    }
}

