package com.soulgalore.crawler.core.impl;

import com.google.inject.Inject;
import com.soulgalore.crawler.core.Crawler;
import com.soulgalore.crawler.core.CrawlerConfiguration;
import com.soulgalore.crawler.core.CrawlerResult;
import com.soulgalore.crawler.core.HTMLPageResponse;
import com.soulgalore.crawler.core.HTMLPageResponseCallable;
import com.soulgalore.crawler.core.HTMLPageResponseFetcher;
import com.soulgalore.crawler.core.PageURL;
import com.soulgalore.crawler.core.PageURLParser;
import com.soulgalore.crawler.util.StatusCode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

/* loaded from: input_file:com/soulgalore/crawler/core/impl/DefaultCrawler.class */
public class DefaultCrawler implements Crawler {
    private final HTMLPageResponseFetcher responseFetcher;
    private final ExecutorService service;
    private final PageURLParser parser;

    @Inject
    public DefaultCrawler(HTMLPageResponseFetcher hTMLPageResponseFetcher, ExecutorService executorService, PageURLParser pageURLParser) {
        this.service = executorService;
        this.responseFetcher = hTMLPageResponseFetcher;
        this.parser = pageURLParser;
    }

    @Override // com.soulgalore.crawler.core.Crawler
    public void shutdown() {
        if (this.service != null) {
            this.service.shutdown();
        }
        if (this.responseFetcher != null) {
            this.responseFetcher.shutdown();
        }
    }

    @Override // com.soulgalore.crawler.core.Crawler
    public CrawlerResult getUrls(CrawlerConfiguration crawlerConfiguration) {
        Map<String, String> requestHeadersMap = crawlerConfiguration.getRequestHeadersMap();
        HTMLPageResponse verifyInput = verifyInput(crawlerConfiguration.getStartUrl(), crawlerConfiguration.getOnlyOnPath(), requestHeadersMap);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        LinkedHashSet linkedHashSet2 = new LinkedHashSet();
        LinkedHashSet linkedHashSet3 = new LinkedHashSet();
        linkedHashSet2.add(verifyInput);
        String host = verifyInput.getPageUrl().getHost();
        if (crawlerConfiguration.getMaxLevels() > 0) {
            Set<PageURL> linkedHashSet4 = new LinkedHashSet();
            linkedHashSet4.add(verifyInput.getPageUrl());
            for (int i = 0; i < crawlerConfiguration.getMaxLevels(); i++) {
                HashMap hashMap = new HashMap(linkedHashSet4.size());
                for (PageURL pageURL : linkedHashSet4) {
                    hashMap.put(this.service.submit(new HTMLPageResponseCallable(pageURL, this.responseFetcher, true, requestHeadersMap, false)), pageURL);
                }
                linkedHashSet4 = fetchNextLevelLinks(hashMap, linkedHashSet, linkedHashSet3, linkedHashSet2, host, crawlerConfiguration.getOnlyOnPath(), crawlerConfiguration.getNotOnPath());
            }
        } else {
            linkedHashSet.add(verifyInput.getPageUrl());
        }
        if (crawlerConfiguration.isVerifyUrls()) {
            verifyUrls(linkedHashSet, linkedHashSet2, linkedHashSet3, requestHeadersMap);
        }
        LinkedHashSet linkedHashSet5 = new LinkedHashSet();
        Iterator<HTMLPageResponse> it = linkedHashSet2.iterator();
        while (it.hasNext()) {
            linkedHashSet5.add(it.next().getPageUrl());
        }
        if (linkedHashSet5.size() >= 1) {
            ArrayList arrayList = new ArrayList(linkedHashSet5);
            arrayList.add(0, new PageURL(crawlerConfiguration.getStartUrl()));
            arrayList.remove(1);
            linkedHashSet5.clear();
            linkedHashSet5.addAll(arrayList);
        }
        return new CrawlerResult(crawlerConfiguration.getStartUrl(), crawlerConfiguration.isVerifyUrls() ? linkedHashSet5 : linkedHashSet, linkedHashSet2, linkedHashSet3);
    }

    protected Set<PageURL> fetchNextLevelLinks(Map<Future<HTMLPageResponse>, PageURL> map, Set<PageURL> set, Set<HTMLPageResponse> set2, Set<HTMLPageResponse> set3, String str, String str2, String str3) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (Map.Entry<Future<HTMLPageResponse>, PageURL> entry : map.entrySet()) {
            try {
                HTMLPageResponse hTMLPageResponse = entry.getKey().get();
                if (200 == hTMLPageResponse.getResponseCode() && hTMLPageResponse.getResponseType().indexOf("html") > 0) {
                    set3.add(hTMLPageResponse);
                    for (PageURL pageURL : this.parser.get(hTMLPageResponse)) {
                        if (str.equals(pageURL.getHost())) {
                            if (pageURL.getUrl().contains(str2)) {
                                if (!str3.equals("") && pageURL.getUrl().contains(str3)) {
                                }
                                if (!set.contains(pageURL)) {
                                    linkedHashSet.add(pageURL);
                                    set.add(pageURL);
                                }
                            }
                        }
                    }
                } else if (200 != hTMLPageResponse.getResponseCode() || StatusCode.SC_SERVER_REDIRECT_TO_NEW_DOMAIN.getCode() == hTMLPageResponse.getResponseCode()) {
                    set.remove(entry.getValue());
                    set2.add(hTMLPageResponse);
                } else {
                    set.remove(entry.getValue());
                }
            } catch (InterruptedException e) {
                set2.add(new HTMLPageResponse(entry.getValue(), StatusCode.SC_SERVER_RESPONSE_UNKNOWN.getCode(), Collections.emptyMap(), "", "", 0L, "", -1L));
            } catch (ExecutionException e2) {
                set2.add(new HTMLPageResponse(entry.getValue(), StatusCode.SC_SERVER_RESPONSE_UNKNOWN.getCode(), Collections.emptyMap(), "", "", 0L, "", -1L));
            }
        }
        return linkedHashSet;
    }

    private void verifyUrls(Set<PageURL> set, Set<HTMLPageResponse> set2, Set<HTMLPageResponse> set3, Map<String, String> map) {
        LinkedHashSet linkedHashSet = new LinkedHashSet(set);
        linkedHashSet.removeAll(set2);
        HashSet hashSet = new HashSet(linkedHashSet.size());
        Iterator it = linkedHashSet.iterator();
        while (it.hasNext()) {
            hashSet.add(new HTMLPageResponseCallable((PageURL) it.next(), this.responseFetcher, true, map, false));
        }
        try {
            for (Future future : this.service.invokeAll(hashSet)) {
                if (!future.isCancelled()) {
                    HTMLPageResponse hTMLPageResponse = (HTMLPageResponse) future.get();
                    if (hTMLPageResponse.getResponseCode() == 200 && hTMLPageResponse.getResponseType().indexOf("html") > 0) {
                        linkedHashSet.remove(hTMLPageResponse.getPageUrl());
                        set2.add(hTMLPageResponse);
                    } else if (hTMLPageResponse.getResponseCode() == 200) {
                        linkedHashSet.remove(hTMLPageResponse.getPageUrl());
                    } else {
                        set3.add(hTMLPageResponse);
                    }
                }
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ExecutionException e2) {
            e2.printStackTrace();
        }
    }

    private HTMLPageResponse fetchOnePage(PageURL pageURL, Map<String, String> map) {
        return this.responseFetcher.get(pageURL, true, map, true);
    }

    private HTMLPageResponse verifyInput(String str, String str2, Map<String, String> map) {
        PageURL pageURL = new PageURL(str);
        if (pageURL.isWrongSyntax()) {
            throw new IllegalArgumentException("The url " + str + " isn't a valid url ");
        }
        HTMLPageResponse fetchOnePage = fetchOnePage(pageURL, map);
        if (StatusCode.isResponseCodeOk(Integer.valueOf(fetchOnePage.getResponseCode()))) {
            return fetchOnePage;
        }
        throw new IllegalArgumentException("The start url: " + str + " couldn't be fetched, response code " + StatusCode.toFriendlyName(fetchOnePage.getResponseCode()));
    }
}
