/*
 * Decompiled with CFR 0.152.
 */
package com.soulgalore.crawler.core.impl;

import com.google.inject.Inject;
import com.soulgalore.crawler.core.Crawler;
import com.soulgalore.crawler.core.CrawlerConfiguration;
import com.soulgalore.crawler.core.CrawlerResult;
import com.soulgalore.crawler.core.HTMLPageResponse;
import com.soulgalore.crawler.core.HTMLPageResponseCallable;
import com.soulgalore.crawler.core.HTMLPageResponseFetcher;
import com.soulgalore.crawler.core.PageURL;
import com.soulgalore.crawler.core.PageURLParser;
import com.soulgalore.crawler.util.StatusCode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

public class DefaultCrawler
implements Crawler {
    private final HTMLPageResponseFetcher responseFetcher;
    private final ExecutorService service;
    private final PageURLParser parser;

    @Inject
    public DefaultCrawler(HTMLPageResponseFetcher theResponseFetcher, ExecutorService theService, PageURLParser theParser) {
        this.service = theService;
        this.responseFetcher = theResponseFetcher;
        this.parser = theParser;
    }

    @Override
    public void shutdown() {
        if (this.service != null) {
            this.service.shutdown();
        }
        if (this.responseFetcher != null) {
            this.responseFetcher.shutdown();
        }
    }

    @Override
    public CrawlerResult getUrls(CrawlerConfiguration configuration) {
        Map<String, String> requestHeaders = configuration.getRequestHeadersMap();
        HTMLPageResponse resp = this.verifyInput(configuration.getStartUrl(), configuration.getOnlyOnPath(), requestHeaders);
        LinkedHashSet<PageURL> allUrls = new LinkedHashSet<PageURL>();
        LinkedHashSet<HTMLPageResponse> verifiedUrls = new LinkedHashSet<HTMLPageResponse>();
        LinkedHashSet<HTMLPageResponse> nonWorkingResponses = new LinkedHashSet<HTMLPageResponse>();
        verifiedUrls.add(resp);
        String host = resp.getPageUrl().getHost();
        if (configuration.getMaxLevels() > 0) {
            Set<PageURL> nextToFetch = new LinkedHashSet<PageURL>();
            nextToFetch.add(resp.getPageUrl());
            for (int level = 0; level < configuration.getMaxLevels(); ++level) {
                HashMap<Future<HTMLPageResponse>, PageURL> futures = new HashMap<Future<HTMLPageResponse>, PageURL>(nextToFetch.size());
                for (PageURL testURL : nextToFetch) {
                    futures.put(this.service.submit(new HTMLPageResponseCallable(testURL, this.responseFetcher, true, requestHeaders, false)), testURL);
                }
                nextToFetch = this.fetchNextLevelLinks(futures, allUrls, nonWorkingResponses, verifiedUrls, host, configuration.getOnlyOnPath(), configuration.getNotOnPath());
            }
        } else {
            allUrls.add(resp.getPageUrl());
        }
        if (configuration.isVerifyUrls()) {
            this.verifyUrls(allUrls, verifiedUrls, nonWorkingResponses, requestHeaders);
        }
        LinkedHashSet<PageURL> workingUrls = new LinkedHashSet<PageURL>();
        for (HTMLPageResponse workingResponses : verifiedUrls) {
            workingUrls.add(workingResponses.getPageUrl());
        }
        if (workingUrls.size() >= 1) {
            ArrayList<PageURL> list = new ArrayList<PageURL>(workingUrls);
            list.add(0, new PageURL(configuration.getStartUrl()));
            list.remove(1);
            workingUrls.clear();
            workingUrls.addAll(list);
        }
        return new CrawlerResult(configuration.getStartUrl(), configuration.isVerifyUrls() ? workingUrls : allUrls, verifiedUrls, nonWorkingResponses);
    }

    protected Set<PageURL> fetchNextLevelLinks(Map<Future<HTMLPageResponse>, PageURL> responses, Set<PageURL> allUrls, Set<HTMLPageResponse> nonWorkingUrls, Set<HTMLPageResponse> verifiedUrls, String host, String onlyOnPath, String notOnPath) {
        LinkedHashSet<PageURL> nextLevel = new LinkedHashSet<PageURL>();
        for (Map.Entry<Future<HTMLPageResponse>, PageURL> entry : responses.entrySet()) {
            try {
                HTMLPageResponse response = entry.getKey().get();
                if (200 == response.getResponseCode() && response.getResponseType().indexOf("html") > 0) {
                    verifiedUrls.add(response);
                    Set<PageURL> allLinks = this.parser.get(response);
                    for (PageURL link : allLinks) {
                        if (!host.equals(link.getHost()) || !link.getUrl().contains(onlyOnPath) || !notOnPath.equals("") && link.getUrl().contains(notOnPath) || allUrls.contains(link)) continue;
                        nextLevel.add(link);
                        allUrls.add(link);
                    }
                    continue;
                }
                if (200 != response.getResponseCode() || StatusCode.SC_SERVER_REDIRECT_TO_NEW_DOMAIN.getCode() == response.getResponseCode()) {
                    allUrls.remove(entry.getValue());
                    nonWorkingUrls.add(response);
                    continue;
                }
                allUrls.remove(entry.getValue());
            }
            catch (InterruptedException e) {
                nonWorkingUrls.add(new HTMLPageResponse(entry.getValue(), StatusCode.SC_SERVER_RESPONSE_UNKNOWN.getCode(), Collections.<String, String>emptyMap(), "", "", 0L, "", -1L));
            }
            catch (ExecutionException e) {
                nonWorkingUrls.add(new HTMLPageResponse(entry.getValue(), StatusCode.SC_SERVER_RESPONSE_UNKNOWN.getCode(), Collections.<String, String>emptyMap(), "", "", 0L, "", -1L));
            }
        }
        return nextLevel;
    }

    private void verifyUrls(Set<PageURL> allUrls, Set<HTMLPageResponse> verifiedUrls, Set<HTMLPageResponse> nonWorkingUrls, Map<String, String> requestHeaders) {
        LinkedHashSet<PageURL> urlsThatNeedsVerification = new LinkedHashSet<PageURL>(allUrls);
        urlsThatNeedsVerification.removeAll(verifiedUrls);
        HashSet<HTMLPageResponseCallable> tasks = new HashSet<HTMLPageResponseCallable>(urlsThatNeedsVerification.size());
        for (PageURL testURL : urlsThatNeedsVerification) {
            tasks.add(new HTMLPageResponseCallable(testURL, this.responseFetcher, true, requestHeaders, false));
        }
        try {
            List responses = this.service.invokeAll(tasks);
            for (Future future : responses) {
                if (future.isCancelled()) continue;
                HTMLPageResponse response = (HTMLPageResponse)future.get();
                if (response.getResponseCode() == 200 && response.getResponseType().indexOf("html") > 0) {
                    urlsThatNeedsVerification.remove(response.getPageUrl());
                    verifiedUrls.add(response);
                    continue;
                }
                if (response.getResponseCode() == 200) {
                    urlsThatNeedsVerification.remove(response.getPageUrl());
                    continue;
                }
                nonWorkingUrls.add(response);
            }
        }
        catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        catch (ExecutionException e) {
            e.printStackTrace();
        }
    }

    private HTMLPageResponse fetchOnePage(PageURL url, Map<String, String> requestHeaders) {
        return this.responseFetcher.get(url, true, requestHeaders, true);
    }

    private HTMLPageResponse verifyInput(String startUrl, String onlyOnPath, Map<String, String> requestHeaders) {
        PageURL pageUrl = new PageURL(startUrl);
        if (pageUrl.isWrongSyntax()) {
            throw new IllegalArgumentException("The url " + startUrl + " isn't a valid url ");
        }
        HTMLPageResponse resp = this.fetchOnePage(pageUrl, requestHeaders);
        if (!StatusCode.isResponseCodeOk(resp.getResponseCode())) {
            throw new IllegalArgumentException("The start url: " + startUrl + " couldn't be fetched, response code " + StatusCode.toFriendlyName(resp.getResponseCode()));
        }
        return resp;
    }
}

