/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.agent.tools;

import com.google.common.collect.ImmutableMap;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import lombok.Generated;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ClassicHttpRequest;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.opensearch.core.action.ActionListener;
import org.opensearch.ml.common.spi.tools.Tool;
import org.opensearch.ml.common.spi.tools.ToolAnnotation;
import org.opensearch.ml.common.utils.StringUtils;
import org.opensearch.ml.common.utils.ToolUtils;
import org.opensearch.threadpool.ThreadPool;

@ToolAnnotation(value="WebSearchTool")
public class WebSearchTool
implements Tool {
    @Generated
    private static final Logger log = LogManager.getLogger(WebSearchTool.class);
    public static final String TYPE = "WebSearchTool";
    public static final String DEFAULT_DESCRIPTION = "This tool performs a web search using the specified query or fetches the next page of a previous search. It accepts one mandatory argument: `query`, which is a search term used to initiate a new search, and one optional argument: `next_page`, which is a link to retrieve the next set of search results from a previous response. The tool returns the raw documents retrieved from the search engine, along with a `next_page` field for pagination.";
    private static final String USER_AGENT = "OpenSearchWebCrawler/1.0";
    public static final String DEFAULT_INPUT_SCHEMA = "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\",\"description\":\"The search term to query using the configured search engine. This is the primary input used to perform the search.\"},\"next_page\":{\"type\":\"string\",\"description\":\"URL to the next page of search results. If provided, the tool will fetch and return results from this page instead of executing a new search query.\"}},\"required\":[\"query\"]}";
    public static final Map<String, Object> DEFAULT_ATTRIBUTES = Map.of("input_schema", "{\"type\":\"object\",\"properties\":{\"query\":{\"type\":\"string\",\"description\":\"The search term to query using the configured search engine. This is the primary input used to perform the search.\"},\"next_page\":{\"type\":\"string\",\"description\":\"URL to the next page of search results. If provided, the tool will fetch and return results from this page instead of executing a new search query.\"}},\"required\":[\"query\"]}", "strict", false);
    private String name = "WebSearchTool";
    private String description = "This tool performs a web search using the specified query or fetches the next page of a previous search. It accepts one mandatory argument: `query`, which is a search term used to initiate a new search, and one optional argument: `next_page`, which is a link to retrieve the next set of search results from a previous response. The tool returns the raw documents retrieved from the search engine, along with a `next_page` field for pagination.";
    private String version;
    private CloseableHttpClient httpClient = HttpClients.createDefault();
    private final ThreadPool threadPool;
    private Map<String, Object> attributes;

    public WebSearchTool(ThreadPool threadPool) {
        this.threadPool = threadPool;
        this.attributes = new HashMap<String, Object>();
        this.attributes.put("input_schema", DEFAULT_INPUT_SCHEMA);
        this.attributes.put("strict", false);
    }

    public <T> void run(Map<String, String> originalParameters, ActionListener<T> listener) {
        Map parameters = ToolUtils.extractInputParameters(originalParameters, this.attributes);
        try {
            String query = parameters.getOrDefault("query", (String)parameters.get("question")).replaceAll(" ", "+");
            String engine = parameters.getOrDefault("engine", "google");
            String endpoint = parameters.getOrDefault("endpoint", this.getDefaultEndpoint(engine));
            String apiKey = (String)parameters.get("api_key");
            String nextPage = (String)parameters.get("next_page");
            String engineId = (String)parameters.get("engine_id");
            String authorization = (String)parameters.get("Authorization");
            String queryKey = parameters.getOrDefault("query_key", "q");
            String offsetKey = parameters.getOrDefault("offset_key", "offset");
            String limitKey = parameters.getOrDefault("limit_key", "limit");
            String customResUrlJsonpath = (String)parameters.get("custom_res_url_jsonpath");
            this.threadPool.executor("websearch-crawler-threadpool").submit(() -> {
                try {
                    String parsedNextPage = null;
                    if ("duckduckgo".equalsIgnoreCase(engine)) {
                        if (nextPage != null) {
                            this.fetchDuckDuckGoResult(nextPage, listener);
                        } else {
                            this.fetchDuckDuckGoResult(this.buildDDGEndpoint(this.getDefaultEndpoint(engine), query), listener);
                        }
                    } else {
                        HttpGet getRequest = null;
                        if ("google".equalsIgnoreCase(engine)) {
                            if (nextPage != null) {
                                getRequest = new HttpGet(nextPage);
                                parsedNextPage = this.buildGoogleNextPage(endpoint, engineId, query, apiKey, nextPage);
                            } else {
                                getRequest = new HttpGet(this.buildGoogleUrl(endpoint, engineId, query, apiKey, 0));
                                parsedNextPage = this.buildGoogleUrl(endpoint, engineId, query, apiKey, 10);
                            }
                        } else if ("bing".equalsIgnoreCase(engine)) {
                            if (nextPage != null) {
                                getRequest = new HttpGet(nextPage);
                                parsedNextPage = this.buildBingNextPage(endpoint, query, nextPage);
                            } else {
                                getRequest = new HttpGet(this.buildBingUrl(endpoint, query, 0));
                                parsedNextPage = this.buildBingUrl(endpoint, query, 10);
                            }
                            getRequest.addHeader("Ocp-Apim-Subscription-Key", (Object)apiKey);
                        } else if ("custom".equalsIgnoreCase(engine)) {
                            if (nextPage != null) {
                                getRequest = new HttpGet(nextPage);
                                parsedNextPage = this.buildCustomNextPage(endpoint, nextPage, queryKey, query, offsetKey, limitKey);
                            } else {
                                getRequest = new HttpGet(this.buildCustomUrl(endpoint, queryKey, query, offsetKey, 0, limitKey));
                                parsedNextPage = this.buildCustomUrl(endpoint, queryKey, query, offsetKey, 10, limitKey);
                            }
                            getRequest.addHeader("Authorization", (Object)authorization);
                        } else {
                            listener.onFailure((Exception)new IllegalArgumentException("Unsupported search engine: %s".formatted(engine)));
                            return;
                        }
                        CloseableHttpResponse res = this.httpClient.execute((ClassicHttpRequest)getRequest);
                        if (res.getCode() >= 400) {
                            listener.onFailure((Exception)new IllegalArgumentException("Web search failed: %d %s".formatted(res.getCode(), res.getReasonPhrase())));
                        } else {
                            String responseString = EntityUtils.toString((HttpEntity)res.getEntity());
                            this.parseResponse(responseString, authorization, parsedNextPage, engine, customResUrlJsonpath, listener);
                        }
                    }
                }
                catch (Exception e) {
                    listener.onFailure((Exception)new IllegalStateException("Web search failed: %s".formatted(e.getMessage())));
                }
            });
        }
        catch (Exception e) {
            listener.onFailure((Exception)new IllegalStateException("Web search failed: %s".formatted(e.getMessage())));
        }
    }

    private String buildDDGEndpoint(String endpoint, String query) {
        return "%s?q=%s".formatted(endpoint, query);
    }

    private String buildGoogleNextPage(String endpoint, String engineId, String query, String apiKey, String currentPage) {
        String[] offsetSplit = currentPage.split("&start=");
        int offset = NumberUtils.toInt((String)offsetSplit[1], (int)0) + 10;
        return this.buildGoogleUrl(endpoint, engineId, query, apiKey, offset);
    }

    private String buildGoogleUrl(String endpoint, String engineId, String query, String apiKey, int start) {
        return "%s?q=%s&cx=%s&key=%s&start=%d".formatted(endpoint, query, engineId, apiKey, start);
    }

    private String buildBingNextPage(String endpoint, String query, String currentPage) {
        String[] offsetSplit = currentPage.split("&offset=");
        int offset = NumberUtils.toInt((String)offsetSplit[1], (int)0) + 10;
        return this.buildBingUrl(endpoint, query, offset);
    }

    private String buildCustomNextPage(String endpoint, String currentPage, String queryKey, String query, String offsetKey, String limitKey) {
        String[] pageSplit = currentPage.split("&%s=".formatted(offsetKey));
        int offsetValue = NumberUtils.toInt((String)pageSplit[1].split("&")[0], (int)0) + 10;
        return this.buildCustomUrl(endpoint, queryKey, query, offsetKey, offsetValue, limitKey);
    }

    private String buildCustomUrl(String endpoint, String queryKey, String query, String offsetKey, int offsetValue, String limitKey) {
        return "%s?%s=%s&%s=%d&%s=10".formatted(endpoint, queryKey, query, offsetKey, offsetValue, limitKey);
    }

    private String getDefaultEndpoint(String engine) {
        return switch (engine.toLowerCase(Locale.ROOT)) {
            case "google" -> "https://customsearch.googleapis.com/customsearch/v1";
            case "bing" -> "https://api.bing.microsoft.com/v7.0/search";
            case "duckduckgo" -> "https://duckduckgo.com/html";
            case "custom" -> null;
            default -> throw new IllegalArgumentException("Unsupported search engine: %s".formatted(engine));
        };
    }

    private String buildBingUrl(String endpoint, String query, int offset) {
        return "%s?q%s&textFormat=HTML&count=10&offset=%d".formatted(endpoint, query, offset);
    }

    private <T> void parseResponse(String rawResponse, String authorization, String nextPage, String engine, String customResUrlJsonpath, ActionListener<T> listener) {
        JsonObject rawJson = JsonParser.parseString((String)rawResponse).getAsJsonObject();
        switch (engine.toLowerCase(Locale.ROOT)) {
            case "google": {
                this.parseGoogleResults(rawJson, nextPage, listener);
                break;
            }
            case "bing": {
                this.parseBingResults(rawJson, nextPage, listener);
                break;
            }
            case "custom": {
                List urls = (List)JsonPath.read((String)rawResponse, (String)customResUrlJsonpath, (Predicate[])new Predicate[0]);
                this.parseCustomResults(urls, authorization, nextPage, listener);
                break;
            }
            default: {
                listener.onFailure((Exception)new RuntimeException("Unsupported search engine: %s".formatted(engine)));
            }
        }
    }

    private <T> void parseGoogleResults(JsonObject googleResponse, String nextPage, ActionListener<T> listener) {
        HashMap<String, Object> results = new HashMap<String, Object>();
        results.put("next_page", nextPage);
        JsonArray items = googleResponse.getAsJsonArray("items");
        ArrayList<Map<String, String>> crawlResults = new ArrayList<Map<String, String>>();
        for (int i = 0; i < items.size(); ++i) {
            JsonObject item = items.get(i).getAsJsonObject();
            String link = item.get("link").getAsString();
            Map<String, String> crawlResult = this.crawlPage(link, null);
            crawlResults.add(crawlResult);
        }
        results.put("items", crawlResults);
        listener.onResponse((Object)StringUtils.gson.toJson(results));
    }

    private <T> void parseBingResults(JsonObject bingResponse, String nextPage, ActionListener<T> listener) {
        HashMap<String, Object> results = new HashMap<String, Object>();
        results.put("next_page", nextPage);
        ArrayList<Map<String, String>> crawlResults = new ArrayList<Map<String, String>>();
        JsonArray values = bingResponse.get("webPages").getAsJsonObject().getAsJsonArray("value");
        for (int i = 0; i < values.size(); ++i) {
            JsonObject value = values.get(i).getAsJsonObject();
            String link = value.get("url").getAsString();
            Map<String, String> crawlResult = this.crawlPage(link, null);
            crawlResults.add(crawlResult);
        }
        results.put("items", crawlResults);
        listener.onResponse((Object)StringUtils.gson.toJson(results));
    }

    private <T> void parseCustomResults(List<String> urls, String authorization, String nextPage, ActionListener<T> listener) {
        HashMap<String, Object> results = new HashMap<String, Object>();
        results.put("next_page", nextPage);
        ArrayList<Map<String, String>> crawlResults = new ArrayList<Map<String, String>>();
        for (int i = 0; i < urls.size(); ++i) {
            String link = urls.get(i);
            Map<String, String> crawlResult = this.crawlPage(link, authorization);
            crawlResults.add(crawlResult);
        }
        results.put("items", crawlResults);
        listener.onResponse((Object)StringUtils.gson.toJson(results));
    }

    private <T> void fetchDuckDuckGoResult(String endpoint, ActionListener<T> listener) {
        try {
            Document doc = Jsoup.connect((String)endpoint).timeout(10000).get();
            Optional<Elements> pageResult = Optional.of(doc).map(x -> x.getElementById("links")).map(x -> x.getElementsByClass("results_links"));
            if (pageResult.isEmpty()) {
                listener.onFailure((Exception)new IllegalStateException("Failed to fetch duckduckgo results!"));
                return;
            }
            String nextPage = this.getDDGNextPageLink(endpoint, doc);
            HashMap<String, Object> results = new HashMap<String, Object>();
            ArrayList<Map<String, String>> crawlResults = new ArrayList<Map<String, String>>();
            for (Element result : pageResult.get()) {
                Optional<Element> elementOptional = Optional.of(result).map(x -> x.getElementsByClass("links_main")).stream().findFirst().map(x -> Objects.requireNonNull(x.first()).getElementsByTag("a").first());
                if (elementOptional.isEmpty()) {
                    listener.onFailure((Exception)new IllegalStateException("Failed to fetch duckduckgo results as no valid link element found!"));
                    return;
                }
                String link = elementOptional.get().attr("href");
                Map<String, String> crawlResult = this.crawlPage(link, null);
                crawlResults.add(crawlResult);
            }
            results.put("next_page", nextPage);
            results.put("items", crawlResults);
            listener.onResponse((Object)StringUtils.gson.toJson(results));
        }
        catch (IOException e) {
            log.error("Failed to fetch duckduckgo results due to exception!");
            listener.onFailure((Exception)e);
        }
    }

    private String getDDGNextPageLink(String endpoint, Document doc) {
        Element navLinkDiv = doc.select("div.nav-link").first();
        if (navLinkDiv == null) {
            log.warn("Failed to find next page link div for duckduckgo");
            return null;
        }
        Element form = navLinkDiv.selectFirst("form");
        if (form == null) {
            log.warn("Failed to find next page link form for duckduckgo");
            return null;
        }
        String[] urlAndParams = endpoint.split("\\?q");
        if (urlAndParams.length != 2) {
            log.warn("Failed to find next page link url for duckduckgo");
            return null;
        }
        StringBuilder sb = new StringBuilder(urlAndParams[0]);
        Elements inputs = form.select("input:not([type=submit])");
        for (int i = 0; i < inputs.size(); ++i) {
            String name = ((Element)inputs.get(i)).attr("name");
            String value = ((Element)inputs.get(i)).attr("value");
            if ("q".equalsIgnoreCase(name)) {
                value = value.replaceAll(" ", "+");
            }
            if (i == 0) {
                sb.append("?").append(name).append("=").append(value);
                continue;
            }
            sb.append("&").append(name).append("=").append(value);
        }
        return sb.toString();
    }

    private Map<String, String> crawlPage(String url, String authorization) {
        try {
            Connection connection = Jsoup.connect((String)url).timeout(10000).userAgent(USER_AGENT);
            if (authorization != null) {
                connection.header("Authorization", authorization);
            }
            Document doc = connection.get();
            Elements parentElements = doc.select("body");
            if (this.isCaptchaOrLoginPage(doc)) {
                log.debug("Skipping {} - CAPTCHA required", (Object)url);
                return null;
            }
            Element bodyElement = (Element)parentElements.getFirst();
            String title = bodyElement.select("title").text();
            String content = bodyElement.text();
            return ImmutableMap.of((Object)"url", (Object)url, (Object)"title", (Object)title, (Object)"content", (Object)content);
        }
        catch (Exception e) {
            log.error("Failed to crawl link: {}", (Object)url);
            return null;
        }
    }

    private boolean isCaptchaOrLoginPage(Document doc) {
        String html = doc.html().toLowerCase(Locale.ROOT);
        return !doc.select("input[name*='captcha'], input[id*='captcha']").isEmpty() || !doc.select(".g-recaptcha, div[data-sitekey]").isEmpty() || !doc.select("img[src*='captcha'], img[src*='recaptcha']").isEmpty() || org.apache.commons.lang3.StringUtils.containsIgnoreCase((CharSequence)html, (CharSequence)"verify you are human") || !doc.select(".h-captcha").isEmpty();
    }

    public String getType() {
        return TYPE;
    }

    public boolean validate(Map<String, String> parameters) {
        String engine = parameters.get("engine");
        if (org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)engine)) {
            return false;
        }
        boolean isQueryEmpty = org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)parameters.getOrDefault("query", parameters.get("question")));
        if (isQueryEmpty) {
            log.warn("Query is empty");
            return false;
        }
        boolean isEndpointEmpty = org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)parameters.getOrDefault("endpoint", this.getDefaultEndpoint(engine)));
        if (isEndpointEmpty) {
            log.warn("Endpoint is empty");
            return false;
        }
        if ("google".equalsIgnoreCase(engine)) {
            boolean hasEngineIdAndApiKey;
            boolean bl = hasEngineIdAndApiKey = parameters.containsKey("engine_id") && !parameters.get("engine_id").isEmpty() && parameters.containsKey("api_key") && !parameters.get("api_key").isEmpty();
            if (!hasEngineIdAndApiKey) {
                log.warn("Google search engine_id or api_key is empty");
                return false;
            }
            return true;
        }
        if ("duckduckgo".equalsIgnoreCase(engine)) {
            return true;
        }
        if ("bing".equalsIgnoreCase(engine)) {
            boolean hasApiKey = org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)parameters.get("api_key"));
            if (!hasApiKey) {
                log.warn("Bing search api_key is empty");
                return false;
            }
            return true;
        }
        if ("custom".equalsIgnoreCase(engine)) {
            String customApi = parameters.get("custom_api");
            String customResUrlJsonpath = parameters.get("custom_res_url_jsonpath");
            if (org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)customApi) || org.apache.commons.lang3.StringUtils.isEmpty((CharSequence)customResUrlJsonpath)) {
                log.warn("custom search API is empty or result json path is empty");
                return false;
            }
            return true;
        }
        log.error("Unsupported search engine: {}", (Object)engine);
        return false;
    }

    @Generated
    public void setVersion(String version) {
        this.version = version;
    }

    @Generated
    public void setHttpClient(CloseableHttpClient httpClient) {
        this.httpClient = httpClient;
    }

    @Generated
    public void setAttributes(Map<String, Object> attributes) {
        this.attributes = attributes;
    }

    @Generated
    public CloseableHttpClient getHttpClient() {
        return this.httpClient;
    }

    @Generated
    public ThreadPool getThreadPool() {
        return this.threadPool;
    }

    @Generated
    public Map<String, Object> getAttributes() {
        return this.attributes;
    }

    @Generated
    public void setName(String name) {
        this.name = name;
    }

    @Generated
    public String getName() {
        return this.name;
    }

    @Generated
    public String getDescription() {
        return this.description;
    }

    @Generated
    public void setDescription(String description) {
        this.description = description;
    }

    @Generated
    public String getVersion() {
        return this.version;
    }

    public static class Factory
    implements Tool.Factory<WebSearchTool> {
        private static Factory INSTANCE;
        private ThreadPool threadPool;

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled force condition propagation
         * Lifted jumps to return sites
         */
        public static Factory getInstance() {
            if (INSTANCE != null) return INSTANCE;
            Class<WebSearchTool> clazz = WebSearchTool.class;
            synchronized (WebSearchTool.class) {
                if (INSTANCE != null) return INSTANCE;
                INSTANCE = new Factory();
                // ** MonitorExit[var0] (shouldn't be in output)
                return INSTANCE;
            }
        }

        public void init(ThreadPool threadPool) {
            this.threadPool = threadPool;
        }

        public WebSearchTool create(Map<String, Object> map) {
            return new WebSearchTool(this.threadPool);
        }

        public String getDefaultDescription() {
            return WebSearchTool.DEFAULT_DESCRIPTION;
        }

        public String getDefaultType() {
            return WebSearchTool.TYPE;
        }

        public String getDefaultVersion() {
            return "1.0";
        }

        public Map<String, Object> getDefaultAttributes() {
            return DEFAULT_ATTRIBUTES;
        }
    }
}

