Elep

<?php
/**
 * Created by PhpStorm.
 * User: turgutsaricam
 * Date: 24/08/16
 * Time: 23:50
 */

namespace WPCCrawler\Objects\Crawling\Bot;

use DateTime;
use DOMDocument;
use DOMElement;
use DOMNode;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Psr7\Uri;
use GuzzleHttp\RequestOptions;
use Illuminate\Support\Arr;
use Illuminate\Support\Str;
use InvalidArgumentException;
use Symfony\Component\BrowserKit\Cookie;
use Symfony\Component\BrowserKit\Response;
use Symfony\Component\DomCrawler\Crawler;
use WP_Post;
use WPCCrawler\Environment;
use WPCCrawler\Factory;
use WPCCrawler\Objects\Cache\ResponseCache;
use WPCCrawler\Objects\Crawling\Bot\Objects\AppProxyOptions;
use WPCCrawler\Objects\Crawling\Bot\Objects\AppRequestOptions;
use WPCCrawler\Objects\Crawling\Bot\Objects\CrawlerVariable;
use WPCCrawler\Objects\Enums\InformationMessage;
use WPCCrawler\Objects\Enums\InformationType;
use WPCCrawler\Objects\Events\Base\AbstractCrawlingEvent;
use WPCCrawler\Objects\Events\EventService;
use WPCCrawler\Objects\File\MediaFile;
use WPCCrawler\Objects\Filtering\Explaining\Explainers\FilterSettingExplainer;
use WPCCrawler\Objects\Filtering\Explaining\FilterExplainingService;
use WPCCrawler\Objects\Filtering\Filter\FilterList;
use WPCCrawler\Objects\Filtering\FilterDependencyProvider\FilterDependencyProvider;
use WPCCrawler\Objects\Html\ElementCreator;
use WPCCrawler\Objects\Informing\Information;
use WPCCrawler\Objects\Informing\Informer;
use WPCCrawler\Objects\Json\JsonToHtmlConverter;
use WPCCrawler\Objects\OptionsBox\OptionsBoxService;
use WPCCrawler\Objects\Settings\Enums\SettingInnerKey;
use WPCCrawler\Objects\Settings\Enums\SettingKey;
use WPCCrawler\Objects\Settings\Factory\HtmlManip\AbstractHtmlManipKeyFactory;
use WPCCrawler\Objects\Settings\SettingService;
use WPCCrawler\Objects\Traits\FindAndReplaceTrait;
use WPCCrawler\Objects\Traits\SettingsTrait;
use WPCCrawler\Utils;
use WPCCrawler\WPCCrawler;

abstract class AbstractBot {

use FindAndReplaceTrait;
    use SettingsTrait {
        setSettings as traitSetSettings;
    }

// TODO: PHP DOMDocument fails if there are HTML tags inside script elements. Put in more generic way, it considers
    // strings as HTML code. So, we need to remove HTML tags existing inside script elements. Simply, find "<script" and
    // remove HTML code existing until "</script>". But, be careful about script elements existing inside script element.
    // In other words, remove HTML tags until "</script>" that closes the first found "<script", not the script elements
    // inside the script element. For example, "<script>var x = '<script></script>'</script>" should become
    // "<script>var x = ''</script>", not "<script>var x = '<script></script>'". After implementing this, add this as a
    // general setting so that the user can disable this feature if he/she wants.

/** @var string */
    private $selectAllRegex = '^.*$';

/** @var AppClient|null */
    protected $client;

/** @var array */
    private $generalSettings;

/** @var array */
    private $defaultGeneralSettings;

/** @var array */
    private $botSettings;

/** @var bool */
    private $useUtf8;

/** @var bool */
    private $convertEncodingToUtf8;

/** @var bool */
    private $allowCookies;

/** @var bool */
    private $disableSslVerification;

/** @var string */
    private $httpAccept;

/** @var string */
    private $httpUserAgent;

/** @var array<string, string>|null The request headers as key-value pairs */
    private $requestHeaders = null;

/** @var int */
    private $connectionTimeout;

/** @var array */
    private $proxyList;

/** @var array */
    public $preparedProxyList = [];

/** @var array */
    public $httpProxies = [];

/** @var array */
    public $httpsProxies = [];

/** @var int Maximum number of trial counts for proxies */
    private $proxyTryLimit;

/** @var int|null */
    private $siteId;

/** @var WP_Post|null The site (WP Content Crawler site) which is being crawled */
    private $site;

/** @var string|null Stores the content of the latest response */
    private $latestResponseContent;

/** @var bool Stores whether the last response has been retrieved from cache or not. */
    private $isLatestResponseFromCache = false;

/** @var bool */
    private $responseCacheEnabled = false;

/**
     * @var Response|null Stores the response of the latest request. If the response was retrieved from the cache, this
     *      will be null.
     */
    private $latestResponse = null;

/**
     * @var Exception|null Exception thrown for the latest request. If no exception is thrown for the latest request,
     *      this is null.
     */
    private $latestRequestException = null;

/**
     * @param array     $settings              Settings for the site to be crawled
     * @param int|null  $siteId                ID of the site.
     * @param bool|null $useUtf8               If null, settings will be used to decide whether utf8 should be used or
     *                                         not. If bool, it will be used directly without considering settings. In
     *                                         other words, bool overrides the settings.
     * @param bool|null $convertEncodingToUtf8 True if encoding of the response should be converted to UTF8 when there
     *                                         is a different encoding. If null, settings will be used to decide. If
     *                                         bool, it will be used directly without considering settings. In other
     *                                         words, bool overrides the settings. This is applicable only if $useUtf8
     *                                         is found as true.
     */
    public function __construct($settings, $siteId = null, $useUtf8 = null, $convertEncodingToUtf8 = null) {
        if($siteId) $this->siteId = $siteId;

// Get general settings
        $this->generalSettings = SettingService::getAllGeneralSettings();

// Get the default settings
        $this->defaultGeneralSettings = Factory::generalSettingsController()->getDefaultGeneralSettings();

$this->setSettings(
            $settings,
            Factory::postService()->getSingleMetaKeys(),
            true,
            $useUtf8,
            $convertEncodingToUtf8
        );
    }

/**
     * @return Crawler|null
     * @since 1.11.0
     */
    public abstract function getCrawler(): ?Crawler;

/**
     * @param Crawler|null $crawler
     * @since 1.11.0
     */
    public abstract function setCrawler(?Crawler $crawler): void;

/**
     * Prepares proxies
     */
    public function prepareProxies(): void {
        // Get the proxy list if the user wants to use proxy.
        if(!$this->getSettingForCheckbox(SettingKey::WPCC_USE_PROXY)) return;

$this->proxyList = array_filter(array_map(function($proxy) {
            return trim($proxy);
        }, explode("\n", $this->getSetting(SettingKey::WPCC_PROXIES, "", true))));

// If there is no proxy, no need to proceed.
        if(!$this->proxyList) return;

$tcp = "tcp://";
        $http = "http://";
        $https = "https://";

// Prepare proxy lists
        foreach ($this->proxyList as $proxy) {
            // If the proxy is for http, add it into httpProxies.
            if (Str::startsWith($proxy, $http)) {
                $this->httpProxies[] = $proxy;

// If the proxy is for https, add it into httpsProxies.
            } else if (Str::startsWith($proxy, $https)) {
                $this->httpsProxies[] = $proxy;

// Otherwise, add them to both.
            } else {
                // Get the protocol string
                preg_match("/^[a-z]+:\/\//i", $proxy, $matches);

// If no match is found, prepend tcp
                if (!$matches || empty($matches)) {
                    $proxy = $tcp . $proxy;
                }

// Add it to the proxy lists
                $this->httpProxies[] = $proxy;
                $this->httpsProxies[] = $proxy;
            }

$this->preparedProxyList[] = $proxy;
        }

$this->httpProxies  = array_unique($this->httpProxies);
        $this->httpsProxies = array_unique($this->httpsProxies);

// Shuffle prepared proxy list if the user prefers it.
        if($this->getSettingForCheckbox(SettingKey::WPCC_PROXY_RANDOMIZE)) {
            shuffle($this->preparedProxyList);

// Make sure the indices start from 0 and goes up 1 by 1
            $this->preparedProxyList = array_values($this->preparedProxyList);
        }

/**
         * Modify the proxy list.
         *
         * @param array $preparedProxyList Proxy list, prepared according to the settings
         * @param AbstractBot $bot         The bot itself
         *
         * @return array preparedProxyList  Modified proxy list
         * @since 1.6.3
         */
        $this->preparedProxyList = apply_filters('wpcc/bot/proxy-list', $this->preparedProxyList, $this);

}

/**
     * Creates a client to be used to perform browser actions
     *
     * @param AppProxyOptions|null $proxy The proxy to be used by the client
     */
    public function createClient(?AppProxyOptions $proxy = null): void {
        $this->client = $this->onCreateAppClient();

$config = [
            RequestOptions::COOKIES => $this->allowCookies,
            RequestOptions::VERIFY  => $this->disableSslVerification === false,
        ];

if($this->connectionTimeout) {
            $config[RequestOptions::CONNECT_TIMEOUT]  = $this->connectionTimeout;
            $config[RequestOptions::TIMEOUT]          = $this->connectionTimeout;
        }

// Set the proxy
        if($proxy) {
            $config[RequestOptions::PROXY] = [
                $proxy->getProtocol() => $proxy->getUrl()
            ];
        }

$this->client->setClient($this->onCreateGuzzleClient($config));

if($this->httpAccept)    $this->client->setServerParameter("HTTP_ACCEPT",     $this->httpAccept);
        if($this->httpUserAgent) $this->client->setServerParameter('HTTP_USER_AGENT', $this->httpUserAgent);

// Add the request headers
        $requestHeaders = $this->getRequestHeaders();
        foreach($requestHeaders as $headerName => $headerValue) {
            $this->client->setServerParameter($headerName, $headerValue);
        }

/**
         * Modify the client that will be used to make requests.
         *
         * @param AppClient $client The client
         * @param AbstractBot $bot The bot itself
         *
         * @return AppClient Modified client
         * @since 1.6.3
         * @since 1.12.0 Uses AppClient class instead of Goutte's Client class
         */
        $this->client = apply_filters('wpcc/bot/client', $this->client, $this);
    }

/**
     * @return AppClient A new {@link AppClient}
     * @since 1.14.0
     */
    protected function onCreateAppClient(): AppClient {
        return new AppClient();
    }

/**
     * @param array $config Configuration of {@link Client}
     * @return Client A new {@link Client}
     * @since 1.14.0
     */
    protected function onCreateGuzzleClient(array $config): Client {
        return new Client($config);
    }

/**
     * Creates a new Client and prepares it by adding Accept and User-Agent headers and enabling cookies.
     * Some other routines can also be done here.
     *
     * @return AppClient
     */
    public function getClient(): AppClient {
        if ($this->client === null) {
            $this->createClient();
        }

/** @var AppClient $client */
        $client = $this->client;
        return $client;
    }

public function getSiteUrl(): ?string {
        return $this->getSetting(SettingKey::MAIN_PAGE_URL, null);
    }

/**
     * Set cookies of the browser client using the settings
     *
     * @param string $url Full URL for which the cookies should be set
     */
    private function setCookies($url): void {
        // Try to get the cookies specified for this site
        $cookies = $this->getArraySetting(SettingKey::COOKIES);
        if(!$cookies) return;

// Get cookie domain
        $urlParts = parse_url($url);
        if (!is_array($urlParts)) return;

$defaultDomain = Utils::array_get($urlParts, 'host');
        if (!$defaultDomain) return;

$isSecure = strpos($url, "https") !== false;

// Add each cookie to this client
        foreach($cookies as $cookieData) {
            $key   = $cookieData[SettingInnerKey::KEY]   ?? null;
            $value = $cookieData[SettingInnerKey::VALUE] ?? null;
            if ($key === null || $value === null) continue;

$rawDomain = $cookieData[SettingInnerKey::DOMAIN] ?? null;
            $domain = is_string($rawDomain) && $rawDomain
                ? ltrim(trim($rawDomain), '.')
                : $defaultDomain;
            $domain = $domain ?: $defaultDomain;

$this->getClient()->getCookieJar()->set(new Cookie(
                $key,
                $value,
                null,
                null,
                $domain,
                $isSecure,
                true,
                true
            ));
        }

}

/**
     * @return array<string, string> See {@link $requestHeaders}
     * @since 1.12.0
     */
    private function getRequestHeaders(): array {
        if ($this->requestHeaders === null) {
            $this->requestHeaders = $this->createRequestHeaders() ?? [];
        }

return $this->requestHeaders;
    }

/**
     * @return array<string, string>|null The request headers, retrieved from {@link SettingKey::REQUEST_HEADERS}
     *                                    setting, as key-value pairs.
     * @since 1.12.0
     */
    private function createRequestHeaders(): ?array {
        $headersSetting = $this->getSetting(SettingKey::REQUEST_HEADERS);
        if (!is_array($headersSetting) || !$headersSetting) return null;

$result = [];
        foreach($headersSetting as $data) {
            $name  = $data[SettingInnerKey::KEY]   ?? null;
            $value = $data[SettingInnerKey::VALUE] ?? null;
            if ($name === null || $name === '' || $value === null) continue;

// The HTTP headers must be prefixed with "HTTP_"
            $result['HTTP_' . $name] = $value;
        }

return $result;
    }

/**
     * @param array     $settings
     * @param array     $singleKeys
     * @param bool      $prepare
     * @param bool|null $useUtf8               See {@link initVariables()}
     * @param bool|null $convertEncodingToUtf8 See {@link initVariables()}
     * @since 1.14.0
     */
    public function setSettings($settings, $singleKeys = [], $prepare = true, $useUtf8 = null, $convertEncodingToUtf8 = null): void {
        $this->traitSetSettings($settings, $singleKeys, $prepare);
        $this->initVariables(
            $useUtf8 ?? $this->useUtf8,
            $convertEncodingToUtf8 ?? $this->convertEncodingToUtf8
        );
    }

/**
     * Invalidates {@link $requestHeaders} and {@link $client} so that their getters will return fresh values
     *
     * @param bool|null $useUtf8               If null, settings will be used to decide whether utf8 should be used or
     *                                         not. If bool, it will be used directly without considering settings. In
     *                                         other words, bool overrides the settings.
     * @param bool|null $convertEncodingToUtf8 True if encoding of the response should be converted to UTF8 when there
     *                                         is a different encoding. If null, settings will be used to decide. If
     *                                         bool, it will be used directly without considering settings. In other
     *                                         words, bool overrides the settings. This is applicable only if $useUtf8
     *                                         is found as true.
     * @since 1.14.0
     */
    protected function initVariables($useUtf8 = null, $convertEncodingToUtf8 = null): void {
        $this->requestHeaders = null;

// Decide which settings we should use.
        $this->botSettings = $this->getSetting(SettingKey::DO_NOT_USE_GENERAL_SETTINGS) ? $this->getSettings() : $this->generalSettings;

/*
         *
         */

$this->useUtf8                  = $useUtf8 !== null                 ? (bool) $useUtf8               : $this->getSettingForCheckbox(SettingKey::WPCC_MAKE_SURE_ENCODING_UTF8);
        $this->convertEncodingToUtf8    = $convertEncodingToUtf8 !== null   ? (bool) $convertEncodingToUtf8 : $this->getSettingForCheckbox(SettingKey::WPCC_CONVERT_CHARSET_TO_UTF8);

// Set client settings by using user's preferences.
        $this->allowCookies             = $this->getSettingForCheckbox(SettingKey::WPCC_HTTP_ALLOW_COOKIES);
        $this->disableSslVerification   = $this->getSettingForCheckbox(SettingKey::WPCC_DISABLE_SSL_VERIFICATION);

// Set the ACCEPT and USER_AGENT. If these settings do not exist, use default values.
        $this->httpAccept               = $this->getSetting(SettingKey::WPCC_HTTP_ACCEPT);
        $this->httpUserAgent            = $this->getSetting(SettingKey::WPCC_HTTP_USER_AGENT);

$this->connectionTimeout        = $this->getSetting(SettingKey::WPCC_CONNECTION_TIMEOUT, 0, true);
        $this->connectionTimeout        = !is_numeric($this->connectionTimeout) ? 0 : (int) $this->connectionTimeout;

$this->proxyTryLimit            = $this->getSetting(SettingKey::WPCC_PROXY_TRY_LIMIT, 0, true);
        $this->proxyTryLimit            = !is_numeric($this->proxyTryLimit) ? 0 : (int) $this->proxyTryLimit;

// Prepare the proxies
        $this->prepareProxies();

$this->createClient();
    }

/**
     * @param string                 $url             Target URL
     * @param AppRequestOptions|null $options         Request options. If this is `null`, a `GET` request is made to the
     *                                                target URL.
     * @param array|null             $findAndReplaces Find and replaces to be applied to raw response content. For the
     *                                                format of this value, see {@see FindAndReplaceTrait::findAndReplace}.
     *                                                Default: null
     * @return Crawler|null
     */
    public function request($url, ?AppRequestOptions $options = null, $findAndReplaces = null): ?Crawler {
        $proxyList = $this->preparedProxyList;
        $protocol = Str::startsWith($url, "https") ? "https" : "http";
        $proxyUrl = $proxyList && isset($proxyList[0]) ? $proxyList[0] : false;
        $tryCount = 0;
        $options = $options ?? new AppRequestOptions();

do {
            try {
                // Make the request and get the response text. If the method succeeded, the response text will be
                // available in $this->latestResponseContent
                $options->setProxy(is_string($proxyUrl)
                    ? new AppProxyOptions($proxyUrl, $protocol)
                    : null
                );
                $responseText = $this->getResponseText($url, $options);
                if (!$responseText) return null;

// Assign it as the latest response content
                $this->latestResponseContent = $responseText;

// If there are find-and-replace options that should be applied to raw response text, apply them.
                if($findAndReplaces) {
                    $this->latestResponseContent = $this->findAndReplace($findAndReplaces, $this->latestResponseContent, false);
                }

/**
                 * Modify the response content.
                 *
                 * @param string $latestResponseContent Response content after the previously-set find-and-replace settings are applied
                 * @param string $url                   The URL that sent the response
                 * @param AbstractBot $bot              The bot itself
                 *
                 * @return string Modified response content
                 * @since 1.7.1
                 */
                $this->latestResponseContent = apply_filters('wpcc/bot/response-content', $this->latestResponseContent, $url, $this);

// Try to get the HTML content. If this causes an error, we'll catch it and return null.
                $crawler = $this->createCrawler($this->latestResponseContent, $url);

// Try to get the HTML from the crawler to see if it can do it. Otherwise, it will throw an
                // InvalidArgumentException, which we will catch.
                $crawler->html();

// Inject some variables that might be needed to be retrieved by the user
                $this->injectVariablesToCrawler($crawler, $url);

return $crawler;

} catch (ConnectException $e) {
                // If the URL cannot be fetched, try another proxy, if exists.
                $this->latestRequestException = $e;
                $tryCount++;

// Break the loop if there is no proxy list or it is empty.
                // Stop if we've reached the try limit.
                // If the next proxy does not exist, break the loop.
                if(!$proxyList || ($this->proxyTryLimit > 0 && $tryCount >= $this->proxyTryLimit) || !isset($proxyList[$tryCount])) {
                    $msgProxyUrl = $proxyUrl ? (sprintf(_wpcc('Last tried proxy: %1$s'), $proxyUrl) . ', ') : '';

Informer::add(Information::fromInformationMessage(
                        InformationMessage::CONNECTION_ERROR,
                        $msgProxyUrl . sprintf(_wpcc('URL: %1$s, Message: %2$s'), $url, $e->getMessage()),
                        InformationType::INFO
                    )->setException($e)->addAsLog());

break;
                }

// Get the next proxy
                $proxyUrl = $proxyList[$tryCount];

} catch (RequestException $e) {
                // If the URL cannot be fetched, then just return null.
                $this->latestRequestException = $e;

Informer::add(Information::fromInformationMessage(
                    InformationMessage::REQUEST_ERROR,
                    sprintf(_wpcc('URL: %1$s, Message: %2$s'), $url, $e->getMessage()),
                    InformationType::INFO
                )->setException($e)->addAsLog());

break;

} catch (InvalidArgumentException $e) {
                // If the HTML could not be retrieved, then just return null.
                $this->latestRequestException = $e;

Informer::add(Information::fromInformationMessage(
                    InformationMessage::HTML_COULD_NOT_BE_RETRIEVED_ERROR,
                    sprintf(_wpcc('URL: %1$s, Message: %2$s'), $url, $e->getMessage()),
                    InformationType::INFO
                )->setException($e)->addAsLog());
                break;

} catch (Exception $e) {
                // If there is an error, return null.
                $this->latestRequestException = $e;

Informer::add(Information::fromInformationMessage(
                    InformationMessage::ERROR,
                    sprintf(_wpcc('URL: %1$s, Message: %2$s'), $url, $e->getMessage()),
                    InformationType::INFO
                )->setException($e)->addAsLog());
                break;
            }

} while(true);

return null;
    }

/**
     * Enable/disable response caching
     *
     * @param bool $enabled  Enable or disable the response cache. True to enable.
     * @param bool $clearOld True if all previously-created response caches should be cleared.
     * @return self
     * @since 1.8.0
     * @since 1.14.0 Returns the instance
     */
    public function setResponseCacheEnabled(bool $enabled, bool $clearOld = false): self {
        $this->responseCacheEnabled = $enabled;

// Delete all response cache if the cache is disabled
        if ($clearOld) ResponseCache::getInstance()->deleteAll();

return $this;
    }

/**
     * @return bool See {@link responseCacheEnabled}
     * @since 1.14.0
     */
    public function isResponseCacheEnabled(): bool {
        return $this->responseCacheEnabled;
    }

/**
     * @return bool
     */
    public function isLatestResponseFromCache(): bool {
        return $this->isLatestResponseFromCache;
    }

/**
     * @param Crawler $crawler    The crawler to which the variables will be injected
     * @param string  $currentUrl The URL from which the crawler is created
     * @since 1.13.0
     */
    protected function injectVariablesToCrawler(Crawler $crawler, string $currentUrl): void {
        $bodyNode = $crawler->filter('body')->first()->getNode(0);
        if (!($bodyNode instanceof DOMNode)) return;

$nowStr = (string) current_time('mysql');
        $htmlCode = Utils::view('partials.crawler-variables')
            ->with('variables', [
                new CrawlerVariable(_wpcc('Page URL'),     'wpcc-page-url',     $currentUrl),
                new CrawlerVariable(_wpcc('Current date'), 'wpcc-current-date', $nowStr),
            ])
            ->render();

(new ElementCreator())
            ->create($bodyNode, ElementCreator::LOCATION_INSIDE_BOTTOM, $htmlCode);
    }

/**
     * Makes a request to the given URL with the given method considering the cookies and using given proxy. Then,
     * returns the response text.
     *
     * @param string               $url     Target URL
     * @param AppRequestOptions    $options The request options
     * @return false|string
     * @since 1.8.0
     * @since 1.14.0 Remove $method, $proxyUrl, and $protocol params. Add $options param.
     */
    protected function getResponseText($url, AppRequestOptions $options) {
        $this->latestResponse         = null;
        $this->latestRequestException = null;

// If caching is enabled, try to get the response from cache.
        $this->isLatestResponseFromCache = false;
        if ($this->isResponseCacheEnabled()) {
            $response = ResponseCache::getInstance()->get($options->getMethod(), $url);
            if ($response) {
                $this->isLatestResponseFromCache = true;
                return $response;
            }
        }

// If there is a proxy, create a new client with the proxy settings.
        $proxy = $options->getProxy();
        if($proxy) {
            $this->createClient($proxy);
        }

$this->setCookies($url);

/**
         * Fires before any request is made.
         *
         * @param AbstractBot $bot
         * @param string      $url
         * @since 1.6.3
         */
        do_action('wpcc/before_request', $this, $url);

$this->getClient()->request(
            $options->getMethod(),
            $url,
            [],
            [],
            [],
            $options->getBody()
        );

// Get the response and its HTTP status code
        $this->latestResponse = $this->getClient()->getInternalResponse();

/**
         * Fires just after a request is made.
         *
         * @param AbstractBot $bot
         * @param string      $url
         * @since 1.6.3
         */
        do_action('wpcc/after_request', $this, $url, $this->latestResponse);

$status = $this->latestResponse->getStatusCode();

switch($status) {
            // Do not proceed if the target URL is not found.
            case 404:
                Informer::add(Information::fromInformationMessage(
                    InformationMessage::URL_NOT_FOUND,
                    "Target URL ({$url}) is not found ({$status}).",
                    InformationType::INFO)->addAsLog()
                );
                return false;
        }

// Do not proceed if there was a server error.
        if($status >= 500 && $status < 600) {
            Informer::add(Information::fromInformationMessage(
                InformationMessage::REMOTE_SERVER_ERROR,
                "Server error for URL ({$url}). Status: {$status}",
                InformationType::INFO)->addAsLog()
            );
            return false;
        }

$content = $this->latestResponse->getContent();

// If caching enabled, cache the response.
        if ($this->isResponseCacheEnabled()) {
            ResponseCache::getInstance()->save($options->getMethod(), $url, $content);
        }

// Return the content of the response
        return $content;
    }

/**
     * Throws a dummy {@link \GuzzleHttp\Exception\ConnectException}
     *
     * @noinspection PhpUnusedPrivateMethodInspection*/
    private function throwDummyConnectException(): void { // @phpstan-ignore-line
        throw new ConnectException("Dummy exception.", new Request("GET", "httpabc"));
    }

/**
     * First, makes the replacements provided, then replaces relative URLs in a crawler's HTML with direct URLs.
     *
     * @param Crawler $crawler               Crawler for the page for which the replacements will be done
     * @param array|null $findAndReplaces    An array of arrays. Inner array should have:
     *      "regex":    bool    If this key exists, then search will be performed as regular expression. If not, a
     *      normal search will be done.
     *      "find":     string  What to find
     *      "replace":  string  Replacement for what is found
     * @param bool $applyGeneralReplacements True if you want to apply the replacements inserted in general settings
     *                                       page
     * @return Crawler A new crawler with replacements done
     */
    public function makeInitialReplacements(Crawler $crawler, ?array $findAndReplaces = null,
                                            bool $applyGeneralReplacements = false): Crawler {
        $html = $crawler->html();

// First, apply general replacements
        if($applyGeneralReplacements) {
            $findAndReplacesGeneral = Utils::getOptionUnescaped(SettingKey::WPCC_FIND_REPLACE);
            if (is_array($findAndReplacesGeneral)) {
                $html = $this->findAndReplace($findAndReplacesGeneral, $html);
            }
        }

// Find and replace what user wants.
        if($findAndReplaces) {
            $html = $this->findAndReplace($findAndReplaces, $html);
        }

return new Crawler($html);
    }

/**
     * Applies HTML manipulations to the given {@link Crawler}
     *
     * @param Crawler                     $crawler                  The crawler that will be manipulated
     * @param AbstractHtmlManipKeyFactory $keyFactory               The key factory that will be used to retrieve the
     *                                                              keys of the manipulation settings
     * @param bool                        $applyGeneralReplacements See {@link makeInitialReplacements()}
     * @return Crawler The manipulated crawler
     * @since 1.14.0
     */
    public function applyHtmlManipulationSettings(Crawler $crawler, AbstractHtmlManipKeyFactory $keyFactory,
                                                  bool $applyGeneralReplacements = false): Crawler {
        $findAndReplacesForFirstLoad = $this->getArraySetting($keyFactory->getFindReplaceFirstLoadKey(), null);

// Make initial replacements
        $crawler = $this->makeInitialReplacements($crawler, $findAndReplacesForFirstLoad, $applyGeneralReplacements);

// Apply HTML manipulations
        $this->applyFindAndReplaceInElementAttributes($crawler, $keyFactory->getFindReplaceElementAttributesKey());
        $this->applyExchangeElementAttributeValues($crawler,    $keyFactory->getExchangeElementAttributesKey());
        $this->applyRemoveElementAttributes($crawler,           $keyFactory->getRemoveElementAttributesKey());
        $this->applyFindAndReplaceInElementHTML($crawler,       $keyFactory->getFindReplaceElementHtmlKey());
        $this->applyConvertJsonToHtml($crawler,                 $keyFactory->getConvertJsonToHtmlKey());
        $this->applyConvertJsonToHtmlAuto($crawler,             $keyFactory->getConvertJsonToHtmlAutoKey());

return $crawler;
    }

/**
     * Resolves relative URLs
     *
     * @param Crawler|null $crawler
     * @param null|string  $fallbackBaseUrl If a base URL is not found in the crawler, this URL will be used as the base.
     */
    public function resolveRelativeUrls(?Crawler $crawler, ?string $fallbackBaseUrl = null): void {
        if (!$crawler) return;

// If there is a base URL defined in the HTML, use that to resolve the relative URLs.
        $baseHref = $this->extractData($crawler, 'base', 'href', null, true, true);

// If the base URL does not exist, use the fallback URL.
        if (!$baseHref || !is_string($baseHref)) $baseHref = $fallbackBaseUrl;

// Stop if there is no base URL.
        if (!$baseHref) return;

// Create a URI for the base URL
        $baseUri = new Uri($baseHref);

// Define the attributes whose values will be resolved
        // https://html.spec.whatwg.org/#dynamic-changes-to-base-urls
        $attributes = ['src', 'href', 'cite', 'ping'];

// Resolve the values of the attributes
        foreach($attributes as $attr) {
            $this->resolveRelativeUrlForAttribute($crawler, $baseUri, $attr);
        }
    }

/*
     * HTML MANIPULATION
     */

/**
     * Applies changes configured in "find and replace in element attributes" option.
     *
     * @param Crawler|null $crawler   The crawler on which the changes will be done
     * @param string       $optionKey The key that stores the options for "find and replace in element attributes" input's
     *                                values
     */
    public function applyFindAndReplaceInElementAttributes(?Crawler $crawler, string $optionKey): void {
        if (!$crawler) return;

$data = $this->getSetting($optionKey);
        if(!$data) return;

foreach($data as $item) {
            $this->findAndReplaceInElementAttribute(
                $crawler,
                [Utils::array_get($item, SettingInnerKey::SELECTOR)],
                Utils::array_get($item, SettingInnerKey::ATTRIBUTE),
                Utils::array_get($item, SettingInnerKey::FIND),
                Utils::array_get($item, SettingInnerKey::REPLACE),
                isset($item[SettingInnerKey::REGEX])
            );
        }
    }

/**
     * Applies changes configured in "exchange element attributes" option.
     *
     * @param Crawler|null $crawler   The crawler on which the changes will be done
     * @param string       $optionKey The key that stores the options for "exchange element attributes" input's values
     */
    public function applyExchangeElementAttributeValues(?Crawler $crawler, string $optionKey): void {
        if (!$crawler) return;

$data = $this->getSetting($optionKey);
        if(!$data) return;

foreach($data as $item) {
            $this->exchangeElementAttributeValues(
                $crawler,
                [Utils::array_get($item, SettingInnerKey::SELECTOR)],
                Utils::array_get($item, SettingInnerKey::ATTRIBUTE_1),
                Utils::array_get($item, SettingInnerKey::ATTRIBUTE_2)
            );
        }
    }

/**
     * Applies changes configured in "remove element attributes" option.
     *
     * @param Crawler|null $crawler   The crawler on which the changes will be done
     * @param string       $optionKey The key that stores the options for "remove element attributes" input's values
     */
    public function applyRemoveElementAttributes(?Crawler $crawler, string $optionKey): void {
        if (!$crawler) return;

$data = $this->getSetting($optionKey);
        if(!$data) return;

foreach($data as $item) {
            $this->removeElementAttributes(
                $crawler,
                [Utils::array_get($item, SettingInnerKey::SELECTOR)],
                Utils::array_get($item, SettingInnerKey::ATTRIBUTE)
            );
        }
    }

/**
     * Applies changes configured in "find and replace in element HTML" option.
     *
     * @param Crawler|null $crawler   The crawler on which the changes will be done
     * @param string       $optionKey The key that stores the options for "find and replace in HTML" input's values
     */
    public function applyFindAndReplaceInElementHTML(?Crawler $crawler, string $optionKey): void {
        if (!$crawler) return;

$data = $this->getSetting($optionKey);
        if(!$data) return;

foreach($data as $item) {
            $this->findAndReplaceInElementHTML(
                $crawler,
                [Utils::array_get($item, SettingInnerKey::SELECTOR)],
                Utils::array_get($item, SettingInnerKey::FIND),
                Utils::array_get($item, SettingInnerKey::REPLACE),
                isset($item[SettingInnerKey::REGEX])
            );
        }
    }

/**
     * Finds JSON strings via the CSS selectors configured in a "convert JSON to HTML" setting, converts them to HTML,
     * and appends them as the last child of the `body` element of the given crawler
     *
     * @param Crawler|null $crawler     The crawler on which the changes will be done
     * @param string       $settingName The key that stores the options for "convert JSON to HTML" setting's values
     * @since 1.14.0
     */
    public function applyConvertJsonToHtml(?Crawler $crawler, string $settingName): void {
        if (!$crawler) {
            return;
        }

$selectors = $this->getArraySetting($settingName);
        if (!$selectors) return;

$this->convertJsonToHtml($crawler, $selectors);
    }

/**
     * Finds JSON strings automatically, if enabled by a "convert JSON to HTML automatically" setting, converts them to
     * HTML, and appends them as the last child of the `body` element of the given crawler
     *
     * @param Crawler|null $crawler     The crawler on which the changes will be done
     * @param string       $settingName The key that stores the value for "convert JSON to HTML automatically" setting
     * @since 1.14.0
     */
    public function applyConvertJsonToHtmlAuto(?Crawler $crawler, string $settingName): void {
        if (!$crawler || !$this->getSettingForCheckbox($settingName)) {
            return;
        }

$this->convertJsonToHtmlAuto($crawler);
    }

/*
     *
     */

/**
     * Removes the items with a 'start' position less than the given pos value.
     *
     * @param array $itemsArray An array of items. Each item in the array should have 'start' key and its value.
     * @param int $pos The reference DOM position. The elements with a 'start' position less than this will be removed.
     */
    public function removeItemsBeforePos(&$itemsArray, $pos): void {
        if(!$pos) return;

foreach($itemsArray as $key => $item) {
            if($item["start"] < $pos) {
                unset($itemsArray[$key]);
            }
        }
    }

/**
     * @param Crawler|null $crawler   The crawler from which the elements will be removed
     * @param array|string $selectors A selector or an array of selectors for the elements to be removed. This can also
     *                                be an array of arrays, where each inner array contains the selector in "selector"
     *                                key.
     */
    public function removeElementsFromCrawler(?Crawler $crawler, $selectors = []): void {
        if (!$crawler) return;

$results = $this->getElementsFromCrawler($crawler, $selectors);
        if (!$results) return;

foreach($results as $node) {
            $this->removeNode($node);
        }

}

/**
     * Immediately apply all filters of a filter setting
     *
     * @param string                   $settingKey Key of the setting that stores filters
     * @param FilterDependencyProvider $provider   Provider that will inject the dependencies
     * @since 1.11.0
     */
    public function applyFilterSetting(string $settingKey, FilterDependencyProvider $provider): void {
        $list = FilterList::fromJson($this->getSetting($settingKey, null));
        if (!$list) return;

$list->applyAll($provider);
    }

/**
     * Remove a node from its document
     *
     * @param Crawler $node
     * @since 1.11.0
     */
    public function removeNode($node): void {
        try {
            foreach ($node as $child) {
                if ($child->parentNode === null) {
                    continue;
                }

$child->parentNode->removeChild($child);
            }

} catch(Exception $e) {
            Informer::addError($e->getMessage())->setException($e)->addAsLog();
        }
    }

/**
     * @param Crawler|null $crawler   The crawler from which the elements will be retrieved
     * @param array|string $selectors A selector or an array of selectors for the elements to be retrieved. This can
     *                                also be an array of arrays, where each inner array contains the selector in
     *                                "selector" key.
     * @return Crawler[]|null
     */
    public function getElementsFromCrawler($crawler, $selectors = []): ?array {
        if(empty($selectors) || !$crawler) return null;

if(!is_array($selectors)) $selectors = [$selectors];

$results = [];
        foreach ($selectors as $selectorData) {
            if (!$selectorData) continue;

// Get the selector
            $selector = is_array($selectorData) ? Utils::array_get($selectorData, SettingInnerKey::SELECTOR) : $selectorData;

// If there is no selector, continue with the next one.
            if (!$selector) continue;

// Remove each item found by the selector
            try {
                $crawler->filter($selector)->each(function ($node) use (&$results) {
                    /** @var Crawler $node */
                    $results[] = $node;
                });
            } catch(Exception $e) {
                Informer::addError($selector . " - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }

return $results ?: null;
    }

/**
     * Replace the values of two attributes of each element found via selectors. E.g.
     * "<img src='srcVal' data-src='dataSrcVal'>" becomes "<img src='dataSrcVal' data-src='srcVal'>"
     *
     * @param Crawler|null $crawler
     * @param array|string $selectors
     * @param string|null  $firstAttrName  Name of the first attribute. E.g. "src"
     * @param string|null  $secondAttrName Name of the seconds attribute. E.g. "data-src"
     */
    public function exchangeElementAttributeValues($crawler, $selectors, $firstAttrName, $secondAttrName): void {
        if(empty($selectors) || !$crawler
            || $firstAttrName  === null || $firstAttrName  === ''
            || $secondAttrName === null || $secondAttrName === '') return;

if(!is_array($selectors)) $selectors = [$selectors];

foreach ($selectors as $selector) {
            if (!$selector) continue;

try {
                $crawler->filter($selector)->each(function ($node) use (&$firstAttrName, &$secondAttrName) {
                    /** @var Crawler $node */
                    /** @var DOMElement $child */
                    $child = $node->getNode(0);

// Get values of the attributes
                    $firstAttrVal = $child->getAttribute($firstAttrName);
                    $secondAttrVal = $child->getAttribute($secondAttrName);

// Exchange the values
                    if($secondAttrVal !== "") {
                        $child->setAttribute($firstAttrName, $secondAttrVal);
                        $child->setAttribute($secondAttrName, $firstAttrVal);
                    }
                });

} catch(Exception $e) {
                Informer::addError($selector . " - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }
    }

/**
     * Modify a node with a callback.
     *
     * @param Crawler|null  $crawler   The crawler in which the elements will be searched for
     * @param array|string  $selectors Selectors to be used to find the elements.
     * @param callable|null $callback  A callback that takes only one argument, which is the found node, e.g.
     *                                 function(Crawler $node) {}
     */
    public function modifyElementWithCallback($crawler, $selectors, $callback): void {
        if(empty($selectors) || !$crawler || !is_callable($callback)) return;

if(!is_array($selectors)) $selectors = [$selectors];

foreach ($selectors as $selector) {
            if (!$selector) continue;

try {
                $crawler->filter($selector)->each(function ($node) use (&$callback) {
                    /** @var Crawler $node */
                    call_user_func($callback, $node);
                });

} catch(Exception $e) {
                Informer::addError($selector . " - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }
    }

/**
     * Remove an attribute of the elements found via selectors.
     *
     * @param Crawler|null $crawler
     * @param array|string $selectors
     * @param string|null  $attrName Name of the attribute. E.g. "src". You can set more than one attribute by writing
     *                               the attributes comma-separated. E.g. "src,data-src,width,height"
     */
    public function removeElementAttributes($crawler, $selectors, $attrName): void {
        if(empty($selectors) || !$attrName || !$crawler) return;

if(!is_array($selectors)) $selectors = [$selectors];

// Prepare the attribute names
        $attrNames = array_map(function($name) {
            return trim($name);
        }, array_filter(explode(",", $attrName)));

foreach ($selectors as $selector) {
            if (!$selector) continue;

try {
                $crawler->filter($selector)->each(function ($node) use (&$attrNames) {
                    /** @var Crawler $node */
                    /** @var DOMElement $child */
                    $child = $node->getNode(0);

// Remove the attribute
                    foreach($attrNames as $attrName) $child->removeAttribute($attrName);
                });

} catch(Exception $e) {
                Informer::addError($selector . " - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }
    }

/**
     * Find and replace in the value of an attribute of the elements found via selectors.
     *
     * @param Crawler|null $crawler
     * @param array|string $selectors
     * @param string|null  $attrName Name of the attribute. E.g. "src"
     * @param string|null  $find
     * @param string       $replace
     * @param bool         $regex    True if find and replace strings should be considered as regular expressions.
     */
    public function findAndReplaceInElementAttribute($crawler, $selectors, $attrName, $find, $replace, $regex = false): void {
        if(empty($selectors) || !$attrName || !$crawler) return;

// If the "find" is empty, assume the user wants to find everything.
        if($find === null || (!$find && $find !== "0")) {
            $find = $this->selectAllRegex;
            $regex = true;
        }

if(!is_array($selectors)) $selectors = [$selectors];

foreach ($selectors as $selector) {
            if (!$selector) continue;

try {
                $crawler->filter($selector)->each(function ($node) use (&$attrName, &$find, &$replace, &$regex) {
                    /** @var Crawler $node */
                    /** @var DOMElement $child */
                    $child = $node->getNode(0);

// Get value of the attribute
                    $val = $child->getAttribute($attrName);

// Find and replace in the attribute's value and set the new attribute value
                    $child->setAttribute($attrName, $this->findAndReplaceSingle($find, $replace, $val, $regex));
                });

} catch(Exception $e) {
                Informer::addError("{$selector}, {$attrName} - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }
    }

/**
     * Find and replace in an element's HTML code.
     *
     * @param Crawler|null $crawler
     * @param array|string $selectors
     * @param string|null  $find
     * @param string       $replace
     * @param bool         $regex True if find and replace strings should be considered as regular expressions.
     */
    public function findAndReplaceInElementHTML($crawler, $selectors, $find, $replace, $regex = false): void {
        if(empty($selectors) || !$crawler) return;

if(!is_array($selectors)) $selectors = [$selectors];

foreach ($selectors as $selector) {
            if (!$selector) continue;

try {
                $crawler->filter($selector)->each(function ($node) use (&$find, &$replace, &$regex) {
                    /** @var Crawler $node */
                    $firstHtml = Utils::getNodeHTML($node);
                    $html = $this->findAndReplaceSingle($find, $replace, $firstHtml, $regex, false);

// If there is no change, continue with the next one.
                    if ($html === $firstHtml) return;

if(mb_strpos($html, "<html") !== false || mb_strpos($html, "<body") !== false) return;

$this->replaceElement($node, $html);
                });

} catch(Exception $e) {
                Informer::addError("{$selector} - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }
    }

/**
     * Finds JSON strings via the CSS selectors, converts them to HTML, and appends them as the last child of the `body`
     * element of the given crawler
     *
     * @param Crawler|null $crawler   The crawler on which the changes will be done
     * @param array[]      $selectors CSS selector data
     * @since 1.14.0
     */
    public function convertJsonToHtml(?Crawler $crawler, ?array $selectors): void {
        if (!$crawler) {
            return;
        }

$jsonStrings = $this->extractValuesWithMultipleSelectorData($crawler, $selectors, 'text');
        if (!is_array($jsonStrings)) {
            return;
        }

$jsonStrings = Arr::flatten($jsonStrings);
        JsonToHtmlConverter::fromJsonIntoCrawler($crawler, $jsonStrings);
    }

/**
     * Finds JSON strings automatically, converts them to HTML, and appends them as the last child of the `body` element
     * of the given crawler
     *
     * @param Crawler|null $crawler The crawler on which the changes will be done
     * @since 1.14.0
     */
    public function convertJsonToHtmlAuto(?Crawler $crawler): void {
        if (!$crawler) {
            return;
        }

JsonToHtmlConverter::fromCrawlerIntoCrawlerAuto($crawler);
    }

/**
     * Replaces an element with another element
     *
     * @param Crawler $node The element that will be replaced
     * @param string  $html The HTML code of the new element
     * @return DOMNode[]|null If a replacement is made, returns the new elements. Otherwise, null. This returns an
     *                        array of new nodes, because the replacement HTML code might contain multiple nodes.
     * @since 1.12.0
     */
    public function replaceElement(Crawler $node, string $html): ?array {
        $child = $node->getNode(0);
        if (!$child) return null;

// Create a dummy crawler so that we can get the manipulated HTML as DOMElement. We are able to add
        // a DOMElement to the document, but not an HTML string directly.
        $dummyCrawler = $this->createDummyCrawler($html);

// Get the child element as DOMElement from the dummy crawler.
        $childrenContainer = $dummyCrawler->filter('body > div')->first()
            ->getNode(0);
        if (!$childrenContainer) return null;

// Import the new child element to the main crawler's document. This is vital, because DOMElement::replaceChild
        // requires the new child to be in the same document.
        /** @var DOMNode|null $parentNode */
        $parentNode = $child->parentNode;
        if (!$parentNode) return null;

/** @var DOMDocument|null $doc */
        $doc = $parentNode->ownerDocument;
        if (!$doc) return null;

/** @var DOMNode[] $newNodes */
        $newNodes = [];
        foreach($childrenContainer->childNodes as $childNode) {
            if (!($childNode instanceof DOMNode)) continue;

// Import the child into the document first
            $newChildNode = $doc->importNode($childNode, true);
            if (!($newChildNode instanceof DOMNode)) continue;

// Move the imported child as the previous sibling of the child
            $parentNode->insertBefore($newChildNode, $child);
            $newNodes[] = $newChildNode;
        }

// Now that all the new nodes are added as previous siblings to the current child, we can remove the current
        // child to complete the replacement.
        $parentNode->removeChild($child);
        return $newNodes;
    }

/**
     * Get values for a selector setting. This applies the options box configurations as well.
     *
     * @param Crawler|null      $crawler      See {@link AbstractBot::extractValuesWithSelectorData}
     * @param string            $settingName  Name of the setting from which the selector data will be retrieved
     * @param string            $defaultAttr  See {@link AbstractBot::extractValuesWithSelectorData}
     * @param false|null|string $contentType  See {@link AbstractBot::extractData}
     * @param bool              $singleResult See {@link AbstractBot::extractData}
     * @param bool              $trim         See {@link AbstractBot::extractData}
     * @return array|mixed|null If there are no results, returns null. If $singleResult is true, returns a single
     *                          result. Otherwise, returns an array. If $singleResult is false, returns an array of
     *                          arrays, where each inner array is the result of a single selector data.
     */
    public function extractValuesForSelectorSetting(?Crawler $crawler, string $settingName, $defaultAttr, 
                                                    $contentType = false, $singleResult = false, $trim = true) {
        if (!$crawler) return null;

$selectors = $this->getArraySetting($settingName);
        if (!$selectors) return null;

return $this->extractValuesWithMultipleSelectorData(
            $crawler,
            $selectors,
            $defaultAttr,
            $contentType,
            $singleResult,
            $trim
        );
    }

/**
     * Extract values from the crawler using selector data.
     *
     * @param Crawler|null      $crawler      The crawler from which the data should be extracted
     * @param array[]|null      $selectors    An array of selector data that have these keys: "selector" (optional),
     *                                        "attr" (optional), "options_box" (optional).
     * @param string            $defaultAttr  Attribute value that will be used if the attribute is not found in the
     *                                        settings
     * @param false|null|string $contentType  See {@link AbstractBot::extractData}
     * @param bool              $singleResult See {@link AbstractBot::extractData}
     * @param bool              $trim         See {@link AbstractBot::extractData}
     * @return array|null|string See {@link AbstractBot::extractData}
     * @since 1.14.0
     */
    public function extractValuesWithMultipleSelectorData(?Crawler $crawler, ?array $selectors, $defaultAttr,
                                                          $contentType = false, $singleResult = false, $trim = true) {
        if (!$selectors) return null;

$results = [];

// TODO: If there is no selector but options box options, they might be applied. For example, if there are
        // templates, the user might want to define something, without using a selector. If this is done, it must be
        // applicable in every setting having an options box, not just here.

foreach($selectors as $data) {
            // Get the result for this selector data
            $result = $this->extractValuesWithSelectorData($crawler, $data, $defaultAttr, $contentType, $singleResult, $trim);
            if ($result === null) {
                continue;
            }

$results[] = $result;

// One match is enough
            if ($singleResult) break;
        }

if (!$results) return null;
        return $singleResult ? $results[0] : $results;
    }

/**
     * Extract values from the crawler using selector data.
     *
     * @param Crawler|null      $crawler      The crawler from which the data should be extracted
     * @param array|null        $data         Selector data that have these keys: "selector" (optional), "attr"
     *                                        (optional), "options_box" (optional).
     * @param string            $defaultAttr  Attribute value that will be used if the attribute is not found in the
     *                                        settings
     * @param false|null|string $contentType  See {@link AbstractBot::extractData}
     * @param bool              $singleResult See {@link AbstractBot::extractData}
     * @param bool              $trim         See {@link AbstractBot::extractData}
     * @return array|null|string See {@link AbstractBot::extractData}
     * @since 1.8.0
     */
    public function extractValuesWithSelectorData(?Crawler $crawler, $data, $defaultAttr, $contentType = false,
                                                  $singleResult = false, $trim = true) {
        if (!$crawler || $data === null) return null;

$selector = Utils::array_get($data, SettingInnerKey::SELECTOR);
        $attr     = Utils::array_get($data, SettingInnerKey::ATTRIBUTE);
        if (!$attr) $attr = $defaultAttr;

$result = $this->extractData($crawler, $selector, $attr, $contentType, $singleResult, $trim);
        if ($result === null) {
            return null;
        }

// Apply options box settings
        $optionsBoxApplier = OptionsBoxService::getInstance()->createApplierFromSelectorData($data);
        if ($optionsBoxApplier) {
            $result = is_array($result) ?
                $optionsBoxApplier->applyToArray($result, $contentType ? 'data' : null) :
                $optionsBoxApplier->apply($result);
        }

return $result;
    }

/**
     * Extracts specified data from the crawler
     *
     * @param Crawler|null      $crawler
     * @param array|string      $selectors    A single selector as string or more than one selector as array
     * @param string|string[]   $dataType     "text", "html", "href" or attribute of the element (e.g. "content")
     * @param string|null|false $contentType  Type of found content. This will be included as "type" in resultant
     *                                        array.
     * @param bool              $singleResult True if you want a single result, false if you want all matches. If true,
     *                                        the first match will be returned.
     * @param bool              $trim         True if you want each match trimmed, false otherwise.
     * @return array|null|string              If found, the result. Otherwise, null. If there is a valid content
     *                                        type, then the result will include an array including the position of
     *                                        the found value in the crawler HTML. If the content type is null or
     *                                        false, then just the found value will be included. <p><p> If there are
     *                                        more than one dataType:
     *                                        <li>If more than one match is found, then the "data" value will be an
     *                                        array.</li>
     *                                        <li>If only one match is found, then the data will be a string.</li>
     */
    public function extractData(?Crawler $crawler, $selectors, $dataType, $contentType, bool $singleResult, bool $trim) {
        // Check if the selectors are empty. If so, do not bother.
        if(empty($selectors) || !$crawler) return null;

// If the selectors is not an array, make it one.
        if(!is_array($selectors)) $selectors = [$selectors];

// If the data type is not an array, make it one.
        if(!is_array($dataType)) {
            $dataType = [$dataType];

} else {
            // Make sure each type in the data type array is unique
            $dataType = array_unique($dataType);
        }

$crawlerHtml = $crawler->html();
        $results = [];
        foreach($selectors as $selector) {
            if(!$selector) continue;
            if($singleResult && !empty($results)) break;

$offset = 0;
            try {
                $crawler->filter($selector)->each(function($node) use ($dataType,
                    $singleResult, $trim, $contentType, &$results, &$offset, &$crawlerHtml) {
                    /** @var Crawler $node */

// If single result is needed and we have found one, then do not continue.
                    if($singleResult && !empty($results)) return;

$value = null;
                    foreach ($dataType as $dt) {
                        try {
                            $val = null;
                            switch ($dt) {
                                case "text":
                                    $val = $node->text();
                                    break;
                                case "html":
                                    $val = Utils::getNodeHTML($node);
                                    break;
                                default:
                                    $val = $node->attr($dt);
                                    break;
                            }

if($val !== null) {
                                if($trim) $val = trim($val);
                                if($val !== '') {
                                    if(!$value) $value = [];
                                    $value[$dt] = $val;
                                }
                            }

} catch (InvalidArgumentException $e) { }
                    }

try {
                        if($value) {
                            if ($contentType) {
                                $html = Utils::getNodeHTML($node);
                                $start = mb_strpos($crawlerHtml, $html, $offset);
                                $results[] = [
                                    "type"  =>  $contentType,
                                    "data"  =>  sizeof($value) == 1 ? array_values($value)[0] : $value,
                                    "start" =>  $start,
                                    "end"   =>  $start + mb_strlen($html)
                                ];
                                $offset = $start + 1;
                            } else {
                                $results[] = sizeof($value) == 1 ? array_values($value)[0] : $value;
                            }
                        }

} catch(InvalidArgumentException $e) { }
                });

} catch(Exception $e) {
                Informer::addError("{$selector} - " . $e->getMessage())->setException($e)->addAsLog();
            }
        }

// Return the results
        if($singleResult && !empty($results)) {
            return $results[0];

} else if(!empty($results)) {
            return $results;
        }

return null;
    }

/**
     * Modify media elements in the crawler. This method finds the elements that belongs to the given media file and
     * modifies those elements with the given callback. In fact, the modification is done by the callback itself. This
     * method only finds the elements.
     *
     * @param Crawler   $crawler   The crawler in which the media file will be searched for
     * @param MediaFile $mediaFile The media file
     * @param callable  $callback  A callback that takes a MediaFile and a DOMElement instance and returns void. E.g.
     *                             function(MediaFile $mediaFile, DOMElement $domElement) {}
     * @since 1.8.0
     */
    public function modifyMediaElement($crawler, $mediaFile, $callback): void {
        // Set media alt and title in the elements having this media's local URL as their 'src' value
        $this->modifyElementWithCallback($crawler, '[src^="' . $mediaFile->getLocalUrl() . '"]',
            function($node) use (&$mediaFile, &$callback) {
                /** @var Crawler $node */
                /** @var DOMElement $child */
                $child = $node->getNode(0);

call_user_func($callback, $mediaFile, $child);
            }
        );
    }

/**
     * Notify the users via email if no value is found via one of the supplied CSS selectors.
     *
     * @param string  $url                         The URL
     * @param Crawler $crawler                     The crawler in which selectors will be looked for
     * @param array   $selectors                   CSS selectors. Each inner array should have <b>selector</b> and
     *                                             <b>attr</b> keys.
     * @param string  $lastEmailDateMetaKey        Post meta key that stores the last time a similar email sent.
     * @param bool    $bypassInactiveNotifications True if you want to run this method even if notifications are not
     *                                             activated in settings.
     */
    protected function notifyUser($url, $crawler, $selectors, $lastEmailDateMetaKey, $bypassInactiveNotifications = false): void {
        if(!$bypassInactiveNotifications && !SettingService::isNotificationActive()) return;

// Check if the defined interval has passed.
        $this->addSingleKey($lastEmailDateMetaKey);
        $lastEmailDate = $this->getSetting($lastEmailDateMetaKey);
        $emailIntervalInSeconds = SettingService::getEmailNotificationInterval() * 60;

if($lastEmailDate) {
            $lastEmailDate = strtotime($lastEmailDate);
            if(time() - $lastEmailDate < $emailIntervalInSeconds) return;
        }

$this->loadSiteIfPossible();

// Get the email addresses that can be sent notifications
        $emailAddresses = SettingService::getNotificationEmails();
        if(!$emailAddresses) return;

$messagesEmptyValue = [];

// Check each selector for existence.
        foreach($selectors as $selectorData) {
            $selector = Utils::getValueFromArray($selectorData, SettingInnerKey::SELECTOR, false);
            if(!$selector) continue;

$attr = Utils::getValueFromArray($selectorData, SettingInnerKey::ATTRIBUTE, "text");

$data = $this->extractData($crawler, $selector, $attr, null, false, true);

// If no value is found by the selector, add a new message string including selector's details.
            if($data === null) {
                $messagesEmptyValue[] = $selector . " | " . $attr;
            }
        }

// If there are messages, send them to the email addresses.
        if(!empty($messagesEmptyValue)) {
            // We will send HTML.
            add_filter('wp_mail_content_type', function() {
                return 'text/html';
            });

$siteName = $this->site ? " (" . $this->site->post_title . ") " : '';

$subject = _wpcc("Empty CSS selectors found") . $siteName . " - " . _wpcc("WP Content Crawler");

// Prepare the body
            $body = Utils::view('emails.notification-empty-value')->with([
                'url'                   =>  $url,
                'messagesEmptyValue'    =>  $messagesEmptyValue,
                'site'                  =>  $this->site
            ])->render();

/**
             * Fires just before notification emails are sent
             *
             * @param AbstractBot $bot                  The bot itself
             * @param string      $url                  URL of the page in which at least a value is found to be empty
             * @param Crawler     $crawler              The crawler in which selectors will be looked for
             * @param array       $selectors            CSS selectors that were used to find empty-valued elements
             * @param string      $lastEmailDateMetaKey Post meta key that stores the last time a similar email sent.
             * @param array       $emailAddresses       Email addresses to which a notification email should be sent
             * @param string      $subject              Subject of the notification email
             * @param string      $body                 Body of the notification email
             * @since 1.6.3
             */
            do_action('wpcc/notification/before_notify', $this, $url, $crawler, $selectors, $lastEmailDateMetaKey, $emailAddresses, $subject, $body);

// Send emails
            foreach($emailAddresses as $to) {
                wp_mail($to, $subject, $body);
            }

/**
             * Fires just after notification emails are sent
             *
             * @param AbstractBot $bot                  The bot itself
             * @param string      $url                  URL of the page in which at least a value is found to be empty
             * @param Crawler     $crawler              The crawler in which selectors will be looked for
             * @param array       $selectors            CSS selectors that were used to find empty-valued elements
             * @param string      $lastEmailDateMetaKey Post meta key that stores the last time a similar email sent.
             * @param array       $emailAddresses       Email addresses to which a notification email should be sent
             * @param string      $subject              Subject of the notification email
             * @param string      $body                 Body of the notification email
             * @since 1.6.3
             */
            do_action('wpcc/notification/after_notify', $this, $url, $crawler, $selectors, $lastEmailDateMetaKey, $emailAddresses, $subject, $body);
        }

// Update last email sending date as now.
        if($this->siteId) Utils::savePostMeta($this->siteId, $lastEmailDateMetaKey, (new DateTime())->format(Environment::mysqlDateFormat()));
    }

/*
     *
     */

/**
     * Creates a crawler with the right encoding.
     *
     * @param string $html
     * @param string $url
     * @return Crawler
     */
    public function createCrawler($html, $url): Crawler {
        if($this->useUtf8) {
            // Check if charset is defined as meta Content-Type. If so, replace it.
            // The regex below is taken from Symfony\Component\DomCrawler\Crawler::addContent
            /** @noinspection RegExpRedundantEscape */
            $regexCharset = '/\<meta[^\>]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i';
            if(preg_match($regexCharset, $html, $matches)) {
                // Change only if it is not already utf-8
                $charset = $matches[1];
                if(strtolower($charset) !== "utf-8") {

// Convert the encoding from the defined charset to UTF-8 if it is required
                    if ($this->convertEncodingToUtf8) {
                        // Get available encodings
                        $availableEncodings = array_map('strtolower', mb_list_encodings());

// Make sure the encoding exists in available encodings.
                        if (in_array(strtolower($charset), $availableEncodings)) {
                            $html = mb_convert_encoding($html, "UTF-8", $charset);

// Now match again to get the right positions after converting the encoding. I'm not sure if the
                            // positions might change after converting the encoding. Hence, to be on the safe side, we're
                            // matching again.
                            preg_match($regexCharset, $html, $matches);

// Otherwise, we cannot convert the encoding. Inform the user.
                        } else {
                            Informer::addError(sprintf(_wpcc('Encoding %1$s does not exist in available encodings.'), $charset))
                                ->addAsLog();
                        }
                    }

if ($matches) {
                        $pos0 = stripos($html, $matches[0]);
                        $pos1 = $pos0 + stripos($matches[0], $matches[1]);

$html = substr_replace($html, "UTF-8", $pos1, strlen($matches[1]));
                    }
                }

// Otherwise
            } else {
                // Make sure the charset is UTF-8
                /** @noinspection HtmlRequiredTitleElement */
                $html = $this->findAndReplaceSingle(
                    '(<head>|<head\s[^>]+>)',
                    '$1 <meta charset="UTF-8" />',
                    $html,
                    true
                );
            }
        }

/*
         * PREPARE THE HTML
         */

// Remove chars that come before the first "<"
        $posFirstLessThanChar = mb_strpos($html, "<");
        if (is_int($posFirstLessThanChar)) {
            $html = mb_substr($html, $posFirstLessThanChar);
        }

// Remove chars that come after the last ">"
        $posLastGreaterThanChar = mb_strrpos($html, ">");
        if (is_int($posLastGreaterThanChar)) {
            $html = mb_substr($html, 0, $posLastGreaterThanChar + 1);
        }

/*
         * CREATE THE CRAWLER
         */

$crawler = new Crawler(null, $url);
        $crawler->addContent($html);

return $crawler;
    }

/**
     * Creates a dummy Crawler from an HTML.
     *
     * @param string|null $html
     * @return Crawler
     */
    public function createDummyCrawler(?string $html): Crawler {
        $html = $html !== null ? $html : '';
        /** @noinspection HtmlRequiredTitleElement */
        /** @noinspection HtmlRequiredLangAttribute */
        $html = "<html><head><meta charset='utf-8'></head><body><div>" . $html . "</div></body></html>";
        return new Crawler($html);
    }

/**
     * Gets the content from a dummy crawler created by {@link createDummyCrawler}
     *
     * @param Crawler $dummyCrawler
     * @return string
     */
    public function getContentFromDummyCrawler($dummyCrawler): string {
        $divWrappedHtml = Utils::getNodeHTML($dummyCrawler->filter('body > div')->first());
        return mb_substr($divWrappedHtml, 5, mb_strlen($divWrappedHtml) - 11);
    }

/**
     * @return int|null Site ID for which this bot is created
     */
    public function getSiteId(): ?int {
        return $this->siteId;
    }

/**
     * @return WP_Post|null See {@link $site}
     * @since 1.11.0
     */
    public function getSite(): ?WP_Post {
        $this->loadSiteIfPossible();
        return $this->site;
    }

/**
     * @return string|null See {@link $latestResponseContent}
     */
    public function getLatestResponseContent(): ?string {
        return $this->latestResponseContent;
    }

/**
     * @return Response|null See {@link $latestResponse}
     * @since 1.11.0
     */
    public function getLatestResponse(): ?Response {
        return $this->latestResponse;
    }

/**
     * @return Exception|null See {@link $latestRequestException}
     * @since 1.11.0
     */
    public function getLatestRequestException(): ?Exception {
        return $this->latestRequestException;
    }

/*
     * PROTECTED METHODS
     */

/**
     * Trigger an {@link AbstractCrawlingEvent}
     *
     * @param string $eventClass Class name of an {@link AbstractCrawlingEvent}
     * @return $this
     * @throws Exception See {@link AbstractCrawlingEvent::notify()}
     * @since 1.11.0
     */
    protected function triggerEvent(string $eventClass): self {
        $event = EventService::getInstance()->getEvent($eventClass);
        if ($event instanceof AbstractCrawlingEvent) {
            $event->notify();
        }

return $this;
    }

/**
     * Initialize filters defined in a filter setting. This creates the filters and registers them to their events so
     * that they will be executed when the events are triggered.
     *
     * @param string                   $settingKey               Key of the setting that stores the filter details. One
     *                                                           of the constants defined in {@link SettingKey}.
     * @param string                   $defaultConditionEventCls Name of an {@link AbstractEvent} class that is
     *                                                           registered in {@link EventService}. This will be
     *                                                           provided to {@link FilterList::subscribeAll()}.
     * @param FilterDependencyProvider $provider                 Provider that will inject the dependencies
     * @param string|null              $name                     Name of the setting. This will be used when explaining
     *                                                           the filter setting.
     * @since 1.11.0
     */
    protected function initializeFilterSetting(string $settingKey, string $defaultConditionEventCls,
                                               FilterDependencyProvider $provider, ?string $name = null): void {
        $list = FilterList::fromJson($this->getSetting($settingKey, null));
        if (!$list) return;

// If this is a test, add the filter list to the filter explaining service so that the explanations of the
        // filters will be added to the response.
        if (WPCCrawler::isDoingGeneralTest()) {
            FilterExplainingService::getInstance()->addFilterSettingExplainer(new FilterSettingExplainer(
                $name ?: _wpcc('(No name)'),
                $list
            ));
        }

$defaultConditionEvent = EventService::getInstance()->getEvent($defaultConditionEventCls);
        if (!$defaultConditionEvent) {
            Informer::addError(_wpcc('Filters could not be registered because the default event does not exist.'))
                ->addAsLog();
            return;
        }

$list->subscribeAll($provider, $defaultConditionEvent);
    }

/*
     * PRIVATE METHODS
     */

/**
     * Sets {@link $site} variable if there is a valid {@link $siteId}.
     */
    private function loadSiteIfPossible(): void {
        if(!$this->site && $this->siteId) {
            $this->site = get_post($this->siteId);
        }
    }

/**
     * @param Crawler|null $crawler The crawler in which the changes will be applied
     * @param Uri          $baseUri Base URI that is retrieved by using <base> tag's href attribute
     * @param string       $attr    Target attribute. E.g. 'href', or 'cite', or 'ping', or 'src'
     */
    private function resolveRelativeUrlForAttribute(?Crawler $crawler, Uri $baseUri, string $attr): void {
        if (!$crawler) return;

$crawler->filter('[' . $attr . ']')->each(function ($node) use (&$attr, &$baseUri) {
            /** @var Crawler $node */
            /** @var DOMElement $child */
            $child = $node->getNode(0);

// Get value of the attribute
            $val = $child->getAttribute($attr);

// If there is no value, stop.
            if (!$val) return;

$resolved = Utils::resolveUrl($baseUri, $val);
            if (!$resolved) return;

// Set the new attribute value as the resolved URI
            $child->setAttribute($attr, $resolved);
        });
    }
}