# Copyright 2023 Reid Swanson.
#
# This file is part of scrachy.
#
# scrachy is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# scrachy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with scrachy. If not, see <https://www.gnu.org/licenses/>.
"""
The addons provided by Scrachy.
"""
from __future__ import annotations
# Python Modules
import importlib
import logging
from types import ModuleType
from typing import Optional
# 3rd Party Modules
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
# Project Modules
from scrachy.settings.defaults import fingerprinter, storage
from scrachy.settings.defaults import filter as cache_filter
from scrachy.utils.imports import get_import_path
from scrachy.utils.request import DEFAULT_SCRACHY_FINGERPRINTER_VERSION
log = logging.getLogger(__name__)
[docs]
def try_import(module_name: str, addon_name):
"""
Try importing a module by name and raise a
:class:`scrapy.exceptions.NotConfigured` error if it can't be found.
:param module_name: The full path to the module.
:param addon_name: The name of the addon.
"""
try:
importlib.import_module(module_name)
except ModuleNotFoundError:
raise NotConfigured(f"The {addon_name} addon requires the module: {module_name}")
[docs]
class ScrachyAddon:
"""
The base class for all Scrachy addons.
"""
[docs]
def __init__(self, settings_module: Optional[ModuleType] = None):
self.settings_module = settings_module
def update_settings(self, settings: Settings):
settings.setmodule(self.settings_module, 'addon')
[docs]
class BlacklistPolicyAddon(ScrachyAddon):
def __int__(self):
super().__init__()
[docs]
class DynamicHashRequestFingerprinterAddon(ScrachyAddon):
[docs]
def __init__(self):
super().__init__(fingerprinter)
def update_settings(self, settings: Settings):
super().update_settings(settings)
fp = get_import_path(settings.get('REQUEST_FINGERPRINTER_CLASS'))
if fp.startswith('scrachy'):
settings['REQUEST_FINGERPRINTER_IMPLEMENTATION'] = DEFAULT_SCRACHY_FINGERPRINTER_VERSION
[docs]
class AlchemyCacheStorageAddon(ScrachyAddon):
[docs]
def __init__(self):
super().__init__(storage)
def update_settings(self, settings: Settings):
def check_import(m: str):
try_import(m, self.__class__.__name__)
check_import('bs4')
check_import('w3lib')
check_import('sqlalchemy')
super().update_settings(settings)
[docs]
class CachedResponseFilterAddon(ScrachyAddon):
[docs]
def __init__(self):
super().__init__(cache_filter)