Skip to content

Advanced Usage

Under Construction

This whole page is currently under construction.

Not worth your time at this point.

Under Construction

Just making sure the previous warning was not missed.

This page is not worth your time at this point.

Overview

This page covers the advanced internals of recipe-scrapers, including abstract classes, schema.org parsing, utility functions, exception handling, and the plugin system. These components are primarily useful for contributors and developers looking to extend the library's functionality.

Core Components

Abstract Base Classes

Classes

AbstractScraper

Source code in recipe_scrapers/_abstract.py
class AbstractScraper:
    page_data: str

    _opengraph_cls = OpenGraph
    _schema_cls = SchemaOrg

    def __init__(self, html: str, url: str):
        self.page_data = html
        self.url = url
        self.soup = BeautifulSoup(self.page_data, "html.parser")
        self.opengraph = self._opengraph_cls(self.soup)
        self.schema = self._schema_cls(self.page_data)

        # attach the plugins as instructed in settings.PLUGINS
        if not hasattr(self.__class__, "plugins_initialized"):
            for name, _ in inspect.getmembers(self, inspect.ismethod):
                current_method = getattr(self.__class__, name)
                for plugin in reversed(settings.PLUGINS):
                    if plugin.should_run(self.host(), name):
                        current_method = plugin.run(current_method)
                setattr(self.__class__, name, current_method)
            setattr(self.__class__, "plugins_initialized", True)

    def author(self):
        """Author of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def canonical_url(self):
        """Canonical or original URL of the recipe."""
        canonical_link = self.soup.find("link", {"rel": "canonical", "href": True})
        if canonical_link:
            return urljoin(self.url, canonical_link["href"])
        return self.url

    def site_name(self):
        """Name of the website."""
        raise NotImplementedError("This should be implemented.")

    @classmethod
    def host(cls) -> str:
        """Host domain of the recipe URL."""
        raise NotImplementedError("This should be implemented.")

    def language(self):
        """Language the recipe is written in."""
        candidate_languages = OrderedDict()
        html = self.soup.find("html", {"lang": True})
        if html:
            candidate_languages[html.get("lang")] = True

        # Deprecated: check for a meta http-equiv header
        # See: https://www.w3.org/International/questions/qa-http-and-lang
        meta_language = self.soup.find(
            "meta",
            {
                "http-equiv": lambda x: x and x.lower() == "content-language",
                "content": True,
            },
        )
        if meta_language:
            language = meta_language.get("content").split(",", 1)[0]
            if language:
                candidate_languages[language] = True

        # If other languages exist, remove 'en' commonly generated by HTML editors
        if len(candidate_languages) > 1:
            candidate_languages.pop("en", None)

        # Return the first candidate language
        if candidate_languages:
            return candidate_languages.popitem(last=False)[0]
        else:
            raise ElementNotFoundInHtml("Could not find language.")

    def title(self):
        """Title of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def ingredients(self):
        """Ingredients of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def ingredient_groups(self) -> list[IngredientGroup]:
        """List of ingredient groups."""
        return [IngredientGroup(purpose=None, ingredients=self.ingredients())]

    def instructions(self) -> str:
        """Instructions to prepare the recipe."""
        raise NotImplementedError("This should be implemented.")

    def instructions_list(self) -> list[str]:
        """Instructions to prepare the recipe as a list."""
        return [
            instruction
            for instruction in self.instructions().split("\n")
            if instruction
        ]

    def category(self):
        """Category of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def yields(self):
        """Total servings or items in the recipe."""
        raise NotImplementedError("This should be implemented.")

    def description(self):
        """Description of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def total_time(self):
        """Total time needed to prepare and cook the recipe in minutes."""
        raise NotImplementedError("This should be implemented.")

    def cook_time(self):
        """Cooking time in minutes."""
        raise NotImplementedError("This should be implemented.")

    def prep_time(self):
        """Preparation time in minutes."""
        raise NotImplementedError("This should be implemented.")

    def cuisine(self):
        """Cuisine of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def cooking_method(self):
        """The method of cooking the recipe"""
        raise NotImplementedError("This should be implemented.")

    def ratings(self):
        """Ratings of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def ratings_count(self):
        """Total number of ratings of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def equipment(self):
        """Equipment needed for the recipe."""
        raise NotImplementedError("This should be implemented.")

    def reviews(self):
        """Reviews of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def nutrients(self):
        """Nutrients of the recipe."""
        raise NotImplementedError("This should be implemented.")

    def dietary_restrictions(self):
        """The specified dietary restrictions or guidelines for which this recipe is suitable"""
        raise NotImplementedError("This should be implemented.")

    def image(self):
        """An image URL for the recipe."""
        raise NotImplementedError("This should be implemented.")

    def keywords(self):
        """Keywords or tags used to describe the recipe"""
        raise NotImplementedError("This should be implemented.")

    def links(self):
        """Links found in the recipe."""
        invalid_href = {"#", ""}
        links_html = self.soup.find_all("a", href=True)

        return [link.attrs for link in links_html if link["href"] not in invalid_href]

    def to_json(self):
        """Recipe information in JSON format."""
        json_dict = {}
        public_method_names = [
            method
            for method in dir(self)
            if callable(getattr(self, method))
            if not method.startswith("_") and method not in ["soup", "links", "to_json"]
        ]
        for method in public_method_names:
            try:
                if method == "ingredient_groups":
                    json_dict[method] = [i.__dict__ for i in getattr(self, method)()]
                else:
                    json_dict[method] = getattr(self, method)()
            except Exception:
                pass
        return json_dict
Functions
author()

Author of the recipe.

Source code in recipe_scrapers/_abstract.py
def author(self):
    """Author of the recipe."""
    raise NotImplementedError("This should be implemented.")
canonical_url()

Canonical or original URL of the recipe.

Source code in recipe_scrapers/_abstract.py
def canonical_url(self):
    """Canonical or original URL of the recipe."""
    canonical_link = self.soup.find("link", {"rel": "canonical", "href": True})
    if canonical_link:
        return urljoin(self.url, canonical_link["href"])
    return self.url
category()

Category of the recipe.

Source code in recipe_scrapers/_abstract.py
def category(self):
    """Category of the recipe."""
    raise NotImplementedError("This should be implemented.")
cook_time()

Cooking time in minutes.

Source code in recipe_scrapers/_abstract.py
def cook_time(self):
    """Cooking time in minutes."""
    raise NotImplementedError("This should be implemented.")
cooking_method()

The method of cooking the recipe

Source code in recipe_scrapers/_abstract.py
def cooking_method(self):
    """The method of cooking the recipe"""
    raise NotImplementedError("This should be implemented.")
cuisine()

Cuisine of the recipe.

Source code in recipe_scrapers/_abstract.py
def cuisine(self):
    """Cuisine of the recipe."""
    raise NotImplementedError("This should be implemented.")
description()

Description of the recipe.

Source code in recipe_scrapers/_abstract.py
def description(self):
    """Description of the recipe."""
    raise NotImplementedError("This should be implemented.")
dietary_restrictions()

The specified dietary restrictions or guidelines for which this recipe is suitable

Source code in recipe_scrapers/_abstract.py
def dietary_restrictions(self):
    """The specified dietary restrictions or guidelines for which this recipe is suitable"""
    raise NotImplementedError("This should be implemented.")
equipment()

Equipment needed for the recipe.

Source code in recipe_scrapers/_abstract.py
def equipment(self):
    """Equipment needed for the recipe."""
    raise NotImplementedError("This should be implemented.")
host() classmethod

Host domain of the recipe URL.

Source code in recipe_scrapers/_abstract.py
@classmethod
def host(cls) -> str:
    """Host domain of the recipe URL."""
    raise NotImplementedError("This should be implemented.")
image()

An image URL for the recipe.

Source code in recipe_scrapers/_abstract.py
def image(self):
    """An image URL for the recipe."""
    raise NotImplementedError("This should be implemented.")
ingredient_groups()

List of ingredient groups.

Source code in recipe_scrapers/_abstract.py
def ingredient_groups(self) -> list[IngredientGroup]:
    """List of ingredient groups."""
    return [IngredientGroup(purpose=None, ingredients=self.ingredients())]
ingredients()

Ingredients of the recipe.

Source code in recipe_scrapers/_abstract.py
def ingredients(self):
    """Ingredients of the recipe."""
    raise NotImplementedError("This should be implemented.")
instructions()

Instructions to prepare the recipe.

Source code in recipe_scrapers/_abstract.py
def instructions(self) -> str:
    """Instructions to prepare the recipe."""
    raise NotImplementedError("This should be implemented.")
instructions_list()

Instructions to prepare the recipe as a list.

Source code in recipe_scrapers/_abstract.py
def instructions_list(self) -> list[str]:
    """Instructions to prepare the recipe as a list."""
    return [
        instruction
        for instruction in self.instructions().split("\n")
        if instruction
    ]
keywords()

Keywords or tags used to describe the recipe

Source code in recipe_scrapers/_abstract.py
def keywords(self):
    """Keywords or tags used to describe the recipe"""
    raise NotImplementedError("This should be implemented.")
language()

Language the recipe is written in.

Source code in recipe_scrapers/_abstract.py
def language(self):
    """Language the recipe is written in."""
    candidate_languages = OrderedDict()
    html = self.soup.find("html", {"lang": True})
    if html:
        candidate_languages[html.get("lang")] = True

    # Deprecated: check for a meta http-equiv header
    # See: https://www.w3.org/International/questions/qa-http-and-lang
    meta_language = self.soup.find(
        "meta",
        {
            "http-equiv": lambda x: x and x.lower() == "content-language",
            "content": True,
        },
    )
    if meta_language:
        language = meta_language.get("content").split(",", 1)[0]
        if language:
            candidate_languages[language] = True

    # If other languages exist, remove 'en' commonly generated by HTML editors
    if len(candidate_languages) > 1:
        candidate_languages.pop("en", None)

    # Return the first candidate language
    if candidate_languages:
        return candidate_languages.popitem(last=False)[0]
    else:
        raise ElementNotFoundInHtml("Could not find language.")

Links found in the recipe.

Source code in recipe_scrapers/_abstract.py
def links(self):
    """Links found in the recipe."""
    invalid_href = {"#", ""}
    links_html = self.soup.find_all("a", href=True)

    return [link.attrs for link in links_html if link["href"] not in invalid_href]
nutrients()

Nutrients of the recipe.

Source code in recipe_scrapers/_abstract.py
def nutrients(self):
    """Nutrients of the recipe."""
    raise NotImplementedError("This should be implemented.")
prep_time()

Preparation time in minutes.

Source code in recipe_scrapers/_abstract.py
def prep_time(self):
    """Preparation time in minutes."""
    raise NotImplementedError("This should be implemented.")
ratings()

Ratings of the recipe.

Source code in recipe_scrapers/_abstract.py
def ratings(self):
    """Ratings of the recipe."""
    raise NotImplementedError("This should be implemented.")
ratings_count()

Total number of ratings of the recipe.

Source code in recipe_scrapers/_abstract.py
def ratings_count(self):
    """Total number of ratings of the recipe."""
    raise NotImplementedError("This should be implemented.")
reviews()

Reviews of the recipe.

Source code in recipe_scrapers/_abstract.py
def reviews(self):
    """Reviews of the recipe."""
    raise NotImplementedError("This should be implemented.")
site_name()

Name of the website.

Source code in recipe_scrapers/_abstract.py
def site_name(self):
    """Name of the website."""
    raise NotImplementedError("This should be implemented.")
title()

Title of the recipe.

Source code in recipe_scrapers/_abstract.py
def title(self):
    """Title of the recipe."""
    raise NotImplementedError("This should be implemented.")
to_json()

Recipe information in JSON format.

Source code in recipe_scrapers/_abstract.py
def to_json(self):
    """Recipe information in JSON format."""
    json_dict = {}
    public_method_names = [
        method
        for method in dir(self)
        if callable(getattr(self, method))
        if not method.startswith("_") and method not in ["soup", "links", "to_json"]
    ]
    for method in public_method_names:
        try:
            if method == "ingredient_groups":
                json_dict[method] = [i.__dict__ for i in getattr(self, method)()]
            else:
                json_dict[method] = getattr(self, method)()
        except Exception:
            pass
    return json_dict
total_time()

Total time needed to prepare and cook the recipe in minutes.

Source code in recipe_scrapers/_abstract.py
def total_time(self):
    """Total time needed to prepare and cook the recipe in minutes."""
    raise NotImplementedError("This should be implemented.")
yields()

Total servings or items in the recipe.

Source code in recipe_scrapers/_abstract.py
def yields(self):
    """Total servings or items in the recipe."""
    raise NotImplementedError("This should be implemented.")
Schema.org Parser

Classes

Functions

Utility Functions

Classes

Functions

change_keys(obj, convert)

Recursively goes through the dictionary obj and replaces keys with the convert function

Useful for fixing incorrect property keys, e.g. in JSON-LD dictionaries

Credit: StackOverflow user 'baldr' (https://web.archive.org/web/20201022163147/https://stackoverflow.com/questions/11700705/python-recursively-replace -character-in-keys-of-nested-dictionary/33668421)

Note: with modifications applied.

Source code in recipe_scrapers/_utils.py
def change_keys(obj, convert):
    """
    Recursively goes through the dictionary obj and replaces keys with the convert function

    Useful for fixing incorrect property keys, e.g. in JSON-LD dictionaries

    Credit: StackOverflow user 'baldr'
    (https://web.archive.org/web/20201022163147/https://stackoverflow.com/questions/11700705/python-recursively-replace
        -character-in-keys-of-nested-dictionary/33668421)

    Note: with modifications applied.
    """
    if isinstance(obj, dict):
        return {convert(k): change_keys(v, convert) for k, v in obj.items()}
    elif isinstance(obj, (list, set, tuple)):
        cls = type(obj)
        return cls(change_keys(item, convert) for item in obj)
    else:
        return obj

get_yields(element)

Will return a string of servings or items, if the recipe is for number of items and not servings the method will return the string "x item(s)" where x is the quantity. Returns a string of servings or items. If the recipe is for a number of items (not servings), it returns "x item(s)" where x is the quantity. This function handles cases where the yield is in dozens, such as "4 dozen cookies", returning "4 dozen" instead of "4 servings". Additionally accommodates yields specified in batches (e.g., "2 batches of brownies"), returning the yield as stated. :param element: Should be BeautifulSoup.TAG, in some cases not feasible and will then be text. :return: The number of servings or items. :return: The number of servings, items, dozen, batches, etc...

Source code in recipe_scrapers/_utils.py
def get_yields(element):
    """
    Will return a string of servings or items, if the recipe is for number of items and not servings
    the method will return the string "x item(s)" where x is the quantity.
    Returns a string of servings or items. If the recipe is for a number of items (not servings),
    it returns "x item(s)" where x is the quantity. This function handles cases where the yield is in dozens,
    such as "4 dozen cookies", returning "4 dozen" instead of "4 servings". Additionally
    accommodates yields specified in batches (e.g., "2 batches of brownies"), returning the yield as stated.
    :param element: Should be BeautifulSoup.TAG, in some cases not feasible and will then be text.
    :return: The number of servings or items.
    :return: The number of servings, items, dozen, batches, etc...
    """
    if element is None:
        raise ElementNotFoundInHtml(element)
    if isinstance(element, str):
        serve_text = element
    else:
        serve_text = element.get_text()
    if not serve_text:
        raise ValueError("Cannot extract yield information from empty string")

    if SERVE_REGEX_TO.search(serve_text):
        serve_text = serve_text.split(SERVE_REGEX_TO.split(serve_text, 2)[1], 2)[1]

    matched = SERVE_REGEX_NUMBER.search(serve_text).groupdict().get("items") or 0
    serve_text_lower = serve_text.lower()

    best_match = None
    best_match_length = 0

    for singular, plural in RECIPE_YIELD_TYPES:
        if singular in serve_text_lower or plural in serve_text_lower:
            match_length = (
                len(singular) if singular in serve_text_lower else len(plural)
            )
            if match_length > best_match_length:
                best_match_length = match_length
                best_match = f"{matched} {singular if int(matched) == 1 else plural}"

    if best_match:
        return best_match

    if SERVE_REGEX_ITEMS.search(serve_text) is not None:
        return f"{matched} item{'s' if int(matched) != 1 else ''}"

    return f"{matched} serving{'s' if int(matched) != 1 else ''}"
Exception Handling

Classes

ElementNotFoundInHtml

Bases: RecipeScrapersExceptions

Error when we cannot locate the HTML element on the page

Source code in recipe_scrapers/_exceptions.py
class ElementNotFoundInHtml(RecipeScrapersExceptions):
    """Error when we cannot locate the HTML element on the page"""

    def __init__(self, element):
        self.element = element
        message = (
            "Element not found in html (self.soup.find returned None). Check traceback."
        )
        super().__init__(message)

FieldNotProvidedByWebsiteException

Bases: StaticValueException

Error when, as far as we know, the website does not provide this info for any recipes.

Source code in recipe_scrapers/_exceptions.py
class FieldNotProvidedByWebsiteException(StaticValueException):
    """Error when, as far as we know, the website does not provide this info for any recipes."""

    ...

FillPluginException

Bases: RecipeScrapersExceptions

Inability to locate an element on a page by using a fill plugin

Source code in recipe_scrapers/_exceptions.py
class FillPluginException(RecipeScrapersExceptions):
    """Inability to locate an element on a page by using a fill plugin"""

    def __init__(self, message):
        super().__init__(message)

NoSchemaFoundInWildMode

Bases: RecipeScrapersExceptions

The scraper was unable to locate schema.org metadata within the webpage.

Source code in recipe_scrapers/_exceptions.py
class NoSchemaFoundInWildMode(RecipeScrapersExceptions):
    """The scraper was unable to locate schema.org metadata within the webpage."""

    def __init__(self, url):
        self.url = url
        message = f"No Recipe Schema found at {self.url}."
        super().__init__(message)

OpenGraphException

Bases: FillPluginException

Unable to locate element on the page using OpenGraph metadata

Source code in recipe_scrapers/_exceptions.py
class OpenGraphException(FillPluginException):
    """Unable to locate element on the page using OpenGraph metadata"""

    ...

RecipeSchemaNotFound

Bases: SchemaOrgException

No recipe schema metadata found on the page

Source code in recipe_scrapers/_exceptions.py
class RecipeSchemaNotFound(SchemaOrgException):
    """No recipe schema metadata found on the page"""

    def __init__(self, url):
        self.url = url
        message = f"No Recipe Schema found at {self.url}."
        super().__init__(message)

SchemaOrgException

Bases: FillPluginException

Error in parsing or missing portion of the Schema.org data on the page

Source code in recipe_scrapers/_exceptions.py
class SchemaOrgException(FillPluginException):
    """Error in parsing or missing portion of the Schema.org data on the page"""

    ...

StaticValueException

Bases: RecipeScrapersExceptions

Error to communicate that the scraper is returning a fixed/static value.

Source code in recipe_scrapers/_exceptions.py
class StaticValueException(RecipeScrapersExceptions):
    """Error to communicate that the scraper is returning a fixed/static value."""

    def __init__(self, *, return_value):
        self.return_value = return_value
        message = f"Suggested return value {return_value} is not from recipe source."
        super().__init__(message)

WebsiteNotImplementedError

Bases: RecipeScrapersExceptions

Error when website is not supported by this library.

Source code in recipe_scrapers/_exceptions.py
class WebsiteNotImplementedError(RecipeScrapersExceptions):
    """Error when website is not supported by this library."""

    def __init__(self, domain):
        self.domain = domain
        message = f"Website ({self.domain}) not supported."
        super().__init__(message)
Plugin System

Classes

ExceptionHandlingPlugin

Bases: PluginInterface

Plugin that is used only if settings.SUPPRESS_EXCEPTIONS is set to True.

The outer-most plugin and decorator.

If ANY of the methods listed raises ANY kind of exception, silence it and return the respective value from settings.ON_EXCEPTION_RETURN_VALUES

If settings.SUPPRESS_EXCEPTIONS is set to False this plugin is ignored and does nothing. (In other words exceptions won't be handled and will bubble up to program's explosion. Left to the end-user to handle them on his own).

Source code in recipe_scrapers/plugins/exception_handling.py
class ExceptionHandlingPlugin(PluginInterface):
    """
    Plugin that is used only if settings.SUPPRESS_EXCEPTIONS is set to True.

    The outer-most plugin and decorator.

    If ANY of the methods listed raises ANY kind of exception, silence it
    and return the respective value from settings.ON_EXCEPTION_RETURN_VALUES

    If settings.SUPPRESS_EXCEPTIONS is set to False this plugin is ignored and
    does nothing. (In other words exceptions won't be handled and will bubble up
    to program's explosion. Left to the end-user to handle them on his own).
    """

    run_on_hosts = ("*",)
    run_on_methods = (
        "title",
        "total_time",
        "yields",
        "image",
        "ingredients",
        "instructions",
        "ratings",
        "reviews",
        "links",
        "language",
        "nutrients",
    )

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            if settings.SUPPRESS_EXCEPTIONS:
                logger.setLevel(settings.LOG_LEVEL)
                class_name = self.__class__.__name__
                method_name = decorated.__name__
                logger.debug(
                    f"Decorating: {class_name}.{method_name}() with ExceptionHandlingPlugin"
                )

                try:
                    return decorated(self, *args, **kwargs)
                except Exception as e:
                    logger.info(
                        f"ExceptionHandlingPlugin silenced exception: {str(e)} in {class_name}.{method_name}()"
                    )

                    return settings.ON_EXCEPTION_RETURN_VALUES.get(
                        decorated.__name__, None
                    )
            return decorated(self, *args, **kwargs)

        return decorated_method_wrapper

HTMLTagStripperPlugin

Bases: PluginInterface

Run the output from the methods listed through the stripper function defined above.

It is intended to strip away seen inside the strings. We do not want them.

Source code in recipe_scrapers/plugins/html_tags_stripper.py
class HTMLTagStripperPlugin(PluginInterface):
    """
    Run the output from the methods listed through the stripper function
    defined above.

    It is intended to strip away <html><tags></tags></html> seen inside the strings.
    We do not want them.
    """

    decorate_hosts = ("*",)
    run_on_methods = ("title", "instructions", "ingredients")

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            logger.setLevel(settings.LOG_LEVEL)
            class_name = self.__class__.__name__
            method_name = decorated.__name__
            logger.debug(
                f"Decorating: {class_name}.{method_name}() with HTMLTagStripperPlugin plugin."
            )

            decorated_func_result = decorated(self, *args, **kwargs)

            if type(decorated_func_result) is list:
                return [stripper(item) for item in decorated_func_result]
            else:
                return stripper(decorated_func_result)

        return decorated_method_wrapper

NormalizeStringPlugin

Bases: PluginInterface

Explicitly run the output from the methods listed through normalize_string

Source code in recipe_scrapers/plugins/normalize_string.py
class NormalizeStringPlugin(PluginInterface):
    """
    Explicitly run the output from the methods listed through normalize_string
    """

    decorate_hosts = ("*",)
    run_on_methods = ("title",)

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            # TODO: Write logging. Configure logging.
            logger.setLevel(settings.LOG_LEVEL)
            class_name = self.__class__.__name__
            method_name = decorated.__name__
            logger.debug(
                f"Decorating: {class_name}.{method_name}() with NormalizeStringPlugin"
            )

            return normalize_string(decorated(self, *args, **kwargs))

        return decorated_method_wrapper

OpenGraphFillPlugin

Bases: PluginInterface

If any of the methods listed is invoked on a scraper class that happens not to be implemented, attempt to return results by checking for OpenGraph metadata.

Source code in recipe_scrapers/plugins/opengraph_fill.py
class OpenGraphFillPlugin(PluginInterface):
    """
    If any of the methods listed is invoked on a scraper class
    that happens not to be implemented, attempt to return results
    by checking for OpenGraph metadata.
    """

    run_on_hosts = ("*",)
    run_on_methods = (
        "site_name",
        "image",
    )

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            logger.setLevel(settings.LOG_LEVEL)
            class_name = self.__class__.__name__
            method_name = decorated.__name__
            logger.debug(
                f"Decorating: {class_name}.{method_name}() with OpenGraphFillPlugin"
            )

            try:
                return decorated(self, *args, **kwargs)
            except (FillPluginException, NotImplementedError) as e:
                function = getattr(self.opengraph, decorated.__name__)
                if self.opengraph.soup and function:
                    logger.info(
                        f"{class_name}.{method_name}() seems not to be implemented but OpenGraph metadata may be available. Attempting to return result from OpenGraph."
                    )
                    return function(*args, **kwargs)
                else:
                    raise e

        return decorated_method_wrapper

OpenGraphImageFetchPlugin

Bases: PluginInterface

If .image() method on whatever scraper return exception for some reason, do try to fetch the recipe image from the og:image on the page.

Apply to .image() method on all scrapers if plugin is active.

Source code in recipe_scrapers/plugins/opengraph_image_fetch.py
class OpenGraphImageFetchPlugin(PluginInterface):
    """
    If .image() method on whatever scraper return exception for some reason,
    do try to fetch the recipe image from the og:image on the page.

    Apply to .image() method on all scrapers if plugin is active.
    """

    run_on_hosts = ("*",)
    run_on_methods = ("image",)

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            logger.setLevel(settings.LOG_LEVEL)
            class_name = self.__class__.__name__
            method_name = decorated.__name__
            logger.debug(
                f"Decorating: {class_name}.{method_name}() with OpenGraphImageFetchPlugin"
            )

            image = None
            try:
                image = decorated(self, *args, **kwargs)
            except Exception:
                pass

            if image:
                return image
            else:
                logger.info(
                    f"{class_name}.{method_name}() did not manage to find recipe image. OpenGraphImageFetchPlugin will attempt to do its magic."
                )
                image = self.soup.find(
                    "meta", {"property": "og:image", "content": True}
                )
                return image.get("content") if image else None

        return decorated_method_wrapper

SchemaOrgFillPlugin

Bases: PluginInterface

If any of the methods listed is invoked on a scraper class that happens not to be implement and Schema.org is available attempt to return the results from the schema available.

Source code in recipe_scrapers/plugins/schemaorg_fill.py
class SchemaOrgFillPlugin(PluginInterface):
    """
    If any of the methods listed is invoked on a scraper class
    that happens not to be implement and Schema.org is available
    attempt to return the results from the schema available.
    """

    run_on_hosts = ("*",)
    run_on_methods = (
        "author",
        "site_name",
        "title",
        "category",
        "total_time",
        "yields",
        "image",
        "ingredients",
        "instructions",
        "ratings",
        "reviews",
        "links",
        "language",
        "nutrients",
        "cooking_method",
        "cuisine",
        "description",
        "cook_time",
        "prep_time",
        "keywords",
        "ratings_count",
        "dietary_restrictions",
    )

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            logger.setLevel(settings.LOG_LEVEL)
            class_name = self.__class__.__name__
            method_name = decorated.__name__
            logger.debug(
                f"Decorating: {class_name}.{method_name}() with SchemaOrgFillPlugin"
            )
            try:
                return decorated(self, *args, **kwargs)
            except (FillPluginException, NotImplementedError) as e:
                function = getattr(self.schema, decorated.__name__)
                if not self.schema.data:
                    raise RecipeSchemaNotFound(url=self.url)
                if function:
                    logger.info(
                        f"{class_name}.{method_name}() seems to not be implemented but .schema is available! Attempting to return result from SchemaOrg."
                    )
                    return function(*args, **kwargs)
                else:
                    raise e

        return decorated_method_wrapper

StaticValueExceptionHandlingPlugin

Bases: PluginInterface

Handles cases where a scraper indicates that it returns a static value -- perhaps because the website never provides info for that method at all (communicated by FieldNotProvidedByWebsiteException), or because for some reason it is easier or more convenient to define statically (communicated by StaticValueException).

Objects of StaticValueException and subclasses include a return value, so we return that to the caller instead after emitting a suitable warning for use by developers/users.

Source code in recipe_scrapers/plugins/static_values.py
class StaticValueExceptionHandlingPlugin(PluginInterface):
    """
    Handles cases where a scraper indicates that it returns a static value --
    perhaps because the website never provides info for that method at all
    (communicated by FieldNotProvidedByWebsiteException), or because for some
    reason it is easier or more convenient to define statically (communicated
    by StaticValueException).

    Objects of StaticValueException and subclasses include a return value, so
    we return that to the caller instead after emitting a suitable warning for
    use by developers/users.
    """

    BUG_REPORT_LINK = "https://github.com/hhursev/recipe-scrapers/issues"

    run_on_hosts = ("*",)
    run_on_methods = (
        "author",
        "site_name",
        "language",
        "cuisine",
        "cooking_method",
        "total_time",
        "yields",
    )

    @classmethod
    def run(cls, decorated):
        @functools.wraps(decorated)
        def decorated_method_wrapper(self, *args, **kwargs):
            try:
                return decorated(self, *args, **kwargs)
            except FieldNotProvidedByWebsiteException as e:
                message = (
                    f"{self.host()} doesn't seem to support the {decorated.__name__} field. "
                    "If you know this to be untrue for some recipe, please submit a bug report at "
                    f"{StaticValueExceptionHandlingPlugin.BUG_REPORT_LINK}"
                )
                warnings.warn(
                    message=message, category=FieldNotProvidedByWebsiteWarning
                )
                return e.return_value
            except StaticValueException as e:
                message = (
                    f"{self.host()} returns a constant value from the {decorated.__name__} field. "
                    "If you believe we can and should determine that dynamically, please submit a "
                    f"bug report at {StaticValueExceptionHandlingPlugin.BUG_REPORT_LINK}"
                )
                warnings.warn(message=message, category=StaticValueWarning)
                return e.return_value

        return decorated_method_wrapper