vrobbler/tests/foods_tests/test_recipe_scraper.py

import pytest
from foods.sources.rscraper import (
    RecipeScraperService,
)


RECIPE_HTML_WITH_SCHEMA = """
<!DOCTYPE html>
<html>
<head>
    <script type="application/ld+json">
    {
        "@context": "https://schema.org/",
        "@type": "Recipe",
        "name": "Test Recipe",
        "author": {
            "@type": "Person",
            "name": "Test Author"
        },
        "recipeIngredient": ["1 cup flour", "2 eggs", "1/2 cup sugar"],
        "recipeInstructions": [
            {
                "@type": "HowToStep",
                "text": "Mix ingredients together"
            }
        ],
        "totalTime": "PT30M",
        "recipeYield": "4 servings"
    }
    </script>
</head>
<body>
    <h1>Test Recipe</h1>
</body>
</html>
"""

RECIPE_HTML_WITHOUT_SCHEMA = """
<!DOCTYPE html>
<html>
<head>
    <title>Not a Recipe Page</title>
</head>
<body>
    <h1>Welcome to My Blog</h1>
    <p>This is just a regular blog post about cooking.</p>
</body>
</html>
"""

RECIPE_HTML_WITH_MICRODATA = """
<!DOCTYPE html>
<html>
<head>
    <title>Test Recipe</title>
</head>
<body itemscope itemtype="http://schema.org/Recipe">
    <h1 itemprop="name">Microdata Recipe</h1>
    <div itemprop="author" itemscope itemtype="http://schema.org/Person">
        <span itemprop="name">Test Author</span>
    </div>
    <div itemprop="recipeIngredient">1 cup flour</div>
    <div itemprop="recipeIngredient">2 eggs</div>
    <div itemprop="recipeInstructions">
        <div itemprop="text">Mix all ingredients</div>
    </div>
</body>
</html>
"""


class TestRecipeScraperService:
    @pytest.fixture
    def scraper(self):
        return RecipeScraperService()

    def test_is_recipe_with_valid_schema(self, scraper):
        result = scraper.is_recipe(
            RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
        )
        assert result is True

    def test_is_recipe_without_schema(self, scraper):
        result = scraper.is_recipe(
            RECIPE_HTML_WITHOUT_SCHEMA, "https://example.com/blog"
        )
        assert result is False

    def test_is_recipe_with_microdata(self, scraper):
        result = scraper.is_recipe(
            RECIPE_HTML_WITH_MICRODATA, "https://example.com/recipe"
        )
        assert result is True

    def test_scrape_returns_title(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert result["title"] == "Test Recipe"

    def test_scrape_returns_ingredients(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert len(result["ingredients"]) == 3
        assert "1 cup flour" in result["ingredients"]

    def test_scrape_returns_instructions(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert len(result["instructions"]) > 0
        assert "Mix ingredients together" in result["instructions"]

    def test_scrape_returns_yields(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert result["yields"] == "4 servings"

    def test_scrape_returns_total_time(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert result["total_time"] == 30

    def test_scrape_returns_url(self, scraper):
        result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
        assert result["url"] == "https://example.com/recipe"

    def test_scrape_raises_on_invalid_html(self, scraper):
        with pytest.raises(ValueError):
            scraper.scrape("", "https://example.com/recipe")

    def test_scrape_handles_missing_optional_fields(self, scraper):
        minimal_html = """
        <!DOCTYPE html>
        <html>
        <head>
            <script type="application/ld+json">
            {
                "@context": "https://schema.org/",
                "@type": "Recipe",
                "name": "Minimal Recipe"
            }
            </script>
        </head>
        <body></body>
        </html>
        """
        result = scraper.scrape(minimal_html, "https://example.com/minimal")
        assert result["title"] == "Minimal Recipe"
        assert result["ingredients"] == []
        assert result["instructions"] == []

    def test_parse_servings(self, scraper):
        assert scraper.parse_servings("4 servings") == 4
        assert scraper.parse_servings("6 people") == 6
        assert scraper.parse_servings("2") == 2
        assert scraper.parse_servings("serves 8") == 8
        assert scraper.parse_servings(None) is None
        assert scraper.parse_servings("") is None

    def test_extract_tags_from_cuisine(self, scraper):
        recipe_data = {"cuisine": "Italian"}
        tags = scraper.extract_tags(recipe_data)
        assert "Italian" in tags

    def test_extract_tags_from_cuisine_list(self, scraper):
        recipe_data = {"cuisine": ["Italian", "Mexican"]}
        tags = scraper.extract_tags(recipe_data)
        assert "Italian" in tags
        assert "Mexican" in tags

    def test_extract_tags_from_dietary(self, scraper):
        recipe_data = {"dietary": "Gluten-Free"}
        tags = scraper.extract_tags(recipe_data)
        assert "Gluten-Free" in tags

    def test_extract_tags_from_course(self, scraper):
        recipe_data = {"course": "Dessert"}
        tags = scraper.extract_tags(recipe_data)
        assert "Dessert" in tags

    def test_extract_tags_from_keywords(self, scraper):
        recipe_data = {"keywords": "easy, quick, healthy"}
        tags = scraper.extract_tags(recipe_data)
        assert "easy" in tags
        assert "quick" in tags
        assert "healthy" in tags

    def test_extract_tags_from_keywords_list(self, scraper):
        recipe_data = {"keywords": ["comfort food", "winter"]}
        tags = scraper.extract_tags(recipe_data)
        assert "comfort food" in tags
        assert "winter" in tags