Files
vrobbler/tests/foods_tests/test_recipe_scraper.py

187 lines
5.9 KiB
Python

import pytest
from foods.sources.rscraper import (
RecipeScraperService,
)
RECIPE_HTML_WITH_SCHEMA = """
<!DOCTYPE html>
<html>
<head>
<script type="application/ld+json">
{
"@context": "https://schema.org/",
"@type": "Recipe",
"name": "Test Recipe",
"author": {
"@type": "Person",
"name": "Test Author"
},
"recipeIngredient": ["1 cup flour", "2 eggs", "1/2 cup sugar"],
"recipeInstructions": [
{
"@type": "HowToStep",
"text": "Mix ingredients together"
}
],
"totalTime": "PT30M",
"recipeYield": "4 servings"
}
</script>
</head>
<body>
<h1>Test Recipe</h1>
</body>
</html>
"""
RECIPE_HTML_WITHOUT_SCHEMA = """
<!DOCTYPE html>
<html>
<head>
<title>Not a Recipe Page</title>
</head>
<body>
<h1>Welcome to My Blog</h1>
<p>This is just a regular blog post about cooking.</p>
</body>
</html>
"""
RECIPE_HTML_WITH_MICRODATA = """
<!DOCTYPE html>
<html>
<head>
<title>Test Recipe</title>
</head>
<body itemscope itemtype="http://schema.org/Recipe">
<h1 itemprop="name">Microdata Recipe</h1>
<div itemprop="author" itemscope itemtype="http://schema.org/Person">
<span itemprop="name">Test Author</span>
</div>
<div itemprop="recipeIngredient">1 cup flour</div>
<div itemprop="recipeIngredient">2 eggs</div>
<div itemprop="recipeInstructions">
<div itemprop="text">Mix all ingredients</div>
</div>
</body>
</html>
"""
class TestRecipeScraperService:
@pytest.fixture
def scraper(self):
return RecipeScraperService()
def test_is_recipe_with_valid_schema(self, scraper):
result = scraper.is_recipe(
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
)
assert result is True
def test_is_recipe_without_schema(self, scraper):
result = scraper.is_recipe(
RECIPE_HTML_WITHOUT_SCHEMA, "https://example.com/blog"
)
assert result is False
def test_is_recipe_with_microdata(self, scraper):
result = scraper.is_recipe(
RECIPE_HTML_WITH_MICRODATA, "https://example.com/recipe"
)
assert result is True
def test_scrape_returns_title(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert result["title"] == "Test Recipe"
def test_scrape_returns_ingredients(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert len(result["ingredients"]) == 3
assert "1 cup flour" in result["ingredients"]
def test_scrape_returns_instructions(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert len(result["instructions"]) > 0
assert "Mix ingredients together" in result["instructions"]
def test_scrape_returns_yields(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert result["yields"] == "4 servings"
def test_scrape_returns_total_time(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert result["total_time"] == 30
def test_scrape_returns_url(self, scraper):
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
assert result["url"] == "https://example.com/recipe"
def test_scrape_raises_on_invalid_html(self, scraper):
with pytest.raises(ValueError):
scraper.scrape("", "https://example.com/recipe")
def test_scrape_handles_missing_optional_fields(self, scraper):
minimal_html = """
<!DOCTYPE html>
<html>
<head>
<script type="application/ld+json">
{
"@context": "https://schema.org/",
"@type": "Recipe",
"name": "Minimal Recipe"
}
</script>
</head>
<body></body>
</html>
"""
result = scraper.scrape(minimal_html, "https://example.com/minimal")
assert result["title"] == "Minimal Recipe"
assert result["ingredients"] == []
assert result["instructions"] == []
def test_parse_servings(self, scraper):
assert scraper.parse_servings("4 servings") == 4
assert scraper.parse_servings("6 people") == 6
assert scraper.parse_servings("2") == 2
assert scraper.parse_servings("serves 8") == 8
assert scraper.parse_servings(None) is None
assert scraper.parse_servings("") is None
def test_extract_tags_from_cuisine(self, scraper):
recipe_data = {"cuisine": "Italian"}
tags = scraper.extract_tags(recipe_data)
assert "Italian" in tags
def test_extract_tags_from_cuisine_list(self, scraper):
recipe_data = {"cuisine": ["Italian", "Mexican"]}
tags = scraper.extract_tags(recipe_data)
assert "Italian" in tags
assert "Mexican" in tags
def test_extract_tags_from_dietary(self, scraper):
recipe_data = {"dietary": "Gluten-Free"}
tags = scraper.extract_tags(recipe_data)
assert "Gluten-Free" in tags
def test_extract_tags_from_course(self, scraper):
recipe_data = {"course": "Dessert"}
tags = scraper.extract_tags(recipe_data)
assert "Dessert" in tags
def test_extract_tags_from_keywords(self, scraper):
recipe_data = {"keywords": "easy, quick, healthy"}
tags = scraper.extract_tags(recipe_data)
assert "easy" in tags
assert "quick" in tags
assert "healthy" in tags
def test_extract_tags_from_keywords_list(self, scraper):
recipe_data = {"keywords": ["comfort food", "winter"]}
tags = scraper.extract_tags(recipe_data)
assert "comfort food" in tags
assert "winter" in tags