187 lines
5.9 KiB
Python
187 lines
5.9 KiB
Python
import pytest
|
|
from foods.sources.rscraper import (
|
|
RecipeScraperService,
|
|
)
|
|
|
|
|
|
RECIPE_HTML_WITH_SCHEMA = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<script type="application/ld+json">
|
|
{
|
|
"@context": "https://schema.org/",
|
|
"@type": "Recipe",
|
|
"name": "Test Recipe",
|
|
"author": {
|
|
"@type": "Person",
|
|
"name": "Test Author"
|
|
},
|
|
"recipeIngredient": ["1 cup flour", "2 eggs", "1/2 cup sugar"],
|
|
"recipeInstructions": [
|
|
{
|
|
"@type": "HowToStep",
|
|
"text": "Mix ingredients together"
|
|
}
|
|
],
|
|
"totalTime": "PT30M",
|
|
"recipeYield": "4 servings"
|
|
}
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<h1>Test Recipe</h1>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
RECIPE_HTML_WITHOUT_SCHEMA = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Not a Recipe Page</title>
|
|
</head>
|
|
<body>
|
|
<h1>Welcome to My Blog</h1>
|
|
<p>This is just a regular blog post about cooking.</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
RECIPE_HTML_WITH_MICRODATA = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Test Recipe</title>
|
|
</head>
|
|
<body itemscope itemtype="http://schema.org/Recipe">
|
|
<h1 itemprop="name">Microdata Recipe</h1>
|
|
<div itemprop="author" itemscope itemtype="http://schema.org/Person">
|
|
<span itemprop="name">Test Author</span>
|
|
</div>
|
|
<div itemprop="recipeIngredient">1 cup flour</div>
|
|
<div itemprop="recipeIngredient">2 eggs</div>
|
|
<div itemprop="recipeInstructions">
|
|
<div itemprop="text">Mix all ingredients</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
|
|
class TestRecipeScraperService:
|
|
@pytest.fixture
|
|
def scraper(self):
|
|
return RecipeScraperService()
|
|
|
|
def test_is_recipe_with_valid_schema(self, scraper):
|
|
result = scraper.is_recipe(
|
|
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
|
)
|
|
assert result is True
|
|
|
|
def test_is_recipe_without_schema(self, scraper):
|
|
result = scraper.is_recipe(
|
|
RECIPE_HTML_WITHOUT_SCHEMA, "https://example.com/blog"
|
|
)
|
|
assert result is False
|
|
|
|
def test_is_recipe_with_microdata(self, scraper):
|
|
result = scraper.is_recipe(
|
|
RECIPE_HTML_WITH_MICRODATA, "https://example.com/recipe"
|
|
)
|
|
assert result is True
|
|
|
|
def test_scrape_returns_title(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert result["title"] == "Test Recipe"
|
|
|
|
def test_scrape_returns_ingredients(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert len(result["ingredients"]) == 3
|
|
assert "1 cup flour" in result["ingredients"]
|
|
|
|
def test_scrape_returns_instructions(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert len(result["instructions"]) > 0
|
|
assert "Mix ingredients together" in result["instructions"]
|
|
|
|
def test_scrape_returns_yields(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert result["yields"] == "4 servings"
|
|
|
|
def test_scrape_returns_total_time(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert result["total_time"] == 30
|
|
|
|
def test_scrape_returns_url(self, scraper):
|
|
result = scraper.scrape(RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe")
|
|
assert result["url"] == "https://example.com/recipe"
|
|
|
|
def test_scrape_raises_on_invalid_html(self, scraper):
|
|
with pytest.raises(ValueError):
|
|
scraper.scrape("", "https://example.com/recipe")
|
|
|
|
def test_scrape_handles_missing_optional_fields(self, scraper):
|
|
minimal_html = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<script type="application/ld+json">
|
|
{
|
|
"@context": "https://schema.org/",
|
|
"@type": "Recipe",
|
|
"name": "Minimal Recipe"
|
|
}
|
|
</script>
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
"""
|
|
result = scraper.scrape(minimal_html, "https://example.com/minimal")
|
|
assert result["title"] == "Minimal Recipe"
|
|
assert result["ingredients"] == []
|
|
assert result["instructions"] == []
|
|
|
|
def test_parse_servings(self, scraper):
|
|
assert scraper.parse_servings("4 servings") == 4
|
|
assert scraper.parse_servings("6 people") == 6
|
|
assert scraper.parse_servings("2") == 2
|
|
assert scraper.parse_servings("serves 8") == 8
|
|
assert scraper.parse_servings(None) is None
|
|
assert scraper.parse_servings("") is None
|
|
|
|
def test_extract_tags_from_cuisine(self, scraper):
|
|
recipe_data = {"cuisine": "Italian"}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "Italian" in tags
|
|
|
|
def test_extract_tags_from_cuisine_list(self, scraper):
|
|
recipe_data = {"cuisine": ["Italian", "Mexican"]}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "Italian" in tags
|
|
assert "Mexican" in tags
|
|
|
|
def test_extract_tags_from_dietary(self, scraper):
|
|
recipe_data = {"dietary": "Gluten-Free"}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "Gluten-Free" in tags
|
|
|
|
def test_extract_tags_from_course(self, scraper):
|
|
recipe_data = {"course": "Dessert"}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "Dessert" in tags
|
|
|
|
def test_extract_tags_from_keywords(self, scraper):
|
|
recipe_data = {"keywords": "easy, quick, healthy"}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "easy" in tags
|
|
assert "quick" in tags
|
|
assert "healthy" in tags
|
|
|
|
def test_extract_tags_from_keywords_list(self, scraper):
|
|
recipe_data = {"keywords": ["comfort food", "winter"]}
|
|
tags = scraper.extract_tags(recipe_data)
|
|
assert "comfort food" in tags
|
|
assert "winter" in tags
|