[foods] Add recipe website parsing
This commit is contained in:
14
PROJECT.org
14
PROJECT.org
@ -92,7 +92,7 @@ fetching and simple saving.
|
||||
:LOGBOOK:
|
||||
CLOCK: [2025-07-09 Wed 09:55]--[2025-07-09 Wed 10:15] => 0:20
|
||||
:END:
|
||||
* Backlog [4/27]
|
||||
* Backlog [6/30]
|
||||
** TODO [#C] Create small utility to clean up tracks scrobbled with wonky playback times :vrobbler:personal:bug:music:scrobbles:
|
||||
** TODO [#C] Move to using more robust mopidy-webhooks pacakge form pypi :utility:improvement:
|
||||
:PROPERTIES:
|
||||
@ -458,7 +458,17 @@ Turns out we're not looking up music tracks properly, again.
|
||||
:PROPERTIES:
|
||||
:ID: e16228b2-b062-bd00-32e6-b2353e6406e9
|
||||
:END:
|
||||
** TODO [#A] Videos are scrobbling duplicates again :vrobbler:bug:videos:scrobbles:
|
||||
** TODO Add sentiment parsing for Scrobbles with notes :vrobbler:project:scrobbles:sentiment:
|
||||
** TODO Check opencode about a way to present stats like movies per month :vrobbler:scrobbles:stats:personal:project:
|
||||
** DONE Add recipe parsing for food lookups :vrobbler:foods:project:feature:personal:
|
||||
:PROPERTIES:
|
||||
:ID: 86456c78-247b-fb63-7ae8-d6d17e7666b1
|
||||
:END:
|
||||
** DONE [#A] Videos are scrobbling duplicates again :vrobbler:bug:videos:scrobbles:
|
||||
:PROPERTIES:
|
||||
:ID: a46884fe-7ef1-410a-316b-7ac6d7599331
|
||||
:END:
|
||||
<2026-03-06 Fri>
|
||||
** DONE Fix board games not saving BGG id on lookup :vrobbler:bug:boardgames:
|
||||
:PROPERTIES:
|
||||
:ID: 506c2965-51d6-6cb9-fc4f-4f0468d2d62f
|
||||
|
||||
17
scripts/README.org
Normal file
17
scripts/README.org
Normal file
@ -0,0 +1,17 @@
|
||||
#+title: Readme
|
||||
|
||||
Scripts are a collection of helpful utility scripts, or simple gut-check tests for various functional pieces.
|
||||
|
||||
* test_recipe_scraper.py
|
||||
Asserts various urls by making actual calls out to the internet, while our test suite mocks return values.
|
||||
|
||||
#+begin_src sh
|
||||
python ../manage.py shell < ../scripts/test_recipe_scraper.py
|
||||
#+end_src
|
||||
|
||||
#+RESULTS:
|
||||
| Eagerly | running | all | tasks |
|
||||
| Connected | to | sqlite@db.sqlite3 | |
|
||||
| Checking: | https://cookingwithmike.com/quinoa-meatloaf/ | | |
|
||||
| Checking: | https://www.kingarthurbaking.com/recipes/overnight-sourdough-waffles-recipe | | |
|
||||
| Checking: | https://dirt.fyi/article/2026/02/25-years-of-ipod-brain?src=longreads | | |
|
||||
21
scripts/test_recipe_scraper.py
Normal file
21
scripts/test_recipe_scraper.py
Normal file
@ -0,0 +1,21 @@
|
||||
import requests
|
||||
from foods.sources.rscraper import (
|
||||
RecipeScraperService,
|
||||
)
|
||||
|
||||
|
||||
test_urls = {
|
||||
"https://cookingwithmike.com/quinoa-meatloaf/": True,
|
||||
"https://www.kingarthurbaking.com/recipes/overnight-sourdough-waffles-recipe": True,
|
||||
"https://dirt.fyi/article/2026/02/25-years-of-ipod-brain?src=longreads": False,
|
||||
"https://tastesbetterfromscratch.com/belgian-waffles/": True,
|
||||
}
|
||||
|
||||
for k, v in test_urls.items():
|
||||
|
||||
html = requests.get(k).text
|
||||
print("Checking: ", k)
|
||||
if v:
|
||||
assert RecipeScraperService().is_recipe(html, k)
|
||||
else:
|
||||
assert not RecipeScraperService().is_recipe(html, k)
|
||||
0
tests/foods_tests/__init__.py
Normal file
0
tests/foods_tests/__init__.py
Normal file
198
tests/foods_tests/test_recipe_scraper.py
Normal file
198
tests/foods_tests/test_recipe_scraper.py
Normal file
@ -0,0 +1,198 @@
|
||||
import pytest
|
||||
from foods.sources.rscraper import (
|
||||
RecipeScraperService,
|
||||
)
|
||||
|
||||
|
||||
RECIPE_HTML_WITH_SCHEMA = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org/",
|
||||
"@type": "Recipe",
|
||||
"name": "Test Recipe",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Test Author"
|
||||
},
|
||||
"recipeIngredient": ["1 cup flour", "2 eggs", "1/2 cup sugar"],
|
||||
"recipeInstructions": [
|
||||
{
|
||||
"@type": "HowToStep",
|
||||
"text": "Mix ingredients together"
|
||||
}
|
||||
],
|
||||
"totalTime": "PT30M",
|
||||
"recipeYield": "4 servings"
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Test Recipe</h1>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
RECIPE_HTML_WITHOUT_SCHEMA = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Not a Recipe Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to My Blog</h1>
|
||||
<p>This is just a regular blog post about cooking.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
RECIPE_HTML_WITH_MICRODATA = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Recipe</title>
|
||||
</head>
|
||||
<body itemscope itemtype="http://schema.org/Recipe">
|
||||
<h1 itemprop="name">Microdata Recipe</h1>
|
||||
<div itemprop="author" itemscope itemtype="http://schema.org/Person">
|
||||
<span itemprop="name">Test Author</span>
|
||||
</div>
|
||||
<div itemprop="recipeIngredient">1 cup flour</div>
|
||||
<div itemprop="recipeIngredient">2 eggs</div>
|
||||
<div itemprop="recipeInstructions">
|
||||
<div itemprop="text">Mix all ingredients</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class TestRecipeScraperService:
|
||||
@pytest.fixture
|
||||
def scraper(self):
|
||||
return RecipeScraperService()
|
||||
|
||||
def test_is_recipe_with_valid_schema(self, scraper):
|
||||
result = scraper.is_recipe(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert result is True
|
||||
|
||||
def test_is_recipe_without_schema(self, scraper):
|
||||
result = scraper.is_recipe(
|
||||
RECIPE_HTML_WITHOUT_SCHEMA, "https://example.com/blog"
|
||||
)
|
||||
assert result is False
|
||||
|
||||
def test_is_recipe_with_microdata(self, scraper):
|
||||
result = scraper.is_recipe(
|
||||
RECIPE_HTML_WITH_MICRODATA, "https://example.com/recipe"
|
||||
)
|
||||
assert result is True
|
||||
|
||||
def test_scrape_returns_title(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert result["title"] == "Test Recipe"
|
||||
|
||||
def test_scrape_returns_ingredients(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert len(result["ingredients"]) == 3
|
||||
assert "1 cup flour" in result["ingredients"]
|
||||
|
||||
def test_scrape_returns_instructions(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert len(result["instructions"]) > 0
|
||||
assert "Mix ingredients together" in result["instructions"]
|
||||
|
||||
def test_scrape_returns_yields(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert result["yields"] == "4 servings"
|
||||
|
||||
def test_scrape_returns_total_time(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert result["total_time"] == 30
|
||||
|
||||
def test_scrape_returns_url(self, scraper):
|
||||
result = scraper.scrape(
|
||||
RECIPE_HTML_WITH_SCHEMA, "https://example.com/recipe"
|
||||
)
|
||||
assert result["url"] == "https://example.com/recipe"
|
||||
|
||||
def test_scrape_raises_on_invalid_html(self, scraper):
|
||||
with pytest.raises(ValueError):
|
||||
scraper.scrape("", "https://example.com/recipe")
|
||||
|
||||
def test_scrape_handles_missing_optional_fields(self, scraper):
|
||||
minimal_html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org/",
|
||||
"@type": "Recipe",
|
||||
"name": "Minimal Recipe"
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body></body>
|
||||
</html>
|
||||
"""
|
||||
result = scraper.scrape(minimal_html, "https://example.com/minimal")
|
||||
assert result["title"] == "Minimal Recipe"
|
||||
assert result["ingredients"] == []
|
||||
assert result["instructions"] == []
|
||||
|
||||
def test_parse_servings(self, scraper):
|
||||
assert scraper.parse_servings("4 servings") == 4
|
||||
assert scraper.parse_servings("6 people") == 6
|
||||
assert scraper.parse_servings("2") == 2
|
||||
assert scraper.parse_servings("serves 8") == 8
|
||||
assert scraper.parse_servings(None) is None
|
||||
assert scraper.parse_servings("") is None
|
||||
|
||||
def test_extract_tags_from_cuisine(self, scraper):
|
||||
recipe_data = {"cuisine": "Italian"}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "Italian" in tags
|
||||
|
||||
def test_extract_tags_from_cuisine_list(self, scraper):
|
||||
recipe_data = {"cuisine": ["Italian", "Mexican"]}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "Italian" in tags
|
||||
assert "Mexican" in tags
|
||||
|
||||
def test_extract_tags_from_dietary(self, scraper):
|
||||
recipe_data = {"dietary": "Gluten-Free"}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "Gluten-Free" in tags
|
||||
|
||||
def test_extract_tags_from_course(self, scraper):
|
||||
recipe_data = {"course": "Dessert"}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "Dessert" in tags
|
||||
|
||||
def test_extract_tags_from_keywords(self, scraper):
|
||||
recipe_data = {"keywords": "easy, quick, healthy"}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "easy" in tags
|
||||
assert "quick" in tags
|
||||
assert "healthy" in tags
|
||||
|
||||
def test_extract_tags_from_keywords_list(self, scraper):
|
||||
recipe_data = {"keywords": ["comfort food", "winter"]}
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
assert "comfort food" in tags
|
||||
assert "winter" in tags
|
||||
135
tests/foods_tests/test_usda.py
Normal file
135
tests/foods_tests/test_usda.py
Normal file
@ -0,0 +1,135 @@
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
from foods.sources.usda import (
|
||||
USDAFoodAPI,
|
||||
NutritionCalculator,
|
||||
)
|
||||
|
||||
|
||||
class TestUSDAFoodAPI:
|
||||
@pytest.fixture
|
||||
def usda_api(self):
|
||||
with patch(
|
||||
"vrobbler.apps.foods.sources.usda.settings"
|
||||
) as mock_settings:
|
||||
mock_settings.USDA_API_KEY = "test_api_key"
|
||||
return USDAFoodAPI(api_key="test_api_key")
|
||||
|
||||
def test_extract_nutrients_with_nutrient_number(self, usda_api):
|
||||
food_data = {
|
||||
"description": "Test Food",
|
||||
"foodNutrients": [
|
||||
{
|
||||
"nutrientNumber": "203",
|
||||
"nutrientName": "Protein",
|
||||
"value": 10.0,
|
||||
},
|
||||
{
|
||||
"nutrientNumber": "204",
|
||||
"nutrientName": "Total lipid (fat)",
|
||||
"value": 5.0,
|
||||
},
|
||||
{
|
||||
"nutrientNumber": "205",
|
||||
"nutrientName": "Carbohydrate, by difference",
|
||||
"value": 20.0,
|
||||
},
|
||||
{
|
||||
"nutrientNumber": "208",
|
||||
"nutrientName": "Energy",
|
||||
"value": 150.0,
|
||||
},
|
||||
{
|
||||
"nutrientNumber": "269",
|
||||
"nutrientName": "Sugars, total",
|
||||
"value": 5.0,
|
||||
},
|
||||
],
|
||||
}
|
||||
result = usda_api.extract_nutrients(food_data)
|
||||
assert result["protein"] == 10.0
|
||||
assert result["fat"] == 5.0
|
||||
assert result["carbohydrates"] == 20.0
|
||||
assert result["calories"] == 150.0
|
||||
assert result["sugar"] == 5.0
|
||||
|
||||
def test_extract_nutrients_with_nested_nutrient(self, usda_api):
|
||||
food_data = {
|
||||
"description": "Test Food",
|
||||
"foodNutrients": [
|
||||
{
|
||||
"nutrient": {"id": 203, "name": "Protein"},
|
||||
"value": 10.0,
|
||||
},
|
||||
],
|
||||
}
|
||||
result = usda_api.extract_nutrients(food_data)
|
||||
assert result["protein"] == 10.0
|
||||
|
||||
def test_extract_nutrients_with_empty_nutrients(self, usda_api):
|
||||
food_data = {"description": "Test Food", "foodNutrients": []}
|
||||
result = usda_api.extract_nutrients(food_data)
|
||||
assert result["protein"] == 0
|
||||
assert result["calories"] == 0
|
||||
|
||||
def test_extract_nutrients_with_no_nutrients_key(self, usda_api):
|
||||
food_data = {"description": "Test Food"}
|
||||
result = usda_api.extract_nutrients(food_data)
|
||||
assert result["protein"] == 0
|
||||
|
||||
|
||||
class TestNutritionCalculator:
|
||||
@pytest.fixture
|
||||
def calculator(self):
|
||||
with patch("vrobbler.apps.foods.sources.usda.USDAFoodAPI"):
|
||||
return NutritionCalculator()
|
||||
|
||||
def test_parse_ingredient_with_fraction(self, calculator):
|
||||
result = calculator.parse_ingredient("1/2 cup flour")
|
||||
assert result["quantity"] == 0.5
|
||||
assert result["unit"] == "cup"
|
||||
assert result["ingredient"] == "flour"
|
||||
|
||||
def test_parse_ingredient_with_mixed_number(self, calculator):
|
||||
result = calculator.parse_ingredient("1 1/2 cups sugar")
|
||||
assert result["quantity"] == 1.5
|
||||
assert result["unit"] == "cups"
|
||||
assert result["ingredient"] == "sugar"
|
||||
|
||||
def test_parse_ingredient_with_decimal(self, calculator):
|
||||
result = calculator.parse_ingredient("0.5 tsp salt")
|
||||
assert result["quantity"] == 0.5
|
||||
assert result["unit"] == "tsp"
|
||||
assert result["ingredient"] == "salt"
|
||||
|
||||
def test_parse_ingredient_with_whole_number(self, calculator):
|
||||
result = calculator.parse_ingredient("3 eggs")
|
||||
assert result["quantity"] == 3
|
||||
assert result["unit"] is None
|
||||
assert result["ingredient"] == "eggs"
|
||||
|
||||
def test_parse_ingredient_with_no_quantity(self, calculator):
|
||||
result = calculator.parse_ingredient("salt to taste")
|
||||
assert result["quantity"] == 1
|
||||
|
||||
def test_clean_ingredient_name_removes_modifiers(self, calculator):
|
||||
result = calculator._clean_ingredient_name("fresh chopped onions")
|
||||
assert "fresh" not in result.lower()
|
||||
assert "chopped" not in result.lower()
|
||||
|
||||
def test_clean_ingredient_name_removes_parentheses(self, calculator):
|
||||
result = calculator._clean_ingredient_name("flour (sifted)")
|
||||
assert "(" not in result
|
||||
assert ")" not in result
|
||||
|
||||
def test_convert_to_grams_cup(self, calculator):
|
||||
result = calculator._convert_to_grams(2, "cups", "flour")
|
||||
assert result == 480
|
||||
|
||||
def test_convert_to_grams_tablespoon(self, calculator):
|
||||
result = calculator._convert_to_grams(3, "tbsp", "olive oil")
|
||||
assert result == 45
|
||||
|
||||
def test_convert_to_grams_unknown_unit(self, calculator):
|
||||
result = calculator._convert_to_grams(1, "unknown", "something")
|
||||
assert result == 100
|
||||
@ -0,0 +1,131 @@
|
||||
# Generated by Django 4.2.29 on 2026-03-05 17:01
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("foods", "0004_remove_food_run_time_seconds_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="carbohydrates",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="cook_time_minutes",
|
||||
field=models.IntegerField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="fat",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="fiber",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="ingredients",
|
||||
field=models.TextField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="instructions",
|
||||
field=models.TextField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="prep_time_minutes",
|
||||
field=models.IntegerField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="protein",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="servings",
|
||||
field=models.IntegerField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="sodium",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="source_site",
|
||||
field=models.CharField(blank=True, max_length=100, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="source_url",
|
||||
field=models.URLField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="sugar",
|
||||
field=models.DecimalField(
|
||||
blank=True, decimal_places=2, max_digits=10, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="total_time_minutes",
|
||||
field=models.IntegerField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="food",
|
||||
name="yield_text",
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="food",
|
||||
index=models.Index(
|
||||
fields=["source_url"], name="foods_food_source__f42f6f_idx"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="food",
|
||||
index=models.Index(
|
||||
fields=["allrecipe_id"], name="foods_food_allreci_3e7a6c_idx"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="food",
|
||||
index=models.Index(
|
||||
fields=["description"], name="foods_food_descrip_fccd4d_idx"
|
||||
),
|
||||
),
|
||||
migrations.RenameField(
|
||||
model_name="food",
|
||||
old_name="allrecipe_image",
|
||||
new_name="recipe_image",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name="foodcategory",
|
||||
name="allrecipe_image",
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="foodcategory",
|
||||
name="category_image",
|
||||
field=models.ImageField(blank=True, null=True, upload_to="food/category/"),
|
||||
),
|
||||
]
|
||||
@ -1,5 +1,6 @@
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
from uuid import uuid4
|
||||
|
||||
from django.apps import apps
|
||||
@ -10,6 +11,8 @@ from imagekit.models import ImageSpecField
|
||||
from imagekit.processors import ResizeToFit
|
||||
from scrobbles.dataclasses import BaseLogData, WithPeopleLogData
|
||||
from scrobbles.mixins import ScrobblableConstants, ScrobblableMixin
|
||||
from foods.sources.rscraper import RecipeScraperService
|
||||
from foods.sources.usda import NutritionCalculator
|
||||
|
||||
BNULL = {"blank": True, "null": True}
|
||||
|
||||
@ -23,15 +26,15 @@ class FoodLogData(BaseLogData, WithPeopleLogData):
|
||||
class FoodCategory(TimeStampedModel):
|
||||
uuid = models.UUIDField(default=uuid4, editable=False, **BNULL)
|
||||
name = models.CharField(max_length=255)
|
||||
allrecipe_image = models.ImageField(upload_to="food/recipe/", **BNULL)
|
||||
allrecipe_image_small = ImageSpecField(
|
||||
source="recipe_image",
|
||||
category_image = models.ImageField(upload_to="food/category/", **BNULL)
|
||||
category_image_small = ImageSpecField(
|
||||
source="category_image",
|
||||
processors=[ResizeToFit(100, 100)],
|
||||
format="JPEG",
|
||||
options={"quality": 60},
|
||||
)
|
||||
allrecipe_image_medium = ImageSpecField(
|
||||
source="recipe_image",
|
||||
category_image_medium = ImageSpecField(
|
||||
source="category_image",
|
||||
processors=[ResizeToFit(300, 300)],
|
||||
format="JPEG",
|
||||
options={"quality": 75},
|
||||
@ -39,6 +42,7 @@ class FoodCategory(TimeStampedModel):
|
||||
allrecipe_id = models.CharField(max_length=255, **BNULL)
|
||||
description = models.TextField(**BNULL)
|
||||
|
||||
@classmethod
|
||||
def find_or_create(cls, title: str) -> "FoodCategory":
|
||||
return cls.objects.filter(title=title).first()
|
||||
|
||||
@ -47,17 +51,42 @@ class FoodCategory(TimeStampedModel):
|
||||
|
||||
|
||||
class Food(ScrobblableMixin):
|
||||
description = models.TextField(**BNULL)
|
||||
# Recipe source tracking
|
||||
source_url = models.URLField(null=True, blank=True, unique=True)
|
||||
source_site = models.CharField(max_length=100, null=True, blank=True)
|
||||
|
||||
# Recipe data
|
||||
description = models.TextField(**BNULL) # Recipe title
|
||||
ingredients = models.TextField(**BNULL) # JSON or newline-separated
|
||||
instructions = models.TextField(**BNULL) # JSON or newline-separated
|
||||
prep_time_minutes = models.IntegerField(**BNULL)
|
||||
cook_time_minutes = models.IntegerField(**BNULL)
|
||||
total_time_minutes = models.IntegerField(**BNULL)
|
||||
servings = models.IntegerField(**BNULL)
|
||||
yield_text = models.CharField(
|
||||
max_length=255, **BNULL
|
||||
) # e.g., "8 calzones"
|
||||
|
||||
# Nutrition (per serving)
|
||||
calories = models.IntegerField(**BNULL)
|
||||
allrecipe_image = models.ImageField(upload_to="food/recipe/", **BNULL)
|
||||
allrecipe_image_small = ImageSpecField(
|
||||
source="allrecipe_image",
|
||||
protein = models.DecimalField(max_digits=10, decimal_places=2, **BNULL)
|
||||
fat = models.DecimalField(max_digits=10, decimal_places=2, **BNULL)
|
||||
carbohydrates = models.DecimalField(
|
||||
max_digits=10, decimal_places=2, **BNULL
|
||||
)
|
||||
fiber = models.DecimalField(max_digits=10, decimal_places=2, **BNULL)
|
||||
sugar = models.DecimalField(max_digits=10, decimal_places=2, **BNULL)
|
||||
sodium = models.DecimalField(max_digits=10, decimal_places=2, **BNULL)
|
||||
|
||||
recipe_image = models.ImageField(upload_to="food/recipe/", **BNULL)
|
||||
recipe_image_small = ImageSpecField(
|
||||
source="recipe_image",
|
||||
processors=[ResizeToFit(100, 100)],
|
||||
format="JPEG",
|
||||
options={"quality": 60},
|
||||
)
|
||||
allrecipe_image_medium = ImageSpecField(
|
||||
source="allrecipe_image",
|
||||
recipe_image_medium = ImageSpecField(
|
||||
source="recipe_image",
|
||||
processors=[ResizeToFit(300, 300)],
|
||||
format="JPEG",
|
||||
options={"quality": 75},
|
||||
@ -68,6 +97,13 @@ class Food(ScrobblableMixin):
|
||||
FoodCategory, on_delete=models.DO_NOTHING, **BNULL
|
||||
)
|
||||
|
||||
class Meta:
|
||||
indexes = [
|
||||
models.Index(fields=["source_url"]),
|
||||
models.Index(fields=["allrecipe_id"]),
|
||||
models.Index(fields=["description"]),
|
||||
]
|
||||
|
||||
def get_absolute_url(self) -> str:
|
||||
return reverse("foods:food_detail", kwargs={"slug": self.uuid})
|
||||
|
||||
@ -91,8 +127,8 @@ class Food(ScrobblableMixin):
|
||||
@property
|
||||
def primary_image_url(self) -> str:
|
||||
url = ""
|
||||
if self.allrecipe_image:
|
||||
url = self.allrecipe_image.url
|
||||
if self.recipe_image:
|
||||
url = self.recipe_image.url
|
||||
return url
|
||||
|
||||
@property
|
||||
@ -100,21 +136,155 @@ class Food(ScrobblableMixin):
|
||||
return FoodLogData
|
||||
|
||||
@classmethod
|
||||
def find_or_create(cls, allrecipe_id: str) -> "Food":
|
||||
food = cls.objects.filter(allrecipe_id=allrecipe_id).first()
|
||||
def find_or_create_from_recipe(
|
||||
cls, url: str, category=None
|
||||
) -> Tuple["Food", bool]:
|
||||
"""
|
||||
Scrape a recipe URL and create/update Food instance.
|
||||
Uses django-taggit for tags and existing FoodCategory for category.
|
||||
"""
|
||||
# Check if URL already exists
|
||||
existing = cls.objects.filter(source_url=url).first()
|
||||
if existing:
|
||||
return existing, False
|
||||
|
||||
if not food:
|
||||
food_dict = get_food_from_allrecipe_id(allrecipe_id)
|
||||
# category_dict = {}
|
||||
# Scrape the recipe
|
||||
scraper = RecipeScraperService()
|
||||
recipe_data = scraper.scrape_url(url)
|
||||
|
||||
# category, _created = FoodCategory.objects.get_or_create(
|
||||
# **category_dict
|
||||
# )
|
||||
food = Food.objects.create(**food_dict)
|
||||
# for category_id in category_ids:
|
||||
# food.category.add(category_id)
|
||||
# Download image
|
||||
recipe_image = None
|
||||
if recipe_data.get("image"):
|
||||
recipe_image = scraper.download_image(recipe_data["image"])
|
||||
|
||||
return food
|
||||
# Extract tags
|
||||
tags = scraper.extract_tags(recipe_data)
|
||||
|
||||
# Calculate nutrition
|
||||
nutrition = recipe_data.get("nutrition")
|
||||
if not nutrition:
|
||||
calculator = NutritionCalculator()
|
||||
servings = (
|
||||
scraper.parse_servings(recipe_data.get("yields", "1")) or 1
|
||||
)
|
||||
nutrition = calculator.calculate_nutrition(
|
||||
recipe_data.get("ingredients", []), servings
|
||||
)
|
||||
else:
|
||||
servings = (
|
||||
scraper.parse_servings(recipe_data.get("yields", "1")) or 1
|
||||
)
|
||||
|
||||
# Get or create category (if not provided)
|
||||
if not category and recipe_data.get("category"):
|
||||
category, _ = FoodCategory.objects.get_or_create(
|
||||
name=recipe_data["category"]
|
||||
)
|
||||
|
||||
# Create Food instance
|
||||
food = cls.objects.create(
|
||||
title=recipe_data.get("title"),
|
||||
source_url=url,
|
||||
source_site=recipe_data.get("site"),
|
||||
ingredients=json.dumps(recipe_data.get("ingredients", [])),
|
||||
instructions=json.dumps(recipe_data.get("instructions", [])),
|
||||
prep_time_minutes=recipe_data.get("prep_time"),
|
||||
cook_time_minutes=recipe_data.get("cook_time"),
|
||||
total_time_minutes=recipe_data.get("total_time"),
|
||||
servings=servings,
|
||||
yield_text=recipe_data.get("yields"),
|
||||
# cuisine=recipe_data.get("cuisine"),
|
||||
# course=recipe_data.get("course"),
|
||||
calories=int(nutrition.get("calories", 0)) if nutrition else None,
|
||||
protein=nutrition.get("protein") if nutrition else None,
|
||||
fat=nutrition.get("fat") if nutrition else None,
|
||||
carbohydrates=(
|
||||
nutrition.get("carbohydrates") if nutrition else None
|
||||
),
|
||||
fiber=nutrition.get("fiber") if nutrition else None,
|
||||
category=category,
|
||||
)
|
||||
|
||||
# Add tags via django-taggit
|
||||
if tags:
|
||||
food.genre.add(*tags)
|
||||
|
||||
# Save image if downloaded
|
||||
if recipe_image:
|
||||
food.recipe_image.save(recipe_image.name, recipe_image, save=True)
|
||||
|
||||
if "allrecipes.com" in url:
|
||||
food.allrecipe_id = url.split("/recipe/")[-1].split("/")[0]
|
||||
food.save()
|
||||
|
||||
return food, True
|
||||
|
||||
@classmethod
|
||||
def find_or_create_from_ingredients(
|
||||
cls,
|
||||
ingredients: list[str],
|
||||
title: str,
|
||||
servings: int = 1,
|
||||
category=None,
|
||||
) -> Tuple["Food", bool]:
|
||||
"""
|
||||
Create Food from ingredient list (no URL).
|
||||
Calculates nutrition using USDA API.
|
||||
"""
|
||||
# Check if similar recipe exists
|
||||
existing = cls.objects.filter(description__icontains=title).first()
|
||||
if existing:
|
||||
return existing, False
|
||||
|
||||
calculator = NutritionCalculator()
|
||||
nutrition = calculator.calculate_nutrition(ingredients, servings)
|
||||
|
||||
food = cls.objects.create(
|
||||
description=title,
|
||||
ingredients=json.dumps(ingredients),
|
||||
servings=servings,
|
||||
calories=int(nutrition.get("calories", 0)) if nutrition else None,
|
||||
protein=nutrition.get("protein") if nutrition else None,
|
||||
fat=nutrition.get("fat") if nutrition else None,
|
||||
carbohydrates=(
|
||||
nutrition.get("carbohydrates") if nutrition else None
|
||||
),
|
||||
fiber=nutrition.get("fiber") if nutrition else None,
|
||||
category=category,
|
||||
)
|
||||
|
||||
return food, True
|
||||
|
||||
def refresh_nutrition(self) -> bool:
|
||||
"""Recalculate nutrition from ingredients (useful if USDA data updated)."""
|
||||
if not self.ingredients:
|
||||
return False
|
||||
|
||||
try:
|
||||
ingredients = json.loads(self.ingredients)
|
||||
calculator = NutritionCalculator()
|
||||
nutrition = calculator.calculate_nutrition(
|
||||
ingredients, self.servings or 1
|
||||
)
|
||||
|
||||
self.calories = int(nutrition.get("calories", 0))
|
||||
self.protein = nutrition.get("protein")
|
||||
self.fat = nutrition.get("fat")
|
||||
self.carbohydrates = nutrition.get("carbohydrates")
|
||||
self.fiber = nutrition.get("fiber")
|
||||
self.save(
|
||||
update_fields=[
|
||||
"calories",
|
||||
"protein",
|
||||
"fat",
|
||||
"carbohydrates",
|
||||
"fiber",
|
||||
]
|
||||
)
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def scrobbles(self, user_id):
|
||||
Scrobble = apps.get_model("scrobbles", "Scrobble")
|
||||
|
||||
182
vrobbler/apps/foods/sources/rscraper.py
Normal file
182
vrobbler/apps/foods/sources/rscraper.py
Normal file
@ -0,0 +1,182 @@
|
||||
# services/recipe_scraper.py
|
||||
from recipe_scrapers import scrape_html
|
||||
from recipe_scrapers._exceptions import (
|
||||
NoSchemaFoundInWildMode,
|
||||
SchemaOrgException,
|
||||
)
|
||||
import requests
|
||||
from typing import Dict, Optional, List
|
||||
from django.core.files.base import ContentFile
|
||||
|
||||
|
||||
class RecipeScraperService:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"User-Agent": "Mozilla/5.0 (compatible; Vrobbler/0.3)"}
|
||||
)
|
||||
|
||||
def scrape(self, html: str, org_url: str) -> Dict:
|
||||
"""Scrape recipe data from HTML.
|
||||
|
||||
Args:
|
||||
html: The HTML content of the page
|
||||
org_url: The original URL the HTML was fetched from
|
||||
"""
|
||||
try:
|
||||
scraper = scrape_html(html, org_url=org_url, supported_only=False)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to scrape {org_url}: {str(e)}")
|
||||
|
||||
data_dict = {
|
||||
"title": scraper.title(),
|
||||
"ingredients": self._safe_call(scraper, "ingredients", []),
|
||||
"instructions": self._safe_call(scraper, "instructions_list", []),
|
||||
"yields": self._safe_call(scraper, "yields"),
|
||||
"nutrition": self._safe_call(scraper, "nutrition"),
|
||||
"course": self._safe_call(scraper, "course"),
|
||||
"dietary": self._safe_call(scraper, "dietary", []),
|
||||
"prep_time": self._safe_call(scraper, "prep_time"),
|
||||
"total_time": self._safe_call(scraper, "total_time"),
|
||||
"image": self._safe_call(scraper, "image"),
|
||||
"cook_time": self._safe_call(scraper, "cook_time"),
|
||||
"cuisine": self._safe_call(scraper, "cuisine"),
|
||||
"site": self._safe_call(scraper, "host"),
|
||||
"url": org_url,
|
||||
"category": self._safe_call(scraper, "category"),
|
||||
"keywords": self._safe_call(scraper, "keywords"),
|
||||
}
|
||||
|
||||
return data_dict
|
||||
|
||||
def scrape_url(self, url: str):
|
||||
response = self.session.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception("Recipe website returned non 200 response", e)
|
||||
return self.scrape(response.text, org_url=url)
|
||||
|
||||
@classmethod
|
||||
def is_recipe(self, html: str, org_url: str) -> bool:
|
||||
"""Check if HTML contains a recipe.
|
||||
|
||||
Args:
|
||||
html: The HTML content to check
|
||||
org_url: The original URL the HTML was fetched from
|
||||
"""
|
||||
try:
|
||||
scraper = scrape_html(html, org_url=org_url, supported_only=False)
|
||||
return scraper.schema is not None
|
||||
except NoSchemaFoundInWildMode:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def is_recipe_url(cls, url: str) -> bool:
|
||||
"""Check if a URL points to a recipe by fetching and checking the HTML."""
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
timeout=10,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; Vrobbler/0.3)"
|
||||
},
|
||||
)
|
||||
if response.status_code != 200:
|
||||
logger.debug("Recipe website returned non 200 response")
|
||||
return False
|
||||
return cls.is_recipe(response.text, url)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
def _safe_call(self, scraper, method, default=None):
|
||||
"""
|
||||
Safely call a scraper method, returning default on error.
|
||||
Handles both missing methods (AttributeError) and methods that return None.
|
||||
"""
|
||||
try:
|
||||
# Check if method exists before calling
|
||||
if not hasattr(scraper, method):
|
||||
return default
|
||||
|
||||
result = getattr(scraper, method)()
|
||||
return result if result else default
|
||||
except (
|
||||
AttributeError,
|
||||
TypeError,
|
||||
KeyError,
|
||||
ValueError,
|
||||
SchemaOrgException,
|
||||
):
|
||||
# AttributeError: method doesn't exist on this scraper class
|
||||
# TypeError: method exists but can't be called
|
||||
# KeyError: method exists but data structure is unexpected
|
||||
return default
|
||||
|
||||
def download_image(self, image_url: str) -> Optional[ContentFile]:
|
||||
"""Download recipe image and return as Django ContentFile."""
|
||||
if not image_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = self.session.get(image_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
filename = image_url.split("/")[-1].split("?")[0]
|
||||
if not filename.endswith((".jpg", ".jpeg", ".png", ".webp")):
|
||||
filename = "recipe_image.jpg"
|
||||
|
||||
return ContentFile(response.content, name=filename)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def parse_servings(self, yield_text: str) -> Optional[int]:
|
||||
"""Extract number of servings from yield text."""
|
||||
import re
|
||||
|
||||
if not yield_text:
|
||||
return None
|
||||
|
||||
match = re.search(r"(\d+)", yield_text)
|
||||
return int(match.group(1)) if match else None
|
||||
|
||||
def extract_tags(self, recipe_data: Dict) -> List[str]:
|
||||
"""Extract all tags from recipe metadata."""
|
||||
tags = set()
|
||||
|
||||
# Add cuisine
|
||||
if recipe_data.get("cuisine"):
|
||||
cuisine = recipe_data["cuisine"]
|
||||
if isinstance(cuisine, str):
|
||||
tags.add(cuisine.strip())
|
||||
elif isinstance(cuisine, list):
|
||||
tags.update([c.strip() for c in cuisine if c])
|
||||
|
||||
# Add dietary
|
||||
if recipe_data.get("dietary"):
|
||||
dietary = recipe_data["dietary"]
|
||||
if isinstance(dietary, str):
|
||||
tags.add(dietary.strip())
|
||||
elif isinstance(dietary, list):
|
||||
tags.update([d.strip() for d in dietary if d])
|
||||
|
||||
# Add course
|
||||
if recipe_data.get("course"):
|
||||
course = recipe_data["course"]
|
||||
if isinstance(course, str):
|
||||
tags.add(course.strip())
|
||||
elif isinstance(course, list):
|
||||
tags.update([c.strip() for c in course if c])
|
||||
|
||||
# Add keywords
|
||||
keywords = recipe_data.get("keywords")
|
||||
if keywords:
|
||||
if isinstance(keywords, str):
|
||||
tags.update(
|
||||
[k.strip() for k in keywords.split(",") if k.strip()]
|
||||
)
|
||||
elif isinstance(keywords, list):
|
||||
tags.update([k.strip() for k in keywords if k.strip()])
|
||||
|
||||
return list(tags)
|
||||
547
vrobbler/apps/foods/sources/usda.py
Normal file
547
vrobbler/apps/foods/sources/usda.py
Normal file
@ -0,0 +1,547 @@
|
||||
from django.conf import settings
|
||||
import requests
|
||||
from typing import Dict, List, Optional
|
||||
from functools import lru_cache
|
||||
from typing import List, Dict
|
||||
import re
|
||||
from fractions import Fraction
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class USDAFoodAPI:
|
||||
BASE_URL = "https://api.nal.usda.gov/fdc/v1"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
self.api_key = api_key or getattr(settings, "USDA_API_KEY", None)
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"USDA API key not found. Set USDA_API_KEY in your Django settings."
|
||||
)
|
||||
self.session = requests.Session()
|
||||
|
||||
def _get_base_params(self) -> Dict:
|
||||
"""Return base parameters including the API key."""
|
||||
return {"api_key": self.api_key}
|
||||
|
||||
@lru_cache(maxsize=100)
|
||||
def search_foods(
|
||||
self, query: str, page_size: int = 50, page_number: int = 1
|
||||
) -> List[Dict]:
|
||||
"""Search for food items by keyword."""
|
||||
url = f"{self.BASE_URL}/foods/search"
|
||||
params = {
|
||||
**self._get_base_params(),
|
||||
"query": query,
|
||||
"pageSize": page_size,
|
||||
"pageNumber": page_number,
|
||||
"includeNutrientCodes": "203,204,205,208,269",
|
||||
}
|
||||
response = self.session.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("foods", [])
|
||||
|
||||
def get_food_details(self, fdc_id: int) -> Dict:
|
||||
"""Retrieve detailed nutrient data for a specific food item."""
|
||||
url = f"{self.BASE_URL}/food/{fdc_id}"
|
||||
params = {
|
||||
**self._get_base_params(),
|
||||
"nutrients": [
|
||||
203,
|
||||
204,
|
||||
205,
|
||||
208,
|
||||
269,
|
||||
], # Protein, Fat, Carbs, Calories, Fiber
|
||||
}
|
||||
response = self.session.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def get_multiple_foods(self, fdc_ids: List[int]) -> List[Dict]:
|
||||
"""Retrieve details for multiple food items."""
|
||||
url = f"{self.BASE_URL}/foods"
|
||||
params = {
|
||||
**self._get_base_params(),
|
||||
"fdcIds": fdc_ids,
|
||||
"nutrients": ["203", "204", "205", "208", "269"],
|
||||
}
|
||||
response = self.session.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json().get("foods", [])
|
||||
|
||||
def extract_nutrients(self, food_data: Dict) -> Dict:
|
||||
"""Extract key nutrients into a clean dictionary."""
|
||||
nutrients = {
|
||||
"protein": 0,
|
||||
"fat": 0,
|
||||
"carbohydrates": 0,
|
||||
"calories": 0,
|
||||
"fiber": 0,
|
||||
"sugar": 0,
|
||||
"sodium": 0,
|
||||
}
|
||||
nutrient_map = {
|
||||
"203": "protein",
|
||||
"204": "fat",
|
||||
"205": "carbohydrates",
|
||||
"208": "calories",
|
||||
"269": "fiber",
|
||||
"2000": "sodium",
|
||||
"269": "sugar",
|
||||
}
|
||||
food_nutrients = food_data.get("foodNutrients", [])
|
||||
|
||||
if not food_nutrients:
|
||||
logger.warning(
|
||||
f"No nutrients found for food: {food_data.get('description', 'unknown')}"
|
||||
)
|
||||
return nutrients
|
||||
|
||||
for nutrient in food_nutrients:
|
||||
nutrient_id = None
|
||||
value = None
|
||||
|
||||
if isinstance(nutrient, dict):
|
||||
if "nutrientNumber" in nutrient:
|
||||
nutrient_id = str(nutrient.get("nutrientNumber", ""))
|
||||
value = nutrient.get("value")
|
||||
elif "nutrient" in nutrient and isinstance(nutrient["nutrient"], dict):
|
||||
nutrient_id = str(nutrient.get("nutrient", {}).get("id", ""))
|
||||
value = nutrient.get("value")
|
||||
elif "number" in nutrient:
|
||||
nutrient_id = str(nutrient.get("number", ""))
|
||||
value = nutrient.get("value")
|
||||
|
||||
if nutrient_id and nutrient_id in nutrient_map and value is not None:
|
||||
key = nutrient_map[nutrient_id]
|
||||
try:
|
||||
nutrients[key] = float(value)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
return nutrients
|
||||
|
||||
def get_food_summary(self, fdc_id: int) -> Dict:
|
||||
"""Return a simplified summary of a food item."""
|
||||
details = self.get_food_details(fdc_id)
|
||||
return {
|
||||
"fdcId": details.get("fdcId"),
|
||||
"description": details.get("description"),
|
||||
"dataType": details.get("dataType"),
|
||||
"brandOwner": details.get("brandOwner"),
|
||||
"servingSize": details.get("servingSize"),
|
||||
"servingSizeUnit": details.get("servingSizeUnit"),
|
||||
"nutrients": self.extract_nutrients(details),
|
||||
}
|
||||
|
||||
|
||||
class NutritionCalculator:
|
||||
"""Calculate recipe nutrition from ingredients using USDA API."""
|
||||
|
||||
# Common units to strip from ingredient names
|
||||
UNITS = [
|
||||
"cup",
|
||||
"cups",
|
||||
"c",
|
||||
"tablespoon",
|
||||
"tablespoons",
|
||||
"tbsp",
|
||||
"tb",
|
||||
"t",
|
||||
"teaspoon",
|
||||
"teaspoons",
|
||||
"tsp",
|
||||
"ts",
|
||||
"ounce",
|
||||
"ounces",
|
||||
"oz",
|
||||
"fluid ounce",
|
||||
"fluid ounces",
|
||||
"fl oz",
|
||||
"pound",
|
||||
"pounds",
|
||||
"lb",
|
||||
"lbs",
|
||||
"gram",
|
||||
"grams",
|
||||
"g",
|
||||
"kilogram",
|
||||
"kilograms",
|
||||
"kg",
|
||||
"milliliter",
|
||||
"milliliters",
|
||||
"ml",
|
||||
"liter",
|
||||
"liters",
|
||||
"l",
|
||||
"piece",
|
||||
"pieces",
|
||||
"whole",
|
||||
"large",
|
||||
"medium",
|
||||
"small",
|
||||
"can",
|
||||
"cans",
|
||||
"bottle",
|
||||
"bottles",
|
||||
"jar",
|
||||
"jars",
|
||||
"package",
|
||||
"packages",
|
||||
"pkg",
|
||||
"box",
|
||||
"boxes",
|
||||
"slice",
|
||||
"slices",
|
||||
"clove",
|
||||
"cloves",
|
||||
"stick",
|
||||
"sticks",
|
||||
"pinch",
|
||||
"dash",
|
||||
"handful",
|
||||
"bunch",
|
||||
"sprig",
|
||||
"sprigs",
|
||||
]
|
||||
|
||||
# Common modifiers to strip (don't affect nutrition much)
|
||||
MODIFIERS = [
|
||||
"all-purpose",
|
||||
"all purpose",
|
||||
"ap",
|
||||
"bread",
|
||||
"cake",
|
||||
"self-rising",
|
||||
"self rising",
|
||||
"granulated",
|
||||
"powdered",
|
||||
"confectioners",
|
||||
"brown",
|
||||
"white",
|
||||
"raw",
|
||||
"extra virgin",
|
||||
"virgin",
|
||||
"light",
|
||||
"dark",
|
||||
"heavy",
|
||||
"whipping",
|
||||
"fresh",
|
||||
"dried",
|
||||
"frozen",
|
||||
"canned",
|
||||
"jarred",
|
||||
"bottled",
|
||||
"chopped",
|
||||
"diced",
|
||||
"minced",
|
||||
"sliced",
|
||||
"grated",
|
||||
"shredded",
|
||||
"crushed",
|
||||
"melted",
|
||||
"softened",
|
||||
"room temperature",
|
||||
"cold",
|
||||
"warm",
|
||||
"hot",
|
||||
"packed",
|
||||
"sifted",
|
||||
"sieved",
|
||||
"ground",
|
||||
"whole",
|
||||
"halved",
|
||||
"quartered",
|
||||
"peeled",
|
||||
"unpeeled",
|
||||
"seeded",
|
||||
"pitted",
|
||||
"cored",
|
||||
"trimmed",
|
||||
"optional",
|
||||
"to taste",
|
||||
"as needed",
|
||||
"for garnish",
|
||||
"for serving",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.usda = USDAFoodAPI()
|
||||
self._unit_pattern = self._build_unit_pattern()
|
||||
|
||||
def _build_unit_pattern(self) -> re.Pattern:
|
||||
"""Build regex pattern for matching units."""
|
||||
# Sort by length (longest first) to match "tablespoon" before "tbsp"
|
||||
sorted_units = sorted(self.UNITS, key=len, reverse=True)
|
||||
unit_str = "|".join(re.escape(u) for u in sorted_units)
|
||||
return re.compile(rf"\b({unit_str})\b", re.IGNORECASE)
|
||||
|
||||
def parse_ingredient(self, ingredient_text: str) -> Dict:
|
||||
"""
|
||||
Parse ingredient string into quantity, unit, and clean ingredient name.
|
||||
|
||||
Examples:
|
||||
"2 cups all-purpose flour" → {qty: 2, unit: 'cups', name: 'flour'}
|
||||
"1 1/2 tsp salt" → {qty: 1.5, unit: 'tsp', name: 'salt'}
|
||||
"1/4 cup olive oil" → {qty: 0.25, unit: 'cup', name: 'olive oil'}
|
||||
"3 large eggs" → {qty: 3, unit: 'large', name: 'eggs'}
|
||||
"""
|
||||
text = ingredient_text.strip()
|
||||
|
||||
# Parse quantity (handles whole numbers, fractions, and mixed numbers)
|
||||
quantity, remaining = self._parse_quantity(text)
|
||||
|
||||
# Parse unit from remaining text
|
||||
unit, remaining = self._parse_unit(remaining)
|
||||
|
||||
# Clean the ingredient name
|
||||
ingredient_name = self._clean_ingredient_name(remaining)
|
||||
|
||||
return {
|
||||
"quantity": quantity,
|
||||
"unit": unit,
|
||||
"ingredient": ingredient_name,
|
||||
"original": text,
|
||||
}
|
||||
|
||||
def _parse_quantity(self, text: str) -> tuple:
|
||||
"""
|
||||
Extract quantity from start of ingredient text.
|
||||
Handles: "2", "1/2", "1 1/2", "0.5", etc.
|
||||
"""
|
||||
text = text.strip()
|
||||
|
||||
# Pattern for mixed numbers: "1 1/2", "2 3/4"
|
||||
mixed_pattern = r"^(\d+)\s+(\d+)/(\d+)"
|
||||
mixed_match = re.match(mixed_pattern, text)
|
||||
if mixed_match:
|
||||
whole = int(mixed_match.group(1))
|
||||
numerator = int(mixed_match.group(2))
|
||||
denominator = int(mixed_match.group(3))
|
||||
quantity = whole + (numerator / denominator)
|
||||
remaining = text[mixed_match.end() :].strip()
|
||||
return quantity, remaining
|
||||
|
||||
# Pattern for simple fractions: "1/2", "3/4"
|
||||
fraction_pattern = r"^(\d+)/(\d+)"
|
||||
fraction_match = re.match(fraction_pattern, text)
|
||||
if fraction_match:
|
||||
numerator = int(fraction_match.group(1))
|
||||
denominator = int(fraction_match.group(2))
|
||||
quantity = numerator / denominator
|
||||
remaining = text[fraction_match.end() :].strip()
|
||||
return quantity, remaining
|
||||
|
||||
# Pattern for decimals: "0.5", "1.5"
|
||||
decimal_pattern = r"^(\d+\.?\d*)"
|
||||
decimal_match = re.match(decimal_pattern, text)
|
||||
if decimal_match:
|
||||
quantity = float(decimal_match.group(1))
|
||||
remaining = text[decimal_match.end() :].strip()
|
||||
return quantity, remaining
|
||||
|
||||
# No quantity found
|
||||
return 1, text
|
||||
|
||||
def _parse_unit(self, text: str) -> tuple:
|
||||
"""Extract unit from text."""
|
||||
text = text.strip()
|
||||
|
||||
match = self._unit_pattern.match(text)
|
||||
if match:
|
||||
unit = match.group(1).lower()
|
||||
remaining = text[match.end() :].strip()
|
||||
return unit, remaining
|
||||
|
||||
return None, text
|
||||
|
||||
def _clean_ingredient_name(self, text: str) -> str:
|
||||
"""
|
||||
Clean ingredient name for better USDA matching.
|
||||
Removes modifiers, extra whitespace, and normalizes text.
|
||||
"""
|
||||
text = text.strip()
|
||||
|
||||
# Remove parenthetical notes: "flour (sifted)" → "flour"
|
||||
text = re.sub(r"\s*\([^)]*\)", "", text)
|
||||
|
||||
# Remove common modifiers
|
||||
for modifier in sorted(self.MODIFIERS, key=len, reverse=True):
|
||||
# Use word boundaries to avoid partial matches
|
||||
pattern = rf"\b{re.escape(modifier)}\b"
|
||||
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
||||
|
||||
# Remove extra whitespace
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
|
||||
# Remove trailing punctuation
|
||||
text = text.strip(" ,;:.")
|
||||
|
||||
# Remove leading "of" (e.g., "cup of flour" → "flour")
|
||||
text = re.sub(r"^of\s+", "", text, flags=re.IGNORECASE)
|
||||
|
||||
return text.strip()
|
||||
|
||||
def _find_usda_match(self, ingredient_name: str) -> Optional[Dict]:
|
||||
"""
|
||||
Find best USDA match for an ingredient name.
|
||||
Tries multiple search strategies for better matching.
|
||||
"""
|
||||
strategies = [
|
||||
# Strategy 1: Direct search
|
||||
ingredient_name,
|
||||
# Strategy 2: Singular form (remove trailing 's')
|
||||
(ingredient_name.rstrip("s") if ingredient_name.endswith("s") else None),
|
||||
# Strategy 3: Remove common suffixes
|
||||
re.sub(
|
||||
r"\s+(fresh|dried|ground|chopped|sliced)$",
|
||||
"",
|
||||
ingredient_name,
|
||||
flags=re.IGNORECASE,
|
||||
),
|
||||
# Strategy 4: Just the last word (sometimes works for simple ingredients)
|
||||
(ingredient_name.split()[-1] if len(ingredient_name.split()) > 1 else None),
|
||||
]
|
||||
|
||||
for query in strategies:
|
||||
if not query or len(query) < 2:
|
||||
continue
|
||||
|
||||
try:
|
||||
results = self.usda.search_foods(query, page_size=5)
|
||||
if results:
|
||||
logger.debug(f"Found {query}: {results[0].get('description')}")
|
||||
return results[0]
|
||||
except Exception as e:
|
||||
logger.warning(f"USDA search failed for '{query}': {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
def _convert_to_grams(
|
||||
self, quantity: float, unit: str, ingredient_name: str
|
||||
) -> float:
|
||||
"""
|
||||
Convert quantity to grams for more accurate nutrition calculation.
|
||||
This is simplified - production code should use a proper conversion table.
|
||||
"""
|
||||
# Base conversions (approximate)
|
||||
volume_to_grams = {
|
||||
"cup": 240,
|
||||
"cups": 240,
|
||||
"tablespoon": 15,
|
||||
"tablespoons": 15,
|
||||
"tbsp": 15,
|
||||
"teaspoon": 5,
|
||||
"teaspoons": 5,
|
||||
"tsp": 5,
|
||||
"ounce": 28,
|
||||
"ounces": 28,
|
||||
"oz": 28,
|
||||
"pound": 454,
|
||||
"pounds": 454,
|
||||
"lb": 454,
|
||||
"lbs": 454,
|
||||
"gram": 1,
|
||||
"grams": 1,
|
||||
"g": 1,
|
||||
}
|
||||
|
||||
# Get base weight in grams
|
||||
base_grams = volume_to_grams.get(unit, 100) # Default to 100g if unknown
|
||||
|
||||
# Adjust for ingredient type (very simplified)
|
||||
if any(
|
||||
word in ingredient_name.lower()
|
||||
for word in ["flour", "sugar", "rice", "grain"]
|
||||
):
|
||||
base_grams = volume_to_grams.get(unit, 125) # Dry ingredients
|
||||
elif any(word in ingredient_name.lower() for word in ["oil", "butter", "fat"]):
|
||||
base_grams = volume_to_grams.get(unit, 220) # Dense liquids
|
||||
elif any(
|
||||
word in ingredient_name.lower() for word in ["milk", "water", "broth"]
|
||||
):
|
||||
base_grams = volume_to_grams.get(unit, 240) # Liquids
|
||||
|
||||
return quantity * base_grams
|
||||
|
||||
def calculate_nutrition(self, ingredients: List[str], servings: int = 1) -> Dict:
|
||||
"""
|
||||
Calculate total nutrition for a recipe from ingredient list.
|
||||
Returns nutrition per serving.
|
||||
"""
|
||||
totals = {
|
||||
"calories": 0,
|
||||
"protein": 0,
|
||||
"fat": 0,
|
||||
"carbohydrates": 0,
|
||||
"fiber": 0,
|
||||
"sugar": 0,
|
||||
"sodium": 0,
|
||||
}
|
||||
|
||||
matched_count = 0
|
||||
unmatched = []
|
||||
|
||||
for ingredient in ingredients:
|
||||
parsed = self.parse_ingredient(ingredient)
|
||||
|
||||
# Find USDA match
|
||||
match = self._find_usda_match(
|
||||
" ".join(
|
||||
[
|
||||
str(parsed["quantity"]),
|
||||
str(parsed["unit"]),
|
||||
str(parsed["ingredient"]),
|
||||
]
|
||||
)
|
||||
)
|
||||
if not match:
|
||||
match = self._find_usda_match(parsed["ingredient"])
|
||||
|
||||
if match:
|
||||
try:
|
||||
nutrients = self.usda.extract_nutrients(match)
|
||||
# Convert to grams for more accurate calculation
|
||||
grams = self._convert_to_grams(
|
||||
parsed["quantity"],
|
||||
parsed["unit"],
|
||||
parsed["ingredient"],
|
||||
)
|
||||
|
||||
# USDA data is typically per 100g, so scale accordingly
|
||||
multiplier = grams / 100
|
||||
|
||||
totals["calories"] += nutrients.get("calories", 0) * multiplier
|
||||
totals["protein"] += nutrients.get("protein", 0) * multiplier
|
||||
totals["fat"] += nutrients.get("fat", 0) * multiplier
|
||||
totals["carbohydrates"] += (
|
||||
nutrients.get("carbohydrates", 0) * multiplier
|
||||
)
|
||||
totals["fiber"] += nutrients.get("fiber", 0) * multiplier
|
||||
totals["sugar"] += nutrients.get("sugar", 0) * multiplier
|
||||
totals["sodium"] += nutrients.get("sodium", 0) * multiplier
|
||||
|
||||
matched_count += 1
|
||||
|
||||
except Exception:
|
||||
unmatched.append(parsed["original"])
|
||||
else:
|
||||
unmatched.append(parsed["original"])
|
||||
|
||||
# Divide by servings for per-serving nutrition
|
||||
if servings > 0:
|
||||
for key in totals:
|
||||
totals[key] = round(totals[key] / servings, 2)
|
||||
|
||||
# Add metadata
|
||||
totals["_matched"] = matched_count
|
||||
totals["_total"] = len(ingredients)
|
||||
totals["_unmatched"] = unmatched
|
||||
|
||||
return totals
|
||||
@ -5,6 +5,7 @@ from typing import Any, Optional
|
||||
|
||||
import pendulum
|
||||
import pytz
|
||||
import requests
|
||||
from beers.models import Beer
|
||||
from boardgames.models import BoardGame, BoardGameDesigner, BoardGameLocation
|
||||
from books.constants import READCOMICSONLINE_URL
|
||||
@ -13,6 +14,8 @@ from books.utils import parse_readcomicsonline_uri
|
||||
from bricksets.models import BrickSet
|
||||
from dateutil.parser import parse
|
||||
from django.utils import timezone
|
||||
from foods.models import Food
|
||||
from foods.sources.rscraper import RecipeScraperService
|
||||
from locations.constants import LOCATION_PROVIDERS
|
||||
from locations.models import GeoLocation
|
||||
from music.constants import JELLYFIN_POST_KEYS, MOPIDY_POST_KEYS
|
||||
@ -81,7 +84,10 @@ def mopidy_scrobble_media(post_data: dict, user_id: int) -> Scrobble:
|
||||
|
||||
log = {}
|
||||
try:
|
||||
log = {"mopidy_source": post_data.get("mopidy_uri", "").split(":")[0]}
|
||||
log = {
|
||||
"mopidy_source": post_data.get("mopidy_uri", "").split(":")[0],
|
||||
"raw_data": post_data,
|
||||
}
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
@ -544,6 +550,40 @@ def email_scrobble_board_game(
|
||||
return scrobbles_created
|
||||
|
||||
|
||||
def scrobble_from_recipe_website(
|
||||
url: str,
|
||||
user_id: int,
|
||||
source: str = "Vrobbler",
|
||||
action: Optional[str] = None,
|
||||
) -> Scrobble:
|
||||
"""Scrobble a recipe from a website URL."""
|
||||
food, created = Food.find_or_create_from_recipe(url)
|
||||
|
||||
scrobble_dict = {
|
||||
"user_id": user_id,
|
||||
"timestamp": timezone.now(),
|
||||
"playback_position_seconds": 0,
|
||||
"source": source,
|
||||
}
|
||||
logger.info(
|
||||
"[vrobbler-scrobble] recipe scrobble request received",
|
||||
extra={
|
||||
"food_id": food.id,
|
||||
"user_id": user_id,
|
||||
"scrobble_dict": scrobble_dict,
|
||||
"media_type": Scrobble.MediaType.FOOD,
|
||||
"created_bool": created,
|
||||
},
|
||||
)
|
||||
|
||||
scrobble = Scrobble.create_or_update(food, user_id, scrobble_dict)
|
||||
|
||||
if action == "stop":
|
||||
scrobble.stop(force_finish=True)
|
||||
|
||||
return scrobble
|
||||
|
||||
|
||||
def manual_scrobble_from_url(
|
||||
url: str,
|
||||
user_id: int,
|
||||
@ -554,6 +594,12 @@ def manual_scrobble_from_url(
|
||||
we want to scrobble as a media type in and of itself. This checks whether
|
||||
we know about the content type, and routes it to the appropriate media
|
||||
scrobbler. Otherwise, return nothing."""
|
||||
|
||||
if RecipeScraperService.is_recipe_url(url):
|
||||
return scrobble_from_recipe_website(
|
||||
url, user_id, source=source, action=action
|
||||
)
|
||||
|
||||
content_key = ""
|
||||
domain = extract_domain(url)
|
||||
|
||||
@ -906,6 +952,12 @@ def manual_scrobble_webpage(
|
||||
source: str = "Bookmarklet",
|
||||
action: Optional[str] = None,
|
||||
):
|
||||
|
||||
if RecipeScraperService.is_recipe_url(url):
|
||||
return scrobble_from_recipe_website(
|
||||
url, user_id, source=source, action=action
|
||||
)
|
||||
|
||||
webpage = WebPage.find_or_create({"url": url})
|
||||
|
||||
scrobble_dict = {
|
||||
|
||||
@ -58,6 +58,7 @@ DUMP_REQUEST_DATA = (
|
||||
os.getenv("VROBBLER_DUMP_REQUEST_DATA", "false").lower() in TRUTHY
|
||||
)
|
||||
|
||||
USDA_API_KEY = os.getenv("VROBBLER_USDA_API_KEY")
|
||||
THESPORTSDB_API_KEY = os.getenv("VROBBLER_THESPORTSDB_API_KEY", "2")
|
||||
THEAUDIODB_API_KEY = os.getenv("VROBBLER_THEAUDIODB_API_KEY", "2")
|
||||
PODCASTINDEX_API_KEY = os.getenv("VROBBLER_PODCASTINDEX_API_KEY", "")
|
||||
|
||||
Reference in New Issue
Block a user