Squashed commit of the following:

commit 36403ecbae Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 12:04:04 2023 -0500 update migration for new Automation Types commit 4620ebaf30 Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:49:10 2023 -0500 add Name and Instruction automation to YouTube importer commit c907da84c1 Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:45:32 2023 -0500 remove old commented automation code commit 9b5e39415e Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:37:36 2023 -0500 test for automations applied during url import renamed TITLE_REPLACE to NAME_REPLACE commit 2679a22464 Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 15:29:59 2023 -0500 added tests for regex_replace commit 8bae21025b Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 13:51:46 2023 -0500 updated Automation Modal and translations commit 4120adc546 Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 13:12:41 2023 -0500 applied regex_replace automation to food and unit automations updated automation documentation commit 30c891abfc Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 12:46:34 2023 -0500 migrate regex_replace functions to AutomationEngine create TITLE_REPLACE, UNIT_REPLACE and FOOD REPLACE automation types create migration for new types commit b8317c2c29 Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 20:44:40 2023 -0500 move transpose words to AutomationEngine create tests for transpose words commit 39253cfd02 Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 17:03:29 2023 -0500 refactor never_unit automation to AutomationEngine create tests for never_unit commit 7c0b8b151c Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 11:21:06 2023 -0500 update ingredient parser to use AutomationEngine for unt, keyword, food update test_ingredient_parser tests to accomodate changes commit 8e1b8923af Author: smilerz <smilerz@gmail.com> Date: Mon Aug 28 16:44:35 2023 -0500 keyword and unit Automtations refactored to Automation Engine keyword and unit automation tests added commit 52eb876a08 Author: smilerz <smilerz@gmail.com> Date: Mon Aug 28 15:03:19 2023 -0500 food_alias tests added commit a820b9c09e Author: smilerz <smilerz@gmail.com> Date: Sat Aug 26 12:37:16 2023 -0500 create AutomationEngine class create food_automation method refactor food automations to use AutomationEngine
2023-09-12 09:46:08 -05:00
parent c72bf57ccb
commit 768a5ea237
15 changed files with 625 additions and 546 deletions
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@ -2,7 +2,6 @@ import re
 import traceback
 from html import unescape

-from django.core.cache import caches
 from django.utils.dateparse import parse_duration
 from django.utils.translation import gettext as _
 from isodate import parse_duration as iso_parse_duration
@ -10,13 +9,37 @@ from isodate.isoerror import ISO8601Error
 from pytube import YouTube
 from recipe_scrapers._utils import get_host_name, get_minutes

+from cookbook.helper.automation_helper import AutomationEngine
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Automation, Keyword, PropertyType


 def get_from_scraper(scrape, request):
    # converting the scrape_me object to the existing json format based on ld+json
-    recipe_json = {}
+
+    recipe_json = {
+        'steps': [],
+        'internal': True
+    }
+    keywords = []
+
+    # assign source URL
+    try:
+        source_url = scrape.canonical_url()
+    except Exception:
+        try:
+            source_url = scrape.url
+        except Exception:
+            pass
+    if source_url:
+        recipe_json['source_url'] = source_url
+        try:
+            keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
+        except Exception:
+            recipe_json['source_url'] = ''
+
+    automation_engine = AutomationEngine(request, source=recipe_json.get('source_url'))
+    # assign recipe name
    try:
        recipe_json['name'] = parse_name(scrape.title()[:128] or None)
    except Exception:
@ -30,6 +53,10 @@ def get_from_scraper(scrape, request):
    if isinstance(recipe_json['name'], list) and len(recipe_json['name']) > 0:
        recipe_json['name'] = recipe_json['name'][0]

+    recipe_json['name'] = automation_engine.apply_regex_replace_automation(recipe_json['name'], Automation.NAME_REPLACE)
+
+    # assign recipe description
+    # TODO notify user about limit if reached - >256 description will be truncated
    try:
        description = scrape.description() or None
    except Exception:
@ -40,8 +67,10 @@ def get_from_scraper(scrape, request):
        except Exception:
            description = ''

-    recipe_json['internal'] = True
+    recipe_json['description'] = parse_description(description)
+    recipe_json['description'] = automation_engine.apply_regex_replace_automation(recipe_json['description'], Automation.DESCRIPTION_REPLACE)

+    # assign servings attributes
    try:
        # dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
        servings = scrape.schema.data.get('recipeYield') or 1
@ -51,6 +80,7 @@ def get_from_scraper(scrape, request):
    recipe_json['servings'] = parse_servings(servings)
    recipe_json['servings_text'] = parse_servings_text(servings)

+    # assign time attributes
    try:
        recipe_json['working_time'] = get_minutes(scrape.prep_time()) or 0
    except Exception:
@ -75,6 +105,7 @@ def get_from_scraper(scrape, request):
            except Exception:
                pass

+    # assign image
    try:
        recipe_json['image'] = parse_image(scrape.image()) or None
    except Exception:
@ -85,7 +116,7 @@ def get_from_scraper(scrape, request):
        except Exception:
            recipe_json['image'] = ''

-    keywords = []
+    # assign keywords
    try:
        if scrape.schema.data.get("keywords"):
            keywords += listify_keywords(scrape.schema.data.get("keywords"))
@ -110,20 +141,6 @@ def get_from_scraper(scrape, request):
        except Exception:
            pass

-    try:
-        source_url = scrape.canonical_url()
-    except Exception:
-        try:
-            source_url = scrape.url
-        except Exception:
-            pass
-    if source_url:
-        recipe_json['source_url'] = source_url
-        try:
-            keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
-        except Exception:
-            recipe_json['source_url'] = ''
-
    try:
        if scrape.author():
            keywords.append(scrape.author())
@ -131,13 +148,13 @@ def get_from_scraper(scrape, request):
        pass

    try:
-        recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), request.space)
+        recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), request)
    except AttributeError:
        recipe_json['keywords'] = keywords

    ingredient_parser = IngredientParser(request, True)

-    recipe_json['steps'] = []
+    # assign steps
    try:
        for i in parse_instructions(scrape.instructions()):
            recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, })
@ -146,25 +163,10 @@ def get_from_scraper(scrape, request):
    if len(recipe_json['steps']) == 0:
        recipe_json['steps'].append({'instruction': '', 'ingredients': [], })

-    parsed_description = parse_description(description)
-    # TODO notify user about limit if reached
-    # limits exist to limit the attack surface for dos style attacks
-    automations = Automation.objects.filter(
-        type=Automation.DESCRIPTION_REPLACE,
-        space=request.space,
-        disabled=False).only(
-        'param_1',
-        'param_2',
-        'param_3').all().order_by('order')[
-            :512]
-    for a in automations:
-        if re.match(a.param_1, (recipe_json['source_url'])[:512]):
-            parsed_description = re.sub(a.param_2, a.param_3, parsed_description, count=1)
-
-    if len(parsed_description) > 256:  # split at 256 as long descriptions don't look good on recipe cards
-        recipe_json['steps'][0]['instruction'] = f'*{parsed_description}*  \n\n' + recipe_json['steps'][0]['instruction']
+    if len(recipe_json['description']) > 256:  # split at 256 as long descriptions don't look good on recipe cards
+        recipe_json['steps'][0]['instruction'] = f"*{recipe_json['description']}*  \n\n" + recipe_json['steps'][0]['instruction']
    else:
-        recipe_json['description'] = parsed_description[:512]
+        recipe_json['description'] = recipe_json['description'][:512]

    try:
        for x in scrape.ingredients():
@ -205,19 +207,9 @@ def get_from_scraper(scrape, request):
        traceback.print_exc()
        pass

-    if 'source_url' in recipe_json and recipe_json['source_url']:
-        automations = Automation.objects.filter(
-            type=Automation.INSTRUCTION_REPLACE,
-            space=request.space,
-            disabled=False).only(
-            'param_1',
-            'param_2',
-            'param_3').order_by('order').all()[
-            :512]
-        for a in automations:
-            if re.match(a.param_1, (recipe_json['source_url'])[:512]):
-                for s in recipe_json['steps']:
-                    s['instruction'] = re.sub(a.param_2, a.param_3, s['instruction'])
+    for s in recipe_json['steps']:
+        s['instruction'] = automation_engine.apply_regex_replace_automation(s['instruction'], Automation.INSTRUCTION_REPLACE)
+        # re.sub(a.param_2, a.param_3, s['instruction'])

    return recipe_json

@ -267,11 +259,14 @@ def get_from_youtube_scraper(url, request):
        ]
    }

+    # TODO add automation here
    try:
+        automation_engine = AutomationEngine(request, source=url)
        video = YouTube(url=url)
-        default_recipe_json['name'] = video.title
+        default_recipe_json['name'] = automation_engine.apply_regex_replace_automation(video.title, Automation.NAME_REPLACE)
        default_recipe_json['image'] = video.thumbnail_url
-        default_recipe_json['steps'][0]['instruction'] = video.description
+        default_recipe_json['steps'][0]['instruction'] = automation_engine.apply_regex_replace_automation(video.description, Automation.INSTRUCTION_REPLACE)
+
    except Exception:
        pass

@ -410,18 +405,19 @@ def parse_time(recipe_time):
    return recipe_time


-def parse_keywords(keyword_json, space):
+def parse_keywords(keyword_json, request):
    keywords = []
-    keyword_aliases = {}
+    automation_engine = AutomationEngine(request)
+    # keyword_aliases = {}
    # retrieve keyword automation cache if it exists, otherwise build from database
-    KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}'
-    if c := caches['default'].get(KEYWORD_CACHE_KEY, None):
-        keyword_aliases = c
-        caches['default'].touch(KEYWORD_CACHE_KEY, 30)
-    else:
-        for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
-            keyword_aliases[a.param_1.lower()] = a.param_2
-        caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)
+    # KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}'
+    # if c := caches['default'].get(KEYWORD_CACHE_KEY, None):
+    #     keyword_aliases = c
+    #     caches['default'].touch(KEYWORD_CACHE_KEY, 30)
+    # else:
+    #     for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
+    #         keyword_aliases[a.param_1.lower()] = a.param_2
+    #     caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)

    # keywords as list
    for kw in keyword_json:
@ -429,12 +425,13 @@ def parse_keywords(keyword_json, space):
        # if alias exists use that instead

        if len(kw) != 0:
-            if keyword_aliases:
-                try:
-                    kw = keyword_aliases[kw.lower()]
-                except KeyError:
-                    pass
-            if k := Keyword.objects.filter(name=kw, space=space).first():
+            # if keyword_aliases:
+            #     try:
+            #         kw = keyword_aliases[kw.lower()]
+            #     except KeyError:
+            #         pass
+            automation_engine.apply_keyword_automation(kw)
+            if k := Keyword.objects.filter(name=kw, space=request.space).first():
                keywords.append({'label': str(k), 'name': k.name, 'id': k.id})
            else:
                keywords.append({'label': kw, 'name': kw})