Squashed commit of the following:
commit36403ecbae
Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 12:04:04 2023 -0500 update migration for new Automation Types commit4620ebaf30
Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:49:10 2023 -0500 add Name and Instruction automation to YouTube importer commitc907da84c1
Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:45:32 2023 -0500 remove old commented automation code commit9b5e39415e
Author: smilerz <smilerz@gmail.com> Date: Fri Sep 1 07:37:36 2023 -0500 test for automations applied during url import renamed TITLE_REPLACE to NAME_REPLACE commit2679a22464
Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 15:29:59 2023 -0500 added tests for regex_replace commit8bae21025b
Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 13:51:46 2023 -0500 updated Automation Modal and translations commit4120adc546
Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 13:12:41 2023 -0500 applied regex_replace automation to food and unit automations updated automation documentation commit30c891abfc
Author: smilerz <smilerz@gmail.com> Date: Thu Aug 31 12:46:34 2023 -0500 migrate regex_replace functions to AutomationEngine create TITLE_REPLACE, UNIT_REPLACE and FOOD REPLACE automation types create migration for new types commitb8317c2c29
Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 20:44:40 2023 -0500 move transpose words to AutomationEngine create tests for transpose words commit39253cfd02
Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 17:03:29 2023 -0500 refactor never_unit automation to AutomationEngine create tests for never_unit commit7c0b8b151c
Author: smilerz <smilerz@gmail.com> Date: Wed Aug 30 11:21:06 2023 -0500 update ingredient parser to use AutomationEngine for unt, keyword, food update test_ingredient_parser tests to accomodate changes commit8e1b8923af
Author: smilerz <smilerz@gmail.com> Date: Mon Aug 28 16:44:35 2023 -0500 keyword and unit Automtations refactored to Automation Engine keyword and unit automation tests added commit52eb876a08
Author: smilerz <smilerz@gmail.com> Date: Mon Aug 28 15:03:19 2023 -0500 food_alias tests added commita820b9c09e
Author: smilerz <smilerz@gmail.com> Date: Sat Aug 26 12:37:16 2023 -0500 create AutomationEngine class create food_automation method refactor food automations to use AutomationEngine
This commit is contained in:
@ -2,7 +2,6 @@ import re
|
||||
import traceback
|
||||
from html import unescape
|
||||
|
||||
from django.core.cache import caches
|
||||
from django.utils.dateparse import parse_duration
|
||||
from django.utils.translation import gettext as _
|
||||
from isodate import parse_duration as iso_parse_duration
|
||||
@ -10,13 +9,37 @@ from isodate.isoerror import ISO8601Error
|
||||
from pytube import YouTube
|
||||
from recipe_scrapers._utils import get_host_name, get_minutes
|
||||
|
||||
from cookbook.helper.automation_helper import AutomationEngine
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.models import Automation, Keyword, PropertyType
|
||||
|
||||
|
||||
def get_from_scraper(scrape, request):
|
||||
# converting the scrape_me object to the existing json format based on ld+json
|
||||
recipe_json = {}
|
||||
|
||||
recipe_json = {
|
||||
'steps': [],
|
||||
'internal': True
|
||||
}
|
||||
keywords = []
|
||||
|
||||
# assign source URL
|
||||
try:
|
||||
source_url = scrape.canonical_url()
|
||||
except Exception:
|
||||
try:
|
||||
source_url = scrape.url
|
||||
except Exception:
|
||||
pass
|
||||
if source_url:
|
||||
recipe_json['source_url'] = source_url
|
||||
try:
|
||||
keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
|
||||
except Exception:
|
||||
recipe_json['source_url'] = ''
|
||||
|
||||
automation_engine = AutomationEngine(request, source=recipe_json.get('source_url'))
|
||||
# assign recipe name
|
||||
try:
|
||||
recipe_json['name'] = parse_name(scrape.title()[:128] or None)
|
||||
except Exception:
|
||||
@ -30,6 +53,10 @@ def get_from_scraper(scrape, request):
|
||||
if isinstance(recipe_json['name'], list) and len(recipe_json['name']) > 0:
|
||||
recipe_json['name'] = recipe_json['name'][0]
|
||||
|
||||
recipe_json['name'] = automation_engine.apply_regex_replace_automation(recipe_json['name'], Automation.NAME_REPLACE)
|
||||
|
||||
# assign recipe description
|
||||
# TODO notify user about limit if reached - >256 description will be truncated
|
||||
try:
|
||||
description = scrape.description() or None
|
||||
except Exception:
|
||||
@ -40,8 +67,10 @@ def get_from_scraper(scrape, request):
|
||||
except Exception:
|
||||
description = ''
|
||||
|
||||
recipe_json['internal'] = True
|
||||
recipe_json['description'] = parse_description(description)
|
||||
recipe_json['description'] = automation_engine.apply_regex_replace_automation(recipe_json['description'], Automation.DESCRIPTION_REPLACE)
|
||||
|
||||
# assign servings attributes
|
||||
try:
|
||||
# dont use scrape.yields() as this will always return "x servings" or "x items", should be improved in scrapers directly
|
||||
servings = scrape.schema.data.get('recipeYield') or 1
|
||||
@ -51,6 +80,7 @@ def get_from_scraper(scrape, request):
|
||||
recipe_json['servings'] = parse_servings(servings)
|
||||
recipe_json['servings_text'] = parse_servings_text(servings)
|
||||
|
||||
# assign time attributes
|
||||
try:
|
||||
recipe_json['working_time'] = get_minutes(scrape.prep_time()) or 0
|
||||
except Exception:
|
||||
@ -75,6 +105,7 @@ def get_from_scraper(scrape, request):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# assign image
|
||||
try:
|
||||
recipe_json['image'] = parse_image(scrape.image()) or None
|
||||
except Exception:
|
||||
@ -85,7 +116,7 @@ def get_from_scraper(scrape, request):
|
||||
except Exception:
|
||||
recipe_json['image'] = ''
|
||||
|
||||
keywords = []
|
||||
# assign keywords
|
||||
try:
|
||||
if scrape.schema.data.get("keywords"):
|
||||
keywords += listify_keywords(scrape.schema.data.get("keywords"))
|
||||
@ -110,20 +141,6 @@ def get_from_scraper(scrape, request):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
source_url = scrape.canonical_url()
|
||||
except Exception:
|
||||
try:
|
||||
source_url = scrape.url
|
||||
except Exception:
|
||||
pass
|
||||
if source_url:
|
||||
recipe_json['source_url'] = source_url
|
||||
try:
|
||||
keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
|
||||
except Exception:
|
||||
recipe_json['source_url'] = ''
|
||||
|
||||
try:
|
||||
if scrape.author():
|
||||
keywords.append(scrape.author())
|
||||
@ -131,13 +148,13 @@ def get_from_scraper(scrape, request):
|
||||
pass
|
||||
|
||||
try:
|
||||
recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), request.space)
|
||||
recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), request)
|
||||
except AttributeError:
|
||||
recipe_json['keywords'] = keywords
|
||||
|
||||
ingredient_parser = IngredientParser(request, True)
|
||||
|
||||
recipe_json['steps'] = []
|
||||
# assign steps
|
||||
try:
|
||||
for i in parse_instructions(scrape.instructions()):
|
||||
recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, })
|
||||
@ -146,25 +163,10 @@ def get_from_scraper(scrape, request):
|
||||
if len(recipe_json['steps']) == 0:
|
||||
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
|
||||
|
||||
parsed_description = parse_description(description)
|
||||
# TODO notify user about limit if reached
|
||||
# limits exist to limit the attack surface for dos style attacks
|
||||
automations = Automation.objects.filter(
|
||||
type=Automation.DESCRIPTION_REPLACE,
|
||||
space=request.space,
|
||||
disabled=False).only(
|
||||
'param_1',
|
||||
'param_2',
|
||||
'param_3').all().order_by('order')[
|
||||
:512]
|
||||
for a in automations:
|
||||
if re.match(a.param_1, (recipe_json['source_url'])[:512]):
|
||||
parsed_description = re.sub(a.param_2, a.param_3, parsed_description, count=1)
|
||||
|
||||
if len(parsed_description) > 256: # split at 256 as long descriptions don't look good on recipe cards
|
||||
recipe_json['steps'][0]['instruction'] = f'*{parsed_description}* \n\n' + recipe_json['steps'][0]['instruction']
|
||||
if len(recipe_json['description']) > 256: # split at 256 as long descriptions don't look good on recipe cards
|
||||
recipe_json['steps'][0]['instruction'] = f"*{recipe_json['description']}* \n\n" + recipe_json['steps'][0]['instruction']
|
||||
else:
|
||||
recipe_json['description'] = parsed_description[:512]
|
||||
recipe_json['description'] = recipe_json['description'][:512]
|
||||
|
||||
try:
|
||||
for x in scrape.ingredients():
|
||||
@ -205,19 +207,9 @@ def get_from_scraper(scrape, request):
|
||||
traceback.print_exc()
|
||||
pass
|
||||
|
||||
if 'source_url' in recipe_json and recipe_json['source_url']:
|
||||
automations = Automation.objects.filter(
|
||||
type=Automation.INSTRUCTION_REPLACE,
|
||||
space=request.space,
|
||||
disabled=False).only(
|
||||
'param_1',
|
||||
'param_2',
|
||||
'param_3').order_by('order').all()[
|
||||
:512]
|
||||
for a in automations:
|
||||
if re.match(a.param_1, (recipe_json['source_url'])[:512]):
|
||||
for s in recipe_json['steps']:
|
||||
s['instruction'] = re.sub(a.param_2, a.param_3, s['instruction'])
|
||||
for s in recipe_json['steps']:
|
||||
s['instruction'] = automation_engine.apply_regex_replace_automation(s['instruction'], Automation.INSTRUCTION_REPLACE)
|
||||
# re.sub(a.param_2, a.param_3, s['instruction'])
|
||||
|
||||
return recipe_json
|
||||
|
||||
@ -267,11 +259,14 @@ def get_from_youtube_scraper(url, request):
|
||||
]
|
||||
}
|
||||
|
||||
# TODO add automation here
|
||||
try:
|
||||
automation_engine = AutomationEngine(request, source=url)
|
||||
video = YouTube(url=url)
|
||||
default_recipe_json['name'] = video.title
|
||||
default_recipe_json['name'] = automation_engine.apply_regex_replace_automation(video.title, Automation.NAME_REPLACE)
|
||||
default_recipe_json['image'] = video.thumbnail_url
|
||||
default_recipe_json['steps'][0]['instruction'] = video.description
|
||||
default_recipe_json['steps'][0]['instruction'] = automation_engine.apply_regex_replace_automation(video.description, Automation.INSTRUCTION_REPLACE)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@ -410,18 +405,19 @@ def parse_time(recipe_time):
|
||||
return recipe_time
|
||||
|
||||
|
||||
def parse_keywords(keyword_json, space):
|
||||
def parse_keywords(keyword_json, request):
|
||||
keywords = []
|
||||
keyword_aliases = {}
|
||||
automation_engine = AutomationEngine(request)
|
||||
# keyword_aliases = {}
|
||||
# retrieve keyword automation cache if it exists, otherwise build from database
|
||||
KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}'
|
||||
if c := caches['default'].get(KEYWORD_CACHE_KEY, None):
|
||||
keyword_aliases = c
|
||||
caches['default'].touch(KEYWORD_CACHE_KEY, 30)
|
||||
else:
|
||||
for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
|
||||
keyword_aliases[a.param_1.lower()] = a.param_2
|
||||
caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)
|
||||
# KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}'
|
||||
# if c := caches['default'].get(KEYWORD_CACHE_KEY, None):
|
||||
# keyword_aliases = c
|
||||
# caches['default'].touch(KEYWORD_CACHE_KEY, 30)
|
||||
# else:
|
||||
# for a in Automation.objects.filter(space=space, disabled=False, type=Automation.KEYWORD_ALIAS).only('param_1', 'param_2').order_by('order').all():
|
||||
# keyword_aliases[a.param_1.lower()] = a.param_2
|
||||
# caches['default'].set(KEYWORD_CACHE_KEY, keyword_aliases, 30)
|
||||
|
||||
# keywords as list
|
||||
for kw in keyword_json:
|
||||
@ -429,12 +425,13 @@ def parse_keywords(keyword_json, space):
|
||||
# if alias exists use that instead
|
||||
|
||||
if len(kw) != 0:
|
||||
if keyword_aliases:
|
||||
try:
|
||||
kw = keyword_aliases[kw.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
if k := Keyword.objects.filter(name=kw, space=space).first():
|
||||
# if keyword_aliases:
|
||||
# try:
|
||||
# kw = keyword_aliases[kw.lower()]
|
||||
# except KeyError:
|
||||
# pass
|
||||
automation_engine.apply_keyword_automation(kw)
|
||||
if k := Keyword.objects.filter(name=kw, space=request.space).first():
|
||||
keywords.append({'label': str(k), 'name': k.name, 'id': k.id})
|
||||
else:
|
||||
keywords.append({'label': kw, 'name': kw})
|
||||
|
Reference in New Issue
Block a user