create AutomationEngine class

create food_automation method
refactor food automations to use AutomationEngine
This commit is contained in:
smilerz 2023-08-26 12:37:16 -05:00
parent c72bf57ccb
commit a820b9c09e
No known key found for this signature in database
GPG Key ID: 39444C7606D47126
7 changed files with 144 additions and 231 deletions

View File

@ -0,0 +1,57 @@
from django.core.cache import caches
from django.db.models import Q
from django.db.models.functions import Lower
from cookbook.models import Automation, Food, Ingredient, Unit
class AutomationEngine():
request = None
use_cache = None
food_aliases = None
unit_aliases = None
never_unit = None
transpose_words = None
description_replace = None
instruction_replace = None
def __init__(self, request, use_cache=True):
self.request = request
self.use_cache = use_cache
def apply_keyword_automation(self, keyword):
return keyword
def apply_unit_automation(self, unit):
return unit
def apply_food_automation(self, food):
food = food.strip()
if self.use_cache and self.food_aliases is None:
self.food_aliases = {}
FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}'
if c := caches['default'].get(FOOD_CACHE_KEY, None):
self.food_aliases = c
caches['default'].touch(FOOD_CACHE_KEY, 30)
else:
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all():
self.food_aliases[a.param_1.lower()] = a.param_2
caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30)
else:
self.food_aliases = {}
if self.food_aliases:
try:
return self.food_aliases[food.lower()]
except KeyError:
return food
else:
if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first():
return automation.param_2
return food
def apply_transpose_automation(self, string):
return string
def apply_regex_replace_automation(self, string):
return string

View File

@ -6,16 +6,18 @@ from django.core.cache import caches
from django.db.models import Q from django.db.models import Q
from django.db.models.functions import Lower from django.db.models.functions import Lower
from cookbook.helper.automation_helper import AutomationEngine
from cookbook.models import Automation, Food, Ingredient, Unit from cookbook.models import Automation, Food, Ingredient, Unit
class IngredientParser: class IngredientParser:
request = None request = None
ignore_rules = False ignore_rules = False
food_aliases = {} # food_aliases = {}
unit_aliases = {} unit_aliases = {}
never_unit = {} never_unit = {}
transpose_words = {} transpose_words = {}
automation = None
def __init__(self, request, cache_mode, ignore_automations=False): def __init__(self, request, cache_mode, ignore_automations=False):
""" """
@ -26,15 +28,16 @@ class IngredientParser:
""" """
self.request = request self.request = request
self.ignore_rules = ignore_automations self.ignore_rules = ignore_automations
self.automation = AutomationEngine(self.request, use_cache=cache_mode)
if cache_mode: if cache_mode:
FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}' # FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}'
if c := caches['default'].get(FOOD_CACHE_KEY, None): # if c := caches['default'].get(FOOD_CACHE_KEY, None):
self.food_aliases = c # self.food_aliases = c
caches['default'].touch(FOOD_CACHE_KEY, 30) # caches['default'].touch(FOOD_CACHE_KEY, 30)
else: # else:
for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all(): # for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all():
self.food_aliases[a.param_1.lower()] = a.param_2 # self.food_aliases[a.param_1.lower()] = a.param_2
caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30) # caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30)
UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}' UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}'
if c := caches['default'].get(UNIT_CACHE_KEY, None): if c := caches['default'].get(UNIT_CACHE_KEY, None):
@ -65,29 +68,29 @@ class IngredientParser:
i += 1 i += 1
caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30) caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30)
else: else:
self.food_aliases = {} # self.food_aliases = {}
self.unit_aliases = {} self.unit_aliases = {}
self.never_unit = {} self.never_unit = {}
self.transpose_words = {} self.transpose_words = {}
def apply_food_automation(self, food): # def apply_food_automation(self, food):
""" # """
Apply food alias automations to passed food # Apply food alias automations to passed food
:param food: unit as string # :param food: unit as string
:return: food as string (possibly changed by automation) # :return: food as string (possibly changed by automation)
""" # """
if self.ignore_rules: # if self.ignore_rules:
return food # return food
else: # else:
if self.food_aliases: # if self.food_aliases:
try: # try:
return self.food_aliases[food.lower()] # return self.food_aliases[food.lower()]
except KeyError: # except KeyError:
return food # return food
else: # else:
if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first(): # if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first():
return automation.param_2 # return automation.param_2
return food # return food
def apply_unit_automation(self, unit): def apply_unit_automation(self, unit):
""" """
@ -130,7 +133,7 @@ class IngredientParser:
if not food: if not food:
return None return None
if len(food) > 0: if len(food) > 0:
f, created = Food.objects.get_or_create(name=self.apply_food_automation(food), space=self.request.space) f, created = Food.objects.get_or_create(name=self.automation.apply_food_automation(food), space=self.request.space)
return f return f
return None return None
@ -397,7 +400,7 @@ class IngredientParser:
if unit: if unit:
unit = self.apply_unit_automation(unit.strip()) unit = self.apply_unit_automation(unit.strip())
food = self.apply_food_automation(food.strip()) food = self.automation.apply_food_automation(food)
if len(food) > Food._meta.get_field('name').max_length: # test if food name is to long if len(food) > Food._meta.get_field('name').max_length: # test if food name is to long
# try splitting it at a space and taking only the first arg # try splitting it at a space and taking only the first arg
if len(food.split()) > 1 and len(food.split()[0]) < Food._meta.get_field('name').max_length: if len(food.split()) > 1 and len(food.split()[0]) < Food._meta.get_field('name').max_length:

View File

@ -1,191 +0,0 @@
# import json
# import re
# from json import JSONDecodeError
# from urllib.parse import unquote
# from bs4 import BeautifulSoup
# from bs4.element import Tag
# from recipe_scrapers import scrape_html, scrape_me
# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
# from recipe_scrapers._utils import get_host_name, normalize_string
# from cookbook.helper import recipe_url_import as helper
# from cookbook.helper.scrapers.scrapers import text_scraper
# def get_recipe_from_source(text, url, request):
# def build_node(k, v):
# if isinstance(v, dict):
# node = {
# 'name': k,
# 'value': k,
# 'children': get_children_dict(v)
# }
# elif isinstance(v, list):
# node = {
# 'name': k,
# 'value': k,
# 'children': get_children_list(v)
# }
# else:
# node = {
# 'name': k + ": " + normalize_string(str(v)),
# 'value': normalize_string(str(v))
# }
# return node
# def get_children_dict(children):
# kid_list = []
# for k, v in children.items():
# kid_list.append(build_node(k, v))
# return kid_list
# def get_children_list(children):
# kid_list = []
# for kid in children:
# if type(kid) == list:
# node = {
# 'name': "unknown list",
# 'value': "unknown list",
# 'children': get_children_list(kid)
# }
# kid_list.append(node)
# elif type(kid) == dict:
# for k, v in kid.items():
# kid_list.append(build_node(k, v))
# else:
# kid_list.append({
# 'name': normalize_string(str(kid)),
# 'value': normalize_string(str(kid))
# })
# return kid_list
# recipe_tree = []
# parse_list = []
# soup = BeautifulSoup(text, "html.parser")
# html_data = get_from_html(soup)
# images = get_images_from_source(soup, url)
# text = unquote(text)
# scrape = None
# if url and not text:
# try:
# scrape = scrape_me(url_path=url, wild_mode=True)
# except(NoSchemaFoundInWildMode):
# pass
# if not scrape:
# try:
# parse_list.append(remove_graph(json.loads(text)))
# if not url and 'url' in parse_list[0]:
# url = parse_list[0]['url']
# scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
# except JSONDecodeError:
# for el in soup.find_all('script', type='application/ld+json'):
# el = remove_graph(el)
# if not url and 'url' in el:
# url = el['url']
# if type(el) == list:
# for le in el:
# parse_list.append(le)
# elif type(el) == dict:
# parse_list.append(el)
# for el in soup.find_all(type='application/json'):
# el = remove_graph(el)
# if type(el) == list:
# for le in el:
# parse_list.append(le)
# elif type(el) == dict:
# parse_list.append(el)
# scrape = text_scraper(text, url=url)
# recipe_json = helper.get_from_scraper(scrape, request)
# # TODO: DEPRECATE recipe_tree & html_data. first validate it isn't used anywhere
# for el in parse_list:
# temp_tree = []
# if isinstance(el, Tag):
# try:
# el = json.loads(el.string)
# except TypeError:
# continue
# for k, v in el.items():
# if isinstance(v, dict):
# node = {
# 'name': k,
# 'value': k,
# 'children': get_children_dict(v)
# }
# elif isinstance(v, list):
# node = {
# 'name': k,
# 'value': k,
# 'children': get_children_list(v)
# }
# else:
# node = {
# 'name': k + ": " + normalize_string(str(v)),
# 'value': normalize_string(str(v))
# }
# temp_tree.append(node)
# if '@type' in el and el['@type'] == 'Recipe':
# recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
# else:
# recipe_tree += [{'name': 'json', 'children': temp_tree}]
# return recipe_json, recipe_tree, html_data, images
# def get_from_html(soup):
# INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
# html = []
# for s in soup.strings:
# if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
# html.append(s)
# return html
# def get_images_from_source(soup, url):
# sources = ['src', 'srcset', 'data-src']
# images = []
# img_tags = soup.find_all('img')
# if url:
# site = get_host_name(url)
# prot = url.split(':')[0]
# urls = []
# for img in img_tags:
# for src in sources:
# try:
# urls.append(img[src])
# except KeyError:
# pass
# for u in urls:
# u = u.split('?')[0]
# filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
# if filename:
# if (('http' not in u) and (url)):
# # sometimes an image source can be relative
# # if it is provide the base url
# u = '{}://{}{}'.format(prot, site, u)
# if 'http' in u:
# images.append(u)
# return images
# def remove_graph(el):
# # recipes type might be wrapped in @graph type
# if isinstance(el, Tag):
# try:
# el = json.loads(el.string)
# if '@graph' in el:
# for x in el['@graph']:
# if '@type' in x and x['@type'] == 'Recipe':
# el = x
# except (TypeError, JSONDecodeError):
# pass
# return el

View File

@ -149,6 +149,7 @@ def get_from_scraper(scrape, request):
parsed_description = parse_description(description) parsed_description = parse_description(description)
# TODO notify user about limit if reached # TODO notify user about limit if reached
# limits exist to limit the attack surface for dos style attacks # limits exist to limit the attack surface for dos style attacks
# TODO migrate to AutomationEngine
automations = Automation.objects.filter( automations = Automation.objects.filter(
type=Automation.DESCRIPTION_REPLACE, type=Automation.DESCRIPTION_REPLACE,
space=request.space, space=request.space,
@ -206,6 +207,7 @@ def get_from_scraper(scrape, request):
pass pass
if 'source_url' in recipe_json and recipe_json['source_url']: if 'source_url' in recipe_json and recipe_json['source_url']:
# TODO migrate to AutomationEngine
automations = Automation.objects.filter( automations = Automation.objects.filter(
type=Automation.INSTRUCTION_REPLACE, type=Automation.INSTRUCTION_REPLACE,
space=request.space, space=request.space,
@ -414,6 +416,7 @@ def parse_keywords(keyword_json, space):
keywords = [] keywords = []
keyword_aliases = {} keyword_aliases = {}
# retrieve keyword automation cache if it exists, otherwise build from database # retrieve keyword automation cache if it exists, otherwise build from database
# TODO migrate to AutomationEngine
KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}' KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}'
if c := caches['default'].get(KEYWORD_CACHE_KEY, None): if c := caches['default'].get(KEYWORD_CACHE_KEY, None):
keyword_aliases = c keyword_aliases = c

View File

@ -1,22 +1,65 @@
import pytest
from django.contrib import auth
from django.urls import reverse
from django_scopes import scopes_disabled
from cookbook.forms import ImportExportBase
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import ExportLog, Automation
import json import json
import os import os
import pytest import pytest
from django.contrib import auth
from django.urls import reverse from django.urls import reverse
from django_scopes import scope, scopes_disabled
from cookbook.forms import ImportExportBase
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.recipe_search import RecipeSearch
from cookbook.models import Automation, ExportLog, Food, Recipe
from cookbook.tests.conftest import validate_recipe from cookbook.tests.conftest import validate_recipe
from cookbook.tests.factories import FoodFactory, RecipeFactory
IMPORT_SOURCE_URL = 'api_recipe_from_source' IMPORT_SOURCE_URL = 'api_recipe_from_source'
# TODO test case sensitive match, assert update value
# TODO test case insensitive match, assert update value
# TODO test no match, assert not update value
# TODO test accent insensitive match, assert not update value
@pytest.fixture
def obj_1(space_1, u1_s1):
return ExportLog.objects.create(type=ImportExportBase.DEFAULT, running=False, created_by=auth.get_user(u1_s1), space=space_1, exported_recipes=10, total_recipes=10)
@pytest.mark.parametrize("arg", [
['a_u', 302],
['g1_s1', 302],
['u1_s1', 200],
['a1_s1', 200],
['u1_s2', 404],
['a1_s2', 404],
])
def test_keyword_automation():
assert True == True
def test_unit_automation():
assert True == True
def test_food_automation():
assert True == True
def test_description_replace_automation():
assert True == True
def test_instruction_replace_automation():
assert True == True
def test_never_unit_automation():
assert True == True
def test_transpose_automation():
assert True == True
# for some reason this tests cant run due to some kind of encoding issue, needs to be fixed # for some reason this tests cant run due to some kind of encoding issue, needs to be fixed
# def test_description_replace_automation(u1_s1, space_1): # def test_description_replace_automation(u1_s1, space_1):
# if 'cookbook' in os.getcwd(): # if 'cookbook' in os.getcwd():

View File

@ -3,7 +3,6 @@ from django.contrib import auth
from django.urls import reverse from django.urls import reverse
from cookbook.forms import ImportExportBase from cookbook.forms import ImportExportBase
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import ExportLog from cookbook.models import ExportLog

View File

@ -1,4 +1,3 @@
import pytest import pytest
from django.contrib import auth from django.contrib import auth
from django.urls import reverse from django.urls import reverse