From a820b9c09e89993a0802ef85ab7d9a67e2173100 Mon Sep 17 00:00:00 2001 From: smilerz Date: Sat, 26 Aug 2023 12:37:16 -0500 Subject: [PATCH] create AutomationEngine class create food_automation method refactor food automations to use AutomationEngine --- cookbook/helper/automation_helper.py | 57 ++++++ cookbook/helper/ingredient_parser.py | 63 ++++--- cookbook/helper/recipe_html_import.py | 191 -------------------- cookbook/helper/recipe_url_import.py | 3 + cookbook/tests/other/test_automations.py | 59 +++++- cookbook/tests/other/test_export.py | 1 - cookbook/tests/other/test_makenow_filter.py | 1 - 7 files changed, 144 insertions(+), 231 deletions(-) create mode 100644 cookbook/helper/automation_helper.py delete mode 100644 cookbook/helper/recipe_html_import.py diff --git a/cookbook/helper/automation_helper.py b/cookbook/helper/automation_helper.py new file mode 100644 index 00000000..cd224f0b --- /dev/null +++ b/cookbook/helper/automation_helper.py @@ -0,0 +1,57 @@ +from django.core.cache import caches +from django.db.models import Q +from django.db.models.functions import Lower + +from cookbook.models import Automation, Food, Ingredient, Unit + + +class AutomationEngine(): + request = None + use_cache = None + food_aliases = None + unit_aliases = None + never_unit = None + transpose_words = None + description_replace = None + instruction_replace = None + + def __init__(self, request, use_cache=True): + self.request = request + self.use_cache = use_cache + + def apply_keyword_automation(self, keyword): + return keyword + + def apply_unit_automation(self, unit): + return unit + + def apply_food_automation(self, food): + food = food.strip() + if self.use_cache and self.food_aliases is None: + self.food_aliases = {} + FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}' + if c := caches['default'].get(FOOD_CACHE_KEY, None): + self.food_aliases = c + caches['default'].touch(FOOD_CACHE_KEY, 30) + else: + for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all(): + self.food_aliases[a.param_1.lower()] = a.param_2 + caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30) + else: + self.food_aliases = {} + + if self.food_aliases: + try: + return self.food_aliases[food.lower()] + except KeyError: + return food + else: + if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first(): + return automation.param_2 + return food + + def apply_transpose_automation(self, string): + return string + + def apply_regex_replace_automation(self, string): + return string diff --git a/cookbook/helper/ingredient_parser.py b/cookbook/helper/ingredient_parser.py index 57b70f44..1db2c9ed 100644 --- a/cookbook/helper/ingredient_parser.py +++ b/cookbook/helper/ingredient_parser.py @@ -6,16 +6,18 @@ from django.core.cache import caches from django.db.models import Q from django.db.models.functions import Lower +from cookbook.helper.automation_helper import AutomationEngine from cookbook.models import Automation, Food, Ingredient, Unit class IngredientParser: request = None ignore_rules = False - food_aliases = {} + # food_aliases = {} unit_aliases = {} never_unit = {} transpose_words = {} + automation = None def __init__(self, request, cache_mode, ignore_automations=False): """ @@ -26,15 +28,16 @@ class IngredientParser: """ self.request = request self.ignore_rules = ignore_automations + self.automation = AutomationEngine(self.request, use_cache=cache_mode) if cache_mode: - FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}' - if c := caches['default'].get(FOOD_CACHE_KEY, None): - self.food_aliases = c - caches['default'].touch(FOOD_CACHE_KEY, 30) - else: - for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all(): - self.food_aliases[a.param_1.lower()] = a.param_2 - caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30) + # FOOD_CACHE_KEY = f'automation_food_alias_{self.request.space.pk}' + # if c := caches['default'].get(FOOD_CACHE_KEY, None): + # self.food_aliases = c + # caches['default'].touch(FOOD_CACHE_KEY, 30) + # else: + # for a in Automation.objects.filter(space=self.request.space, disabled=False, type=Automation.FOOD_ALIAS).only('param_1', 'param_2').order_by('order').all(): + # self.food_aliases[a.param_1.lower()] = a.param_2 + # caches['default'].set(FOOD_CACHE_KEY, self.food_aliases, 30) UNIT_CACHE_KEY = f'automation_unit_alias_{self.request.space.pk}' if c := caches['default'].get(UNIT_CACHE_KEY, None): @@ -65,29 +68,29 @@ class IngredientParser: i += 1 caches['default'].set(TRANSPOSE_WORDS_CACHE_KEY, self.transpose_words, 30) else: - self.food_aliases = {} + # self.food_aliases = {} self.unit_aliases = {} self.never_unit = {} self.transpose_words = {} - def apply_food_automation(self, food): - """ - Apply food alias automations to passed food - :param food: unit as string - :return: food as string (possibly changed by automation) - """ - if self.ignore_rules: - return food - else: - if self.food_aliases: - try: - return self.food_aliases[food.lower()] - except KeyError: - return food - else: - if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first(): - return automation.param_2 - return food + # def apply_food_automation(self, food): + # """ + # Apply food alias automations to passed food + # :param food: unit as string + # :return: food as string (possibly changed by automation) + # """ + # if self.ignore_rules: + # return food + # else: + # if self.food_aliases: + # try: + # return self.food_aliases[food.lower()] + # except KeyError: + # return food + # else: + # if automation := Automation.objects.filter(space=self.request.space, type=Automation.FOOD_ALIAS, param_1__iexact=food, disabled=False).order_by('order').first(): + # return automation.param_2 + # return food def apply_unit_automation(self, unit): """ @@ -130,7 +133,7 @@ class IngredientParser: if not food: return None if len(food) > 0: - f, created = Food.objects.get_or_create(name=self.apply_food_automation(food), space=self.request.space) + f, created = Food.objects.get_or_create(name=self.automation.apply_food_automation(food), space=self.request.space) return f return None @@ -397,7 +400,7 @@ class IngredientParser: if unit: unit = self.apply_unit_automation(unit.strip()) - food = self.apply_food_automation(food.strip()) + food = self.automation.apply_food_automation(food) if len(food) > Food._meta.get_field('name').max_length: # test if food name is to long # try splitting it at a space and taking only the first arg if len(food.split()) > 1 and len(food.split()[0]) < Food._meta.get_field('name').max_length: diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py deleted file mode 100644 index 95f115b7..00000000 --- a/cookbook/helper/recipe_html_import.py +++ /dev/null @@ -1,191 +0,0 @@ -# import json -# import re -# from json import JSONDecodeError -# from urllib.parse import unquote - -# from bs4 import BeautifulSoup -# from bs4.element import Tag -# from recipe_scrapers import scrape_html, scrape_me -# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode -# from recipe_scrapers._utils import get_host_name, normalize_string - -# from cookbook.helper import recipe_url_import as helper -# from cookbook.helper.scrapers.scrapers import text_scraper - - -# def get_recipe_from_source(text, url, request): -# def build_node(k, v): -# if isinstance(v, dict): -# node = { -# 'name': k, -# 'value': k, -# 'children': get_children_dict(v) -# } -# elif isinstance(v, list): -# node = { -# 'name': k, -# 'value': k, -# 'children': get_children_list(v) -# } -# else: -# node = { -# 'name': k + ": " + normalize_string(str(v)), -# 'value': normalize_string(str(v)) -# } -# return node - -# def get_children_dict(children): -# kid_list = [] -# for k, v in children.items(): -# kid_list.append(build_node(k, v)) -# return kid_list - -# def get_children_list(children): -# kid_list = [] -# for kid in children: -# if type(kid) == list: -# node = { -# 'name': "unknown list", -# 'value': "unknown list", -# 'children': get_children_list(kid) -# } -# kid_list.append(node) -# elif type(kid) == dict: -# for k, v in kid.items(): -# kid_list.append(build_node(k, v)) -# else: -# kid_list.append({ -# 'name': normalize_string(str(kid)), -# 'value': normalize_string(str(kid)) -# }) -# return kid_list - -# recipe_tree = [] -# parse_list = [] -# soup = BeautifulSoup(text, "html.parser") -# html_data = get_from_html(soup) -# images = get_images_from_source(soup, url) -# text = unquote(text) -# scrape = None - -# if url and not text: -# try: -# scrape = scrape_me(url_path=url, wild_mode=True) -# except(NoSchemaFoundInWildMode): -# pass - -# if not scrape: -# try: -# parse_list.append(remove_graph(json.loads(text))) -# if not url and 'url' in parse_list[0]: -# url = parse_list[0]['url'] -# scrape = text_scraper("", url=url) - -# except JSONDecodeError: -# for el in soup.find_all('script', type='application/ld+json'): -# el = remove_graph(el) -# if not url and 'url' in el: -# url = el['url'] -# if type(el) == list: -# for le in el: -# parse_list.append(le) -# elif type(el) == dict: -# parse_list.append(el) -# for el in soup.find_all(type='application/json'): -# el = remove_graph(el) -# if type(el) == list: -# for le in el: -# parse_list.append(le) -# elif type(el) == dict: -# parse_list.append(el) -# scrape = text_scraper(text, url=url) - -# recipe_json = helper.get_from_scraper(scrape, request) - -# # TODO: DEPRECATE recipe_tree & html_data. first validate it isn't used anywhere -# for el in parse_list: -# temp_tree = [] -# if isinstance(el, Tag): -# try: -# el = json.loads(el.string) -# except TypeError: -# continue - -# for k, v in el.items(): -# if isinstance(v, dict): -# node = { -# 'name': k, -# 'value': k, -# 'children': get_children_dict(v) -# } -# elif isinstance(v, list): -# node = { -# 'name': k, -# 'value': k, -# 'children': get_children_list(v) -# } -# else: -# node = { -# 'name': k + ": " + normalize_string(str(v)), -# 'value': normalize_string(str(v)) -# } -# temp_tree.append(node) - -# if '@type' in el and el['@type'] == 'Recipe': -# recipe_tree += [{'name': 'ld+json', 'children': temp_tree}] -# else: -# recipe_tree += [{'name': 'json', 'children': temp_tree}] - -# return recipe_json, recipe_tree, html_data, images - - -# def get_from_html(soup): -# INVISIBLE_ELEMS = ('style', 'script', 'head', 'title') -# html = [] -# for s in soup.strings: -# if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)): -# html.append(s) -# return html - - -# def get_images_from_source(soup, url): -# sources = ['src', 'srcset', 'data-src'] -# images = [] -# img_tags = soup.find_all('img') -# if url: -# site = get_host_name(url) -# prot = url.split(':')[0] - -# urls = [] -# for img in img_tags: -# for src in sources: -# try: -# urls.append(img[src]) -# except KeyError: -# pass - -# for u in urls: -# u = u.split('?')[0] -# filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u) -# if filename: -# if (('http' not in u) and (url)): -# # sometimes an image source can be relative -# # if it is provide the base url -# u = '{}://{}{}'.format(prot, site, u) -# if 'http' in u: -# images.append(u) -# return images - - -# def remove_graph(el): -# # recipes type might be wrapped in @graph type -# if isinstance(el, Tag): -# try: -# el = json.loads(el.string) -# if '@graph' in el: -# for x in el['@graph']: -# if '@type' in x and x['@type'] == 'Recipe': -# el = x -# except (TypeError, JSONDecodeError): -# pass -# return el diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index b84c9f65..4a703b0b 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -149,6 +149,7 @@ def get_from_scraper(scrape, request): parsed_description = parse_description(description) # TODO notify user about limit if reached # limits exist to limit the attack surface for dos style attacks + # TODO migrate to AutomationEngine automations = Automation.objects.filter( type=Automation.DESCRIPTION_REPLACE, space=request.space, @@ -206,6 +207,7 @@ def get_from_scraper(scrape, request): pass if 'source_url' in recipe_json and recipe_json['source_url']: + # TODO migrate to AutomationEngine automations = Automation.objects.filter( type=Automation.INSTRUCTION_REPLACE, space=request.space, @@ -414,6 +416,7 @@ def parse_keywords(keyword_json, space): keywords = [] keyword_aliases = {} # retrieve keyword automation cache if it exists, otherwise build from database + # TODO migrate to AutomationEngine KEYWORD_CACHE_KEY = f'automation_keyword_alias_{space.pk}' if c := caches['default'].get(KEYWORD_CACHE_KEY, None): keyword_aliases = c diff --git a/cookbook/tests/other/test_automations.py b/cookbook/tests/other/test_automations.py index df908fdb..41175086 100644 --- a/cookbook/tests/other/test_automations.py +++ b/cookbook/tests/other/test_automations.py @@ -1,22 +1,65 @@ -import pytest -from django.contrib import auth -from django.urls import reverse -from django_scopes import scopes_disabled - -from cookbook.forms import ImportExportBase -from cookbook.helper.ingredient_parser import IngredientParser -from cookbook.models import ExportLog, Automation import json import os import pytest +from django.contrib import auth from django.urls import reverse +from django_scopes import scope, scopes_disabled +from cookbook.forms import ImportExportBase +from cookbook.helper.ingredient_parser import IngredientParser +from cookbook.helper.recipe_search import RecipeSearch +from cookbook.models import Automation, ExportLog, Food, Recipe from cookbook.tests.conftest import validate_recipe +from cookbook.tests.factories import FoodFactory, RecipeFactory IMPORT_SOURCE_URL = 'api_recipe_from_source' +# TODO test case sensitive match, assert update value +# TODO test case insensitive match, assert update value +# TODO test no match, assert not update value +# TODO test accent insensitive match, assert not update value + +@pytest.fixture +def obj_1(space_1, u1_s1): + return ExportLog.objects.create(type=ImportExportBase.DEFAULT, running=False, created_by=auth.get_user(u1_s1), space=space_1, exported_recipes=10, total_recipes=10) + + +@pytest.mark.parametrize("arg", [ + ['a_u', 302], + ['g1_s1', 302], + ['u1_s1', 200], + ['a1_s1', 200], + ['u1_s2', 404], + ['a1_s2', 404], +]) +def test_keyword_automation(): + assert True == True + + +def test_unit_automation(): + assert True == True + + +def test_food_automation(): + assert True == True + + +def test_description_replace_automation(): + assert True == True + + +def test_instruction_replace_automation(): + assert True == True + + +def test_never_unit_automation(): + assert True == True + + +def test_transpose_automation(): + assert True == True # for some reason this tests cant run due to some kind of encoding issue, needs to be fixed # def test_description_replace_automation(u1_s1, space_1): # if 'cookbook' in os.getcwd(): diff --git a/cookbook/tests/other/test_export.py b/cookbook/tests/other/test_export.py index 995c0cce..4508d714 100644 --- a/cookbook/tests/other/test_export.py +++ b/cookbook/tests/other/test_export.py @@ -3,7 +3,6 @@ from django.contrib import auth from django.urls import reverse from cookbook.forms import ImportExportBase -from cookbook.helper.ingredient_parser import IngredientParser from cookbook.models import ExportLog diff --git a/cookbook/tests/other/test_makenow_filter.py b/cookbook/tests/other/test_makenow_filter.py index 9e43df50..a5780626 100644 --- a/cookbook/tests/other/test_makenow_filter.py +++ b/cookbook/tests/other/test_makenow_filter.py @@ -1,4 +1,3 @@ - import pytest from django.contrib import auth from django.urls import reverse