From 33ac00e294487e96443015fd435c243551e0e4e4 Mon Sep 17 00:00:00 2001 From: vabene1111 Date: Mon, 19 Feb 2024 09:41:45 +0100 Subject: [PATCH] improved importer functions unit and category --- cookbook/forms.py | 2 +- cookbook/helper/open_data_importer.py | 127 ++++++++++++++++++++++---- cookbook/models.py | 44 ++++++++- 3 files changed, 151 insertions(+), 22 deletions(-) diff --git a/cookbook/forms.py b/cookbook/forms.py index ba716c0a..40237cae 100644 --- a/cookbook/forms.py +++ b/cookbook/forms.py @@ -10,7 +10,7 @@ from django_scopes import scopes_disabled from django_scopes.forms import SafeModelChoiceField, SafeModelMultipleChoiceField from hcaptcha.fields import hCaptchaField -from .models import Comment, Food, InviteLink, Keyword, Recipe, RecipeBook, RecipeBookEntry, SearchPreference, Space, Storage, Sync, User, UserPreference +from .models import Comment, InviteLink, Keyword, Recipe, SearchPreference, Space, Storage, Sync, User, UserPreference class SelectWidget(widgets.Select): diff --git a/cookbook/helper/open_data_importer.py b/cookbook/helper/open_data_importer.py index a517a260..ef1ede0b 100644 --- a/cookbook/helper/open_data_importer.py +++ b/cookbook/helper/open_data_importer.py @@ -19,71 +19,157 @@ class OpenDataImporter: def _update_slug_cache(self, object_class, datatype): self.slug_id_cache[datatype] = dict(object_class.objects.filter(space=self.request.space, open_data_slug__isnull=False).values_list('open_data_slug', 'id', )) + @staticmethod + def _is_obj_identical(field_list, obj, existing_obj): + """ + checks if the obj meant for import is identical to an already existing one + :param field_list: list of field names to check + :type field_list: list[str] + :param obj: object meant for import + :type obj: Object + :param existing_obj: object already in DB + :type existing_obj: Object + :return: if objects are identical + :rtype: bool + """ + for field in field_list: + if getattr(obj, field) != existing_obj[field]: + return False + return True + + @staticmethod + def _merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names): + """ + sometimes there might be two objects conflicting for open data import (one has the slug, the other the name) + this function checks if that is the case and merges the two objects if possible + :param model_type: type of model to check/merge + :type model_type: Model + :param obj: object that should be created/updated + :type obj: Model + :param existing_data_slugs: dict of open data slugs mapped to objects + :type existing_data_slugs: dict + :param existing_data_names: dict of names mapped to objects + :type existing_data_names: dict + :return: true if merge was successful or not necessary else false + :rtype: bool + """ + if obj.open_data_slug in existing_data_slugs and obj.name in existing_data_names and existing_data_slugs[obj.open_data_slug]['pk'] != existing_data_names[obj.name]['pk']: + try: + source_obj = model_type.objects.get(pk=existing_data_slugs[obj.open_data_slug]['pk']) + del existing_data_slugs[obj.open_data_slug] + source_obj.merge_into(model_type.objects.get(pk=existing_data_names[obj.name]['pk'])) + return True + except RuntimeError: + return False # in the edge case (e.g. parent/child) that an object cannot be merged don't update it for now + else: + return True + + @staticmethod + def _get_existing_obj(obj, existing_data_slugs, existing_data_names): + """ + gets the existing object from slug or name cache + :param obj: object that should be found + :type obj: Model + :param existing_data_slugs: dict of open data slugs mapped to objects + :type existing_data_slugs: dict + :param existing_data_names: dict of names mapped to objects + :type existing_data_names: dict + :return: existing object + :rtype: dict + """ + existing_obj = None + if obj.open_data_slug in existing_data_slugs: + existing_obj = existing_data_slugs[obj.open_data_slug] + elif obj.name in existing_data_names: + existing_obj = existing_data_names[obj.name] + + return existing_obj + def import_units(self): datatype = 'unit' + model_type = Unit + field_list = ['name', 'plural_name', 'base_unit', 'open_data_slug'] - existing_data = {} - for obj in Unit.objects.filter(space=self.request.space, open_data_slug__isnull=False).values('pk', 'name', 'open_data_slug'): - existing_data[obj['open_data_slug']] = obj + existing_data_slugs = {} + existing_data_names = {} + for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list): + existing_data_slugs[obj['open_data_slug']] = obj + existing_data_names[obj['name']] = obj update_list = [] create_list = [] for u in list(self.data[datatype].keys()): - obj = Unit( + obj = model_type( name=self.data[datatype][u]['name'], plural_name=self.data[datatype][u]['plural_name'], base_unit=self.data[datatype][u]['base_unit'].lower() if self.data[datatype][u]['base_unit'] != '' else None, open_data_slug=u, space=self.request.space ) - if obj.open_data_slug in existing_data: - obj.pk = existing_data[obj.open_data_slug]['pk'] - update_list.append(obj) + if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names: + if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names): + continue # if conflicting objects exist and cannot be merged skip object + + existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names) + + if not self._is_obj_identical(field_list, obj, existing_obj): + obj.pk = existing_obj['pk'] + update_list.append(obj) else: create_list.append(obj) total_count = 0 if self.update_existing and len(update_list) > 0: - Unit.objects.bulk_update(update_list, ('name', 'plural_name', 'base_unit', 'open_data_slug')) + model_type.objects.bulk_update(update_list, field_list) total_count += len(update_list) if len(create_list) > 0: - Unit.objects.bulk_create(create_list, update_conflicts=True, update_fields=('open_data_slug',), unique_fields=('space', 'name',)) + model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',)) total_count += len(create_list) return total_count def import_category(self): datatype = 'category' + model_type = SupermarketCategory + field_list = ['name', 'open_data_slug'] - existing_data = {} - for obj in SupermarketCategory.objects.filter(space=self.request.space, open_data_slug__isnull=False).values('pk', 'name', 'open_data_slug'): - existing_data[obj['open_data_slug']] = obj + existing_data_slugs = {} + existing_data_names = {} + for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list): + existing_data_slugs[obj['open_data_slug']] = obj + existing_data_names[obj['name']] = obj update_list = [] create_list = [] for k in list(self.data[datatype].keys()): - obj = SupermarketCategory( + obj = model_type( name=self.data[datatype][k]['name'], open_data_slug=k, space=self.request.space ) - if obj.open_data_slug in existing_data: - obj.pk = existing_data[obj.open_data_slug]['pk'] - update_list.append(obj) + if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names: + if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names): + continue # if conflicting objects exist and cannot be merged skip object + + existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names) + + if not self._is_obj_identical(field_list, obj, existing_obj): + obj.pk = existing_obj['pk'] + update_list.append(obj) else: create_list.append(obj) total_count = 0 if self.update_existing and len(update_list) > 0: - SupermarketCategory.objects.bulk_update(update_list, ('name', 'open_data_slug')) + model_type.objects.bulk_update(update_list, field_list) total_count += len(update_list) if len(create_list) > 0: - SupermarketCategory.objects.bulk_create(create_list, update_conflicts=True, update_fields=('open_data_slug',), unique_fields=('space', 'name',)) + model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',)) total_count += len(create_list) return total_count @@ -202,10 +288,13 @@ class OpenDataImporter: 'supermarket_category_id': self.slug_id_cache['category'][self.data[datatype][k]['store_category']], 'fdc_id': re.sub(r'\D', '', self.data[datatype][k]['fdc_id']) if self.data[datatype][k]['fdc_id'] != '' else None, 'open_data_slug': k, - 'properties_food_unit_id': unit_g.id, + 'properties_food_unit_id': None, 'space': self.request.space.id, } + if unit_g: + obj['properties_food_unit_id'] = unit_g.id + if obj['open_data_slug'] in existing_data or obj['name'] in existing_data_names: # rather rare edge cases object A has the slug and object B has the name which would lead to uniqueness errors if obj['open_data_slug'] in existing_data and obj['name'] in existing_data_names and existing_data[obj['open_data_slug']]['pk'] != existing_data_names[obj['name']]['pk']: diff --git a/cookbook/models.py b/cookbook/models.py index 0f2636a1..6a6ce7a2 100644 --- a/cookbook/models.py +++ b/cookbook/models.py @@ -209,6 +209,27 @@ class TreeModel(MP_Node): abstract = True +class MergeModelMixin: + + def merge_into(self, target): + """ + very simple merge function that replaces the current instance with the target instance + :param target: target object + :return: target with data merged + """ + + if self == target: + raise ValueError('Cannot merge an object with itself') + + if getattr(self, 'space', 0) != getattr(target, 'space', 0): + raise RuntimeError('Cannot merge objects from different spaces') + + if hasattr(self, 'get_descendants_and_self') and target in callable(getattr(self, 'get_descendants_and_self')): + raise RuntimeError('Cannot merge parent (source) with child (target) object') + + # TODO copy field values + + class PermissionModelMixin: @staticmethod def get_space_key(): @@ -512,7 +533,7 @@ class Sync(models.Model, PermissionModelMixin): return self.path -class SupermarketCategory(models.Model, PermissionModelMixin): +class SupermarketCategory(models.Model, PermissionModelMixin, MergeModelMixin): name = models.CharField(max_length=128, validators=[MinLengthValidator(1)]) description = models.TextField(blank=True, null=True) open_data_slug = models.CharField(max_length=128, null=True, blank=True, default=None) @@ -523,6 +544,14 @@ class SupermarketCategory(models.Model, PermissionModelMixin): def __str__(self): return self.name + def merge_into(self, target): + super().merge_into(target) + + Food.objects.filter(supermarket_category=self).update(supermarket_category=target) + SupermarketCategoryRelation.objects.filter(category=self).update(category=target) + self.delete() + return target + class Meta: constraints = [ models.UniqueConstraint(fields=['space', 'name'], name='smc_unique_name_per_space'), @@ -597,7 +626,7 @@ class Keyword(ExportModelOperationsMixin('keyword'), TreeModel, PermissionModelM indexes = (Index(fields=['id', 'name']),) -class Unit(ExportModelOperationsMixin('unit'), models.Model, PermissionModelMixin): +class Unit(ExportModelOperationsMixin('unit'), models.Model, PermissionModelMixin, MergeModelMixin): name = models.CharField(max_length=128, validators=[MinLengthValidator(1)]) plural_name = models.CharField(max_length=128, null=True, blank=True, default=None) description = models.TextField(blank=True, null=True) @@ -607,6 +636,17 @@ class Unit(ExportModelOperationsMixin('unit'), models.Model, PermissionModelMixi space = models.ForeignKey(Space, on_delete=models.CASCADE) objects = ScopedManager(space='space') + def merge_into(self, target): + super().merge_into(target) + + Ingredient.objects.filter(unit=self).update(unit=target) + ShoppingListEntry.objects.filter(unit=self).update(unit=target) + Food.objects.filter(properties_food_unit=self).update(properties_food_unit=target) + Food.objects.filter(preferred_unit=self).update(preferred_unit=target) + Food.objects.filter(preferred_shopping_unit=self).update(preferred_shopping_unit=target) + self.delete() + return target + def __str__(self): return self.name