almost all unit conversion working

This commit is contained in:
vabene1111
2024-02-19 17:20:04 +01:00
parent 0725fb0f2b
commit b109e28b0c
6 changed files with 283 additions and 128 deletions

View File

@ -1,8 +1,22 @@
import traceback
from collections import defaultdict
from decimal import Decimal
from cookbook.models import (Food, FoodProperty, Property, PropertyType, Supermarket,
SupermarketCategory, SupermarketCategoryRelation, Unit, UnitConversion)
import re
class OpenDataImportResponse:
total_created = 0
total_updated = 0
total_untouched = 0
total_errored = 0
def to_dict(self):
return {'total_created': self.total_created, 'total_updated': self.total_updated, 'total_untouched': self.total_untouched, 'total_errored': self.total_errored}
class OpenDataImporter:
request = None
data = {}
@ -33,7 +47,12 @@ class OpenDataImporter:
:rtype: bool
"""
for field in field_list:
if getattr(obj, field) != existing_obj[field]:
if isinstance(getattr(obj, field), float) or isinstance(getattr(obj, field), Decimal):
if abs(float(getattr(obj, field)) - float(existing_obj[field])) > 0.001: # convert both to float and check if basically equal
print(f'comparing FLOAT {obj} failed because field {field} is not equal ({getattr(obj, field)} != {existing_obj[field]})')
return False
elif getattr(obj, field) != existing_obj[field]:
print(f'comparing {obj} failed because field {field} is not equal ({getattr(obj, field)} != {existing_obj[field]})')
return False
return True
@ -86,6 +105,7 @@ class OpenDataImporter:
return existing_obj
def import_units(self):
od_response = OpenDataImportResponse()
datatype = 'unit'
model_type = Unit
field_list = ['name', 'plural_name', 'base_unit', 'open_data_slug']
@ -109,6 +129,7 @@ class OpenDataImporter:
)
if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names:
if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names):
od_response.total_errored += 1
continue # if conflicting objects exist and cannot be merged skip object
existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names)
@ -116,21 +137,23 @@ class OpenDataImporter:
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append(obj)
total_count = 0
if self.update_existing and len(update_list) > 0:
model_type.objects.bulk_update(update_list, field_list)
total_count += len(update_list)
od_response.total_updated += len(update_list)
if len(create_list) > 0:
model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',))
total_count += len(create_list)
od_response.total_created += len(create_list)
return total_count
return od_response
def import_category(self):
od_response = OpenDataImportResponse()
datatype = 'category'
model_type = SupermarketCategory
field_list = ['name', 'open_data_slug']
@ -153,6 +176,7 @@ class OpenDataImporter:
if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names:
if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names):
od_response.total_errored += 1
continue # if conflicting objects exist and cannot be merged skip object
existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names)
@ -160,86 +184,114 @@ class OpenDataImporter:
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append(obj)
total_count = 0
if self.update_existing and len(update_list) > 0:
model_type.objects.bulk_update(update_list, field_list)
total_count += len(update_list)
od_response.total_updated += len(update_list)
if len(create_list) > 0:
model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',))
total_count += len(create_list)
od_response.total_created += len(create_list)
return total_count
return od_response
def import_property(self):
od_response = OpenDataImportResponse()
datatype = 'property'
model_type = PropertyType
field_list = ['name', 'unit', 'fdc_id', 'open_data_slug']
existing_data = {}
for obj in PropertyType.objects.filter(space=self.request.space, open_data_slug__isnull=False).values('pk', 'name', 'open_data_slug'):
existing_data[obj['open_data_slug']] = obj
existing_data_slugs = {}
existing_data_names = {}
for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list):
existing_data_slugs[obj['open_data_slug']] = obj
existing_data_names[obj['name']] = obj
update_list = []
create_list = []
for k in list(self.data[datatype].keys()):
obj = PropertyType(
obj = model_type(
name=self.data[datatype][k]['name'],
unit=self.data[datatype][k]['unit'],
fdc_id=self.data[datatype][k]['fdc_id'],
open_data_slug=k,
space=self.request.space
)
if obj.open_data_slug in existing_data:
obj.pk = existing_data[obj.open_data_slug]['pk']
update_list.append(obj)
if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names:
if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names):
od_response.total_errored += 1
continue # if conflicting objects exist and cannot be merged skip object
existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names)
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append(obj)
total_count = 0
if self.update_existing and len(update_list) > 0:
PropertyType.objects.bulk_update(update_list, ('name', 'fdc_id', 'unit', 'open_data_slug'))
total_count += len(update_list)
model_type.objects.bulk_update(update_list, field_list)
od_response.total_updated += len(update_list)
if len(create_list) > 0:
PropertyType.objects.bulk_create(create_list, update_conflicts=True, update_fields=('open_data_slug',), unique_fields=('space', 'name',))
total_count += len(create_list)
model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',))
od_response.total_created += len(create_list)
return total_count
return od_response
def import_supermarket(self):
od_response = OpenDataImportResponse()
datatype = 'store'
model_type = Supermarket
field_list = ['name', 'open_data_slug']
existing_data = {}
for obj in Supermarket.objects.filter(space=self.request.space, open_data_slug__isnull=False).values('pk', 'name', 'open_data_slug'):
existing_data[obj['open_data_slug']] = obj
existing_data_slugs = {}
existing_data_names = {}
for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list):
existing_data_slugs[obj['open_data_slug']] = obj
existing_data_names[obj['name']] = obj
update_list = []
create_list = []
self._update_slug_cache(SupermarketCategory, 'category')
for k in list(self.data[datatype].keys()):
obj = Supermarket(
obj = model_type(
name=self.data[datatype][k]['name'],
open_data_slug=k,
space=self.request.space
)
if obj.open_data_slug in existing_data:
obj.pk = existing_data[obj.open_data_slug]['pk']
update_list.append(obj)
if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names:
if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names):
od_response.total_errored += 1
continue # if conflicting objects exist and cannot be merged skip object
existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names)
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append(obj)
total_count = 0
if self.update_existing and len(update_list) > 0:
Supermarket.objects.bulk_update(update_list, ('name', 'open_data_slug'))
total_count += len(update_list)
model_type.objects.bulk_update(update_list, field_list)
od_response.total_updated += len(update_list)
if len(create_list) > 0:
Supermarket.objects.bulk_create(create_list, unique_fields=('space', 'name',), update_conflicts=True, update_fields=('open_data_slug',))
total_count += len(create_list)
model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',))
od_response.total_created += len(create_list)
# always add open data slug if matching supermarket is found, otherwise relation might fail
self._update_slug_cache(Supermarket, 'store')
@ -259,10 +311,22 @@ class OpenDataImporter:
SupermarketCategoryRelation.objects.bulk_create(relations, ignore_conflicts=True, unique_fields=('supermarket', 'category',))
return total_count
return od_response
def import_food(self):
od_response = OpenDataImportResponse()
datatype = 'food'
model_type = Food
field_list = ['name', 'open_data_slug']
existing_data_slugs = {}
existing_data_names = {}
for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list):
existing_data_slugs[obj['open_data_slug']] = obj
existing_data_names[obj['name']] = obj
update_list = []
create_list = []
self._update_slug_cache(Unit, 'unit')
self._update_slug_cache(PropertyType, 'property')
@ -270,140 +334,161 @@ class OpenDataImporter:
unit_g = Unit.objects.filter(space=self.request.space, base_unit__iexact='g').first()
existing_data = {}
existing_data_names = {}
for obj in Food.objects.filter(space=self.request.space).values('pk', 'name', 'open_data_slug'):
if 'open_data_slug' in obj:
existing_data[obj['open_data_slug']] = obj
existing_data_names[obj['name']] = obj
update_list = []
create_list = []
for k in list(self.data[datatype].keys()):
obj = {
obj_dict = {
'name': self.data[datatype][k]['name'],
'plural_name': self.data[datatype][k]['plural_name'] if self.data[datatype][k]['plural_name'] != '' else None,
'supermarket_category_id': self.slug_id_cache['category'][self.data[datatype][k]['store_category']],
'fdc_id': re.sub(r'\D', '', self.data[datatype][k]['fdc_id']) if self.data[datatype][k]['fdc_id'] != '' else None,
'open_data_slug': k,
'properties_food_unit_id': None,
'space': self.request.space.id,
'space_id': self.request.space.id,
}
if unit_g:
obj['properties_food_unit_id'] = unit_g.id
obj_dict['properties_food_unit_id'] = unit_g.id
if obj['open_data_slug'] in existing_data or obj['name'] in existing_data_names:
# rather rare edge cases object A has the slug and object B has the name which would lead to uniqueness errors
if obj['open_data_slug'] in existing_data and obj['name'] in existing_data_names and existing_data[obj['open_data_slug']]['pk'] != existing_data_names[obj['name']]['pk']:
# TODO this fails if objects are parent/children of a tree
# TODO this does not merge nicely (with data), write better merge method (or use generalized one if available when reading this)
source_obj = Food.objects.get(pk=existing_data[obj['open_data_slug']]['pk'])
del existing_data[obj['open_data_slug']]
source_obj.merge_into(Food.objects.get(pk=existing_data_names[obj['name']]['pk']))
obj = model_type(**obj_dict)
if obj['open_data_slug'] in existing_data:
obj['pk'] = existing_data[obj['open_data_slug']]['pk']
elif obj['name'] in existing_data_names:
obj['pk'] = existing_data_names[obj['name']]['pk']
if obj.open_data_slug in existing_data_slugs or obj.name in existing_data_names:
if not self._merge_if_conflicting(model_type, obj, existing_data_slugs, existing_data_names):
od_response.total_errored += 1
continue # if conflicting objects exist and cannot be merged skip object
obj['space'] = self.request.space
obj = Food(**obj)
update_list.append(obj)
existing_obj = self._get_existing_obj(obj, existing_data_slugs, existing_data_names)
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append({'data': obj})
create_list.append(obj_dict)
total_count = 0
if self.update_existing and len(update_list) > 0:
Food.objects.bulk_update(update_list, ['name', 'plural_name', 'properties_food_unit_id', 'supermarket_category_id', 'fdc_id', 'open_data_slug', ])
total_count += len(update_list)
model_type.objects.bulk_update(update_list, field_list)
od_response.total_updated += len(update_list)
if len(create_list) > 0:
Food.load_bulk(create_list, None)
total_count += len(create_list)
model_type.objects.bulk_create(create_list, update_conflicts=True, update_fields=field_list, unique_fields=('space', 'name',))
od_response.total_created += len(create_list)
# --------------- PROPERTY STUFF -----------------------
model_type = Property
field_list = ['property_type_id', 'property_amount', 'open_data_food_slug']
existing_data_slugs = {}
existing_data_property_types = {}
for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list):
existing_data_slugs[obj['open_data_food_slug']] = obj
existing_data_property_types[obj['property_type_id']] = obj
update_list = []
create_list = []
self._update_slug_cache(Food, 'food')
valid_food_ids = []
for f in self.slug_id_cache['food'].values():
valid_food_ids.append(f)
food_property_list = []
# alias_list = []
for k in list(self.data['food'].keys()):
for fp in self.data['food'][k]['properties']['type_values']:
obj = model_type(
property_type_id=self.slug_id_cache['property'][fp['property_type']],
property_amount=fp['property_value'],
open_data_food_slug=k,
space=self.request.space,
)
for k in list(self.data[datatype].keys()):
for fp in self.data[datatype][k]['properties']['type_values']:
# try catch here because sometimes key "k" is not set for the food cache
try:
if self.slug_id_cache['food'][k] == 2652:
pass
food_property_list.append(Property(
property_type_id=self.slug_id_cache['property'][fp['property_type']],
property_amount=fp['property_value'],
import_food_id=self.slug_id_cache['food'][k],
space=self.request.space,
))
except KeyError:
print(str(k) + ' is not in self.slug_id_cache["food"]')
if obj.open_data_food_slug in existing_data_slugs and obj.property_type_id in existing_data_property_types and existing_data_slugs[obj.open_data_food_slug] == existing_data_property_types[obj.property_type_id]:
existing_obj = existing_data_slugs[obj.open_data_food_slug]
Property.objects.bulk_create(food_property_list, ignore_conflicts=True, unique_fields=('space', 'import_food_id', 'property_type',))
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
create_list.append(obj)
if self.update_existing and len(update_list) > 0:
model_type.objects.bulk_update(update_list, field_list)
if len(create_list) > 0:
model_type.objects.bulk_create(create_list, ignore_conflicts=True, unique_fields=('space', 'open_data_food_slug', 'property_type',))
linked_properties = list(model_type.objects.filter(space=self.request.space).values_list('id', flat=True).all())
property_food_relation_list = []
for p in Property.objects.filter(space=self.request.space, import_food_id__isnull=False).values_list('import_food_id', 'id', ):
# temporary fix to delete old, unlinked properties that were previously imported
# TODO find better solution (clearing import_food_id, not doing that in the first place?
if p[0] not in valid_food_ids:
Property.objects.filter(import_food_id=p[0]).delete()
else:
property_food_relation_list.append(Food.properties.through(food_id=p[0], property_id=p[1]))
for p in model_type.objects.filter(space=self.request.space, open_data_food_slug__isnull=False).values_list('open_data_food_slug', 'id', ):
# slug_id_cache should always exist, don't create relations for already linked properties (ignore_conflicts would do that as well but this is more performant)
if p[0] in self.slug_id_cache['food'] and p[1] not in linked_properties:
property_food_relation_list.append(Food.properties.through(food_id=self.slug_id_cache['food'][p[0]], property_id=p[1]))
FoodProperty.objects.bulk_create(property_food_relation_list, ignore_conflicts=True, unique_fields=('food_id', 'property_id',))
return total_count
return od_response
def import_conversion(self):
od_response = OpenDataImportResponse()
datatype = 'conversion'
model_type = UnitConversion
field_list = ['base_amount', 'base_unit_id', 'converted_amount', 'converted_unit_id', 'food_id', 'open_data_slug']
self._update_slug_cache(Food, 'food')
self._update_slug_cache(Unit, 'unit')
existing_data = {}
for obj in UnitConversion.objects.filter(space=self.request.space, open_data_slug__isnull=False).values('pk', 'open_data_slug'):
existing_data[obj['open_data_slug']] = obj
existing_data_slugs = {}
existing_data_foods = defaultdict(list)
for obj in model_type.objects.filter(space=self.request.space).values('pk', *field_list):
existing_data_slugs[obj['open_data_slug']] = obj
existing_data_foods[obj['food_id']].append(obj)
update_list = []
create_list = []
for k in list(self.data[datatype].keys()):
# try catch here because sometimes key "k" is not set for he food cache
# try catch here because sometimes key "k" is not set for the food cache
try:
obj = UnitConversion(
base_amount=self.data[datatype][k]['base_amount'],
obj = model_type(
base_amount=Decimal(self.data[datatype][k]['base_amount']),
base_unit_id=self.slug_id_cache['unit'][self.data[datatype][k]['base_unit']],
converted_amount=self.data[datatype][k]['converted_amount'],
converted_amount=Decimal(self.data[datatype][k]['converted_amount']),
converted_unit_id=self.slug_id_cache['unit'][self.data[datatype][k]['converted_unit']],
food_id=self.slug_id_cache['food'][self.data[datatype][k]['food']],
open_data_slug=k,
space=self.request.space,
created_by=self.request.user,
created_by_id=self.request.user.id,
)
if obj.open_data_slug in existing_data:
obj.pk = existing_data[obj.open_data_slug]['pk']
update_list.append(obj)
if obj.open_data_slug in existing_data_slugs:
existing_obj = existing_data_slugs[obj.open_data_slug]
if not self._is_obj_identical(field_list, obj, existing_obj):
obj.pk = existing_obj['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
else:
create_list.append(obj)
except KeyError:
print(str(k) + ' is not in self.slug_id_cache["food"]')
matching_existing_found = False
if obj.food_id in existing_data_foods:
for edf in existing_data_foods[obj.food_id]:
if obj.base_unit_id == edf['base_unit_id'] and obj.converted_unit_id == edf['converted_unit_id']:
matching_existing_found = True
if not self._is_obj_identical(field_list, obj, edf):
obj.pk = edf['pk']
update_list.append(obj)
else:
od_response.total_untouched += 1
if not matching_existing_found:
create_list.append(obj)
except KeyError as e:
traceback.print_exc()
od_response.total_errored += 1
print(self.data[datatype][k]['food'] + ' is not in self.slug_id_cache["food"]')
total_count = 0
if self.update_existing and len(update_list) > 0:
UnitConversion.objects.bulk_update(update_list, ('base_unit', 'base_amount', 'converted_unit', 'converted_amount', 'food',))
total_count += len(update_list)
od_response.total_updated = model_type.objects.bulk_update(update_list, field_list)
od_response.total_errored += len(update_list) - od_response.total_updated
if len(create_list) > 0:
UnitConversion.objects.bulk_create(create_list, ignore_conflicts=True, unique_fields=('space', 'base_unit', 'converted_unit', 'food', 'open_data_slug'))
total_count += len(create_list)
objs_created = model_type.objects.bulk_create(create_list, ignore_conflicts=True, unique_fields=('space', 'base_unit', 'converted_unit', 'food', 'open_data_slug'))
od_response.total_created = len(objs_created)
od_response.total_errored += len(create_list) - od_response.total_created
return total_count
return od_response