diff --git a/seed/models/columns.py b/seed/models/columns.py index 39e42d35b3..725e641c81 100644 --- a/seed/models/columns.py +++ b/seed/models/columns.py @@ -1126,6 +1126,7 @@ def create_mappings(mappings, organization, user, import_file_id=None): "from_units": mapping.get("from_units"), "to_field": mapping["to_field"], "to_table_name": mapping["to_table_name"], + "to_data_type": mapping.get("to_data_type"), } ) else: diff --git a/seed/static/seed/js/controllers/mapping_controller.js b/seed/static/seed/js/controllers/mapping_controller.js index 86e88c4b9e..5f293d9818 100644 --- a/seed/static/seed/js/controllers/mapping_controller.js +++ b/seed/static/seed/js/controllers/mapping_controller.js @@ -250,6 +250,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ $scope.import_file = import_file_payload.import_file; $scope.import_file.matching_finished = false; $scope.suggested_mappings = suggested_mappings_payload.suggested_column_mappings; + $scope.mapping_error_messages = null; $scope.raw_columns = raw_columns_payload.raw_columns; $scope.mappable_property_columns = suggested_mappings_payload.property_columns; @@ -702,6 +703,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ * after saving column mappings, deletes unmatched buildings */ $scope.remap_buildings = () => { + $scope.mapping_error_messages = null; mapping_service.save_mappings($scope.import_file.id, $scope.get_mappings()).then((mapping_result) => { if (mapping_result.status === 'error' || mapping_result.status === 'warning') { return; @@ -750,7 +752,10 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ progress_key, 0, 1, - $scope.get_cached_mapped_buildings, + (response) => { + $scope.check_mapping_for_nulls(); + $scope.get_cached_mapped_buildings(response); + }, () => {}, $scope.import_file ); @@ -760,6 +765,17 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ }); }; + $scope.check_mapping_for_nulls = () => { + $scope.checking_for_nulls = true; + data_quality_service.check_mapping_for_nulls($scope.organization.id, $scope.import_file.id) + .then((response) => { + $scope.mapping_error_messages = response.status === 'warning' ? response.message : null; + }) + .finally(() => { + $scope.checking_for_nulls = false; + }); + }; + $scope.get_cached_mapped_buildings = ({ unique_id }) => { cache_entry_service.get_cache_entry(unique_id) .then($scope.set_mapped_buildings) @@ -899,6 +915,7 @@ angular.module('SEED.controller.mapping', []).controller('mapping_controller', [ col.suggestion_column_name = cached_col.to_field; col.suggestion_table_name = cached_col.to_table_name; col.from_units = cached_col.from_units; + col.data_type = cached_col.to_data_type; // If available, use display_name, else use raw field name. const mappable_column = _.find($scope.mappable_property_columns.concat($scope.mappable_taxlot_columns), { column_name: cached_col.to_field, table_name: cached_col.to_table_name }); diff --git a/seed/static/seed/js/services/data_quality_service.js b/seed/static/seed/js/services/data_quality_service.js index 1c5a9bb262..27259c0993 100644 --- a/seed/static/seed/js/services/data_quality_service.js +++ b/seed/static/seed/js/services/data_quality_service.js @@ -95,6 +95,10 @@ angular.module('SEED.service.data_quality', []).factory('data_quality_service', return deferred.promise; }; + data_quality_factory.check_mapping_for_nulls = (org_id, import_file_id) => $http + .post(`/api/v3/import_files/${import_file_id}/verify_data_type_mapping/?organization=${org_id}`) + .then((response) => response.data); + return data_quality_factory; } ]); diff --git a/seed/static/seed/partials/mapping.html b/seed/static/seed/partials/mapping.html index e64667afa0..dce56b56f7 100644 --- a/seed/static/seed/partials/mapping.html +++ b/seed/static/seed/partials/mapping.html @@ -226,11 +226,10 @@
-
+
+ + +
Mapped Fields class="pull-right btn btn-primary" ng-click="open_data_upload_modal(import_file.dataset)" ng-hide="import_file.matching_done" - translate > - Save Mappings + + Save Mappings
diff --git a/seed/tests/test_import_file_views.py b/seed/tests/test_import_file_views.py index 6e7fea3b93..436bd36c13 100644 --- a/seed/tests/test_import_file_views.py +++ b/seed/tests/test_import_file_views.py @@ -1060,6 +1060,17 @@ def test_unmatch(self): # # verify that the coparent id is now in the view # self.assertTrue(prop.exists()) + def test_verify_data_type_mapping(self): + self.assertEqual(self.import_file.mapping_error_messages, None) + url = reverse("api:v3:import_files-verify-data-type-mapping", args=[self.import_file.pk]) + url += f"?organization_id={self.org.pk}" + resp = self.client.post(url, content_type="application/json") + self.assertEqual(resp.status_code, 200) + # request modifies import_file + self.import_file.refresh_from_db() + exp_errs = "Blank values detected in columns: [ ENERGY STAR Score, Gross Floor Area, Recent Sale Date ]. Review import file for data type mismatches or click Save Mappings to import as displayed below." + self.assertEqual(self.import_file.mapping_error_messages, exp_errs) + class TestImportFileViewSetPermissions(AccessLevelBaseTestCase, DataMappingBaseTestCase): def setUp(self): diff --git a/seed/utils/import_file.py b/seed/utils/import_file.py index 015ddfaf20..aa511c43be 100644 --- a/seed/utils/import_file.py +++ b/seed/utils/import_file.py @@ -1,7 +1,18 @@ import json import logging -from seed.models import Column, ColumnMapping, ImportFile +from django.db.models import Count, Q + +from seed.models import ( + DATA_STATE_DELETE, + DATA_STATE_IMPORT, + DATA_STATE_UNKNOWN, + Column, + ColumnMapping, + ImportFile, + PropertyState, + TaxLotState, +) def get_import_file_table_mappings(import_file_id): @@ -55,3 +66,73 @@ def get_import_file_table_mappings(import_file_id): result.setdefault("", {})[from_field] = mapping_data return result + + +def verify_data_types(org_id, import_file_id): + """ + Verify that non-text columns don't contain null values, indicatative of data type mapping errors. + + Checks all mapped columns with numeric/date data types (excluding string and extra_data fields) + to identify null values that may result from failed type parsing. For example, if a column + is mapped to a numeric field but contains non-numeric text, the parsing will fail and store + null, indicating a potential mapping mistake. + + If blank values are detected, sets a warning message on the import file's + mapping_error_messages field with a list of affected columns. + """ + import_file = ImportFile.objects.filter(id=import_file_id, import_record__super_organization_id=org_id).first() + if not import_file: + return + + import_file.mapping_error_messages = None + import_file.save() + + mapped_cols = import_file.get_cached_mapped_columns + if not mapped_cols: + return + + propertystate_ids = list( + PropertyState.objects.filter(import_file=import_file) + .exclude(data_state__in=[DATA_STATE_UNKNOWN, DATA_STATE_IMPORT, DATA_STATE_DELETE]) + .values_list("id", flat=True) + ) + taxlotstate_ids = list( + TaxLotState.objects.filter(import_file=import_file) + .exclude(data_state__in=[DATA_STATE_UNKNOWN, DATA_STATE_IMPORT, DATA_STATE_DELETE]) + .values_list("id", flat=True) + ) + + if not propertystate_ids and not taxlotstate_ids: + return + + # {column_name: display_name, ...} for canonical cols with numeric (non-text) data types + column_map = dict( + Column.objects.filter(organization_id=org_id, is_extra_data=False, derived_column_id__isnull=True) + .exclude(data_type__in=["string", "None"]) + .exclude(table_name="") + .values_list("column_name", "display_name") + ) + # Check columns that are within import file's mapping AND column_map + canonical_column_names = set(column_map.keys()) + property_column_names = [col_name for table, col_name in mapped_cols if table == "PropertyState" and col_name in canonical_column_names] + taxlot_column_names = [col_name for table, col_name in mapped_cols if table == "TaxLotState" and col_name in canonical_column_names] + + columns_with_blanks = set() + + # create aggregations to check if null values exist for the selected column names + # run query against import record inventory and count results + if property_column_names: + property_null_checks = {f"{field}_null": Count("id", filter=Q(**{f"{field}__isnull": True})) for field in property_column_names} + property_counts = PropertyState.objects.filter(id__in=propertystate_ids).aggregate(**property_null_checks) + columns_with_blanks.update([column_map[field] for field in property_column_names if property_counts[f"{field}_null"]]) + + if taxlot_column_names: + taxlot_null_checks = {f"{field}_null": Count("id", filter=Q(**{f"{field}__isnull": True})) for field in taxlot_column_names} + taxlot_counts = TaxLotState.objects.filter(id__in=taxlotstate_ids).aggregate(**taxlot_null_checks) + columns_with_blanks.update([column_map[field] for field in taxlot_column_names if taxlot_counts[f"{field}_null"]]) + + if columns_with_blanks: + col_string = ", ".join(sorted(columns_with_blanks)) + err_msg = f"Blank values detected in columns: [ {col_string} ]. Review import file for data type mismatches or click Save Mappings to import as displayed below." + import_file.mapping_error_messages = err_msg + import_file.save() diff --git a/seed/views/v3/import_files.py b/seed/views/v3/import_files.py index 387af65680..0c727531f2 100644 --- a/seed/views/v3/import_files.py +++ b/seed/views/v3/import_files.py @@ -57,6 +57,7 @@ ) from seed.utils.api import OrgMixin, api_endpoint from seed.utils.api_schema import AutoSchemaHelper, swagger_auto_schema_org_query_param +from seed.utils.import_file import verify_data_types _log = logging.getLogger(__name__) @@ -1171,6 +1172,25 @@ def system_meter_upload(self, request, pk): status=status.HTTP_200_OK, ) + @swagger_auto_schema(manual_parameters=[AutoSchemaHelper.query_org_id_field()]) + @action(detail=True, methods=["POST"]) + def verify_data_type_mapping(self, request, pk): + """ + Verify that non-text columns don't contain null values, indicatative of data type mapping errors. + """ + org_id = self.get_organization(request) + + try: + import_file = ImportFile.objects.get(pk=pk, import_record__super_organization_id=org_id) + except ImportFile.DoesNotExist: + return JsonResponse({"status": "error", "message": "No such resource."}, status=status.HTTP_400_BAD_REQUEST) + verify_data_types(org_id, import_file.id) + import_file.refresh_from_db() + + warnings = import_file.mapping_error_messages + response_status = "warning" if warnings else "success" + return JsonResponse({"status": response_status, "message": warnings}) + def get_conversion_factor(type_name, unit, _kbtu_thermal_conversion_factors, _kgal_water_conversion_factors): thermal_conversion_factor = _kbtu_thermal_conversion_factors.get(type_name, {}).get(unit, None)