From 099eb14eb3034e9fb7b5b35db0cf45a28f2a6cd6 Mon Sep 17 00:00:00 2001 From: cre-os Date: Tue, 28 Jan 2025 18:19:46 +0100 Subject: [PATCH 1/2] wip --- src/xml2db/document.py | 11 +---- src/xml2db/xml_converter.py | 91 +++++++++++++++++++++++++++++-------- 2 files changed, 74 insertions(+), 28 deletions(-) diff --git a/src/xml2db/document.py b/src/xml2db/document.py index d0a4f97..43c0de8 100644 --- a/src/xml2db/document.py +++ b/src/xml2db/document.py @@ -179,16 +179,7 @@ def _extract_node( else key ) if content_key in content: - if model_table.columns[key].data_type in ["decimal", "float"]: - val = [float(v) for v in content[content_key]] - elif model_table.columns[key].data_type == "integer": - val = [int(v) for v in content[content_key]] - elif model_table.columns[key].data_type == "boolean": - val = [ - v == "true" or v == "1" for v in content[content_key] - ] - else: - val = content[content_key] + val = content[content_key] if len(val) == 1: record[key] = val[0] diff --git a/src/xml2db/xml_converter.py b/src/xml2db/xml_converter.py index ab54a7c..bc850b8 100644 --- a/src/xml2db/xml_converter.py +++ b/src/xml2db/xml_converter.py @@ -302,6 +302,39 @@ def _transform_node(self, node_type: str, content: dict) -> tuple: child_key, val = list(content.items())[0] content = {"type": [child_key], "value": val} + # convert some simple types to python types + if node_type in self.model.tables: + table = self.model.tables[node_type] + for key in table.columns: + content_key = ( + ( + f"{key[:-5]}__attr" + if table.columns[key].has_suffix + else f"{key}__attr" + ) + if table.columns[key].is_attr + else key + ) + if content_key in content: + if table.columns[key].data_type in ["decimal", "float"]: + content[content_key] = [float(v) for v in content[content_key]] + elif table.columns[key].data_type in [ + "integer", + "int", + "nonPositiveInteger", + "nonNegativeInteger", + "positiveInteger", + "negativeInteger", + "short", + "byte", + "long", + ]: + content[content_key] = [int(v) for v in content[content_key]] + elif table.columns[key].data_type == "boolean": + content[content_key] = [ + v == "true" or v == "1" for v in content[content_key] + ] + return node_type, content def _compute_hash_deduplicate(self, node: tuple, hash_maps: dict) -> tuple: @@ -459,25 +492,47 @@ def check_transformed_node(node_type, element): attributes = {} text_content = None if field_type == "col": - if rel.is_attr: - if rel.has_suffix and f"{rel_name[:-5]}__attr" in content: - attributes[rel.name_chain[-1][0][:-5]] = content[ - f"{rel_name[:-5]}__attr" - ][0] - elif not rel.has_suffix and f"{rel_name}__attr" in content: - attributes[rel.name_chain[-1][0]] = content[ - f"{rel_name}__attr" - ][0] - elif rel_name in content: - if rel.is_content: - text_content = content[rel_name][0] + content_key = ( + ( + f"{rel_name[:-5]}__attr" + if rel.has_suffix + else f"{rel_name}__attr" + ) + if rel.is_attr + else rel_name + ) + if content_key in content: + if rel.data_type in [ + "decimal", + "float", + ]: # remove trailing ".0" for decimal and float + val = ( + value.rstrip("0").rstrip(".") if "." in value else value + for value in str(content[content_key]).split(",") + ) + elif isinstance(content[content_key], datetime.datetime): + val = content[content_key].isoformat(timespec="milliseconds") else: - for field_value in content[rel_name]: - child = etree.Element(rel.name_chain[-1][0]) - if isinstance(field_value, datetime): - field_value = field_value.isoformat() - child.text = str(field_value).encode("utf-8") - children.append(child) + val = str(content[content_key]) + if rel.is_attr: + if rel.has_suffix: + attributes[rel.name_chain[-1][0][:-5]] = content[ + f"{rel_name[:-5]}__attr" + ][0] + elif not rel.has_suffix and f"{rel_name}__attr" in content: + attributes[rel.name_chain[-1][0]] = content[ + f"{rel_name}__attr" + ][0] + elif rel_name in content: + if rel.is_content: + text_content = content[rel_name][0] + else: + for field_value in content[rel_name]: + child = etree.Element(rel.name_chain[-1][0]) + if isinstance(field_value, datetime): + field_value = field_value.isoformat() + child.text = str(field_value).encode("utf-8") + children.append(child) elif field_type == "rel1": if rel_name in content: child = self._make_xml_node( From e92969fa191ea83ec4e68863d8a043f5508a21cf Mon Sep 17 00:00:00 2001 From: cre-os Date: Tue, 4 Feb 2025 09:27:57 +0100 Subject: [PATCH 2/2] move conversion before hashing --- src/xml2db/document.py | 12 ++--------- src/xml2db/xml_converter.py | 41 ++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/src/xml2db/document.py b/src/xml2db/document.py index 43c0de8..5471737 100644 --- a/src/xml2db/document.py +++ b/src/xml2db/document.py @@ -327,15 +327,7 @@ def _build_node(node_type: str, node_pk: int) -> tuple: if rel.is_attr else rel_name ) - if rel.data_type in [ - "decimal", - "float", - ]: # remove trailing ".0" for decimal and float - content[content_key] = [ - value.rstrip("0").rstrip(".") if "." in value else value - for value in str(record[rel_name]).split(",") - ] - elif isinstance(record[rel_name], datetime.datetime): + if isinstance(record[rel_name], datetime.datetime): content[content_key] = [ record[rel_name].isoformat(timespec="milliseconds") ] @@ -345,7 +337,7 @@ def _build_node(node_type: str, node_pk: int) -> tuple: 0 ] if "," in str(record[rel_name]) - else [str(record[rel_name])] + else [record[rel_name]] ) elif ( field_type == "rel1" diff --git a/src/xml2db/xml_converter.py b/src/xml2db/xml_converter.py index bc850b8..1dbbf8a 100644 --- a/src/xml2db/xml_converter.py +++ b/src/xml2db/xml_converter.py @@ -506,33 +506,28 @@ def check_transformed_node(node_type, element): "decimal", "float", ]: # remove trailing ".0" for decimal and float - val = ( - value.rstrip("0").rstrip(".") if "." in value else value - for value in str(content[content_key]).split(",") - ) - elif isinstance(content[content_key], datetime.datetime): - val = content[content_key].isoformat(timespec="milliseconds") + val = str(content[content_key][0]) + val = [val.rstrip("0").rstrip(".") if "." in val else val] + elif isinstance(content[content_key][0], datetime): + val = [ + content[content_key][0].isoformat(timespec="milliseconds") + ] else: - val = str(content[content_key]) + val = content[content_key] if rel.is_attr: if rel.has_suffix: - attributes[rel.name_chain[-1][0][:-5]] = content[ - f"{rel_name[:-5]}__attr" - ][0] - elif not rel.has_suffix and f"{rel_name}__attr" in content: - attributes[rel.name_chain[-1][0]] = content[ - f"{rel_name}__attr" - ][0] - elif rel_name in content: - if rel.is_content: - text_content = content[rel_name][0] + attributes[rel.name_chain[-1][0][:-5]] = val[0] else: - for field_value in content[rel_name]: - child = etree.Element(rel.name_chain[-1][0]) - if isinstance(field_value, datetime): - field_value = field_value.isoformat() - child.text = str(field_value).encode("utf-8") - children.append(child) + attributes[rel.name_chain[-1][0]] = val[0] + elif rel.is_content: + text_content = val[0] + else: + for field_value in val: + child = etree.Element(rel.name_chain[-1][0]) + if isinstance(field_value, datetime): + field_value = field_value.isoformat() + child.text = str(field_value).encode("utf-8") + children.append(child) elif field_type == "rel1": if rel_name in content: child = self._make_xml_node(