From d57a28e4fb3654063bdef3c3fa4d2286cc4bbc09 Mon Sep 17 00:00:00 2001 From: Marek Madejski Date: Tue, 24 Jan 2023 13:54:43 +0100 Subject: [PATCH] Update ChangeEventHeaderUtility.py More Pythonic; added type annotations. --- README.md | 4 +-- python/util/ChangeEventHeaderUtility.py | 36 +++++++++++-------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 86460e3..1b28f5c 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,12 @@ officially supported languages have well-supported Avro libraries: |C++|[Apache Avro C++](https://avro.apache.org/docs/current/api/cpp/html/index.html)| |Dart|[avro-dart](https://github.com/sqs/avro-dart) (last updated 2012)| |Go|[goavro](https://github.com/linkedin/goavro)| -|Java|[Apache Avro Java](https://avro.apache.org/docs/1.10.2/gettingstartedjava.html)| +|Java|[Apache Avro Java](https://avro.apache.org/docs/current/getting-started-java/)| |Kotlin|[avro4k](https://github.com/avro-kotlin/avro4k)| |Node|[avro-js](https://www.npmjs.com/package/avro-js)| |Objective C|[ObjectiveAvro](https://github.com/jlawton/ObjectiveAvro) (but read [this](https://stackoverflow.com/questions/57216446/data-serialisation-in-objective-c-avro-alternative))| |PHP|[avro-php](https://github.com/wikimedia/avro-php)| -|Python|[Apache Avro Python](https://avro.apache.org/docs/current/gettingstartedpython.html)| +|Python|[Apache Avro Python](https://avro.apache.org/docs/current/getting-started-python/)| |Ruby|[AvroTurf](https://github.com/dasch/avro_turf)| ## Documentation, Blog Post and Videos diff --git a/python/util/ChangeEventHeaderUtility.py b/python/util/ChangeEventHeaderUtility.py index eb98ab9..bfb0adf 100644 --- a/python/util/ChangeEventHeaderUtility.py +++ b/python/util/ChangeEventHeaderUtility.py @@ -9,16 +9,16 @@ from bitstring import BitArray -def process_bitmap(avro_schema: Schema, bitmap_fields: list): +def process_bitmap(avro_schema: Schema, bitmap_fields: list[str]) -> list[str]: fields = [] - if len(bitmap_fields) != 0: + if bitmap_fields: # replace top field level bitmap with list of fields if bitmap_fields[0].startswith("0x"): bitmap = bitmap_fields[0] - fields = fields + get_fieldnames_from_bitstring(bitmap, avro_schema) + fields += get_fieldnames_from_bitstring(bitmap, avro_schema) bitmap_fields.remove(bitmap) # replace parentPos-nested Nulled BitMap with list of fields too - if len(bitmap_fields) != 0 and "-" in str(bitmap_fields[-1]): + if bitmap_fields and "-" in str(bitmap_fields[-1]): for bitmap_field in bitmap_fields: if bitmap_field is not None and "-" in str(bitmap_field): bitmap_strings = bitmap_field.split("-") @@ -31,33 +31,29 @@ def process_bitmap(avro_schema: Schema, bitmap_fields: list): parent_field_name = parent_field.name # interpret the child field names from mapping of parentFieldPos -> childFieldbitMap full_field_names = get_fieldnames_from_bitstring(bitmap_strings[1], child_schema) - full_field_names = append_parent_name(parent_field_name, full_field_names) - if len(full_field_names) > 0: + append_parent_name(parent_field_name, full_field_names) + if full_field_names: # when all nested fields under a compound got nulled out at once by customer, we recognize the top level field instead of trying to list every single nested field - fields = fields + full_field_names + fields += full_field_names return fields -def convert_hexbinary_to_bitset(bitmap): +def convert_hexbinary_to_bitset(bitmap: str) -> str: bit_array = BitArray(hex=bitmap[2:]) binary_string = bit_array.bin return binary_string[::-1] -def append_parent_name(parent_field_name, full_field_names): +def append_parent_name(parent_field_name: str, full_field_names: list[str]): for index in range(len(full_field_names)): - full_field_names[index] = parent_field_name + "." + full_field_names[index] - return full_field_names + full_field_names[index] = f'{parent_field_name}.{full_field_names[index]}' -def get_fieldnames_from_bitstring(bitmap, avro_schema: Schema): - bitmap_field_name = [] +def get_fieldnames_from_bitstring(bitmap: str, avro_schema: Schema) -> list[str]: fields_list = list(avro_schema.fields) binary_string = convert_hexbinary_to_bitset(bitmap) indexes = find('1', binary_string) - for index in indexes: - bitmap_field_name.append(fields_list[index].name) - return bitmap_field_name + return [fields_list[index].name for index in indexes] # Get the value type of an "optional" schema, which is a union of [null, valueSchema] @@ -66,13 +62,13 @@ def get_value_schema(parent_field): schemas = parent_field.schemas if len(schemas) == 2 and schemas[0].type == 'null': return schemas[1] - if len(schemas) == 2 and schemas[0].type == 'string': + elif len(schemas) == 2 and schemas[0].type == 'string': return schemas[1] - if len(schemas) == 3 and schemas[0].type == 'null' and schemas[1].type == 'string': + elif len(schemas) == 3 and schemas[0].type == 'null' and schemas[1].type == 'string': return schemas[2] return parent_field # Find the positions of 1 in the bit string -def find(to_find, binary_string): - return [i for i, x in enumerate(binary_string) if x == to_find] \ No newline at end of file +def find(to_find: str, binary_string: str): + return (i for i, x in enumerate(binary_string) if x == to_find)