From 225de1f1700a011e225fc42b830b6e82cd9792a2 Mon Sep 17 00:00:00 2001 From: AndreasG Date: Tue, 19 Jan 2021 14:02:35 +0200 Subject: [PATCH 1/2] Add additional flag to ignore tables in data comparison --- pgdatadiff/main.py | 6 ++++-- pgdatadiff/pgdatadiff.py | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pgdatadiff/main.py b/pgdatadiff/main.py index db41464..eff46a2 100644 --- a/pgdatadiff/main.py +++ b/pgdatadiff/main.py @@ -1,6 +1,6 @@ """ Usage: - pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] + pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] [--exclude-tables=] pgdatadiff --version Options: @@ -10,6 +10,7 @@ --seconddb=postgres://postgres:password@localhost/seconddb The connection string of the second DB --only-data Only compare data, exclude sequences --only-sequences Only compare seqences, exclude data + --exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string] --count-only Do a quick test based on counts alone --chunk-size=10000 The chunk size when comparing data [default: 10000] """ @@ -33,7 +34,8 @@ def main(): differ = DBDiff(first_db_connection_string, second_db_connection_string, chunk_size=arguments['--chunk-size'], - count_only=arguments['--count-only']) + count_only=arguments['--count-only'], + exclude_tables=arguments['--exclude-tables']) if not arguments['--only-sequences']: if differ.diff_all_table_data(): diff --git a/pgdatadiff/pgdatadiff.py b/pgdatadiff/pgdatadiff.py index 1bb9be1..cfaff37 100644 --- a/pgdatadiff/pgdatadiff.py +++ b/pgdatadiff/pgdatadiff.py @@ -1,6 +1,6 @@ import warnings -from fabulous.color import bold, green, red +from fabulous.color import bold, green, red, yellow from halo import Halo from sqlalchemy import exc as sa_exc from sqlalchemy.engine import create_engine @@ -19,7 +19,7 @@ def make_session(connection_string): class DBDiff(object): - def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): + def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False, exclude_tables=""): firstsession, firstengine = make_session(firstdb) secondsession, secondengine = make_session(seconddb) self.firstsession = firstsession @@ -32,6 +32,7 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): self.secondinspector = inspect(secondengine) self.chunk_size = int(chunk_size) self.count_only = count_only + self.exclude_tables = exclude_tables.split(',') def diff_table_data(self, tablename): try: @@ -142,6 +143,9 @@ def diff_all_table_data(self): tables = sorted( self.firstinspector.get_table_names(schema="public")) for table in tables: + if table in self.exclude_tables: + print(bold(yellow(f"Ignoring table {table}"))) + continue with Halo( text=f"Analysing table {table}. " f"[{tables.index(table) + 1}/{len(tables)}]", From 43fbf168ae17436fa5616510438e02b7b235f2d0 Mon Sep 17 00:00:00 2001 From: AndreasG Date: Tue, 9 Feb 2021 13:32:46 +0200 Subject: [PATCH 2/2] Add argument to specify schema name --- pgdatadiff/main.py | 6 ++++-- pgdatadiff/pgdatadiff.py | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pgdatadiff/main.py b/pgdatadiff/main.py index eff46a2..c111f23 100644 --- a/pgdatadiff/main.py +++ b/pgdatadiff/main.py @@ -1,6 +1,6 @@ """ Usage: - pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] [--exclude-tables=] + pgdatadiff --firstdb= --seconddb= [--schema=] [--only-data|--only-sequences] [--count-only] [--chunk-size=] [--exclude-tables=] pgdatadiff --version Options: @@ -8,6 +8,7 @@ --version Show version. --firstdb=postgres://postgres:password@localhost/firstdb The connection string of the first DB --seconddb=postgres://postgres:password@localhost/seconddb The connection string of the second DB + --schema="public" The schema of tables in comparison --only-data Only compare data, exclude sequences --only-sequences Only compare seqences, exclude data --exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string] @@ -35,7 +36,8 @@ def main(): differ = DBDiff(first_db_connection_string, second_db_connection_string, chunk_size=arguments['--chunk-size'], count_only=arguments['--count-only'], - exclude_tables=arguments['--exclude-tables']) + exclude_tables=arguments['--exclude-tables'], + schema=arguments['--schema']) if not arguments['--only-sequences']: if differ.diff_all_table_data(): diff --git a/pgdatadiff/pgdatadiff.py b/pgdatadiff/pgdatadiff.py index cfaff37..6f59f13 100644 --- a/pgdatadiff/pgdatadiff.py +++ b/pgdatadiff/pgdatadiff.py @@ -19,7 +19,7 @@ def make_session(connection_string): class DBDiff(object): - def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False, exclude_tables=""): + def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, exclude_tables=""): firstsession, firstengine = make_session(firstdb) secondsession, secondengine = make_session(seconddb) self.firstsession = firstsession @@ -33,6 +33,7 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False, exclud self.chunk_size = int(chunk_size) self.count_only = count_only self.exclude_tables = exclude_tables.split(',') + self.schema = schema or 'public' def diff_table_data(self, tablename): try: @@ -62,7 +63,7 @@ def diff_table_data(self, tablename): SELECT md5(array_agg(md5((t.*)::varchar))::varchar) FROM ( SELECT * - FROM {tablename} + FROM {self.schema}.{tablename} ORDER BY {pk} limit :row_limit offset :row_offset ) AS t; """ @@ -91,7 +92,7 @@ def get_all_sequences(self): self.firstsession.execute(GET_SEQUENCES_SQL).fetchall()] def diff_sequence(self, seq_name): - GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {seq_name};" + GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {self.schema}.{seq_name};" try: firstvalue = \ @@ -141,7 +142,7 @@ def diff_all_table_data(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=sa_exc.SAWarning) tables = sorted( - self.firstinspector.get_table_names(schema="public")) + self.firstinspector.get_table_names(schema=self.schema)) for table in tables: if table in self.exclude_tables: print(bold(yellow(f"Ignoring table {table}")))