diff --git a/pgdatadiff/main.py b/pgdatadiff/main.py index db41464..c111f23 100644 --- a/pgdatadiff/main.py +++ b/pgdatadiff/main.py @@ -1,6 +1,6 @@ """ Usage: - pgdatadiff --firstdb= --seconddb= [--only-data|--only-sequences] [--count-only] [--chunk-size=] + pgdatadiff --firstdb= --seconddb= [--schema=] [--only-data|--only-sequences] [--count-only] [--chunk-size=] [--exclude-tables=] pgdatadiff --version Options: @@ -8,8 +8,10 @@ --version Show version. --firstdb=postgres://postgres:password@localhost/firstdb The connection string of the first DB --seconddb=postgres://postgres:password@localhost/seconddb The connection string of the second DB + --schema="public" The schema of tables in comparison --only-data Only compare data, exclude sequences --only-sequences Only compare seqences, exclude data + --exclude-tables="" Exclude tables from data comparison Must be a comma separated string [default: empty string] --count-only Do a quick test based on counts alone --chunk-size=10000 The chunk size when comparing data [default: 10000] """ @@ -33,7 +35,9 @@ def main(): differ = DBDiff(first_db_connection_string, second_db_connection_string, chunk_size=arguments['--chunk-size'], - count_only=arguments['--count-only']) + count_only=arguments['--count-only'], + exclude_tables=arguments['--exclude-tables'], + schema=arguments['--schema']) if not arguments['--only-sequences']: if differ.diff_all_table_data(): diff --git a/pgdatadiff/pgdatadiff.py b/pgdatadiff/pgdatadiff.py index 1bb9be1..6f59f13 100644 --- a/pgdatadiff/pgdatadiff.py +++ b/pgdatadiff/pgdatadiff.py @@ -1,6 +1,6 @@ import warnings -from fabulous.color import bold, green, red +from fabulous.color import bold, green, red, yellow from halo import Halo from sqlalchemy import exc as sa_exc from sqlalchemy.engine import create_engine @@ -19,7 +19,7 @@ def make_session(connection_string): class DBDiff(object): - def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): + def __init__(self, firstdb, seconddb, schema, chunk_size=10000, count_only=False, exclude_tables=""): firstsession, firstengine = make_session(firstdb) secondsession, secondengine = make_session(seconddb) self.firstsession = firstsession @@ -32,6 +32,8 @@ def __init__(self, firstdb, seconddb, chunk_size=10000, count_only=False): self.secondinspector = inspect(secondengine) self.chunk_size = int(chunk_size) self.count_only = count_only + self.exclude_tables = exclude_tables.split(',') + self.schema = schema or 'public' def diff_table_data(self, tablename): try: @@ -61,7 +63,7 @@ def diff_table_data(self, tablename): SELECT md5(array_agg(md5((t.*)::varchar))::varchar) FROM ( SELECT * - FROM {tablename} + FROM {self.schema}.{tablename} ORDER BY {pk} limit :row_limit offset :row_offset ) AS t; """ @@ -90,7 +92,7 @@ def get_all_sequences(self): self.firstsession.execute(GET_SEQUENCES_SQL).fetchall()] def diff_sequence(self, seq_name): - GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {seq_name};" + GET_SEQUENCES_VALUE_SQL = f"SELECT last_value FROM {self.schema}.{seq_name};" try: firstvalue = \ @@ -140,8 +142,11 @@ def diff_all_table_data(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=sa_exc.SAWarning) tables = sorted( - self.firstinspector.get_table_names(schema="public")) + self.firstinspector.get_table_names(schema=self.schema)) for table in tables: + if table in self.exclude_tables: + print(bold(yellow(f"Ignoring table {table}"))) + continue with Halo( text=f"Analysing table {table}. " f"[{tables.index(table) + 1}/{len(tables)}]",