|
| 1 | +#coding:utf-8 |
| 2 | + |
| 3 | +""" |
| 4 | +ID: issue-8061 |
| 5 | +ISSUE: https://github.com/FirebirdSQL/firebird/pull/8061 |
| 6 | +TITLE: UNNEST subqueries invalidation. Examples when unnesting can NOT be used. |
| 7 | +DESCRIPTION: |
| 8 | + Test uses DDL and data from employee DB but they have been extracted to .sql and stored in |
| 9 | + files/standard_sample_databases.zip (file: "sample-DB_-_firebird.sql"). |
| 10 | + Default names of all constraints were replaced in order to easy find appropriate table. |
| 11 | +
|
| 12 | + Examples for this test based on |
| 13 | + 1) https://blogs.oracle.com/optimizer/post/optimizer-transformations-subquery-unesting-part-2 |
| 14 | + (paragraph "Validity of Unnesting") |
| 15 | + 2) https://jonathanlewis.wordpress.com/2007/02/26/subquery-with-or/ |
| 16 | +NOTES: |
| 17 | + 1. One need to change config parameter SubQueryConversion to 'true' when check FB 5.x. |
| 18 | + 2. Commits: |
| 19 | + 6.x: |
| 20 | + 22.03.2025 10:47 |
| 21 | + https://github.com/FirebirdSQL/firebird/commit/fc12c0ef392fec9c83d41bc17da3dc233491498c |
| 22 | + (Unnest IN/ANY/EXISTS subqueries and optimize them using semi-join algorithm (#8061)) |
| 23 | + 5.x |
| 24 | + 31.07.2024 09:46 |
| 25 | + https://github.com/FirebirdSQL/firebird/commit/4943b3faece209caa93cc9573803677019582f1c |
| 26 | + (Added support for semi/anti and outer joins to hash join algorithm ...) |
| 27 | + Also: |
| 28 | + 14.09.2024 09:24 |
| 29 | + https://github.com/FirebirdSQL/firebird/commit/5fa4ae611d18fd4ce9aac1c8dbc79e5fea2bc1f2 |
| 30 | + (Fix bug #8252: Incorrect subquery unnesting with complex dependencies) |
| 31 | + |
| 32 | + Checked on 6.0.0.735, 5.0.3.1647 |
| 33 | +""" |
| 34 | + |
| 35 | +import pytest |
| 36 | +import zipfile |
| 37 | +from pathlib import Path |
| 38 | +from firebird.qa import * |
| 39 | +from firebird.driver import driver_config, connect |
| 40 | + |
| 41 | +db = db_factory() |
| 42 | +# Hash Join (semi) (keys: 1, total key length: 4) |
| 43 | +substitutions = [(r'Hash Join \(semi\) \(keys: \d+, total key length: \d+\)', 'Hash Join (semi)'), (r'record length: \d+', 'record length: NN')] |
| 44 | + |
| 45 | +act = python_act('db', substitutions = substitutions) |
| 46 | + |
| 47 | +tmp_sql = temp_file('gh_8061.tmp.sql') |
| 48 | + |
| 49 | +query_map = { |
| 50 | + 1000 : ( |
| 51 | + """ |
| 52 | + select c3.cust_no |
| 53 | + from customer c3 |
| 54 | + where exists ( |
| 55 | + select s3.cust_no |
| 56 | + from sales s3 |
| 57 | + where s3.cust_no = c3.cust_no and |
| 58 | + exists ( |
| 59 | + select x.emp_no |
| 60 | + from employee x |
| 61 | + where |
| 62 | + x.job_country = c3.country |
| 63 | + ) |
| 64 | + ) |
| 65 | + """ |
| 66 | + ,""" |
| 67 | + Subqueries that are correlated to non-parent; for example, |
| 68 | + subquery SQ3 is contained by SQ2 (parent of SQ3) and SQ2 in turn is contained |
| 69 | + by SQ1 and SQ3 is correlated to tables defined in SQ1. |
| 70 | + """ |
| 71 | + ) |
| 72 | + ,2000 : ( |
| 73 | + """ |
| 74 | + select c3.cust_no |
| 75 | + from customer c3 |
| 76 | + where exists ( |
| 77 | + select s3.cust_no |
| 78 | + from sales s3 |
| 79 | + where s3.cust_no = c3.cust_no |
| 80 | + group by s3.cust_no |
| 81 | + ) |
| 82 | + """ |
| 83 | + ,""" |
| 84 | + A group-by subquery is correlated; in this case, unnesting implies doing join |
| 85 | + after group-by. Changing the given order of the two operations may not be always legal. |
| 86 | + """ |
| 87 | + ) |
| 88 | + ,3000 : ( |
| 89 | + """ |
| 90 | + select s1.cust_no |
| 91 | + from sales s1 |
| 92 | + where exists ( |
| 93 | + select 1 from customer c1 where s1.cust_no = c1.cust_no |
| 94 | + union all |
| 95 | + select 1 from employee x1 where s1.sales_rep = x1.emp_no |
| 96 | + ) |
| 97 | + """ |
| 98 | + ,""" |
| 99 | + For disjunctive subqueries, the outer columns in the connecting |
| 100 | + or correlating conditions are not the same. |
| 101 | + """ |
| 102 | + ) |
| 103 | + ,4000 : ( |
| 104 | + """ |
| 105 | + select x1.emp_no |
| 106 | + from employee x1 |
| 107 | + where |
| 108 | + ( |
| 109 | + x1.job_country = 'USA' or |
| 110 | + exists ( |
| 111 | + select 1 |
| 112 | + from sales s1 |
| 113 | + where s1.sales_rep = x1.emp_no |
| 114 | + ) |
| 115 | + ) |
| 116 | + """ |
| 117 | + ,'An `OR` condition in compound WHERE expression, see https://jonathanlewis.wordpress.com/2007/02/26/subquery-with-or/' |
| 118 | + ) |
| 119 | +} |
| 120 | + |
| 121 | +#----------------------------------------------------------- |
| 122 | + |
| 123 | +def replace_leading(source, char="."): |
| 124 | + stripped = source.lstrip() |
| 125 | + return char * (len(source) - len(stripped)) + stripped |
| 126 | + |
| 127 | +#----------------------------------------------------------- |
| 128 | + |
| 129 | +@pytest.mark.version('>=5.0.1') |
| 130 | +def test_1(act: Action, tmp_sql: Path, capsys): |
| 131 | + employee_data_sql = zipfile.Path(act.files_dir / 'standard_sample_databases.zip', at='sample-DB_-_firebird.sql') |
| 132 | + tmp_sql.write_bytes(employee_data_sql.read_bytes()) |
| 133 | + |
| 134 | + act.isql(switches = ['-q'], charset='utf8', input_file = tmp_sql, combine_output = True) |
| 135 | + |
| 136 | + if act.return_code == 0: |
| 137 | + |
| 138 | + srv_cfg = driver_config.register_server(name = f'srv_cfg_8061', config = '') |
| 139 | + db_cfg_name = f'db_cfg_8061' |
| 140 | + db_cfg_object = driver_config.register_database(name = db_cfg_name) |
| 141 | + db_cfg_object.server.value = srv_cfg.name |
| 142 | + db_cfg_object.database.value = str(act.db.db_path) |
| 143 | + if act.is_version('<6'): |
| 144 | + db_cfg_object.config.value = f""" |
| 145 | + SubQueryConversion = true |
| 146 | + """ |
| 147 | + |
| 148 | + with connect(db_cfg_name, user = act.db.user, password = act.db.password) as con: |
| 149 | + cur = con.cursor() |
| 150 | + for q_idx, q_tuple in query_map.items(): |
| 151 | + test_sql, qry_comment = q_tuple[:2] |
| 152 | + ps = cur.prepare(test_sql) |
| 153 | + print(q_idx) |
| 154 | + print(test_sql) |
| 155 | + print(qry_comment) |
| 156 | + print( '\n'.join([replace_leading(s) for s in ps.detailed_plan.split('\n')]) ) |
| 157 | + ps.free() |
| 158 | + |
| 159 | + else: |
| 160 | + # If retcode !=0 then we can print the whole output of failed gbak: |
| 161 | + print('Initial script failed, check output:') |
| 162 | + for line in act.clean_stdout.splitlines(): |
| 163 | + print(line) |
| 164 | + act.reset() |
| 165 | + |
| 166 | + act.expected_stdout = f""" |
| 167 | + 1000 |
| 168 | + {query_map[1000][0]} |
| 169 | + {query_map[1000][1]} |
| 170 | + Sub-query |
| 171 | + ....-> Filter |
| 172 | + ........-> Table "EMPLOYEE" as "X" Full Scan |
| 173 | + Sub-query |
| 174 | + ....-> Filter (preliminary) |
| 175 | + ........-> Filter |
| 176 | + ............-> Table "SALES" as "S3" Access By ID |
| 177 | + ................-> Bitmap |
| 178 | + ....................-> Index "SALES_CUSTOMER_FK_CUST_NO" Range Scan (full match) |
| 179 | + Select Expression |
| 180 | + ....-> Filter |
| 181 | + ........-> Table "CUSTOMER" as "C3" Full Scan |
| 182 | +
|
| 183 | + 2000 |
| 184 | + {query_map[2000][0]} |
| 185 | + {query_map[2000][1]} |
| 186 | + Sub-query |
| 187 | + ....-> Aggregate |
| 188 | + ........-> Filter |
| 189 | + ............-> Table "SALES" as "S3" Access By ID |
| 190 | + ................-> Index "SALES_CUSTOMER_FK_CUST_NO" Range Scan (full match) |
| 191 | + Select Expression |
| 192 | + ....-> Filter |
| 193 | + ........-> Table "CUSTOMER" as "C3" Full Scan |
| 194 | +
|
| 195 | + 3000 |
| 196 | + {query_map[3000][0]} |
| 197 | + {query_map[3000][1]} |
| 198 | + Sub-query |
| 199 | + ....-> Union |
| 200 | + ........-> Filter |
| 201 | + ............-> Table "CUSTOMER" as "C1" Access By ID |
| 202 | + ................-> Bitmap |
| 203 | + ....................-> Index "CUSTOMER_PK" Unique Scan |
| 204 | + ........-> Filter |
| 205 | + ............-> Table "EMPLOYEE" as "X1" Access By ID |
| 206 | + ................-> Bitmap |
| 207 | + ....................-> Index "EMPLOYEE_PK" Unique Scan |
| 208 | + Select Expression |
| 209 | + ....-> Filter |
| 210 | + ........-> Table "SALES" as "S1" Full Scan |
| 211 | +
|
| 212 | + 4000 |
| 213 | + {query_map[4000][0]} |
| 214 | + {query_map[4000][1]} |
| 215 | + Sub-query |
| 216 | + ....-> Filter |
| 217 | + ........-> Table "SALES" as "S1" Access By ID |
| 218 | + ............-> Bitmap |
| 219 | + ................-> Index "SALES_EMPLOYEE_FK_SALES_REP" Range Scan (full match) |
| 220 | + Select Expression |
| 221 | + ....-> Filter |
| 222 | + ........-> Table "EMPLOYEE" as "X1" Full Scan |
| 223 | + """ |
| 224 | + act.stdout = capsys.readouterr().out |
| 225 | + assert act.clean_stdout == act.clean_expected_stdout |
0 commit comments