diff --git a/CHANGELOG.md b/CHANGELOG.md index bfb8d7e..2d30d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [0.98.2](https://github.com/5app/dare/compare/v0.98.1...v0.98.2) (2025-08-14) + + +### Performance Improvements + +* **cte:** subquery optimisation, fixes [#413](https://github.com/5app/dare/issues/413) ([#416](https://github.com/5app/dare/issues/416)) ([fd7822f](https://github.com/5app/dare/commit/fd7822f5bd5cacead549160bc152f8a52d3fd298)) + ## [0.98.1](https://github.com/5app/dare/compare/v0.98.0...v0.98.1) (2025-08-08) diff --git a/README.md b/README.md index e4faedc..2dfd4de 100644 --- a/README.md +++ b/README.md @@ -1382,6 +1382,26 @@ The approach also supports multiple field definitions in the key, i.e. > } > // SELECT name FROM users WHERE MATCH(name, email) AGAINST ('Andrew' IN BOOLEAN MODE) > ``` +## Performance with LIMIT'ed nested queries + +Nested subqueries generated via Dare do not take advantage of restricted datasets through SQL `LIMIT` - atleast this was the case with MySQL's InnoDB tables. + +To address this in MySQL 8, and other databases which support Common Table Expressions (CTE), Dare will by default apply filtering and limiting via a CTE with an INNER JOIN to the rowid to the base table. + +By default, the rules defined in `applyCTELimitFiltering` enables this features for all databases (with the exception of MySQL 5.*), and for requests for less than 10k records. + +The rules around when to apply the CTE can be adjusted, e.g. + +```js +const dare = new Dare(options); +dare.applyCTELimitFiltering = (options) => { + return options.limit < 10_000; +} +``` + +To enable/disable CTE, have the function return truthy/falsy value. + + ## Multiple joins/filters on the same table diff --git a/package-lock.json b/package-lock.json index 3d33eaa..3bbec16 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "dare", - "version": "0.98.1", + "version": "0.98.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dare", - "version": "0.98.1", + "version": "0.98.2", "license": "MIT", "dependencies": { "semver-compare": "^1.0.0", diff --git a/package.json b/package.json index cecb51e..b2aa107 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "dare", - "version": "0.98.1", + "version": "0.98.2", "description": "Database to REST, REST to Database", "type": "module", "main": "./src/index.js", diff --git a/src/format/reducer_conditions.js b/src/format/reducer_conditions.js index a39ae03..bdb2fe2 100644 --- a/src/format/reducer_conditions.js +++ b/src/format/reducer_conditions.js @@ -51,7 +51,7 @@ export default function reduceConditions( if ( value && typeof value === 'object' && - !(value instanceof Date) && + !(value instanceof Date) && !Array.isArray(value) && key_definition?.type !== 'json' && !Buffer.isBuffer(value) diff --git a/src/format_request.js b/src/format_request.js index 3f83b8b..0fe681c 100644 --- a/src/format_request.js +++ b/src/format_request.js @@ -8,7 +8,8 @@ import limitClause from './format/limit_clause.js'; import joinHandler from './format/join_handler.js'; import getFieldAttributes from './utils/field_attributes.js'; import extend from './utils/extend.js'; -import buildQuery from './get.js'; +import buildQuery, {generateSQLSelect} from './get.js'; +import toArray from './utils/toArray.js'; /** * @import {Sql} from 'sql-template-tag' @@ -515,8 +516,12 @@ async function format_request(options, dareInstance) { // Create sub_query const sub_query = buildQuery(options, dareInstance); + // Create the SQL + const sql_sub_query = generateSQLSelect(sub_query); - sql_where_conditions = [SQL`${sql_negate} EXISTS (${sub_query})`]; + sql_where_conditions = [ + SQL`${sql_negate} EXISTS (${sql_sub_query})`, + ]; } else { /* * Whilst patch and delete will throw an ER_UPDATE_TABLE_USED error @@ -535,12 +540,13 @@ async function format_request(options, dareInstance) { options.parent = null; // Do not add superfluous joins const sub_query = buildQuery(options, dareInstance); + const sql_sub_query = generateSQLSelect(sub_query); sql_where_conditions = [ SQL`${raw(parentReferences[0])} ${sql_negate} IN ( SELECT ${join(options.fields.map(field => raw(String(field))))} FROM ( - ${sub_query} + ${sql_sub_query} ) AS ${raw(options.sql_alias)}_tmp ) `, @@ -555,12 +561,3 @@ async function format_request(options, dareInstance) { return options; } - -function toArray(a) { - if (typeof a === 'string') { - a = a.split(',').map(s => s.trim()); - } else if (!Array.isArray(a)) { - a = [a]; - } - return a; -} diff --git a/src/get.js b/src/get.js index 5dc805d..d1e0356 100644 --- a/src/get.js +++ b/src/get.js @@ -19,8 +19,15 @@ export default function buildQuery(opts, dareInstance) { const {is_subquery} = opts; // Traverse the Request Object - const {fields, has_many_join, sql_joins, sql_filter, groupby, orderby} = - traverse(opts, is_subquery, dareInstance); + const { + fields, + has_many_join, + has_sub_queries, + sql_joins, + sql_filter, + groupby, + orderby, + } = traverse(opts, is_subquery, dareInstance); // Get the root tableID const {sql_table, sql_alias} = opts; @@ -154,22 +161,59 @@ export default function buildQuery(opts, dareInstance) { } // Put it all together - let sql = SQL`SELECT ${join(sql_fields)} + return { + sql_fields, + sql_table, + sql_alias, + sql_joins, + sql_filter, + sql_groupby, + sql_orderby, + limit: opts.limit, + start: opts.start, + alias, + has_sub_queries, + }; +} + +/** + * Generate a SQL SELECT statement + * @param {object} opts - Options for generating the SQL statement + * @param {Sql} [opts.sql_cte] - Common Table Expression (CTE) to use + * @param {Array} opts.sql_fields - Fields to select + * @param {string} opts.sql_table - The table to select from + * @param {string} opts.sql_alias - Alias for the table + * @param {Array} opts.sql_joins - Joins to include in the query + * @param {Array} opts.sql_filter - Filters to apply to the query + * @param {Array} opts.sql_groupby - Group by fields + * @param {Array} opts.sql_orderby - Order by fields + * @param {number} [opts.limit] - Limit the number of results + * @param {number} [opts.start] - Offset for the results + * @returns {Sql} - The SQL statement + */ +export function generateSQLSelect({ + sql_cte, + sql_fields, + sql_table, + sql_alias, + sql_joins, + sql_filter, + sql_groupby, + sql_orderby, + limit, + start, +}) { + return SQL` + ${sql_cte ? SQL`WITH ${sql_cte}` : empty} + SELECT ${join(sql_fields)} FROM ${raw(sql_table)} ${raw(sql_alias)} ${optionalJoin(sql_joins, '\n', '')} ${optionalJoin(sql_filter, ' AND ', 'WHERE ')} ${optionalJoin(sql_groupby, ',', 'GROUP BY ')} ${optionalJoin(sql_orderby, ',', 'ORDER BY ')} - ${opts.limit ? SQL`LIMIT ${raw(opts.limit)}` : empty} - ${opts.start ? SQL`OFFSET ${raw(opts.start)}` : empty} + ${limit ? SQL`LIMIT ${raw(String(limit))}` : empty} + ${start ? SQL`OFFSET ${raw(String(start))}` : empty} `; - - if (alias) { - // Wrap the whole thing in an alias - sql = SQL`(${sql}) AS "${raw(alias)}"`; - } - - return sql; } function traverse(item, is_subquery, dareInstance) { @@ -204,6 +248,7 @@ function traverse(item, is_subquery, dareInstance) { fields, list, has_many_join: false, + has_sub_queries: false, }; // Things to change if this isn't the root. @@ -249,9 +294,13 @@ function traverse(item, is_subquery, dareInstance) { // Make the sub-query const sub_query = buildQuery(item, dareInstance); + const sql_sub_query = SQL`(${generateSQLSelect(sub_query)}) AS "${raw(sub_query.alias)}"`; // Add the formatted field - fields.push(sub_query); + fields.push(sql_sub_query); + + // Mark as having sub queries + resp.has_sub_queries = true; // The rest has been handled in the sub-query return resp; diff --git a/src/index.js b/src/index.js index 9fed867..afd76a2 100644 --- a/src/index.js +++ b/src/index.js @@ -1,9 +1,11 @@ import SQL, {raw, join, empty, bulk} from 'sql-template-tag'; -import getHandler from './get.js'; +import buildQuery, {generateSQLSelect} from './get.js'; import DareError from './utils/error.js'; +import toArray from './utils/toArray.js'; + import validateBody from './utils/validate_body.js'; import getFieldAttributes from './utils/field_attributes.js'; @@ -184,7 +186,7 @@ Dare.prototype.execute = async requestQuery => { * Engine, database engine * @type {Engine} */ -Dare.prototype.engine = 'mysql:5.7.40'; +Dare.prototype.engine = 'mysql:8.0.40'; // Rowid, name of primary key field used in grouping operation: MySQL uses _rowid /** @type {string} */ @@ -362,6 +364,26 @@ Dare.prototype.after = function (resp) { return resp; }; +/** + * Determine whether to use CTE LIMIT Filtering + * @param {QueryOptions} options - Query options + * @returns {boolean} Whether to use CTE LIMIT Filtering + */ +Dare.prototype.applyCTELimitFiltering = function (options) { + + // Cancel for old mysql + if (this.engine.startsWith('mysql:5')) { + return false; + } + + // Cancel if limit is beyond a certain threshold + if (options.limit > 10_000) { + return false; + } + + return true; +}; + /** * Use * Creates a new instance of Dare and merges new options with the base options @@ -483,10 +505,37 @@ Dare.prototype.get = async function get(table, fields, filter, options = {}) { const req = await dareInstance.format_request(dareInstance.options); - const query = getHandler(req, dareInstance); + // Build the query + const query = buildQuery(req, dareInstance); + + // Where the query has_sub_queries=true property, we should generate a CTE query + if ( + query.has_sub_queries && + (!opts.groupby || toArray(opts.groupby).join('') === 'id') && + this.applyCTELimitFiltering(req) + ) { + // Create a new formatted query, with just the fields + opts.fields = ['id']; + const cteInstance = this.use(opts); + const cteRequest = await cteInstance.format_request( + cteInstance.options + ); + const cteQuery = buildQuery(cteRequest, cteInstance); + const sql_query = generateSQLSelect(cteQuery); + query.sql_joins.unshift( + SQL`JOIN cte ON (cte.id = ${raw(query.sql_alias)}.${raw(dareInstance.rowid)})` + ); + query.sql_cte = SQL`cte AS (${sql_query})`; + + // Disable repeating the start (offset) + query.start = undefined; + } + + // If the query is empty, return an empty array + const sql_query = generateSQLSelect(query); // Execute the query - const sql_response = await dareInstance.sql(query); + const sql_response = await dareInstance.sql(sql_query); if (sql_response === undefined) { return; @@ -549,10 +598,11 @@ Dare.prototype.getCount = async function getCount(table, filter, options = {}) { const req = await dareInstance.format_request(dareInstance.options); - const query = getHandler(req, dareInstance); + const query = buildQuery(req, dareInstance); + const sql_query = generateSQLSelect(query); // Execute the query - const [resp] = await dareInstance.sql(query); + const [resp] = await dareInstance.sql(sql_query); /* * Return the count @@ -737,7 +787,8 @@ Dare.prototype.post = async function post(table, body, options = {}) { } // Assign the query - sql_query = getHandler(getRequest, getInstance); + const query = buildQuery(getRequest, getInstance); + sql_query = generateSQLSelect(query); fields.push(...walkRequestGetField(getRequest)); } else { @@ -988,9 +1039,7 @@ function prepareSQLSet({ }); // Replace value with a question using any mapped fieldName - assignments.push( - SQL`${raw(sql_field)} = ${value}` - ); + assignments.push(SQL`${raw(sql_field)} = ${value}`); } return join(assignments, ', '); @@ -1140,7 +1189,9 @@ function formatInputValue({ } // Format the field - const sql_field = (sql_alias ? `${sql_alias}.` : '') + dareInstance.identifierWrapper(field); + const sql_field = + (sql_alias ? `${sql_alias}.` : '') + + dareInstance.identifierWrapper(field); /** * Format the set value diff --git a/src/utils/toArray.js b/src/utils/toArray.js new file mode 100644 index 0000000..558af53 --- /dev/null +++ b/src/utils/toArray.js @@ -0,0 +1,17 @@ +/** + * ToArray - if a function is not already an Array, make it so + * @param {*} a - The input value to convert to an array + * @returns {Array} - The converted array + * @example + * toArray('a,b,c') // ['a', 'b', 'c'] + * toArray(['a', 'b', 'c']) // ['a', 'b', 'c'] + * toArray(1) // [1] + */ +export default function toArray(a) { + if (typeof a === 'string') { + a = a.split(',').map(s => s.trim()); + } else if (!Array.isArray(a)) { + a = [a]; + } + return a; +} diff --git a/test/integration/json.spec.js b/test/integration/json.spec.js index c2eb344..86d9676 100644 --- a/test/integration/json.spec.js +++ b/test/integration/json.spec.js @@ -164,7 +164,6 @@ describe('Working with JSON DataType', () => { }); it('JSON fields should be patchable with a setFunction definition', async function () { - if (DB_ENGINE?.startsWith('postgres')) { this.skip(); return; @@ -174,7 +173,7 @@ describe('Working with JSON DataType', () => { dare.options.models.users.schema.settings.patch = { setFunction({sql_field, value}) { return SQL`JSON_MERGE_PATCH(${raw(sql_field)}, ${value})`; - } + }, }; // Insert intial settings diff --git a/test/specs/filter_reducer.spec.js b/test/specs/filter_reducer.spec.js index 67bf8e9..9da934c 100644 --- a/test/specs/filter_reducer.spec.js +++ b/test/specs/filter_reducer.spec.js @@ -134,7 +134,7 @@ describe('Filter Reducer', () => { }, }, `(a.jsonSettings->? IN (?,?,?))`, - ['$.key', '"a"', '"b"', 1], + ['$.key', 'a', 'b', 1], ], ]; diff --git a/test/specs/format_request.spec.js b/test/specs/format_request.spec.js index abb54d8..55f62fd 100644 --- a/test/specs/format_request.spec.js +++ b/test/specs/format_request.spec.js @@ -451,7 +451,7 @@ describe('format_request', () => { ], [ // Should always expand datetime fields - {'datetime': '1981-12-05..1981-12-06'}, + {datetime: '1981-12-05..1981-12-06'}, 'a.datetime BETWEEN ? AND ?', ['1981-12-05T00:00:00', '1981-12-06T23:59:59'], noCondOperators, diff --git a/test/specs/get-subquery.spec.js b/test/specs/get-subquery.spec.js index c5becfb..24b0c3b 100644 --- a/test/specs/get-subquery.spec.js +++ b/test/specs/get-subquery.spec.js @@ -39,16 +39,16 @@ describe('get - subquery', () => { it('should write one to many requests with a subquery', async () => { dare.sql = ({sql}) => { const expected = ` - + WITH cte AS (SELECT a.id FROM assets a LIMIT 1) SELECT a.name AS "asset_name", ( SELECT COUNT(c.id) FROM assetCollections b LEFT JOIN collections c ON (c.id = b.collection_id) WHERE b.asset_id = a.id - LIMIT 1 ) AS "collection_count" FROM assets a + JOIN cte ON (cte.id = a._rowid) GROUP BY a._rowid LIMIT 1 @@ -79,16 +79,16 @@ describe('get - subquery', () => { it('should export the response in the format given', async () => { dare.sql = ({sql}) => { const expected = ` - + WITH cte AS (SELECT a.id FROM assets a LIMIT 1) SELECT a.name AS "asset_name", ( SELECT COUNT(c.id) FROM assetCollections b LEFT JOIN collections c ON (c.id = b.collection_id) WHERE b.asset_id = a.id - LIMIT 1 ) AS "collections.count" FROM assets a + JOIN cte ON (cte.id = a._rowid) GROUP BY a._rowid LIMIT 1 @@ -122,16 +122,17 @@ describe('get - subquery', () => { it('should concatinate many expressions into an array using JSON_ARRAYAGG', async () => { dare.sql = ({sql}) => { const expected = ` - + WITH cte AS (SELECT a.id FROM assets a LIMIT 1) SELECT a.name AS "name", ( - SELECT JSON_ARRAYAGG(IF(c._rowid IS NOT NULL, JSON_ARRAY(c.id, c.name), NULL)) + SELECT JSON_ARRAYAGG(CASE WHEN(c._rowid IS NOT NULL) THEN (JSON_ARRAY(c.id, c.name)) ELSE NULL END) FROM assetCollections b LEFT JOIN collections c ON (c.id = b.collection_id) WHERE b.asset_id = a.id LIMIT 1 ) AS "collections[id,name]" FROM assets a + JOIN cte ON (cte.id = a._rowid) GROUP BY a._rowid LIMIT 1 @@ -162,15 +163,16 @@ describe('get - subquery', () => { it('should concatinate many expressions into an array using JSON_ARRAYAGG', async () => { dare.sql = ({sql}) => { const expected = ` - + WITH cte AS (SELECT a.id FROM assets a LIMIT 1) SELECT a.name AS "name", ( - SELECT JSON_ARRAYAGG(IF(b._rowid IS NOT NULL, JSON_ARRAY(b.id, b.color), NULL)) + SELECT JSON_ARRAYAGG(CASE WHEN (b._rowid IS NOT NULL) THEN (JSON_ARRAY(b.id, b.color)) ELSE NULL END) FROM assetCollections b WHERE b.color = ? AND b.asset_id = a.id LIMIT 1 ) AS "assetCollections[id,color]" FROM assets a + JOIN cte ON (cte.id = a._rowid) GROUP BY a._rowid LIMIT 1 @@ -240,7 +242,7 @@ describe('get - subquery', () => { dare.sql = ({sql}) => { const expected = ` SELECT a.name AS "name", - JSON_ARRAYAGG(IF(c._rowid IS NOT NULL, JSON_ARRAY(c.id, c.name), NULL)) AS "collections[id,name]" + JSON_ARRAYAGG(CASE WHEN(c._rowid IS NOT NULL) THEN (JSON_ARRAY(c.id, c.name)) ELSE NULL END) AS "collections[id,name]" FROM assets a LEFT JOIN assetCollections b ON(b.asset_id = a.id) LEFT JOIN collections c ON (c.id = b.collection_id) @@ -271,7 +273,7 @@ describe('get - subquery', () => { it('should *not* subquery a table off a join with a possible set of values', async () => { dare.sql = ({sql}) => { const expected = ` - SELECT a.name AS "name", JSON_ARRAYAGG(IF(b._rowid IS NOT NULL, JSON_ARRAY(COUNT(d.id)), NULL)) AS "assetCollections[collections.descendents]" + SELECT a.name AS "name", JSON_ARRAYAGG(CASE WHEN (b._rowid IS NOT NULL) THEN (JSON_ARRAY(COUNT(d.id))) ELSE NULL END) AS "assetCollections[collections.descendents]" FROM assets a LEFT JOIN assetCollections b ON(b.asset_id = a.id) LEFT JOIN collections c ON(c.id = b.collection_id) @@ -307,6 +309,7 @@ describe('get - subquery', () => { it('should aggregate single field requests in a subquery, aka without group_concat', async () => { dare.sql = ({sql}) => { const expected = ` + WITH cte AS (SELECT a.id FROM users a GROUP BY a.id ORDER BY a.name LIMIT 1) SELECT a.id,a.name,a.created_time, ( SELECT JSON_ARRAY(b.id, b.email) @@ -316,10 +319,10 @@ describe('get - subquery', () => { LIMIT 1 ) AS "email_id,email" FROM users a + JOIN cte ON (cte.id = a._rowid) GROUP BY a.id ORDER BY a.name LIMIT 1`; - expectSQLEqual(sql, expected); return Promise.resolve([{}]); @@ -377,7 +380,50 @@ describe('get - subquery', () => { }); }); - describe(`Disparities`, () => { + describe('applyCTELimitFiltering', async () => { + + it('should not add CTE on resultsets over 10k rows, by default', async () => { + dare.sql = ({sql}) => { + const expected = ` + SELECT a.id, + ( + SELECT b.email + FROM userEmails b + WHERE + b.user_id = a.id + LIMIT 1 + ) AS "email" + FROM users a + GROUP BY a._rowid + LIMIT 10001`; + + expectSQLEqual(sql, expected); + + return Promise.resolve([{}]); + }; + + dare.options = { + models: { + userEmails: { + schema: {user_id: ['users.id']}, + }, + }, + }; + + return dare.get({ + table: 'users', + fields: [ + 'id', + { + email: 'userEmails.email', + }, + ], + limit: 10_001 + }); + }); + }); + + describe(`Engine Disparities`, () => { it('MySQL 8 fails to correctly count the items in this scenario', async () => { /* * See Bug report: https://bugs.mysql.com/bug.php?id=109585 @@ -401,5 +447,46 @@ describe('get - subquery', () => { limit: 3, }); }); + it('MySQL 5.* does not support CTE', async () => { + + const dareInst = dare.use({engine: 'mysql:5.7.0'}); + + dareInst.sql = ({sql}) => { + const expected = ` + SELECT a.id, + ( + SELECT b.email + FROM userEmails b + WHERE + b.user_id = a.id + LIMIT 1 + ) AS "email" + FROM users a + GROUP BY a._rowid + LIMIT 1`; + + expectSQLEqual(sql, expected); + + return Promise.resolve([{}]); + }; + + dareInst.options = { + models: { + userEmails: { + schema: {user_id: ['users.id']}, + }, + }, + }; + + return dareInst.get({ + table: 'users', + fields: [ + 'id', + { + email: 'userEmails.email', + }, + ], + }); + }); }); }); diff --git a/test/specs/patch.spec.js b/test/specs/patch.spec.js index b7d65f6..9caeb32 100644 --- a/test/specs/patch.spec.js +++ b/test/specs/patch.spec.js @@ -173,7 +173,6 @@ describe('patch', () => { }); it('should apply schema.field.setFunction', () => { - dare.options.models = { test: { schema: {