From 1f4d8c45bf4f703627ee4018ed549c167c6fa690 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 18 Nov 2025 13:56:33 +0530 Subject: [PATCH 1/5] =?UTF-8?q?Add=20Site=E2=86=92Project=20and=20locale?= =?UTF-8?q?=20migration=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../migrate-sites-to-projects-and-locales.mjs | 411 ++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs diff --git a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs new file mode 100644 index 000000000..7c2f34c98 --- /dev/null +++ b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs @@ -0,0 +1,411 @@ +#!/usr/bin/env node +/* + * Migrate Sites to Projects and set locales. + * + * Behavior: + * - Process ALL sites unless filtered by --orgId/--projectId/--domain/--siteId + * - Project key = (organizationId + registrable domain) + * - If site.projectId exists → skip + * - Otherwise find-or-create Project, set site.projectId + * - If site.language/region are unset → detect via locale-detect (network by default) + * - Fallback to en/US + * - Writes directly to DynamoDB table specified by env DYNAMO_TABLE_NAME_DATA (default spacecat-services-data) + * - Concurrency default: 10 (configurable via --concurrency) + * - Reporting with --report emits CSV and JSON under ./reports/ + * + * Required env: + * - AWS_REGION (e.g., us-east-1) + * - AWS credentials via default chain (env or shared profile) + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { + createDataAccess, +} from '../src/service/index.js'; + +import { + detectLocale, + hasText, + isNonEmptyArray, +} from '@adobe/spacecat-shared-utils'; + +// ----------------------------- +// Small CLI arg parser +// ----------------------------- +function parseArgs(argv) { + const args = { + orgId: undefined, + projectId: undefined, + domain: undefined, + siteId: undefined, + apply: false, + dryRun: false, + report: false, + yes: false, + noNetwork: false, + concurrency: 10, + }; + + for (let i = 2; i < argv.length; i += 1) { + const a = argv[i]; + if (a === '--apply') args.apply = true; + else if (a === '--dry-run') args.dryRun = true; + else if (a === '--report') args.report = true; + else if (a === '--yes' || a === '--force' || a === '-y') args.yes = true; + else if (a === '--no-network') args.noNetwork = true; + else if (a.startsWith('--orgId=')) args.orgId = a.split('=')[1]; + else if (a.startsWith('--projectId=')) args.projectId = a.split('=')[1]; + else if (a.startsWith('--domain=')) args.domain = a.split('=')[1]; + else if (a.startsWith('--siteId=')) args.siteId = a.split('=')[1]; + else if (a.startsWith('--concurrency=')) args.concurrency = parseInt(a.split('=')[1], 10); + } + + if (!args.apply && !args.dryRun) { + // default to dry-run unless apply explicitly set + args.dryRun = true; + } + + return args; +} + +// ----------------------------- +// Utilities +// ----------------------------- +function nowStamp() { + const d = new Date(); + return d.toISOString().replace(/[:.]/g, '-'); +} + +function ensureDir(dir) { + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } +} + +function toRegistrableDomain(baseURL) { + try { + const parsedBaseURL = new URL(baseURL); + const { hostname } = parsedBaseURL; + const parts = hostname.split('.'); + + // If hostname has only 1–2 parts, just use it as-is + if (parts.length <= 2) { + return hostname; + } + + // Reference semantics (see onboarding helper): + // - Consider the first two labels as potential subdomains + // - If a label is 2–3 characters long, drop it + // (e.g. www.adobe.com → adobe.com, fr.adobe.com → adobe.com) + for (let i = 0; i < Math.min(parts.length, 2); i += 1) { + const part = parts[i]; + if (part.length === 2 || part.length === 3) { + parts[i] = null; + } + } + + // Join remaining parts back to form the project/domain key + return parts.filter(Boolean).join('.'); + } catch { + return undefined; + } +} + +async function detectSiteLocale(site, { noNetwork }) { + const baseUrl = site.getBaseURL(); + try { + if (noNetwork) { + // Avoid network by supplying empty HTML and headers + const res = await detectLocale({ baseUrl, html: '', headers: {} }); + return { + language: res.language || 'en', + region: res.region || 'US', + }; + } + const res = await detectLocale({ baseUrl }); + return { + language: res.language || 'en', + region: res.region || 'US', + }; + } catch { + return { language: 'en', region: 'US' }; + } +} + +function csvRow(fields) { + const escape = (v) => { + if (v === undefined || v === null) return ''; + const s = String(v); + if (/[",\n]/.test(s)) return `"${s.replace(/"/g, '""')}"`; + return s; + }; + return fields.map(escape).join(','); +} + +// Simple concurrency limiter +async function mapWithConcurrency(items, limit, mapper) { + const results = []; + let inFlight = 0; + let idx = 0; + return new Promise((resolve, reject) => { + const next = () => { + if (idx >= items.length && inFlight === 0) { + resolve(results); + return; + } + while (inFlight < limit && idx < items.length) { + const currentIndex = idx++; + inFlight += 1; + Promise.resolve(mapper(items[currentIndex], currentIndex)) + .then((r) => { results[currentIndex] = r; }) + .catch(reject) + .finally(() => { inFlight -= 1; next(); }); + } + }; + next(); + }); +} + +// ----------------------------- +// Main +// ----------------------------- +async function main() { + const args = parseArgs(process.argv); + const { + orgId, projectId, domain, siteId, apply, dryRun, report, yes, noNetwork, concurrency, + } = args; + + const env = { + DYNAMO_TABLE_NAME_DATA: process.env.DYNAMO_TABLE_NAME_DATA || 'spacecat-services-data', + }; + + console.log(`[start] table=${env.DYNAMO_TABLE_NAME_DATA} apply=${apply} dryRun=${dryRun} report=${report} concurrency=${concurrency} noNetwork=${noNetwork}`); + + const dataAccess = createDataAccess( + { + tableNameData: env.DYNAMO_TABLE_NAME_DATA, + }, + console, + ); + + // EntityRegistry exposes collections keyed by entity name (e.g. "Site", "Project") + // We alias them here for clarity. + const { + Project: ProjectCollection, + Site: SiteCollection, + } = dataAccess; + + // Resolve target sites + let sites = []; + + if (hasText(siteId)) { + const site = await SiteCollection.findById(siteId); + if (site) sites = [site]; + } else if (hasText(projectId)) { + sites = await SiteCollection.allByProjectId(projectId); + } else if (hasText(orgId)) { + try { + if (SiteCollection.allByOrganizationId) { + sites = await SiteCollection.allByOrganizationId(orgId); + } else { + // Fallback: fetch all and filter (should rarely happen) + const allSites = await SiteCollection.all(); + sites = allSites.filter((s) => s.getOrganizationId && s.getOrganizationId() === orgId); + } + } catch { + const allSites = await SiteCollection.all(); + sites = allSites.filter((s) => s.getOrganizationId && s.getOrganizationId() === orgId); + } + if (hasText(domain)) { + const dLower = domain.toLowerCase(); + sites = sites.filter((s) => (toRegistrableDomain(s.getBaseURL()) || '').toLowerCase() === dLower); + } + } else if (hasText(domain)) { + const allSites = await SiteCollection.all(); + const dLower = domain.toLowerCase(); + sites = allSites.filter((s) => (toRegistrableDomain(s.getBaseURL()) || '').toLowerCase() === dLower); + } else { + sites = await SiteCollection.all(); + } + + if (!isNonEmptyArray(sites)) { + console.log('[done] no matching sites'); + return; + } + + // Group projects cache per org for faster lookup + const projectsCacheByOrg = new Map(); // orgId -> Array + + async function findOrCreateProjectForSite(site) { + const org = site.getOrganizationId(); + const projName = toRegistrableDomain(site.getBaseURL()); + if (!hasText(org) || !hasText(projName)) { + return null; + } + let orgProjects = projectsCacheByOrg.get(org); + if (!orgProjects) { + try { + if (ProjectCollection.allByOrganizationId) { + orgProjects = await ProjectCollection.allByOrganizationId(org); + } else { + // Fallback: load all, then filter in-memory + const allProjects = await ProjectCollection.all(); + orgProjects = allProjects.filter((p) => p.getOrganizationId && p.getOrganizationId() === org); + } + } catch { + const allProjects = await ProjectCollection.all(); + orgProjects = allProjects.filter((p) => p.getOrganizationId && p.getOrganizationId() === org); + } + projectsCacheByOrg.set(org, orgProjects); + } + let project = orgProjects.find((p) => p.getProjectName && p.getProjectName() === projName); + if (!project && apply) { + project = await ProjectCollection.create({ + organizationId: org, + projectName: projName, + }); + // refresh cache + orgProjects.push(project); + } + return project; + } + + const changes = []; + + const processOne = async (site) => { + const before = { + siteId: site.getId(), + baseURL: site.getBaseURL(), + organizationId: site.getOrganizationId(), + projectId: site.getProjectId?.() || site.record?.projectId, + language: site.getLanguage?.() || site.record?.language, + region: site.getRegion?.() || site.record?.region, + }; + + if (before.projectId) { + changes.push({ + siteId: before.siteId, + organizationId: before.organizationId, + baseURL: before.baseURL, + projectId: before.projectId, + language: before.language || '', + region: before.region || '', + action: 'skipped:project-present', + }); + return { action: 'skipped' }; + } + + const project = await findOrCreateProjectForSite(site); + const projectIdNew = project?.getId(); + + let langNew = before.language; + let regionNew = before.region; + if (!langNew || !regionNew) { + const detected = await detectSiteLocale(site, { noNetwork }); + langNew = before.language || detected.language || 'en'; + regionNew = before.region || detected.region || 'US'; + } + + // Record change summary + changes.push({ + siteId: before.siteId, + organizationId: before.organizationId, + baseURL: before.baseURL, + projectId: projectIdNew || before.projectId || '', + language: langNew || '', + region: regionNew || '', + action: apply ? 'updated' : 'planned', + }); + + if (!apply) { + return { action: 'planned' }; + } + + // Apply updates (idempotent: only set fields that are unset) + if (projectIdNew && !before.projectId && site.setProjectId) { + site.setProjectId(projectIdNew); + } + if (!before.language && site.setLanguage) { + site.setLanguage(langNew); + } + if (!before.region && site.setRegion) { + site.setRegion(regionNew); + } + if (site.setUpdatedBy) { + site.setUpdatedBy('system'); + } + await site.save(); + return { action: 'updated' }; + }; + + if (apply && !yes) { + console.log(`About to process ${sites.length} site(s). Use --yes to proceed or run with --dry-run first.`); + return; + } + + await mapWithConcurrency(sites, Number.isFinite(concurrency) && concurrency > 0 ? concurrency : 10, processOne); + + // Reporting + if (report) { + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + const outDir = path.resolve(__dirname, '../../../reports'); + ensureDir(outDir); + + const stamp = nowStamp(); + const csvPath = path.join(outDir, `site-project-migration-${stamp}.csv`); + const jsonPath = path.join(outDir, `site-project-migration-${stamp}.json`); + + const header = [ + 'siteId', + 'organizationId', + 'baseURL', + 'projectId', + 'language', + 'region', + 'action', + ]; + const rows = [csvRow(header)]; + for (const c of changes) { + rows.push(csvRow([ + c.siteId, + c.organizationId, + c.baseURL, + c.projectId, + c.language, + c.region, + c.action, + ])); + } + fs.writeFileSync(csvPath, `${rows.join('\n')}\n`, 'utf8'); + fs.writeFileSync( + jsonPath, + `${JSON.stringify( + { + generatedAt: new Date().toISOString(), + changes, // already contains final projectId, language, region, action + }, + null, + 2, + )}\n`, + 'utf8', + ); + console.log(`[report] ${csvPath}`); + console.log(`[report] ${jsonPath}`); + } + + const updated = changes.filter((c) => c.action === 'updated').length; + const planned = changes.filter((c) => c.action === 'planned').length; + const skipped = changes.filter((c) => c.action?.startsWith('skipped')).length; + console.log(`[done] sites=${sites.length} updated=${updated} planned=${planned} skipped=${skipped}`); +} + +main().catch((e) => { + console.error('[fatal]', e); + process.exit(1); +}); + + From c6eec37de23ae31dbb5ef1012359454746be3e51 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 18 Nov 2025 14:07:50 +0530 Subject: [PATCH 2/5] fix: lint issues --- .../migrate-sites-to-projects-and-locales.mjs | 176 ++++++++++++------ 1 file changed, 117 insertions(+), 59 deletions(-) diff --git a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs index 7c2f34c98..eaa1947cb 100644 --- a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs +++ b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs @@ -1,37 +1,52 @@ #!/usr/bin/env node /* - * Migrate Sites to Projects and set locales. + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* + * CLI to migrate Sites to Projects and set locales. * * Behavior: - * - Process ALL sites unless filtered by --orgId/--projectId/--domain/--siteId - * - Project key = (organizationId + registrable domain) - * - If site.projectId exists → skip - * - Otherwise find-or-create Project, set site.projectId - * - If site.language/region are unset → detect via locale-detect (network by default) - * - Fallback to en/US - * - Writes directly to DynamoDB table specified by env DYNAMO_TABLE_NAME_DATA (default spacecat-services-data) - * - Concurrency default: 10 (configurable via --concurrency) - * - Reporting with --report emits CSV and JSON under ./reports/ + * - Process ALL sites unless filtered by --orgId/--projectId/--domain/--siteId. + * - Project key = (organizationId + registrable domain). + * - If site.projectId exists → skip. + * - Otherwise find-or-create Project, then set site.projectId. + * - If site.language/region are unset → detect via locale-detect (network by default), + * with fallback to en/US. + * - Writes to the DynamoDB table in env DYNAMO_TABLE_NAME_DATA + * (default: spacecat-services-data). + * - Concurrency default: 10 (configurable via --concurrency). + * - Reporting with --report emits CSV and JSON under ./reports/. * * Required env: - * - AWS_REGION (e.g., us-east-1) - * - AWS credentials via default chain (env or shared profile) + * - AWS_REGION (e.g., us-east-1). + * - AWS credentials via default chain (env or shared profile). */ +/* eslint-disable no-console */ + import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; -import { - createDataAccess, -} from '../src/service/index.js'; - import { detectLocale, hasText, isNonEmptyArray, } from '@adobe/spacecat-shared-utils'; +import { + createDataAccess, +} from '../src/service/index.js'; + // ----------------------------- // Small CLI arg parser // ----------------------------- @@ -56,11 +71,22 @@ function parseArgs(argv) { else if (a === '--report') args.report = true; else if (a === '--yes' || a === '--force' || a === '-y') args.yes = true; else if (a === '--no-network') args.noNetwork = true; - else if (a.startsWith('--orgId=')) args.orgId = a.split('=')[1]; - else if (a.startsWith('--projectId=')) args.projectId = a.split('=')[1]; - else if (a.startsWith('--domain=')) args.domain = a.split('=')[1]; - else if (a.startsWith('--siteId=')) args.siteId = a.split('=')[1]; - else if (a.startsWith('--concurrency=')) args.concurrency = parseInt(a.split('=')[1], 10); + else if (a.startsWith('--orgId=')) { + const [, value] = a.split('='); + args.orgId = value; + } else if (a.startsWith('--projectId=')) { + const [, value] = a.split('='); + args.projectId = value; + } else if (a.startsWith('--domain=')) { + const [, value] = a.split('='); + args.domain = value; + } else if (a.startsWith('--siteId=')) { + const [, value] = a.split('='); + args.siteId = value; + } else if (a.startsWith('--concurrency=')) { + const [, value] = a.split('='); + args.concurrency = parseInt(value, 10); + } } if (!args.apply && !args.dryRun) { @@ -147,26 +173,31 @@ function csvRow(fields) { // Simple concurrency limiter async function mapWithConcurrency(items, limit, mapper) { - const results = []; - let inFlight = 0; - let idx = 0; - return new Promise((resolve, reject) => { - const next = () => { - if (idx >= items.length && inFlight === 0) { - resolve(results); - return; + const results = new Array(items.length); + const workers = []; + let currentIndex = 0; + + const worker = async () => { + // eslint-disable-next-line no-constant-condition + while (true) { + if (currentIndex >= items.length) { + break; } - while (inFlight < limit && idx < items.length) { - const currentIndex = idx++; - inFlight += 1; - Promise.resolve(mapper(items[currentIndex], currentIndex)) - .then((r) => { results[currentIndex] = r; }) - .catch(reject) - .finally(() => { inFlight -= 1; next(); }); - } - }; - next(); - }); + const index = currentIndex; + currentIndex += 1; + // eslint-disable-next-line no-await-in-loop + const result = await mapper(items[index], index); + results[index] = result; + } + }; + + const workerCount = Math.min(limit, items.length); + for (let i = 0; i < workerCount; i += 1) { + workers.push(worker()); + } + + await Promise.all(workers); + return results; } // ----------------------------- @@ -182,7 +213,11 @@ async function main() { DYNAMO_TABLE_NAME_DATA: process.env.DYNAMO_TABLE_NAME_DATA || 'spacecat-services-data', }; - console.log(`[start] table=${env.DYNAMO_TABLE_NAME_DATA} apply=${apply} dryRun=${dryRun} report=${report} concurrency=${concurrency} noNetwork=${noNetwork}`); + console.log( + `[start] table=${env.DYNAMO_TABLE_NAME_DATA} apply=${apply} ` + + `dryRun=${dryRun} report=${report} concurrency=${concurrency} ` + + `noNetwork=${noNetwork}`, + ); const dataAccess = createDataAccess( { @@ -213,20 +248,28 @@ async function main() { } else { // Fallback: fetch all and filter (should rarely happen) const allSites = await SiteCollection.all(); - sites = allSites.filter((s) => s.getOrganizationId && s.getOrganizationId() === orgId); + sites = allSites.filter( + (site) => site.getOrganizationId && site.getOrganizationId() === orgId, + ); } } catch { const allSites = await SiteCollection.all(); - sites = allSites.filter((s) => s.getOrganizationId && s.getOrganizationId() === orgId); + sites = allSites.filter( + (site) => site.getOrganizationId && site.getOrganizationId() === orgId, + ); } if (hasText(domain)) { const dLower = domain.toLowerCase(); - sites = sites.filter((s) => (toRegistrableDomain(s.getBaseURL()) || '').toLowerCase() === dLower); + sites = sites.filter( + (site) => (toRegistrableDomain(site.getBaseURL()) || '').toLowerCase() === dLower, + ); } } else if (hasText(domain)) { const allSites = await SiteCollection.all(); const dLower = domain.toLowerCase(); - sites = allSites.filter((s) => (toRegistrableDomain(s.getBaseURL()) || '').toLowerCase() === dLower); + sites = allSites.filter( + (site) => (toRegistrableDomain(site.getBaseURL()) || '').toLowerCase() === dLower, + ); } else { sites = await SiteCollection.all(); } @@ -253,15 +296,21 @@ async function main() { } else { // Fallback: load all, then filter in-memory const allProjects = await ProjectCollection.all(); - orgProjects = allProjects.filter((p) => p.getOrganizationId && p.getOrganizationId() === org); + orgProjects = allProjects.filter( + (project) => project.getOrganizationId && project.getOrganizationId() === org, + ); } } catch { const allProjects = await ProjectCollection.all(); - orgProjects = allProjects.filter((p) => p.getOrganizationId && p.getOrganizationId() === org); + orgProjects = allProjects.filter( + (project) => project.getOrganizationId && project.getOrganizationId() === org, + ); } projectsCacheByOrg.set(org, orgProjects); } - let project = orgProjects.find((p) => p.getProjectName && p.getProjectName() === projName); + let project = orgProjects.find( + (candidate) => candidate.getProjectName && candidate.getProjectName() === projName, + ); if (!project && apply) { project = await ProjectCollection.create({ organizationId: org, @@ -342,17 +391,23 @@ async function main() { }; if (apply && !yes) { - console.log(`About to process ${sites.length} site(s). Use --yes to proceed or run with --dry-run first.`); + console.log( + `About to process ${sites.length} site(s). ` + + 'Use --yes to proceed or run with --dry-run first.', + ); return; } - await mapWithConcurrency(sites, Number.isFinite(concurrency) && concurrency > 0 ? concurrency : 10, processOne); + const effectiveConcurrency = Number.isFinite(concurrency) && concurrency > 0 + ? concurrency + : 10; + await mapWithConcurrency(sites, effectiveConcurrency, processOne); // Reporting if (report) { - const __filename = fileURLToPath(import.meta.url); - const __dirname = path.dirname(__filename); - const outDir = path.resolve(__dirname, '../../../reports'); + const filename = fileURLToPath(import.meta.url); + const dirname = path.dirname(filename); + const outDir = path.resolve(dirname, '../../../reports'); ensureDir(outDir); const stamp = nowStamp(); @@ -397,15 +452,18 @@ async function main() { console.log(`[report] ${jsonPath}`); } - const updated = changes.filter((c) => c.action === 'updated').length; - const planned = changes.filter((c) => c.action === 'planned').length; - const skipped = changes.filter((c) => c.action?.startsWith('skipped')).length; - console.log(`[done] sites=${sites.length} updated=${updated} planned=${planned} skipped=${skipped}`); + const updated = changes.filter((change) => change.action === 'updated').length; + const planned = changes.filter((change) => change.action === 'planned').length; + const skipped = changes.filter( + (change) => change.action && change.action.startsWith('skipped'), + ).length; + console.log( + `[done] sites=${sites.length} ` + + `updated=${updated} planned=${planned} skipped=${skipped}`, + ); } main().catch((e) => { console.error('[fatal]', e); process.exit(1); }); - - From 83fdf88b4548c60383715f9259dc105e0e2da609 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 18 Nov 2025 14:15:50 +0530 Subject: [PATCH 3/5] fix: updated script --- .../migrate-sites-to-projects-and-locales.mjs | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs index eaa1947cb..8425b6087 100644 --- a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs +++ b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs @@ -1,6 +1,6 @@ #!/usr/bin/env node /* - * Copyright 2024 Adobe. All rights reserved. + * Copyright 2025 Adobe. All rights reserved. * This file is licensed to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may obtain a copy * of the License at http://www.apache.org/licenses/LICENSE-2.0 @@ -28,7 +28,7 @@ * * Required env: * - AWS_REGION (e.g., us-east-1). - * - AWS credentials via default chain (env or shared profile). + * - AWS credentials via default chain (env). */ /* eslint-disable no-console */ @@ -47,9 +47,7 @@ import { createDataAccess, } from '../src/service/index.js'; -// ----------------------------- -// Small CLI arg parser -// ----------------------------- + function parseArgs(argv) { const args = { orgId: undefined, @@ -90,7 +88,6 @@ function parseArgs(argv) { } if (!args.apply && !args.dryRun) { - // default to dry-run unless apply explicitly set args.dryRun = true; } @@ -117,15 +114,10 @@ function toRegistrableDomain(baseURL) { const { hostname } = parsedBaseURL; const parts = hostname.split('.'); - // If hostname has only 1–2 parts, just use it as-is if (parts.length <= 2) { return hostname; } - // Reference semantics (see onboarding helper): - // - Consider the first two labels as potential subdomains - // - If a label is 2–3 characters long, drop it - // (e.g. www.adobe.com → adobe.com, fr.adobe.com → adobe.com) for (let i = 0; i < Math.min(parts.length, 2); i += 1) { const part = parts[i]; if (part.length === 2 || part.length === 3) { @@ -133,7 +125,6 @@ function toRegistrableDomain(baseURL) { } } - // Join remaining parts back to form the project/domain key return parts.filter(Boolean).join('.'); } catch { return undefined; @@ -144,7 +135,6 @@ async function detectSiteLocale(site, { noNetwork }) { const baseUrl = site.getBaseURL(); try { if (noNetwork) { - // Avoid network by supplying empty HTML and headers const res = await detectLocale({ baseUrl, html: '', headers: {} }); return { language: res.language || 'en', @@ -171,7 +161,6 @@ function csvRow(fields) { return fields.map(escape).join(','); } -// Simple concurrency limiter async function mapWithConcurrency(items, limit, mapper) { const results = new Array(items.length); const workers = []; @@ -226,14 +215,13 @@ async function main() { console, ); - // EntityRegistry exposes collections keyed by entity name (e.g. "Site", "Project") - // We alias them here for clarity. + const { Project: ProjectCollection, Site: SiteCollection, } = dataAccess; - // Resolve target sites + let sites = []; if (hasText(siteId)) { @@ -246,7 +234,6 @@ async function main() { if (SiteCollection.allByOrganizationId) { sites = await SiteCollection.allByOrganizationId(orgId); } else { - // Fallback: fetch all and filter (should rarely happen) const allSites = await SiteCollection.all(); sites = allSites.filter( (site) => site.getOrganizationId && site.getOrganizationId() === orgId, @@ -294,7 +281,6 @@ async function main() { if (ProjectCollection.allByOrganizationId) { orgProjects = await ProjectCollection.allByOrganizationId(org); } else { - // Fallback: load all, then filter in-memory const allProjects = await ProjectCollection.all(); orgProjects = allProjects.filter( (project) => project.getOrganizationId && project.getOrganizationId() === org, @@ -316,7 +302,6 @@ async function main() { organizationId: org, projectName: projName, }); - // refresh cache orgProjects.push(project); } return project; From 06d2c0ffdd5bc8308581e985ebda81480af59dbc Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 18 Nov 2025 14:21:25 +0530 Subject: [PATCH 4/5] fix: lint issues --- .../scripts/migrate-sites-to-projects-and-locales.mjs | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs index 8425b6087..55809361e 100644 --- a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs +++ b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs @@ -47,7 +47,6 @@ import { createDataAccess, } from '../src/service/index.js'; - function parseArgs(argv) { const args = { orgId: undefined, @@ -215,13 +214,11 @@ async function main() { console, ); - const { Project: ProjectCollection, Site: SiteCollection, } = dataAccess; - let sites = []; if (hasText(siteId)) { From 4093697780ee2e7aad36fc90a4e03bd1279554a3 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Wed, 19 Nov 2025 14:39:11 +0530 Subject: [PATCH 5/5] fix: resolved comments --- .../migrate-sites-to-projects-and-locales.mjs | 79 ++++--------------- 1 file changed, 15 insertions(+), 64 deletions(-) diff --git a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs index 55809361e..3d9499f91 100644 --- a/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs +++ b/packages/spacecat-shared-data-access/scripts/migrate-sites-to-projects-and-locales.mjs @@ -160,34 +160,6 @@ function csvRow(fields) { return fields.map(escape).join(','); } -async function mapWithConcurrency(items, limit, mapper) { - const results = new Array(items.length); - const workers = []; - let currentIndex = 0; - - const worker = async () => { - // eslint-disable-next-line no-constant-condition - while (true) { - if (currentIndex >= items.length) { - break; - } - const index = currentIndex; - currentIndex += 1; - // eslint-disable-next-line no-await-in-loop - const result = await mapper(items[index], index); - results[index] = result; - } - }; - - const workerCount = Math.min(limit, items.length); - for (let i = 0; i < workerCount; i += 1) { - workers.push(worker()); - } - - await Promise.all(workers); - return results; -} - // ----------------------------- // Main // ----------------------------- @@ -227,21 +199,7 @@ async function main() { } else if (hasText(projectId)) { sites = await SiteCollection.allByProjectId(projectId); } else if (hasText(orgId)) { - try { - if (SiteCollection.allByOrganizationId) { - sites = await SiteCollection.allByOrganizationId(orgId); - } else { - const allSites = await SiteCollection.all(); - sites = allSites.filter( - (site) => site.getOrganizationId && site.getOrganizationId() === orgId, - ); - } - } catch { - const allSites = await SiteCollection.all(); - sites = allSites.filter( - (site) => site.getOrganizationId && site.getOrganizationId() === orgId, - ); - } + sites = await SiteCollection.allByOrganizationId(orgId); if (hasText(domain)) { const dLower = domain.toLowerCase(); sites = sites.filter( @@ -274,21 +232,7 @@ async function main() { } let orgProjects = projectsCacheByOrg.get(org); if (!orgProjects) { - try { - if (ProjectCollection.allByOrganizationId) { - orgProjects = await ProjectCollection.allByOrganizationId(org); - } else { - const allProjects = await ProjectCollection.all(); - orgProjects = allProjects.filter( - (project) => project.getOrganizationId && project.getOrganizationId() === org, - ); - } - } catch { - const allProjects = await ProjectCollection.all(); - orgProjects = allProjects.filter( - (project) => project.getOrganizationId && project.getOrganizationId() === org, - ); - } + orgProjects = await ProjectCollection.allByOrganizationId(org); projectsCacheByOrg.set(org, orgProjects); } let project = orgProjects.find( @@ -356,19 +300,25 @@ async function main() { } // Apply updates (idempotent: only set fields that are unset) + let hasChanges = false; if (projectIdNew && !before.projectId && site.setProjectId) { site.setProjectId(projectIdNew); + hasChanges = true; } if (!before.language && site.setLanguage) { site.setLanguage(langNew); + hasChanges = true; } if (!before.region && site.setRegion) { site.setRegion(regionNew); + hasChanges = true; } - if (site.setUpdatedBy) { + if (hasChanges && site.setUpdatedBy) { site.setUpdatedBy('system'); } - await site.save(); + if (hasChanges) { + await site.save(); + } return { action: 'updated' }; }; @@ -380,10 +330,11 @@ async function main() { return; } - const effectiveConcurrency = Number.isFinite(concurrency) && concurrency > 0 - ? concurrency - : 10; - await mapWithConcurrency(sites, effectiveConcurrency, processOne); + // eslint-disable-next-line no-restricted-syntax + for (const site of sites) { + // eslint-disable-next-line no-await-in-loop + await processOne(site); + } // Reporting if (report) {