From 56b73a40866a339f67b0f517cbf84a06cb22f539 Mon Sep 17 00:00:00 2001 From: Alexandru Tudoran Date: Mon, 27 Oct 2025 17:20:10 +0200 Subject: [PATCH 1/5] Tud OnBoarding Url store --- .../models/audit-url/audit-url.collection.js | 108 ++++++ .../src/models/audit-url/audit-url.model.js | 82 +++++ .../src/models/audit-url/audit-url.schema.js | 97 ++++++ .../src/models/audit-url/index.d.ts | 47 +++ .../src/models/audit-url/index.js | 20 ++ .../src/models/base/entity.registry.js | 3 + .../src/models/index.js | 1 + .../test/fixtures/audit-urls.fixture.js | 72 ++++ .../test/fixtures/index.fixtures.js | 2 + .../test/it/audit-url/audit-url.test.js | 307 ++++++++++++++++++ .../test/it/util/db.js | 10 +- .../audit-url/audit-url.collection.test.js | 205 ++++++++++++ .../models/audit-url/audit-url.model.test.js | 189 +++++++++++ 13 files changed, 1142 insertions(+), 1 deletion(-) create mode 100644 packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js create mode 100644 packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js create mode 100644 packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js create mode 100644 packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts create mode 100644 packages/spacecat-shared-data-access/src/models/audit-url/index.js create mode 100644 packages/spacecat-shared-data-access/test/fixtures/audit-urls.fixture.js create mode 100644 packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js create mode 100644 packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js create mode 100644 packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js new file mode 100644 index 000000000..ceeb6e1f5 --- /dev/null +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js @@ -0,0 +1,108 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { hasText } from '@adobe/spacecat-shared-utils'; + +import BaseCollection from '../base/base.collection.js'; + +/** + * AuditUrlCollection - A collection class responsible for managing AuditUrl entities. + * Extends the BaseCollection to provide specific methods for interacting with AuditUrl records. + * + * @class AuditUrlCollection + * @extends BaseCollection + */ +class AuditUrlCollection extends BaseCollection { + /** + * Finds an audit URL by site ID and URL. + * This is a convenience method for looking up a specific URL. + * + * @param {string} siteId - The site ID. + * @param {string} url - The URL to find. + * @returns {Promise} The found AuditUrl or null. + */ + async findBySiteIdAndUrl(siteId, url) { + if (!hasText(siteId) || !hasText(url)) { + throw new Error('Both siteId and url are required'); + } + + const results = await this.allBySiteIdAndUrl(siteId, url); + return results.length > 0 ? results[0] : null; + } + + /** + * Gets all audit URLs for a site that have a specific audit type enabled. + * Note: This performs filtering after retrieval since audits is an array. + * + * @param {string} siteId - The site ID. + * @param {string} auditType - The audit type to filter by. + * @param {object} [options={}] - Query options (limit, cursor). + * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated results. + */ + async allBySiteIdAndAuditType(siteId, auditType, options = {}) { + if (!hasText(siteId) || !hasText(auditType)) { + throw new Error('Both siteId and auditType are required'); + } + + // Get all URLs for the site + const allUrls = await this.allBySiteId(siteId, options); + + // Filter by audit type + const filtered = allUrls.filter((auditUrl) => auditUrl.isAuditEnabled(auditType)); + + return filtered; + } + + /** + * Removes all audit URLs for a specific site. + * Useful for cleanup operations. + * + * @param {string} siteId - The site ID. + * @returns {Promise} + */ + async removeForSiteId(siteId) { + if (!hasText(siteId)) { + throw new Error('SiteId is required'); + } + + const urlsToRemove = await this.allBySiteId(siteId); + const idsToRemove = urlsToRemove.map((auditUrl) => auditUrl.getId()); + + if (idsToRemove.length > 0) { + await this.removeByIds(idsToRemove); + } + } + + /** + * Removes audit URLs by source for a specific site. + * For example, remove all 'sitemap' sourced URLs. + * + * @param {string} siteId - The site ID. + * @param {string} source - The source to filter by. + * @returns {Promise} + */ + async removeForSiteIdAndSource(siteId, source) { + if (!hasText(siteId) || !hasText(source)) { + throw new Error('Both siteId and source are required'); + } + + const urlsToRemove = await this.allBySiteIdAndSource(siteId, source); + const idsToRemove = urlsToRemove.map((auditUrl) => auditUrl.getId()); + + if (idsToRemove.length > 0) { + await this.removeByIds(idsToRemove); + } + } +} + +export default AuditUrlCollection; + diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js new file mode 100644 index 000000000..499b338ed --- /dev/null +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js @@ -0,0 +1,82 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import BaseModel from '../base/base.model.js'; + +/** + * AuditUrl - A class representing an AuditUrl entity. + * Provides methods to access and manipulate AuditUrl-specific data. + * + * @class AuditUrl + * @extends BaseModel + */ +class AuditUrl extends BaseModel { + static DEFAULT_SOURCE = 'manual'; + + /** + * Checks if this URL is enabled for a specific audit type. + * @param {string} auditType - The audit type to check. + * @returns {boolean} True if the audit is enabled for this URL. + */ + isAuditEnabled(auditType) { + const audits = (this.getAudits ? this.getAudits() : this.audits) || []; + return audits.includes(auditType); + } + + /** + * Adds an audit type to the audits array if not already present. + * @param {string} auditType - The audit type to add. + * @returns {this} The current instance for chaining. + */ + enableAudit(auditType) { + const audits = (this.getAudits ? this.getAudits() : this.audits) || []; + if (!audits.includes(auditType)) { + // Create a new array instead of mutating the existing one + const updatedAudits = [...audits, auditType]; + if (this.setAudits) { + this.setAudits(updatedAudits); + } else { + this.audits = updatedAudits; + } + } + return this; + } + + /** + * Removes an audit type from the audits array. + * @param {string} auditType - The audit type to remove. + * @returns {this} The current instance for chaining. + */ + disableAudit(auditType) { + const audits = (this.getAudits ? this.getAudits() : this.audits) || []; + // filter() already creates a new array + const filtered = audits.filter((a) => a !== auditType); + if (this.setAudits) { + this.setAudits(filtered); + } else { + this.audits = filtered; + } + return this; + } + + /** + * Checks if this URL was manually created by a user. + * @returns {boolean} True if the source is manual. + */ + isManualSource() { + const source = this.getSource ? this.getSource() : this.source; + return source === AuditUrl.DEFAULT_SOURCE; + } +} + +export default AuditUrl; + diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js new file mode 100644 index 000000000..3ee09a614 --- /dev/null +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js @@ -0,0 +1,97 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* c8 ignore start */ + +import { + isIsoDate, + isValidUrl, + isValidUUID, +} from '@adobe/spacecat-shared-utils'; + +import SchemaBuilder from '../base/schema.builder.js'; +import AuditUrl from './audit-url.model.js'; +import AuditUrlCollection from './audit-url.collection.js'; + +/* +Schema Doc: https://electrodb.dev/en/modeling/schema/ +Attribute Doc: https://electrodb.dev/en/modeling/attributes/ +Indexes Doc: https://electrodb.dev/en/modeling/indexes/ + +Data Access Patterns: +1. Get all URLs for a site: allBySiteId(siteId) +2. Get all URLs for a site by source: allBySiteIdAndSource(siteId, source) +3. Get a specific URL: allBySiteIdAndUrl(siteId, url) +4. Get URLs by audit type: allBySiteIdAndAuditType(siteId, auditType) - filtered in code + +Indexes: +- Primary: siteId (PK) + url (SK) - for unique identification +- bySiteIdAndSource: siteId + source (GSI) - for querying by source +*/ + +const schema = new SchemaBuilder(AuditUrl, AuditUrlCollection) + .addReference('belongs_to', 'Site', ['url']) + .addAttribute('url', { + type: 'string', + required: true, + validate: (value) => isValidUrl(value), + }) + .addAttribute('source', { + type: 'string', + required: true, + default: AuditUrl.DEFAULT_SOURCE, + }) + .addAttribute('audits', { + type: 'list', + items: { + type: 'string', + }, + required: true, + default: [], + }) + .addAttribute('createdAt', { + type: 'string', + required: true, + readOnly: true, + default: () => new Date().toISOString(), + validate: (value) => isIsoDate(value), + }) + .addAttribute('updatedAt', { + type: 'string', + required: true, + readOnly: true, + watch: '*', + default: () => new Date().toISOString(), + set: () => new Date().toISOString(), + validate: (value) => isIsoDate(value), + }) + .addAttribute('createdBy', { + type: 'string', + required: true, + readOnly: true, + default: 'system', + }) + .addAttribute('updatedBy', { + type: 'string', + required: true, + watch: '*', + default: 'system', + set: (value) => value, + }) + // Add a second GSI for querying by siteId and source + .addIndex( + { composite: ['siteId'] }, + { composite: ['source'] }, + ); + +export default schema.build(); + diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts new file mode 100644 index 000000000..47c08367c --- /dev/null +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts @@ -0,0 +1,47 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import type { BaseCollection, BaseModel, Site } from '../index'; + +export interface AuditUrl extends BaseModel { + getAudits(): string[]; + getCreatedAt(): string; + getCreatedBy(): string; + getSite(): Promise; + getSiteId(): string; + getSource(): string; + getUrl(): string; + setAudits(audits: string[]): AuditUrl; + setCreatedBy(createdBy: string): AuditUrl; + setSiteId(siteId: string): AuditUrl; + setSource(source: string): AuditUrl; + setUrl(url: string): AuditUrl; + isAuditEnabled(auditType: string): boolean; + enableAudit(auditType: string): AuditUrl; + disableAudit(auditType: string): AuditUrl; + isManualSource(): boolean; +} + +export interface AuditUrlCollection extends BaseCollection { + allBySiteId(siteId: string): Promise; + allBySiteIdAndSource(siteId: string, source: string): Promise; + allBySiteIdAndSourceAndUrl(siteId: string, source: string, url: string): Promise; + allBySiteIdAndUrl(siteId: string, url: string): Promise; + findBySiteId(siteId: string): Promise; + findBySiteIdAndSource(siteId: string, source: string): Promise; + findBySiteIdAndSourceAndUrl(siteId: string, source: string, url: string): Promise; + findBySiteIdAndUrl(siteId: string, url: string): Promise; + allBySiteIdAndAuditType(siteId: string, auditType: string, options?: object): Promise; + removeForSiteId(siteId: string): Promise; + removeForSiteIdAndSource(siteId: string, source: string): Promise; +} + diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.js b/packages/spacecat-shared-data-access/src/models/audit-url/index.js new file mode 100644 index 000000000..a373bb8e6 --- /dev/null +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.js @@ -0,0 +1,20 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import AuditUrl from './audit-url.model.js'; +import AuditUrlCollection from './audit-url.collection.js'; + +export { + AuditUrl, + AuditUrlCollection, +}; + diff --git a/packages/spacecat-shared-data-access/src/models/base/entity.registry.js b/packages/spacecat-shared-data-access/src/models/base/entity.registry.js index c3b0fdceb..558253c91 100755 --- a/packages/spacecat-shared-data-access/src/models/base/entity.registry.js +++ b/packages/spacecat-shared-data-access/src/models/base/entity.registry.js @@ -16,6 +16,7 @@ import { collectionNameToEntityName, decapitalize } from '../../util/util.js'; import ApiKeyCollection from '../api-key/api-key.collection.js'; import AsyncJobCollection from '../async-job/async-job.collection.js'; import AuditCollection from '../audit/audit.collection.js'; +import AuditUrlCollection from '../audit-url/audit-url.collection.js'; import ConfigurationCollection from '../configuration/configuration.collection.js'; import ExperimentCollection from '../experiment/experiment.collection.js'; import EntitlementCollection from '../entitlement/entitlement.collection.js'; @@ -44,6 +45,7 @@ import TrialUserActivityCollection from '../trial-user-activity/trial-user-activ import ApiKeySchema from '../api-key/api-key.schema.js'; import AsyncJobSchema from '../async-job/async-job.schema.js'; import AuditSchema from '../audit/audit.schema.js'; +import AuditUrlSchema from '../audit-url/audit-url.schema.js'; import ConfigurationSchema from '../configuration/configuration.schema.js'; import EntitlementSchema from '../entitlement/entitlement.schema.js'; import FixEntitySchema from '../fix-entity/fix-entity.schema.js'; @@ -141,6 +143,7 @@ class EntityRegistry { EntityRegistry.registerEntity(ApiKeySchema, ApiKeyCollection); EntityRegistry.registerEntity(AsyncJobSchema, AsyncJobCollection); EntityRegistry.registerEntity(AuditSchema, AuditCollection); +EntityRegistry.registerEntity(AuditUrlSchema, AuditUrlCollection); EntityRegistry.registerEntity(ConfigurationSchema, ConfigurationCollection); EntityRegistry.registerEntity(EntitlementSchema, EntitlementCollection); EntityRegistry.registerEntity(FixEntitySchema, FixEntityCollection); diff --git a/packages/spacecat-shared-data-access/src/models/index.js b/packages/spacecat-shared-data-access/src/models/index.js index 43fb11612..3a65f2039 100755 --- a/packages/spacecat-shared-data-access/src/models/index.js +++ b/packages/spacecat-shared-data-access/src/models/index.js @@ -13,6 +13,7 @@ export * from './api-key/index.js'; export * from './async-job/index.js'; export * from './audit/index.js'; +export * from './audit-url/index.js'; export * from './base/index.js'; export * from './configuration/index.js'; export * from './entitlement/index.js'; diff --git a/packages/spacecat-shared-data-access/test/fixtures/audit-urls.fixture.js b/packages/spacecat-shared-data-access/test/fixtures/audit-urls.fixture.js new file mode 100644 index 000000000..45efcd0cf --- /dev/null +++ b/packages/spacecat-shared-data-access/test/fixtures/audit-urls.fixture.js @@ -0,0 +1,72 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +const auditUrls = [ + { + siteId: '5d6d4439-6659-46c2-b646-92d110fa5a52', // site 0 + url: 'https://example0.com/page-1', + source: 'manual', + audits: ['accessibility', 'broken-backlinks'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'user@example.com', + }, + { + siteId: '5d6d4439-6659-46c2-b646-92d110fa5a52', // site 0 + url: 'https://example0.com/page-2', + source: 'sitemap', + audits: ['accessibility'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'system', + }, + { + siteId: '5d6d4439-6659-46c2-b646-92d110fa5a52', // site 0 + url: 'https://example0.com/page-3', + source: 'manual', + audits: ['broken-backlinks', 'lhs-mobile'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'user@example.com', + }, + { + siteId: '78fec9c7-2141-4600-b7b1-ea5c78752b91', // site 1 + url: 'https://example1.com/page-1', + source: 'manual', + audits: ['accessibility', 'lhs-mobile'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'admin@example.com', + }, + { + siteId: '78fec9c7-2141-4600-b7b1-ea5c78752b91', // site 1 + url: 'https://example1.com/page-2', + source: 'sitemap', + audits: [], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'system', + }, + { + siteId: '56a691db-d32e-4308-ac99-a21de0580557', // site 2 + url: 'https://example2.com/page-1', + source: 'manual', + audits: ['accessibility'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'user@example.com', + }, + { + siteId: '56a691db-d32e-4308-ac99-a21de0580557', // site 2 + url: 'https://example2.com/assets/document.pdf', + source: 'manual', + audits: ['broken-backlinks'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'user@example.com', + }, +]; + +export default auditUrls; diff --git a/packages/spacecat-shared-data-access/test/fixtures/index.fixtures.js b/packages/spacecat-shared-data-access/test/fixtures/index.fixtures.js index 25323697d..79410e7fb 100644 --- a/packages/spacecat-shared-data-access/test/fixtures/index.fixtures.js +++ b/packages/spacecat-shared-data-access/test/fixtures/index.fixtures.js @@ -13,6 +13,7 @@ import apiKeys from './api-keys.fixtures.js'; import asyncJobs from './async-jobs.fixture.js'; import audits from './audits.fixture.js'; +import auditUrls from './audit-urls.fixture.js'; import configurations from './configurations.fixture.js'; import experiments from './experiments.fixture.js'; import importJobs from './import-jobs.fixture.js'; @@ -41,6 +42,7 @@ export default { apiKeys, asyncJobs, audits, + auditUrls, configurations, experiments, fixEntities, diff --git a/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js b/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js new file mode 100644 index 000000000..b50f9eefe --- /dev/null +++ b/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js @@ -0,0 +1,307 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ + +import { expect, use } from 'chai'; +import chaiAsPromised from 'chai-as-promised'; + +import { getDataAccess } from '../util/db.js'; +import { seedDatabase } from '../util/seed.js'; +import { sanitizeTimestamps } from '../../../src/util/util.js'; + +use(chaiAsPromised); + +function checkAuditUrl(auditUrl) { + expect(auditUrl).to.be.an('object'); + expect(auditUrl.getId()).to.be.a('string'); + expect(auditUrl.getSiteId()).to.be.a('string'); + expect(auditUrl.getUrl()).to.be.a('string'); + expect(auditUrl.getSource()).to.be.a('string'); + expect(auditUrl.getAudits()).to.be.an('array'); + expect(auditUrl.getCreatedAt()).to.be.a('string'); + expect(auditUrl.getCreatedBy()).to.be.a('string'); +} + +describe('AuditUrl IT', async () => { + let sampleData; + let AuditUrl; + + before(async () => { + sampleData = await seedDatabase(); + + const dataAccess = getDataAccess(); + AuditUrl = dataAccess.AuditUrl; + }); + + it('finds one audit URL by id', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + + expect(auditUrl).to.be.an('object'); + expect( + sanitizeTimestamps(auditUrl.toJSON()), + ).to.eql( + sanitizeTimestamps(sampleData.auditUrls[0].toJSON()), + ); + }); + + it('gets all audit URLs for a site', async () => { + const site = sampleData.sites[0]; + + const auditUrls = await AuditUrl.allBySiteId(site.getId()); + + expect(auditUrls).to.be.an('array'); + expect(auditUrls.length).to.equal(3); + + auditUrls.forEach((auditUrl) => { + checkAuditUrl(auditUrl); + expect(auditUrl.getSiteId()).to.equal(site.getId()); + }); + }); + + it('gets all audit URLs for a site by source', async () => { + const site = sampleData.sites[0]; + const source = 'manual'; + + const auditUrls = await AuditUrl.allBySiteIdAndSource(site.getId(), source); + + expect(auditUrls).to.be.an('array'); + expect(auditUrls.length).to.equal(2); + + auditUrls.forEach((auditUrl) => { + checkAuditUrl(auditUrl); + expect(auditUrl.getSiteId()).to.equal(site.getId()); + expect(auditUrl.getSource()).to.equal(source); + }); + }); + + it('finds an audit URL by site ID and URL', async () => { + const site = sampleData.sites[0]; + const url = 'https://example0.com/page-1'; + + const auditUrl = await AuditUrl.findBySiteIdAndUrl(site.getId(), url); + + expect(auditUrl).to.be.an('object'); + checkAuditUrl(auditUrl); + expect(auditUrl.getSiteId()).to.equal(site.getId()); + expect(auditUrl.getUrl()).to.equal(url); + }); + + it('returns null when audit URL not found', async () => { + const site = sampleData.sites[0]; + const url = 'https://example0.com/nonexistent'; + + const auditUrl = await AuditUrl.findBySiteIdAndUrl(site.getId(), url); + + expect(auditUrl).to.be.null; + }); + + it('creates a new audit URL', async () => { + const site = sampleData.sites[0]; + const data = { + siteId: site.getId(), + url: 'https://example0.com/new-page', + source: 'manual', + audits: ['accessibility', 'broken-backlinks'], + createdBy: 'test@example.com', + }; + + const auditUrl = await AuditUrl.create(data); + + checkAuditUrl(auditUrl); + expect(auditUrl.getSiteId()).to.equal(data.siteId); + expect(auditUrl.getUrl()).to.equal(data.url); + expect(auditUrl.getSource()).to.equal(data.source); + expect(auditUrl.getAudits()).to.deep.equal(data.audits); + expect(auditUrl.getCreatedBy()).to.equal(data.createdBy); + }); + + it('creates an audit URL with default values', async () => { + const site = sampleData.sites[0]; + const data = { + siteId: site.getId(), + url: 'https://example0.com/default-page', + createdBy: 'test@example.com', + }; + + const auditUrl = await AuditUrl.create(data); + + checkAuditUrl(auditUrl); + expect(auditUrl.getSource()).to.equal('manual'); // Default + expect(auditUrl.getAudits()).to.deep.equal([]); // Default + }); + + it('updates an audit URL', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + + auditUrl.setAudits(['accessibility']); + auditUrl.setUpdatedBy('updater@example.com'); + + const updated = await auditUrl.save(); + + expect(updated.getAudits()).to.deep.equal(['accessibility']); + expect(updated.getUpdatedBy()).to.equal('updater@example.com'); + }); + + it('removes an audit URL', async () => { + const site = sampleData.sites[0]; + const data = { + siteId: site.getId(), + url: 'https://example0.com/to-delete', + source: 'manual', + audits: ['accessibility'], + createdBy: 'test@example.com', + }; + + const auditUrl = await AuditUrl.create(data); + const id = auditUrl.getId(); + + await auditUrl.remove(); + + const deleted = await AuditUrl.findById(id); + expect(deleted).to.be.null; + }); + + describe('Custom Methods', () => { + it('checks if an audit is enabled', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + + expect(auditUrl.isAuditEnabled('accessibility')).to.be.true; + expect(auditUrl.isAuditEnabled('lhs-mobile')).to.be.false; + }); + + it('enables an audit', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + const originalAudits = auditUrl.getAudits(); + + auditUrl.enableAudit('lhs-mobile'); + + expect(auditUrl.getAudits()).to.include('lhs-mobile'); + expect(auditUrl.getAudits().length).to.equal(originalAudits.length + 1); + }); + + it('does not duplicate audits when enabling', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + const originalLength = auditUrl.getAudits().length; + + auditUrl.enableAudit('accessibility'); // Already enabled + + expect(auditUrl.getAudits().length).to.equal(originalLength); + }); + + it('disables an audit', async () => { + const auditUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + + auditUrl.disableAudit('accessibility'); + + expect(auditUrl.getAudits()).to.not.include('accessibility'); + }); + + it('checks if source is manual', async () => { + const manualUrl = await AuditUrl.findById(sampleData.auditUrls[0].getId()); + const sitemapUrl = await AuditUrl.findById(sampleData.auditUrls[1].getId()); + + expect(manualUrl.isManualSource()).to.be.true; + expect(sitemapUrl.isManualSource()).to.be.false; + }); + }); + + describe('Collection Methods', () => { + it('gets all audit URLs by audit type', async () => { + const site = sampleData.sites[0]; + + const auditUrls = await AuditUrl.allBySiteIdAndAuditType( + site.getId(), + 'accessibility', + ); + + expect(auditUrls).to.be.an('array'); + // Fixture has 2 URLs with 'accessibility', but "creates a new audit URL" test adds 1 more + expect(auditUrls.length).to.equal(3); + + auditUrls.forEach((auditUrl) => { + expect(auditUrl.isAuditEnabled('accessibility')).to.be.true; + }); + }); + + it('removes all audit URLs for a site', async () => { + const site = sampleData.sites[2]; + + // Verify URLs exist + let auditUrls = await AuditUrl.allBySiteId(site.getId()); + expect(auditUrls.length).to.be.greaterThan(0); + + // Remove all + await AuditUrl.removeForSiteId(site.getId()); + + // Verify removed + auditUrls = await AuditUrl.allBySiteId(site.getId()); + expect(auditUrls.length).to.equal(0); + }); + + it('removes audit URLs by source', async () => { + const site = sampleData.sites[0]; + + // Remove all manual URLs + await AuditUrl.removeForSiteIdAndSource(site.getId(), 'manual'); + + // Verify only sitemap URLs remain + const auditUrls = await AuditUrl.allBySiteId(site.getId()); + auditUrls.forEach((auditUrl) => { + expect(auditUrl.getSource()).to.not.equal('manual'); + }); + }); + }); + + describe('Validation', () => { + it('rejects invalid UUID for siteId', async () => { + const data = { + siteId: 'invalid-uuid', + url: 'https://example.com/page', + createdBy: 'test@example.com', + }; + + await expect(AuditUrl.create(data)).to.be.rejected; + }); + + it('rejects invalid URL format', async () => { + const site = sampleData.sites[0]; + const data = { + siteId: site.getId(), + url: 'not-a-valid-url', + createdBy: 'test@example.com', + }; + + await expect(AuditUrl.create(data)).to.be.rejected; + }); + + it('requires siteId', async () => { + const data = { + url: 'https://example.com/page', + createdBy: 'test@example.com', + }; + + await expect(AuditUrl.create(data)).to.be.rejected; + }); + + it('requires url', async () => { + const site = sampleData.sites[0]; + const data = { + siteId: site.getId(), + createdBy: 'test@example.com', + }; + + await expect(AuditUrl.create(data)).to.be.rejected; + }); + }); +}); + diff --git a/packages/spacecat-shared-data-access/test/it/util/db.js b/packages/spacecat-shared-data-access/test/it/util/db.js index 9e93d3344..90255d898 100755 --- a/packages/spacecat-shared-data-access/test/it/util/db.js +++ b/packages/spacecat-shared-data-access/test/it/util/db.js @@ -75,7 +75,15 @@ const getDynamoClients = (config = {}) => { return { dbClient, docClient }; }; -export const getDataAccess = (config, logger = console) => { +// Minimal logger that doesn't output debug messages to prevent memory issues +const minimalLogger = { + info: () => {}, + warn: () => {}, + error: () => {}, + debug: () => {}, // Disable debug to prevent massive ElectroDB logs +}; + +export const getDataAccess = (config, logger = minimalLogger) => { const { dbClient } = getDynamoClients(config); return createDataAccess(TEST_DA_CONFIG, logger, dbClient); }; diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js new file mode 100644 index 000000000..c4c1e9472 --- /dev/null +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js @@ -0,0 +1,205 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ + +import { expect, use as chaiUse } from 'chai'; +import chaiAsPromised from 'chai-as-promised'; +import { stub } from 'sinon'; +import sinonChai from 'sinon-chai'; + +import AuditUrl from '../../../../src/models/audit-url/audit-url.model.js'; +import { createElectroMocks } from '../../util.js'; + +chaiUse(chaiAsPromised); +chaiUse(sinonChai); + +describe('AuditUrlCollection', () => { + let instance; + let mockElectroService; + let mockEntityRegistry; + let mockLogger; + let model; + let schema; + + const mockRecord = { + auditUrlId: 'au12345', + siteId: 'site12345', + url: 'https://example.com/page', + source: 'manual', + audits: ['accessibility'], + }; + + beforeEach(() => { + ({ + mockElectroService, + mockEntityRegistry, + mockLogger, + collection: instance, + model, + schema, + } = createElectroMocks(AuditUrl, mockRecord)); + }); + + describe('constructor', () => { + it('initializes the AuditUrlCollection instance correctly', () => { + expect(instance).to.be.an('object'); + expect(instance.electroService).to.equal(mockElectroService); + expect(instance.entityRegistry).to.equal(mockEntityRegistry); + expect(instance.schema).to.equal(schema); + expect(instance.log).to.equal(mockLogger); + expect(model).to.be.an('object'); + }); + }); + + describe('findBySiteIdAndUrl', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.findBySiteIdAndUrl()).to.be.rejectedWith('Both siteId and url are required'); + }); + + it('throws an error if url is not provided', async () => { + await expect(instance.findBySiteIdAndUrl('site123')).to.be.rejectedWith('Both siteId and url are required'); + }); + + it('returns the audit URL when found', async () => { + instance.allBySiteIdAndUrl = stub().resolves([model]); + + const result = await instance.findBySiteIdAndUrl('site123', 'https://example.com/page'); + + expect(result).to.equal(model); + expect(instance.allBySiteIdAndUrl).to.have.been.calledOnceWith('site123', 'https://example.com/page'); + }); + + it('returns null when audit URL is not found', async () => { + instance.allBySiteIdAndUrl = stub().resolves([]); + + const result = await instance.findBySiteIdAndUrl('site123', 'https://example.com/page'); + + expect(result).to.be.null; + }); + }); + + describe('allBySiteIdAndAuditType', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.allBySiteIdAndAuditType()).to.be.rejectedWith('Both siteId and auditType are required'); + }); + + it('throws an error if auditType is not provided', async () => { + await expect(instance.allBySiteIdAndAuditType('site123')).to.be.rejectedWith('Both siteId and auditType are required'); + }); + + it('filters URLs by audit type', async () => { + const mockModel1 = Object.create(AuditUrl.prototype); + mockModel1.audits = ['accessibility', 'seo']; + mockModel1.isAuditEnabled = (type) => mockModel1.audits.includes(type); + + const mockModel2 = Object.create(AuditUrl.prototype); + mockModel2.audits = ['broken-backlinks']; + mockModel2.isAuditEnabled = (type) => mockModel2.audits.includes(type); + + const mockModel3 = Object.create(AuditUrl.prototype); + mockModel3.audits = ['accessibility']; + mockModel3.isAuditEnabled = (type) => mockModel3.audits.includes(type); + + instance.allBySiteId = stub().resolves([mockModel1, mockModel2, mockModel3]); + + const result = await instance.allBySiteIdAndAuditType('site123', 'accessibility'); + + expect(result).to.be.an('array'); + expect(result).to.have.length(2); + expect(result).to.include(mockModel1); + expect(result).to.include(mockModel3); + expect(result).to.not.include(mockModel2); + }); + + it('returns empty array when no URLs match the audit type', async () => { + const mockModel = Object.create(AuditUrl.prototype); + mockModel.audits = ['seo']; + mockModel.isAuditEnabled = (type) => mockModel.audits.includes(type); + + instance.allBySiteId = stub().resolves([mockModel]); + + const result = await instance.allBySiteIdAndAuditType('site123', 'accessibility'); + + expect(result).to.be.an('array'); + expect(result).to.have.length(0); + }); + + it('passes pagination options to allBySiteId', async () => { + instance.allBySiteId = stub().resolves([]); + const options = { limit: 50, cursor: 'abc123' }; + + await instance.allBySiteIdAndAuditType('site123', 'accessibility', options); + + expect(instance.allBySiteId).to.have.been.calledOnceWith('site123', options); + }); + }); + + describe('removeForSiteId', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.removeForSiteId()).to.be.rejectedWith('SiteId is required'); + }); + + it('removes all audit URLs for a given siteId', async () => { + const siteId = 'site12345'; + instance.allBySiteId = stub().resolves([model]); + + await instance.removeForSiteId(siteId); + + expect(instance.allBySiteId).to.have.been.calledOnceWith(siteId); + expect(mockElectroService.entities.auditUrl.delete).to.have.been.calledOnceWith([{ auditUrlId: 'au12345' }]); + }); + + it('does not call remove when there are no audit URLs', async () => { + const siteId = 'site12345'; + instance.allBySiteId = stub().resolves([]); + + await instance.removeForSiteId(siteId); + + expect(instance.allBySiteId).to.have.been.calledOnceWith(siteId); + expect(mockElectroService.entities.auditUrl.delete).to.not.have.been.called; + }); + }); + + describe('removeForSiteIdAndSource', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.removeForSiteIdAndSource()).to.be.rejectedWith('Both siteId and source are required'); + }); + + it('throws an error if source is not provided', async () => { + await expect(instance.removeForSiteIdAndSource('site123')).to.be.rejectedWith('Both siteId and source are required'); + }); + + it('removes all audit URLs for a given siteId and source', async () => { + const siteId = 'site12345'; + const source = 'manual'; + instance.allBySiteIdAndSource = stub().resolves([model]); + + await instance.removeForSiteIdAndSource(siteId, source); + + expect(instance.allBySiteIdAndSource).to.have.been.calledOnceWith(siteId, source); + expect(mockElectroService.entities.auditUrl.delete).to.have.been.calledOnceWith([{ auditUrlId: 'au12345' }]); + }); + + it('does not call remove when there are no matching audit URLs', async () => { + const siteId = 'site12345'; + const source = 'sitemap'; + instance.allBySiteIdAndSource = stub().resolves([]); + + await instance.removeForSiteIdAndSource(siteId, source); + + expect(instance.allBySiteIdAndSource).to.have.been.calledOnceWith(siteId, source); + expect(mockElectroService.entities.auditUrl.delete).to.not.have.been.called; + }); + }); +}); + diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js new file mode 100644 index 000000000..8b74ed7fe --- /dev/null +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js @@ -0,0 +1,189 @@ +/* + * Copyright 2024 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ + +import { expect, use as chaiUse } from 'chai'; +import chaiAsPromised from 'chai-as-promised'; +import sinonChai from 'sinon-chai'; + +import AuditUrl from '../../../../src/models/audit-url/audit-url.model.js'; +import { createElectroMocks } from '../../util.js'; + +chaiUse(chaiAsPromised); +chaiUse(sinonChai); + +describe('AuditUrlModel', () => { + let instance; + let mockRecord; + + beforeEach(() => { + mockRecord = { + auditUrlId: 'au12345', + siteId: 'site12345', + url: 'https://example.com/page', + source: 'manual', + audits: ['accessibility', 'broken-backlinks'], + createdAt: '2025-10-27T12:00:00.000Z', + createdBy: 'user@example.com', + updatedAt: '2025-10-27T12:00:00.000Z', + updatedBy: 'user@example.com', + }; + + ({ + model: instance, + } = createElectroMocks(AuditUrl, mockRecord)); + }); + + describe('constructor', () => { + it('initializes the AuditUrl instance correctly', () => { + expect(instance).to.be.an('object'); + expect(instance.record).to.deep.equal(mockRecord); + }); + }); + + describe('DEFAULT_SOURCE', () => { + it('has the correct default source value', () => { + expect(AuditUrl.DEFAULT_SOURCE).to.equal('manual'); + }); + }); + + describe('isAuditEnabled', () => { + it('returns true when audit is enabled', () => { + expect(instance.isAuditEnabled('accessibility')).to.be.true; + expect(instance.isAuditEnabled('broken-backlinks')).to.be.true; + }); + + it('returns false when audit is not enabled', () => { + expect(instance.isAuditEnabled('lhs-mobile')).to.be.false; + expect(instance.isAuditEnabled('seo')).to.be.false; + }); + + it('handles empty audits array', () => { + instance.record.audits = []; + expect(instance.isAuditEnabled('accessibility')).to.be.false; + }); + + it('handles undefined audits', () => { + instance.record.audits = undefined; + expect(instance.isAuditEnabled('accessibility')).to.be.false; + }); + + it('works with direct property access when getAudits is not available', () => { + const plainObj = Object.create(AuditUrl.prototype); + plainObj.audits = ['accessibility']; + expect(plainObj.isAuditEnabled('accessibility')).to.be.true; + }); + }); + + describe('enableAudit', () => { + it('adds audit to the list when not present', () => { + instance.enableAudit('lhs-mobile'); + expect(instance.getAudits()).to.include('lhs-mobile'); + }); + + it('does not add duplicate audits', () => { + const originalLength = instance.getAudits().length; + instance.enableAudit('accessibility'); // Already exists + expect(instance.getAudits().length).to.equal(originalLength); + }); + + it('returns the instance for method chaining', () => { + const result = instance.enableAudit('seo'); + expect(result).to.equal(instance); + }); + + it('works when starting with empty audits array', () => { + instance.record.audits = []; + instance.enableAudit('accessibility'); + expect(instance.getAudits()).to.deep.equal(['accessibility']); + }); + + it('works with direct property access', () => { + const plainObj = Object.create(AuditUrl.prototype); + plainObj.audits = []; + plainObj.enableAudit('accessibility'); + expect(plainObj.audits).to.deep.equal(['accessibility']); + }); + }); + + describe('disableAudit', () => { + it('removes audit from the list when present', () => { + instance.disableAudit('accessibility'); + expect(instance.getAudits()).to.not.include('accessibility'); + }); + + it('does nothing if audit is not in the list', () => { + const originalLength = instance.getAudits().length; + instance.disableAudit('seo'); // Not in list + expect(instance.getAudits().length).to.equal(originalLength); + }); + + it('returns the instance for method chaining', () => { + const result = instance.disableAudit('accessibility'); + expect(result).to.equal(instance); + }); + + it('handles removing all audits', () => { + instance.disableAudit('accessibility'); + instance.disableAudit('broken-backlinks'); + expect(instance.getAudits()).to.deep.equal([]); + }); + + it('works with direct property access', () => { + const plainObj = Object.create(AuditUrl.prototype); + plainObj.audits = ['accessibility', 'seo']; + plainObj.disableAudit('accessibility'); + expect(plainObj.audits).to.deep.equal(['seo']); + }); + }); + + describe('isManualSource', () => { + it('returns true for manual source', () => { + instance.record.source = 'manual'; + expect(instance.isManualSource()).to.be.true; + }); + + it('returns false for non-manual source', () => { + instance.record.source = 'sitemap'; + expect(instance.isManualSource()).to.be.false; + }); + + it('returns false for other sources', () => { + instance.record.source = 'api'; + expect(instance.isManualSource()).to.be.false; + }); + + it('works with direct property access', () => { + const plainObj = Object.create(AuditUrl.prototype); + plainObj.source = 'manual'; + expect(plainObj.isManualSource()).to.be.true; + + plainObj.source = 'sitemap'; + expect(plainObj.isManualSource()).to.be.false; + }); + }); + + describe('method chaining', () => { + it('allows chaining enableAudit and disableAudit', () => { + instance + .enableAudit('seo') + .enableAudit('lhs-mobile') + .disableAudit('accessibility'); + + expect(instance.isAuditEnabled('seo')).to.be.true; + expect(instance.isAuditEnabled('lhs-mobile')).to.be.true; + expect(instance.isAuditEnabled('accessibility')).to.be.false; + }); + }); +}); + From 976ba3ee6b7774e2fdc6cbe6725553e33a9b2068 Mon Sep 17 00:00:00 2001 From: Alexandru Tudoran Date: Fri, 14 Nov 2025 15:14:29 +0200 Subject: [PATCH 2/5] feat(audit-url): add rank and traffic fields with sorting support - Add rank and traffic fields to AuditUrl schema (optional, nullable) - Implement sortAuditUrls static method for sorting by multiple fields - Add allBySiteIdSorted and allBySiteIdAndSourceSorted methods - Update allBySiteIdAndAuditType to support sorting - Add TypeScript definitions for new fields and methods - Add 20 comprehensive unit tests for sorting functionality - All tests passing (1147 total) - Code coverage: 98.01% for audit-url module --- .../models/audit-url/audit-url.collection.js | 149 ++++++++++- .../src/models/audit-url/audit-url.model.js | 1 - .../src/models/audit-url/audit-url.schema.js | 12 +- .../src/models/audit-url/index.d.ts | 8 +- .../src/models/audit-url/index.js | 1 - .../test/it/audit-url/audit-url.test.js | 1 - .../audit-url/audit-url.collection.test.js | 240 +++++++++++++++++- .../models/audit-url/audit-url.model.test.js | 3 +- 8 files changed, 392 insertions(+), 23 deletions(-) diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js index ceeb6e1f5..24ddd9f92 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js @@ -22,6 +22,72 @@ import BaseCollection from '../base/base.collection.js'; * @extends BaseCollection */ class AuditUrlCollection extends BaseCollection { + /** + * Sorts audit URLs by a specified field. + * @param {Array} auditUrls - Array of AuditUrl objects to sort. + * @param {string} sortBy - Field to sort by ('rank', 'traffic', 'url', 'createdAt', 'updatedAt'). + * @param {string} sortOrder - Sort order ('asc' or 'desc'). Default: 'asc'. + * @returns {Array} Sorted array of AuditUrl objects. + * @private + */ + static sortAuditUrls(auditUrls, sortBy = 'rank', sortOrder = 'asc') { + if (!auditUrls || auditUrls.length === 0) { + return auditUrls; + } + + const sorted = [...auditUrls].sort((a, b) => { + let aValue; + let bValue; + + // Get values using getter methods if available + switch (sortBy) { + case 'rank': + aValue = a.getRank ? a.getRank() : a.rank; + bValue = b.getRank ? b.getRank() : b.rank; + break; + case 'traffic': + aValue = a.getTraffic ? a.getTraffic() : a.traffic; + bValue = b.getTraffic ? b.getTraffic() : b.traffic; + break; + case 'url': + aValue = a.getUrl ? a.getUrl() : a.url; + bValue = b.getUrl ? b.getUrl() : b.url; + break; + case 'createdAt': + aValue = a.getCreatedAt ? a.getCreatedAt() : a.createdAt; + bValue = b.getCreatedAt ? b.getCreatedAt() : b.createdAt; + break; + case 'updatedAt': + aValue = a.getUpdatedAt ? a.getUpdatedAt() : a.updatedAt; + bValue = b.getUpdatedAt ? b.getUpdatedAt() : b.updatedAt; + break; + default: + return 0; + } + + // Handle null/undefined values (push to end) + if (aValue == null && bValue == null) return 0; + if (aValue == null) return 1; + if (bValue == null) return -1; + + // Compare values + let comparison = 0; + if (typeof aValue === 'string' && typeof bValue === 'string') { + comparison = aValue.localeCompare(bValue); + } else if (aValue < bValue) { + comparison = -1; + } else if (aValue > bValue) { + comparison = 1; + } else { + comparison = 0; + } + + return sortOrder === 'desc' ? -comparison : comparison; + }); + + return sorted; + } + /** * Finds an audit URL by site ID and URL. * This is a convenience method for looking up a specific URL. @@ -45,7 +111,7 @@ class AuditUrlCollection extends BaseCollection { * * @param {string} siteId - The site ID. * @param {string} auditType - The audit type to filter by. - * @param {object} [options={}] - Query options (limit, cursor). + * @param {object} [options={}] - Query options (limit, cursor, sortBy, sortOrder). * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated results. */ async allBySiteIdAndAuditType(siteId, auditType, options = {}) { @@ -53,15 +119,91 @@ class AuditUrlCollection extends BaseCollection { throw new Error('Both siteId and auditType are required'); } + const { sortBy, sortOrder, ...queryOptions } = options; + // Get all URLs for the site - const allUrls = await this.allBySiteId(siteId, options); + const allUrls = await this.allBySiteId(siteId, queryOptions); // Filter by audit type - const filtered = allUrls.filter((auditUrl) => auditUrl.isAuditEnabled(auditType)); + let filtered = allUrls.filter((auditUrl) => auditUrl.isAuditEnabled(auditType)); + + // Apply sorting if requested + if (sortBy) { + filtered = AuditUrlCollection.sortAuditUrls(filtered, sortBy, sortOrder); + } return filtered; } + /** + * Gets all audit URLs for a site with sorting support. + * @param {string} siteId - The site ID. + * @param {object} [options={}] - Query options (limit, cursor, sortBy, sortOrder). + * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated and sorted results. + */ + async allBySiteIdSorted(siteId, options = {}) { + if (!hasText(siteId)) { + throw new Error('SiteId is required'); + } + + const { sortBy, sortOrder, ...queryOptions } = options; + + // Get all URLs for the site + const result = await this.allBySiteId(siteId, queryOptions); + + // Handle both array and paginated result formats + const items = Array.isArray(result) ? result : (result.items || []); + + // Apply sorting if requested + const sortedItems = sortBy + ? AuditUrlCollection.sortAuditUrls(items, sortBy, sortOrder) : items; + + // Return in the same format as received + if (Array.isArray(result)) { + return sortedItems; + } + + return { + items: sortedItems, + cursor: result.cursor, + }; + } + + /** + * Gets all audit URLs for a site by source with sorting support. + * @param {string} siteId - The site ID. + * @param {string} source - The source to filter by. + * @param {object} [options={}] - Query options (limit, cursor, sortBy, sortOrder). + * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated and sorted results. + */ + async allBySiteIdAndSourceSorted(siteId, source, options = {}) { + if (!hasText(siteId) || !hasText(source)) { + throw new Error('Both siteId and source are required'); + } + + const { sortBy, sortOrder, ...queryOptions } = options; + + // Get all URLs for the site and source + const result = await this.allBySiteIdAndSource(siteId, source, queryOptions); + + // Handle both array and paginated result formats + const items = Array.isArray(result) ? result : (result.items || []); + + // Apply sorting if requested + const sortedItems = sortBy + ? AuditUrlCollection.sortAuditUrls(items, sortBy, sortOrder) : items; + + // Return in the same format as received + if (Array.isArray(result)) { + return sortedItems; + } + + return { + items: sortedItems, + cursor: result.cursor, + }; + } + /** * Removes all audit URLs for a specific site. * Useful for cleanup operations. @@ -105,4 +247,3 @@ class AuditUrlCollection extends BaseCollection { } export default AuditUrlCollection; - diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js index 499b338ed..b270b1291 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js @@ -79,4 +79,3 @@ class AuditUrl extends BaseModel { } export default AuditUrl; - diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js index 3ee09a614..8aba18e0e 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js @@ -15,7 +15,6 @@ import { isIsoDate, isValidUrl, - isValidUUID, } from '@adobe/spacecat-shared-utils'; import SchemaBuilder from '../base/schema.builder.js'; @@ -58,6 +57,16 @@ const schema = new SchemaBuilder(AuditUrl, AuditUrlCollection) required: true, default: [], }) + .addAttribute('rank', { + type: 'number', + required: false, + default: null, + }) + .addAttribute('traffic', { + type: 'number', + required: false, + default: null, + }) .addAttribute('createdAt', { type: 'string', required: true, @@ -94,4 +103,3 @@ const schema = new SchemaBuilder(AuditUrl, AuditUrlCollection) ); export default schema.build(); - diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts index 47c08367c..e6d5c7aac 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts @@ -16,12 +16,16 @@ export interface AuditUrl extends BaseModel { getAudits(): string[]; getCreatedAt(): string; getCreatedBy(): string; + getRank(): number | null; + getTraffic(): number | null; getSite(): Promise; getSiteId(): string; getSource(): string; getUrl(): string; setAudits(audits: string[]): AuditUrl; setCreatedBy(createdBy: string): AuditUrl; + setRank(rank: number | null): AuditUrl; + setTraffic(traffic: number | null): AuditUrl; setSiteId(siteId: string): AuditUrl; setSource(source: string): AuditUrl; setUrl(url: string): AuditUrl; @@ -36,11 +40,13 @@ export interface AuditUrlCollection extends BaseCollection { allBySiteIdAndSource(siteId: string, source: string): Promise; allBySiteIdAndSourceAndUrl(siteId: string, source: string, url: string): Promise; allBySiteIdAndUrl(siteId: string, url: string): Promise; + allBySiteIdSorted(siteId: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; + allBySiteIdAndSourceSorted(siteId: string, source: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; findBySiteId(siteId: string): Promise; findBySiteIdAndSource(siteId: string, source: string): Promise; findBySiteIdAndSourceAndUrl(siteId: string, source: string, url: string): Promise; findBySiteIdAndUrl(siteId: string, url: string): Promise; - allBySiteIdAndAuditType(siteId: string, auditType: string, options?: object): Promise; + allBySiteIdAndAuditType(siteId: string, auditType: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise; removeForSiteId(siteId: string): Promise; removeForSiteIdAndSource(siteId: string, source: string): Promise; } diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.js b/packages/spacecat-shared-data-access/src/models/audit-url/index.js index a373bb8e6..013ba5b1f 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/index.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.js @@ -17,4 +17,3 @@ export { AuditUrl, AuditUrlCollection, }; - diff --git a/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js b/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js index b50f9eefe..e8cf91660 100644 --- a/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js +++ b/packages/spacecat-shared-data-access/test/it/audit-url/audit-url.test.js @@ -304,4 +304,3 @@ describe('AuditUrl IT', async () => { }); }); }); - diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js index c4c1e9472..b241941cd 100644 --- a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js @@ -18,6 +18,7 @@ import { stub } from 'sinon'; import sinonChai from 'sinon-chai'; import AuditUrl from '../../../../src/models/audit-url/audit-url.model.js'; +import AuditUrlCollection from '../../../../src/models/audit-url/audit-url.collection.js'; import { createElectroMocks } from '../../util.js'; chaiUse(chaiAsPromised); @@ -72,18 +73,18 @@ describe('AuditUrlCollection', () => { it('returns the audit URL when found', async () => { instance.allBySiteIdAndUrl = stub().resolves([model]); - + const result = await instance.findBySiteIdAndUrl('site123', 'https://example.com/page'); - + expect(result).to.equal(model); expect(instance.allBySiteIdAndUrl).to.have.been.calledOnceWith('site123', 'https://example.com/page'); }); it('returns null when audit URL is not found', async () => { instance.allBySiteIdAndUrl = stub().resolves([]); - + const result = await instance.findBySiteIdAndUrl('site123', 'https://example.com/page'); - + expect(result).to.be.null; }); }); @@ -111,9 +112,9 @@ describe('AuditUrlCollection', () => { mockModel3.isAuditEnabled = (type) => mockModel3.audits.includes(type); instance.allBySiteId = stub().resolves([mockModel1, mockModel2, mockModel3]); - + const result = await instance.allBySiteIdAndAuditType('site123', 'accessibility'); - + expect(result).to.be.an('array'); expect(result).to.have.length(2); expect(result).to.include(mockModel1); @@ -127,9 +128,9 @@ describe('AuditUrlCollection', () => { mockModel.isAuditEnabled = (type) => mockModel.audits.includes(type); instance.allBySiteId = stub().resolves([mockModel]); - + const result = await instance.allBySiteIdAndAuditType('site123', 'accessibility'); - + expect(result).to.be.an('array'); expect(result).to.have.length(0); }); @@ -137,9 +138,9 @@ describe('AuditUrlCollection', () => { it('passes pagination options to allBySiteId', async () => { instance.allBySiteId = stub().resolves([]); const options = { limit: 50, cursor: 'abc123' }; - + await instance.allBySiteIdAndAuditType('site123', 'accessibility', options); - + expect(instance.allBySiteId).to.have.been.calledOnceWith('site123', options); }); }); @@ -201,5 +202,222 @@ describe('AuditUrlCollection', () => { expect(mockElectroService.entities.auditUrl.delete).to.not.have.been.called; }); }); -}); + describe('sortAuditUrls', () => { + it('returns empty array when input is empty', () => { + const result = AuditUrlCollection.sortAuditUrls([]); + expect(result).to.deep.equal([]); + }); + + it('returns null when input is null', () => { + const result = AuditUrlCollection.sortAuditUrls(null); + expect(result).to.be.null; + }); + + it('sorts by rank in ascending order', () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 3, getUrl: () => 'url2' }; + const url3 = { getRank: () => 2, getUrl: () => 'url3' }; + + const result = AuditUrlCollection.sortAuditUrls([url2, url1, url3], 'rank', 'asc'); + + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url2); + }); + + it('sorts by rank in descending order', () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 3, getUrl: () => 'url2' }; + const url3 = { getRank: () => 2, getUrl: () => 'url3' }; + + const result = AuditUrlCollection.sortAuditUrls([url1, url3, url2], 'rank', 'desc'); + + expect(result[0]).to.equal(url2); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url1); + }); + + it('sorts by traffic in ascending order', () => { + const url1 = { getTraffic: () => 100, getUrl: () => 'url1' }; + const url2 = { getTraffic: () => 300, getUrl: () => 'url2' }; + const url3 = { getTraffic: () => 200, getUrl: () => 'url3' }; + + const result = AuditUrlCollection.sortAuditUrls([url2, url1, url3], 'traffic', 'asc'); + + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url2); + }); + + it('sorts by url alphabetically', () => { + const url1 = { getUrl: () => 'https://a.com' }; + const url2 = { getUrl: () => 'https://c.com' }; + const url3 = { getUrl: () => 'https://b.com' }; + + const result = AuditUrlCollection.sortAuditUrls([url2, url1, url3], 'url', 'asc'); + + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url2); + }); + + it('handles null values by pushing them to the end', () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => null, getUrl: () => 'url2' }; + const url3 = { getRank: () => 2, getUrl: () => 'url3' }; + + const result = AuditUrlCollection.sortAuditUrls([url2, url1, url3], 'rank', 'asc'); + + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url2); + }); + + it('handles objects without getter methods', () => { + const url1 = { rank: 1, url: 'url1' }; + const url2 = { rank: 3, url: 'url2' }; + const url3 = { rank: 2, url: 'url3' }; + + const result = AuditUrlCollection.sortAuditUrls([url2, url1, url3], 'rank', 'asc'); + + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url3); + expect(result[2]).to.equal(url2); + }); + }); + + describe('allBySiteIdSorted', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.allBySiteIdSorted()).to.be.rejectedWith('SiteId is required'); + }); + + it('returns sorted URLs when sortBy is provided', async () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 3, getUrl: () => 'url2' }; + const url3 = { getRank: () => 2, getUrl: () => 'url3' }; + + instance.allBySiteId = stub().resolves({ items: [url2, url1, url3], cursor: 'cursor123' }); + + const result = await instance.allBySiteIdSorted('site-123', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result.items).to.be.an('array').with.lengthOf(3); + expect(result.items[0]).to.equal(url1); + expect(result.items[1]).to.equal(url3); + expect(result.items[2]).to.equal(url2); + expect(result.cursor).to.equal('cursor123'); + }); + + it('returns unsorted URLs when sortBy is not provided', async () => { + const url1 = { getUrl: () => 'url1' }; + const url2 = { getUrl: () => 'url2' }; + + instance.allBySiteId = stub().resolves({ items: [url2, url1] }); + + const result = await instance.allBySiteIdSorted('site-123', {}); + + expect(result.items).to.deep.equal([url2, url1]); + }); + + it('handles array result format', async () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 2, getUrl: () => 'url2' }; + + instance.allBySiteId = stub().resolves([url2, url1]); + + const result = await instance.allBySiteIdSorted('site-123', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url2); + }); + + it('passes query options to allBySiteId', async () => { + instance.allBySiteId = stub().resolves({ items: [] }); + + await instance.allBySiteIdSorted('site-123', { limit: 10, cursor: 'abc', sortBy: 'rank' }); + + expect(instance.allBySiteId).to.have.been.calledOnceWith('site-123', { limit: 10, cursor: 'abc' }); + }); + }); + + describe('allBySiteIdAndSourceSorted', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.allBySiteIdAndSourceSorted()).to.be.rejectedWith('Both siteId and source are required'); + }); + + it('throws an error if source is not provided', async () => { + await expect(instance.allBySiteIdAndSourceSorted('site-123')).to.be.rejectedWith('Both siteId and source are required'); + }); + + it('returns sorted URLs when sortBy is provided', async () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 3, getUrl: () => 'url2' }; + const url3 = { getRank: () => 2, getUrl: () => 'url3' }; + + instance.allBySiteIdAndSource = stub().resolves({ items: [url2, url1, url3], cursor: 'cursor123' }); + + const result = await instance.allBySiteIdAndSourceSorted('site-123', 'manual', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result.items).to.be.an('array').with.lengthOf(3); + expect(result.items[0]).to.equal(url1); + expect(result.items[1]).to.equal(url3); + expect(result.items[2]).to.equal(url2); + expect(result.cursor).to.equal('cursor123'); + }); + + it('returns unsorted URLs when sortBy is not provided', async () => { + const url1 = { getUrl: () => 'url1' }; + const url2 = { getUrl: () => 'url2' }; + + instance.allBySiteIdAndSource = stub().resolves({ items: [url2, url1] }); + + const result = await instance.allBySiteIdAndSourceSorted('site-123', 'sitemap', {}); + + expect(result.items).to.deep.equal([url2, url1]); + }); + + it('handles array result format', async () => { + const url1 = { getRank: () => 1, getUrl: () => 'url1' }; + const url2 = { getRank: () => 2, getUrl: () => 'url2' }; + + instance.allBySiteIdAndSource = stub().resolves([url2, url1]); + + const result = await instance.allBySiteIdAndSourceSorted('site-123', 'manual', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result[0]).to.equal(url1); + expect(result[1]).to.equal(url2); + }); + + it('passes query options to allBySiteIdAndSource', async () => { + instance.allBySiteIdAndSource = stub().resolves({ items: [] }); + + await instance.allBySiteIdAndSourceSorted('site-123', 'manual', { limit: 10, cursor: 'abc', sortBy: 'rank' }); + + expect(instance.allBySiteIdAndSource).to.have.been.calledOnceWith('site-123', 'manual', { limit: 10, cursor: 'abc' }); + }); + }); + + describe('allBySiteIdAndAuditType with sorting', () => { + it('applies sorting when sortBy is provided', async () => { + const mockModel1 = Object.create(AuditUrl.prototype); + mockModel1.audits = ['accessibility']; + mockModel1.isAuditEnabled = (type) => mockModel1.audits.includes(type); + mockModel1.getRank = () => 2; + + const mockModel2 = Object.create(AuditUrl.prototype); + mockModel2.audits = ['accessibility']; + mockModel2.isAuditEnabled = (type) => mockModel2.audits.includes(type); + mockModel2.getRank = () => 1; + + instance.allBySiteId = stub().resolves([mockModel1, mockModel2]); + + const result = await instance.allBySiteIdAndAuditType('site123', 'accessibility', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result[0]).to.equal(mockModel2); + expect(result[1]).to.equal(mockModel1); + }); + }); +}); diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js index 8b74ed7fe..e90b6ca97 100644 --- a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js @@ -167,7 +167,7 @@ describe('AuditUrlModel', () => { const plainObj = Object.create(AuditUrl.prototype); plainObj.source = 'manual'; expect(plainObj.isManualSource()).to.be.true; - + plainObj.source = 'sitemap'; expect(plainObj.isManualSource()).to.be.false; }); @@ -186,4 +186,3 @@ describe('AuditUrlModel', () => { }); }); }); - From 6ea2abbc10af2fe774d60c3c2ffaf813ed8d7e53 Mon Sep 17 00:00:00 2001 From: Alexandru Tudoran Date: Fri, 14 Nov 2025 15:24:42 +0200 Subject: [PATCH 3/5] chore: trigger PR update From 1827a40cae01765354b0c42a0162e01dcb351859 Mon Sep 17 00:00:00 2001 From: Alexandru Tudoran Date: Fri, 14 Nov 2025 15:31:43 +0200 Subject: [PATCH 4/5] fix(ci): increase Node.js memory limit to 4GB - Add NODE_OPTIONS with --max-old-space-size=4096 to prevent heap out of memory errors - Fixes FATAL ERROR: JavaScript heap out of memory in CI pipeline --- .github/workflows/main.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index e3decef50..eb9f56ac8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -10,6 +10,7 @@ on: [push] env: CI_BUILD_NUM: ${{ github.run_id }} CI_BRANCH: ${{ github.ref_name }} + NODE_OPTIONS: --max-old-space-size=4096 jobs: test: From 483b27a26966c72ea12f127efe2b87ede92e0b48 Mon Sep 17 00:00:00 2001 From: Alexandru Tudoran Date: Fri, 14 Nov 2025 16:40:16 +0200 Subject: [PATCH 5/5] feat(url-store): add platformType support for offsite URLs Add platformType field to AuditUrl schema to categorize URLs as primary-site or offsite platforms (Wikipedia, YouTube, social media, etc.). Changes: - Add platformType attribute with 11 supported platform types - Add GSI for efficient querying by siteId and platformType - Add collection methods: allBySiteIdAndPlatform(), allOffsiteUrls() - Add model helper methods: isOffsitePlatform(), isPlatformType() - Export PLATFORM_TYPES constant - Update TypeScript definitions - Add 33 comprehensive unit tests Platform types supported: primary-site, wikipedia, youtube-channel, reddit-community, facebook-page, twitter-profile, linkedin-company, instagram-account, tiktok-account, github-org, medium-publication All methods support sorting and pagination. --- .../models/audit-url/audit-url.collection.js | 76 +++++++++ .../src/models/audit-url/audit-url.model.js | 21 +++ .../src/models/audit-url/audit-url.schema.js | 29 ++++ .../src/models/audit-url/index.d.ts | 20 +++ .../src/models/audit-url/index.js | 2 + .../audit-url/audit-url.collection.test.js | 158 ++++++++++++++++++ .../models/audit-url/audit-url.model.test.js | 95 +++++++++++ 7 files changed, 401 insertions(+) diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js index 24ddd9f92..cca0390f4 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.collection.js @@ -244,6 +244,82 @@ class AuditUrlCollection extends BaseCollection { await this.removeByIds(idsToRemove); } } + + /** + * Gets all audit URLs for a site filtered by platform type with sorting support. + * @param {string} siteId - The site ID. + * @param {string} platformType - The platform type to filter by. + * @param {object} [options={}] - Query options (limit, cursor, sortBy, sortOrder). + * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated and sorted results. + */ + async allBySiteIdAndPlatform(siteId, platformType, options = {}) { + if (!hasText(siteId) || !hasText(platformType)) { + throw new Error('Both siteId and platformType are required'); + } + + const { sortBy, sortOrder, ...queryOptions } = options; + + // Use the GSI to query by siteId and platformType + const result = await this.queryItems( + { siteId, platformType }, + queryOptions, + 'gsi2', // The third GSI we created + ); + + // Handle both array and paginated result formats + const items = Array.isArray(result) ? result : (result.items || []); + + // Apply sorting if requested + const sortedItems = sortBy + ? AuditUrlCollection.sortAuditUrls(items, sortBy, sortOrder) : items; + + // Return in the same format as received + if (Array.isArray(result)) { + return sortedItems; + } + + return { + items: sortedItems, + cursor: result.cursor, + }; + } + + /** + * Gets all offsite platform URLs for a site (excludes primary-site URLs). + * @param {string} siteId - The site ID. + * @param {object} [options={}] - Query options (limit, cursor, sortBy, sortOrder). + * @returns {Promise<{items: AuditUrl[], cursor?: string}>} Paginated and sorted offsite URLs. + */ + async allOffsiteUrls(siteId, options = {}) { + if (!hasText(siteId)) { + throw new Error('SiteId is required'); + } + + const { sortBy, sortOrder, ...queryOptions } = options; + + // Get all URLs for the site + const result = await this.allBySiteId(siteId, queryOptions); + + // Handle both array and paginated result formats + const items = Array.isArray(result) ? result : (result.items || []); + + // Filter to only offsite platform URLs + const offsiteItems = items.filter((url) => url.isOffsitePlatform && url.isOffsitePlatform()); + + // Apply sorting if requested + const sortedItems = sortBy + ? AuditUrlCollection.sortAuditUrls(offsiteItems, sortBy, sortOrder) : offsiteItems; + + // Return in the same format as received + if (Array.isArray(result)) { + return sortedItems; + } + + return { + items: sortedItems, + cursor: result.cursor, + }; + } } export default AuditUrlCollection; diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js index b270b1291..a44c9d607 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.model.js @@ -11,6 +11,7 @@ */ import BaseModel from '../base/base.model.js'; +import { PLATFORM_TYPES } from './audit-url.schema.js'; /** * AuditUrl - A class representing an AuditUrl entity. @@ -21,6 +22,7 @@ import BaseModel from '../base/base.model.js'; */ class AuditUrl extends BaseModel { static DEFAULT_SOURCE = 'manual'; + static PLATFORM_TYPES = PLATFORM_TYPES; /** * Checks if this URL is enabled for a specific audit type. @@ -76,6 +78,25 @@ class AuditUrl extends BaseModel { const source = this.getSource ? this.getSource() : this.source; return source === AuditUrl.DEFAULT_SOURCE; } + + /** + * Checks if this URL represents an offsite platform (not the primary site). + * @returns {boolean} True if this is an offsite platform URL. + */ + isOffsitePlatform() { + const platformType = this.getPlatformType ? this.getPlatformType() : this.platformType; + return platformType && platformType !== PLATFORM_TYPES.PRIMARY_SITE; + } + + /** + * Checks if this URL is of a specific platform type. + * @param {string} type - The platform type to check. + * @returns {boolean} True if the URL matches the specified platform type. + */ + isPlatformType(type) { + const platformType = this.getPlatformType ? this.getPlatformType() : this.platformType; + return platformType === type; + } } export default AuditUrl; diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js index 8aba18e0e..24d87dc6e 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/audit-url.schema.js @@ -21,6 +21,23 @@ import SchemaBuilder from '../base/schema.builder.js'; import AuditUrl from './audit-url.model.js'; import AuditUrlCollection from './audit-url.collection.js'; +// Valid platform types for URL classification +export const PLATFORM_TYPES = { + PRIMARY_SITE: 'primary-site', + WIKIPEDIA: 'wikipedia', + YOUTUBE_CHANNEL: 'youtube-channel', + REDDIT_COMMUNITY: 'reddit-community', + FACEBOOK_PAGE: 'facebook-page', + TWITTER_PROFILE: 'twitter-profile', + LINKEDIN_COMPANY: 'linkedin-company', + INSTAGRAM_ACCOUNT: 'instagram-account', + TIKTOK_ACCOUNT: 'tiktok-account', + GITHUB_ORG: 'github-org', + MEDIUM_PUBLICATION: 'medium-publication', +}; + +const VALID_PLATFORM_TYPES = Object.values(PLATFORM_TYPES); + /* Schema Doc: https://electrodb.dev/en/modeling/schema/ Attribute Doc: https://electrodb.dev/en/modeling/attributes/ @@ -67,6 +84,13 @@ const schema = new SchemaBuilder(AuditUrl, AuditUrlCollection) required: false, default: null, }) + .addAttribute('platformType', { + type: 'string', + required: false, + default: PLATFORM_TYPES.PRIMARY_SITE, + set: (value) => value?.toLowerCase(), + validate: (value) => !value || VALID_PLATFORM_TYPES.includes(value), + }) .addAttribute('createdAt', { type: 'string', required: true, @@ -100,6 +124,11 @@ const schema = new SchemaBuilder(AuditUrl, AuditUrlCollection) .addIndex( { composite: ['siteId'] }, { composite: ['source'] }, + ) + // Add a third GSI for querying by siteId and platformType + .addIndex( + { composite: ['siteId'] }, + { composite: ['platformType'] }, ); export default schema.build(); diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts index e6d5c7aac..6f625a010 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.d.ts @@ -12,12 +12,27 @@ import type { BaseCollection, BaseModel, Site } from '../index'; +export const PLATFORM_TYPES: { + PRIMARY_SITE: 'primary-site'; + WIKIPEDIA: 'wikipedia'; + YOUTUBE_CHANNEL: 'youtube-channel'; + REDDIT_COMMUNITY: 'reddit-community'; + FACEBOOK_PAGE: 'facebook-page'; + TWITTER_PROFILE: 'twitter-profile'; + LINKEDIN_COMPANY: 'linkedin-company'; + INSTAGRAM_ACCOUNT: 'instagram-account'; + TIKTOK_ACCOUNT: 'tiktok-account'; + GITHUB_ORG: 'github-org'; + MEDIUM_PUBLICATION: 'medium-publication'; +}; + export interface AuditUrl extends BaseModel { getAudits(): string[]; getCreatedAt(): string; getCreatedBy(): string; getRank(): number | null; getTraffic(): number | null; + getPlatformType(): string; getSite(): Promise; getSiteId(): string; getSource(): string; @@ -26,6 +41,7 @@ export interface AuditUrl extends BaseModel { setCreatedBy(createdBy: string): AuditUrl; setRank(rank: number | null): AuditUrl; setTraffic(traffic: number | null): AuditUrl; + setPlatformType(platformType: string): AuditUrl; setSiteId(siteId: string): AuditUrl; setSource(source: string): AuditUrl; setUrl(url: string): AuditUrl; @@ -33,6 +49,8 @@ export interface AuditUrl extends BaseModel { enableAudit(auditType: string): AuditUrl; disableAudit(auditType: string): AuditUrl; isManualSource(): boolean; + isOffsitePlatform(): boolean; + isPlatformType(type: string): boolean; } export interface AuditUrlCollection extends BaseCollection { @@ -42,6 +60,8 @@ export interface AuditUrlCollection extends BaseCollection { allBySiteIdAndUrl(siteId: string, url: string): Promise; allBySiteIdSorted(siteId: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; allBySiteIdAndSourceSorted(siteId: string, source: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; + allBySiteIdAndPlatform(siteId: string, platformType: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; + allOffsiteUrls(siteId: string, options?: { limit?: number; cursor?: string; sortBy?: string; sortOrder?: string }): Promise<{ items: AuditUrl[]; cursor?: string }>; findBySiteId(siteId: string): Promise; findBySiteIdAndSource(siteId: string, source: string): Promise; findBySiteIdAndSourceAndUrl(siteId: string, source: string, url: string): Promise; diff --git a/packages/spacecat-shared-data-access/src/models/audit-url/index.js b/packages/spacecat-shared-data-access/src/models/audit-url/index.js index 013ba5b1f..7fc65ff91 100644 --- a/packages/spacecat-shared-data-access/src/models/audit-url/index.js +++ b/packages/spacecat-shared-data-access/src/models/audit-url/index.js @@ -12,8 +12,10 @@ import AuditUrl from './audit-url.model.js'; import AuditUrlCollection from './audit-url.collection.js'; +import { PLATFORM_TYPES } from './audit-url.schema.js'; export { AuditUrl, AuditUrlCollection, + PLATFORM_TYPES, }; diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js index b241941cd..b5673e082 100644 --- a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.collection.test.js @@ -420,4 +420,162 @@ describe('AuditUrlCollection', () => { expect(result[1]).to.equal(mockModel1); }); }); + + describe('allBySiteIdAndPlatform', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.allBySiteIdAndPlatform()).to.be.rejectedWith('Both siteId and platformType are required'); + }); + + it('throws an error if platformType is not provided', async () => { + await expect(instance.allBySiteIdAndPlatform('site123')).to.be.rejectedWith('Both siteId and platformType are required'); + }); + + it('queries URLs by siteId and platformType using GSI', async () => { + instance.queryItems = stub().resolves([model]); + + const result = await instance.allBySiteIdAndPlatform('site123', 'youtube-channel'); + + expect(result).to.be.an('array').with.lengthOf(1); + expect(result[0]).to.equal(model); + expect(instance.queryItems).to.have.been.calledOnceWith( + { siteId: 'site123', platformType: 'youtube-channel' }, + {}, + 'gsi2', + ); + }); + + it('passes through query options', async () => { + instance.queryItems = stub().resolves({ items: [model], cursor: 'cursor123' }); + + const result = await instance.allBySiteIdAndPlatform('site123', 'wikipedia', { limit: 10, cursor: 'abc' }); + + expect(result.items).to.be.an('array').with.lengthOf(1); + expect(result.cursor).to.equal('cursor123'); + expect(instance.queryItems).to.have.been.calledOnceWith( + { siteId: 'site123', platformType: 'wikipedia' }, + { limit: 10, cursor: 'abc' }, + 'gsi2', + ); + }); + + it('applies sorting when sortBy is provided', async () => { + const mockModel1 = Object.create(AuditUrl.prototype); + mockModel1.getTraffic = () => 1000; + + const mockModel2 = Object.create(AuditUrl.prototype); + mockModel2.getTraffic = () => 5000; + + instance.queryItems = stub().resolves([mockModel1, mockModel2]); + + const result = await instance.allBySiteIdAndPlatform('site123', 'reddit-community', { sortBy: 'traffic', sortOrder: 'desc' }); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result[0]).to.equal(mockModel2); // 5000 traffic first + expect(result[1]).to.equal(mockModel1); // 1000 traffic second + }); + + it('applies sorting to paginated results', async () => { + const mockModel1 = Object.create(AuditUrl.prototype); + mockModel1.getRank = () => 2; + + const mockModel2 = Object.create(AuditUrl.prototype); + mockModel2.getRank = () => 1; + + instance.queryItems = stub().resolves({ items: [mockModel1, mockModel2], cursor: 'next123' }); + + const result = await instance.allBySiteIdAndPlatform('site123', 'facebook-page', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result.items).to.be.an('array').with.lengthOf(2); + expect(result.items[0]).to.equal(mockModel2); // rank 1 first + expect(result.items[1]).to.equal(mockModel1); // rank 2 second + expect(result.cursor).to.equal('next123'); + }); + }); + + describe('allOffsiteUrls', () => { + it('throws an error if siteId is not provided', async () => { + await expect(instance.allOffsiteUrls()).to.be.rejectedWith('SiteId is required'); + }); + + it('returns only offsite platform URLs', async () => { + const mockPrimaryUrl = Object.create(AuditUrl.prototype); + mockPrimaryUrl.isOffsitePlatform = () => false; + + const mockYoutubeUrl = Object.create(AuditUrl.prototype); + mockYoutubeUrl.isOffsitePlatform = () => true; + + const mockWikiUrl = Object.create(AuditUrl.prototype); + mockWikiUrl.isOffsitePlatform = () => true; + + instance.allBySiteId = stub().resolves([mockPrimaryUrl, mockYoutubeUrl, mockWikiUrl]); + + const result = await instance.allOffsiteUrls('site123'); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result).to.include(mockYoutubeUrl); + expect(result).to.include(mockWikiUrl); + expect(result).to.not.include(mockPrimaryUrl); + }); + + it('passes through query options', async () => { + instance.allBySiteId = stub().resolves({ items: [], cursor: 'cursor123' }); + + const result = await instance.allOffsiteUrls('site123', { limit: 10, cursor: 'abc' }); + + expect(result.items).to.be.an('array'); + expect(result.cursor).to.equal('cursor123'); + expect(instance.allBySiteId).to.have.been.calledOnceWith('site123', { limit: 10, cursor: 'abc' }); + }); + + it('applies sorting when sortBy is provided', async () => { + const mockUrl1 = Object.create(AuditUrl.prototype); + mockUrl1.isOffsitePlatform = () => true; + mockUrl1.getTraffic = () => 1000; + + const mockUrl2 = Object.create(AuditUrl.prototype); + mockUrl2.isOffsitePlatform = () => true; + mockUrl2.getTraffic = () => 5000; + + instance.allBySiteId = stub().resolves([mockUrl1, mockUrl2]); + + const result = await instance.allOffsiteUrls('site123', { sortBy: 'traffic', sortOrder: 'desc' }); + + expect(result).to.be.an('array').with.lengthOf(2); + expect(result[0]).to.equal(mockUrl2); // 5000 traffic first + expect(result[1]).to.equal(mockUrl1); // 1000 traffic second + }); + + it('applies sorting to paginated results', async () => { + const mockUrl1 = Object.create(AuditUrl.prototype); + mockUrl1.isOffsitePlatform = () => true; + mockUrl1.getRank = () => 2; + + const mockUrl2 = Object.create(AuditUrl.prototype); + mockUrl2.isOffsitePlatform = () => true; + mockUrl2.getRank = () => 1; + + instance.allBySiteId = stub().resolves({ items: [mockUrl1, mockUrl2], cursor: 'next123' }); + + const result = await instance.allOffsiteUrls('site123', { sortBy: 'rank', sortOrder: 'asc' }); + + expect(result.items).to.be.an('array').with.lengthOf(2); + expect(result.items[0]).to.equal(mockUrl2); // rank 1 first + expect(result.items[1]).to.equal(mockUrl1); // rank 2 second + expect(result.cursor).to.equal('next123'); + }); + + it('returns empty array when no offsite URLs exist', async () => { + const mockPrimaryUrl1 = Object.create(AuditUrl.prototype); + mockPrimaryUrl1.isOffsitePlatform = () => false; + + const mockPrimaryUrl2 = Object.create(AuditUrl.prototype); + mockPrimaryUrl2.isOffsitePlatform = () => false; + + instance.allBySiteId = stub().resolves([mockPrimaryUrl1, mockPrimaryUrl2]); + + const result = await instance.allOffsiteUrls('site123'); + + expect(result).to.be.an('array').with.lengthOf(0); + }); + }); }); diff --git a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js index e90b6ca97..8d16af3d0 100644 --- a/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js +++ b/packages/spacecat-shared-data-access/test/unit/models/audit-url/audit-url.model.test.js @@ -185,4 +185,99 @@ describe('AuditUrlModel', () => { expect(instance.isAuditEnabled('accessibility')).to.be.false; }); }); + + describe('PLATFORM_TYPES', () => { + it('exposes PLATFORM_TYPES as a static property', () => { + expect(AuditUrl.PLATFORM_TYPES).to.be.an('object'); + expect(AuditUrl.PLATFORM_TYPES.PRIMARY_SITE).to.equal('primary-site'); + expect(AuditUrl.PLATFORM_TYPES.WIKIPEDIA).to.equal('wikipedia'); + expect(AuditUrl.PLATFORM_TYPES.YOUTUBE_CHANNEL).to.equal('youtube-channel'); + }); + }); + + describe('isOffsitePlatform', () => { + it('returns false for primary-site platform type', () => { + instance.record.platformType = 'primary-site'; + expect(instance.isOffsitePlatform()).to.be.false; + }); + + it('returns true for youtube-channel platform type', () => { + instance.record.platformType = 'youtube-channel'; + expect(instance.isOffsitePlatform()).to.be.true; + }); + + it('returns true for wikipedia platform type', () => { + instance.record.platformType = 'wikipedia'; + expect(instance.isOffsitePlatform()).to.be.true; + }); + + it('returns true for reddit-community platform type', () => { + instance.record.platformType = 'reddit-community'; + expect(instance.isOffsitePlatform()).to.be.true; + }); + + it('returns false when platformType is undefined', () => { + delete instance.record.platformType; + expect(instance.isOffsitePlatform()).to.be.false; + }); + + it('returns false when platformType is null', () => { + instance.record.platformType = null; + expect(instance.isOffsitePlatform()).to.be.false; + }); + + it('works with getPlatformType getter', () => { + instance.getPlatformType = () => 'facebook-page'; + expect(instance.isOffsitePlatform()).to.be.true; + }); + }); + + describe('isPlatformType', () => { + it('returns true when platform type matches', () => { + instance.record.platformType = 'youtube-channel'; + expect(instance.isPlatformType('youtube-channel')).to.be.true; + }); + + it('returns false when platform type does not match', () => { + instance.record.platformType = 'youtube-channel'; + expect(instance.isPlatformType('wikipedia')).to.be.false; + }); + + it('returns false when platformType is undefined', () => { + delete instance.record.platformType; + expect(instance.isPlatformType('youtube-channel')).to.be.false; + }); + + it('returns false when platformType is null', () => { + instance.record.platformType = null; + expect(instance.isPlatformType('youtube-channel')).to.be.false; + }); + + it('works with getPlatformType getter', () => { + instance.getPlatformType = () => 'twitter-profile'; + expect(instance.isPlatformType('twitter-profile')).to.be.true; + expect(instance.isPlatformType('linkedin-company')).to.be.false; + }); + + it('handles all platform types correctly', () => { + const platformTypes = [ + 'primary-site', + 'wikipedia', + 'youtube-channel', + 'reddit-community', + 'facebook-page', + 'twitter-profile', + 'linkedin-company', + 'instagram-account', + 'tiktok-account', + 'github-org', + 'medium-publication', + ]; + + platformTypes.forEach((type) => { + instance.record.platformType = type; + expect(instance.isPlatformType(type)).to.be.true; + }); + }); + }); });