Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 67 additions & 42 deletions src/tasks/opportunity-status-processor/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,44 @@ import { OPPORTUNITY_DEPENDENCY_MAP } from './opportunity-dependency-map.js';
const TASK_TYPE = 'opportunity-status-processor';
const AUDIT_WORKER_LOG_GROUP = '/aws/lambda/spacecat-services--audit-worker';

/**
* Toggles the www subdomain in a hostname
* @param {string} hostname - The hostname to toggle
* @returns {string} The hostname with www toggled
*/
function toggleWWWHostname(hostname) {
return hostname.startsWith('www.') ? hostname.replace('www.', '') : `www.${hostname}`;
}

/**
* Checks if RUM is available for a domain by attempting to get a domainkey
* Tries both with and without www prefix
* @param {string} domain - The domain to check
* @param {object} context - The context object with env and log
* @returns {Promise<boolean>} True if RUM is available, false otherwise
*/
async function isRUMAvailable(domain, context) {
const { log } = context;
const rumClient = RUMAPIClient.createFrom(context);

try {
const rumClient = RUMAPIClient.createFrom(context);

// Attempt to get domainkey - if this succeeds, RUM is available
await rumClient.retrieveDomainkey(domain);

log.info(`RUM is available for domain: ${domain}`);
return true;
} catch (error) {
log.info(`RUM is not available for domain: ${domain}. Reason: ${error.message}`);
return false;
log.warn(`RUM is not available for domain: ${domain}. Reason: ${error.message}`);
}

// Try with www-toggled domain
const wwwToggledDomain = toggleWWWHostname(domain);
try {
await rumClient.retrieveDomainkey(wwwToggledDomain);
log.info(`RUM is available for domain: ${wwwToggledDomain}`);
return true;
} catch (error) {
log.warn(`RUM not available for ${wwwToggledDomain}: ${error.message}`);
}
return false;
}

/**
Expand Down Expand Up @@ -469,47 +486,55 @@ export async function runOpportunityStatusProcessor(message, context) {
const needsScraping = requiredDependencies.has('scraping');
const needsGSC = requiredDependencies.has('GSC');

// Only check data sources that are needed
if (siteUrl && (needsRUM || needsGSC || needsScraping)) {
try {
const resolvedUrl = await resolveCanonicalUrl(siteUrl);
log.info(`Resolved URL: ${resolvedUrl}`);
const domain = new URL(resolvedUrl).hostname;

if (needsRUM) {
rumAvailable = await isRUMAvailable(domain, context);
if (!siteUrl) {
log.warn('No siteUrl provided, skipping RUM, GSC, and scraping checks');
} else {
let resolvedUrl = null;
if (needsRUM || needsGSC) {
resolvedUrl = await resolveCanonicalUrl(siteUrl);
log.info(`Resolved URL: ${resolvedUrl || 'null'} for ${siteUrl}`);

if (!resolvedUrl) {
log.warn(`Could not resolve canonical URL for: ${siteUrl}. Site may be unreachable.`);
if (slackContext) {
await say(env, log, slackContext, `:warning: Could not resolve canonical URL for \`${siteUrl}\`. Site may be unreachable.`);
}
}
}

if (needsGSC) {
gscConfigured = await isGSCConfigured(resolvedUrl, context);
}
if (needsRUM) {
const urlToCheck = resolvedUrl || siteUrl;
const domain = new URL(urlToCheck).hostname;
rumAvailable = await isRUMAvailable(domain, context);
}

if (needsScraping) {
const scrapingCheck = await isScrapingAvailable(siteUrl, context);
scrapingAvailable = scrapingCheck.available;

// Send Slack notification with scraping statistics if available
if (scrapingCheck.stats && slackContext) {
const { completed, failed, total } = scrapingCheck.stats;
const statsMessage = `:mag: *Scraping Statistics for ${siteUrl}*\n`
+ `✅ Completed: ${completed}\n`
+ `❌ Failed: ${failed}\n`
+ `📊 Total: ${total}`;

if (failed > 0) {
await say(
env,
log,
slackContext,
`${statsMessage}\n:information_source: _${failed} failed URLs will be retried on re-onboarding._`,
);
} else {
await say(env, log, slackContext, statsMessage);
}
if (needsGSC && resolvedUrl) {
gscConfigured = await isGSCConfigured(resolvedUrl, context);
}

if (needsScraping) {
const scrapingCheck = await isScrapingAvailable(siteUrl, context);
scrapingAvailable = scrapingCheck.available;

// Send Slack notification with scraping statistics if available
if (scrapingCheck.stats && slackContext) {
const { completed, failed, total } = scrapingCheck.stats;
const statsMessage = `:mag: *Scraping Statistics for ${siteUrl}*\n`
+ `✅ Completed: ${completed}\n`
+ `❌ Failed: ${failed}\n`
+ `📊 Total: ${total}`;

if (failed > 0) {
await say(
env,
log,
slackContext,
`${statsMessage}\n:information_source: _${failed} failed URLs will be retried on re-onboarding._`,
);
} else {
await say(env, log, slackContext, statsMessage);
}
}
} catch (error) {
log.warn(`Could not resolve canonical URL or parse siteUrl for data source checks: ${siteUrl}`, error);
}
}

Expand Down
24 changes: 24 additions & 0 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -203,5 +203,29 @@ describe('Index Tests', () => {
expect(resp.status).to.equal(200);
expect(directContext.log.info.calledWith(sinon.match(/Received message with type: dummy/))).to.be.true;
});

it('detects wrapped SQS events with messageId in context.invocation.event.Records', async () => {
// Test case where event.Records is not present, but context.invocation.event.Records is
// This covers lines 111-112 in src/index.js
const wrappedSqsContext = {
...context,
invocation: {
event: {
Records: [{
messageId: 'test-message-id-123',
body: JSON.stringify({
type: 'dummy',
siteId: 'wrapped-site',
}),
}],
},
},
};

// Pass an empty event object (no top-level Records)
const resp = await main({}, wrappedSqsContext);
expect(resp.status).to.equal(200);
expect(wrappedSqsContext.log.info.calledWith(sinon.match(/Received message with type: dummy/))).to.be.true;
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ describe('Opportunity Status Processor', () => {

// Mock fetch for robots.txt and HEAD requests
global.fetch = sandbox.stub();
global.fetch.resolves({
global.fetch.callsFake((url) => Promise.resolve({
ok: true,
status: 200,
url, // Include URL for resolveCanonicalUrl
text: sandbox.stub().resolves('User-agent: *\nAllow: /'),
});
}));

// Mock context
context = new MockContextBuilder()
Expand Down Expand Up @@ -268,9 +269,9 @@ describe('Opportunity Status Processor', () => {
mockSite.getOpportunities.resolves(mockOpportunities);

// For this test, we'll just verify that the error is handled gracefully
// The actual resolveCanonicalUrl function will throw an error for invalid URLs
// resolveCanonicalUrl returns null for invalid URLs, triggering log.warn
await runOpportunityStatusProcessor(message, context);
expect(context.log.warn.calledWith('Could not resolve canonical URL or parse siteUrl for data source checks: invalid-url', sinon.match.any)).to.be.true;
expect(context.log.warn.calledWith(sinon.match(/Could not resolve canonical URL for: invalid-url/))).to.be.true;
expect(mockSite.getOpportunities.called).to.be.true;
});

Expand Down Expand Up @@ -348,6 +349,7 @@ describe('Opportunity Status Processor', () => {
info: sinon.stub(),
error: sinon.stub(),
warn: sinon.stub(),
debug: sinon.stub(),
},
env: {
RUM_ADMIN_KEY: 'test-admin-key',
Expand Down Expand Up @@ -399,13 +401,13 @@ describe('Opportunity Status Processor', () => {

await runOpportunityStatusProcessor(testMessage, testContext);

// Verify error handling for localhost URLs
expect(testContext.log.warn.calledWith(`Could not resolve canonical URL or parse siteUrl for data source checks: ${testCase.url}`, sinon.match.any)).to.be.true;
// Verify error handling for localhost URLs (now uses log.warn for null resolvedUrl)
expect(testContext.log.warn.calledWith(sinon.match(/Could not resolve canonical URL for.*Site may be unreachable/))).to.be.true;
}));
});

it('should handle RUM success scenarios', async () => {
// Test RUM available (success case) - use a simple URL that should resolve quickly
// Test RUM available (success case) - tries original domain first
mockRUMClient.retrieveDomainkey.resolves('test-domain-key');
const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client');
const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClient);
Expand All @@ -420,30 +422,138 @@ describe('Opportunity Status Processor', () => {
},
};

const testMockSite = {
getOpportunities: sinon.stub().resolves([]),
};

const testContext = {
...mockContext,
dataAccess: {
Site: {
findById: sinon.stub().resolves({
getOpportunities: sinon.stub().resolves([]),
}),
findById: sinon.stub().resolves(testMockSite),
},
SiteTopPage: {
allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]),
},
Configuration: {
findLatest: sinon.stub().resolves({}),
},
},
};

await runOpportunityStatusProcessor(testMessage, testContext);

// Verify RUM was checked successfully - this should cover lines 26-37
// Verify RUM was checked successfully - now tries original domain first
expect(createFromStub.calledWith(testContext)).to.be.true;
expect(mockRUMClient.retrieveDomainkey.calledWith('example.com')).to.be.true;
expect(mockRUMClient.retrieveDomainkey.callCount).to.be.at.least(1);
expect(mockRUMClient.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com');
expect(testContext.log.info.calledWith('RUM is available for domain: example.com')).to.be.true;

createFromStub.restore();
});

it('should try toggled domain when original domain fails', async () => {
// Test fallback to toggled domain when original fails
const mockRUMClientWithFallback = {
retrieveDomainkey: sinon.stub(),
};
// First call (example.com) fails, second call (www.example.com) succeeds
mockRUMClientWithFallback.retrieveDomainkey
.onFirstCall().rejects(new Error('404 Not Found'))
.onSecondCall().resolves('test-domain-key');

const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client');
const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClientWithFallback);

const testMessage = {
siteId: 'test-site-id',
siteUrl: 'https://example.com',
organizationId: 'test-org-id',
taskContext: {
auditTypes: ['cwv'],
slackContext: null,
},
};

const testMockSite = {
getOpportunities: sinon.stub().resolves([]),
};

const testContext = {
...mockContext,
dataAccess: {
Site: {
findById: sinon.stub().resolves(testMockSite),
},
SiteTopPage: {
allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]),
},
Configuration: {
findLatest: sinon.stub().resolves({}),
},
},
};

await runOpportunityStatusProcessor(testMessage, testContext);

// Verify both attempts were made - original first, then toggled
expect(mockRUMClientWithFallback.retrieveDomainkey.callCount).to.equal(2);
expect(mockRUMClientWithFallback.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com');
expect(mockRUMClientWithFallback.retrieveDomainkey.getCall(1).args[0]).to.equal('www.example.com');
expect(testContext.log.info.calledWith('RUM is available for domain: www.example.com')).to.be.true;

createFromStub.restore();
});

it('should handle both domain variations failing', async () => {
// Test when both original and toggled domain fail
const mockRUMClientFailBoth = {
retrieveDomainkey: sinon.stub().rejects(new Error('404 Not Found')),
};

const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client');
const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClientFailBoth);

const testMessage = {
siteId: 'test-site-id',
siteUrl: 'https://example.com',
organizationId: 'test-org-id',
taskContext: {
auditTypes: ['cwv'],
slackContext: null,
},
};

const testMockSite = {
getOpportunities: sinon.stub().resolves([]),
};

const testContext = {
...mockContext,
dataAccess: {
Site: {
findById: sinon.stub().resolves(testMockSite),
},
SiteTopPage: {
allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]),
},
Configuration: {
findLatest: sinon.stub().resolves({}),
},
},
};

await runOpportunityStatusProcessor(testMessage, testContext);

// Verify both attempts were made - original first, then toggled
expect(mockRUMClientFailBoth.retrieveDomainkey.callCount).to.equal(2);
expect(mockRUMClientFailBoth.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com');
expect(mockRUMClientFailBoth.retrieveDomainkey.getCall(1).args[0]).to.equal('www.example.com');
expect(testContext.log.warn.calledWith(sinon.match(/RUM not available for www.example.com/))).to.be.true;

createFromStub.restore();
});

it('should handle opportunities with different types and localhost URLs', async () => {
// Test opportunities with different types when using localhost URLs
const testCases = [
Expand Down Expand Up @@ -1028,7 +1138,7 @@ describe('Opportunity Status Processor', () => {

await runOpportunityStatusProcessor(message, context);

expect(context.log.warn.calledWithMatch('Could not resolve canonical URL')).to.be.true;
expect(context.log.warn.calledWithMatch('Could not resolve canonical URL for: not-a-valid-url')).to.be.true;
});

it('should handle opportunities with missing getData method', async () => {
Expand Down