Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions __tests__/fixtures/malformed-jsonld.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<!DOCTYPE html>
<html>
<head>
<title>Test Page with Malformed JSON-LD</title>
<!-- First malformed JSON-LD: simple invalid JSON -->
<script type="application/ld+json">invalid</script>

<!-- Second malformed JSON-LD: missing comma -->
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": "Article",
"headline": "Test Article",
"author": "John Doe"
"datePublished": "2023-01-01"
}
</script>
</head>
<body>
<h1>Test Page</h1>
<p>This page contains malformed JSON-LD</p>
</body>
</html>
24 changes: 24 additions & 0 deletions __tests__/fixtures/two-scripts-mixed.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!DOCTYPE html>
<html>
<head>
<title>Two JSON-LD Scripts: Invalid then Valid</title>

<!-- First: Invalid JSON -->
<script type="application/ld+json">invalid</script>

<!-- Second: Valid JSON-LD -->
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": "Article",
"headline": "This Article Should Be Found",
"author": "Jane Doe",
"datePublished": "2023-06-15"
}
</script>
</head>
<body>
<h1>Test Page</h1>
<p>First script has invalid JSON, second script has valid JSON-LD</p>
</body>
</html>
111 changes: 111 additions & 0 deletions __tests__/malformed-jsonld.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
const fs = require('fs')
const { structuredDataTest } = require('../index')

describe('Malformed JSON-LD', () => {
test('should report parsing error when JSON-LD is malformed', async () => {
const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')

// When JSON-LD parsing fails, the test should fail
const result = await structuredDataTest(html)
.then(response => response)
.catch(err => err.res)

// Should have at least one failure due to JSON parsing error
expect(result.failed.length).toBeGreaterThan(0)

// Should have a specific error about JSON-LD parsing
const jsonLdError = result.failed.find(test =>
test.description && test.description.includes('JSON-LD parsing')
)
expect(jsonLdError).toBeDefined()
})

test('should reject promise with VALIDATION_FAILED when JSON-LD is malformed', async () => {
const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')

// Promise should reject with VALIDATION_FAILED error type
await expect(structuredDataTest(html)).rejects.toMatchObject({
type: 'VALIDATION_FAILED',
message: 'Validation failed'
})
})

test('should not crash when encountering malformed JSON-LD', async () => {
const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')

// This should not throw an unexpected error
await expect(async () => {
await structuredDataTest(html).catch(err => {
// We expect a VALIDATION_FAILED error, not a crash
expect(err.type).toBe('VALIDATION_FAILED')
})
}).not.toThrow()
})

test('should still pass when JSON-LD is valid', async () => {
const validHtml = `
<!DOCTYPE html>
<html>
<head>
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": "Article",
"headline": "Valid Article",
"author": "John Doe",
"datePublished": "2023-01-01"
}
</script>
</head>
<body><h1>Test</h1></body>
</html>
`

const result = await structuredDataTest(validHtml)
.then(response => response)
.catch(err => err.res)

// Should not have JSON-LD parsing errors
const jsonLdError = result.failed.find(test =>
test.description && test.description.includes('JSON-LD parsing')
)
expect(jsonLdError).toBeUndefined()
})

test('should process valid JSON-LD from 2nd script even when 1st script has invalid JSON', async () => {
const html = fs.readFileSync('__tests__/fixtures/two-scripts-mixed.html')

const result = await structuredDataTest(html)
.then(response => response)
.catch(err => err.res)

// Should have exactly 1 JSON-LD parsing error (from script #1)
const jsonLdErrors = result.failed.filter(test =>
test.group === 'JSON-LD Parsing Errors'
)
expect(jsonLdErrors.length).toBe(1)
expect(jsonLdErrors[0].test).toBe('JSON-LD script tag #1')
expect(jsonLdErrors[0].description).toContain('Unexpected token')

// Should have found the Article schema from the 2nd script
expect(result.schemas).toContain('Article')
expect(result.schemas.length).toBe(1)

// Should have passing tests for the valid Article JSON-LD from 2nd script
const articleTests = result.passed.filter(test =>
test.schema === 'Article'
)
expect(articleTests.length).toBeGreaterThan(0)

// Verify we extracted the correct data from the 2nd (valid) script
expect(result.structuredData.jsonld.Article).toBeDefined()
expect(result.structuredData.jsonld.Article.length).toBe(1)
expect(result.structuredData.jsonld.Article[0].headline).toBe('This Article Should Be Found')
expect(result.structuredData.jsonld.Article[0].author).toBe('Jane Doe')
expect(result.structuredData.jsonld.Article[0].datePublished).toBe('2023-06-15')

// Should have some passed tests and some failed tests (not all failed)
expect(result.passed.length).toBeGreaterThan(0)
expect(result.failed.length).toBeGreaterThan(0)
})
})
60 changes: 60 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ const _structuredDataTest = async (structuredData, options) => {
let testsOptional = [] // Optional tests (regardless if passed or failed; they do not count towards either)
let testsSkipped = [] // Only that were skipped

// Check for JSON-LD parsing errors and add them as failed tests
if (structuredData.jsonLdErrors && structuredData.jsonLdErrors.length > 0) {
structuredData.jsonLdErrors.forEach((error, i) => {
const errorTest = {
test: `JSON-LD script tag #${error.index}`,
type: 'jsonld',
group: 'JSON-LD Parsing Errors',
description: `JSON-LD parsing failed: ${error.error}`,
error: error.error,
passed: false,
expect: true,
autoDetected: true
}
// Only add to tests array; will be added to testsFailed later in the test loop
tests.push(errorTest)
if (!testGroups.includes('JSON-LD Parsing Errors'))
testGroups.push('JSON-LD Parsing Errors')
})
}

// Combine schemas found with any schemas specified.
let arrayOfSchemas = []
if (auto === true) {
Expand Down Expand Up @@ -391,9 +411,49 @@ const structuredDataTestString = async (input, options) => {
return structuredDataTestHtml(html, options)
}

// Helper function to detect JSON-LD parsing errors
const __detectJsonLdErrors = (html) => {
const errors = []

// Use a simple regex to find JSON-LD script tags
const jsonLdRegex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi
let match
let index = 0

while ((match = jsonLdRegex.exec(html)) !== null) {
const jsonContent = match[1].trim()
index++

if (jsonContent) {
try {
JSON.parse(jsonContent)
// Successfully parsed, no error
} catch (e) {
// JSON parsing failed - record the error
errors.push({
index,
error: e.message,
content: jsonContent.substring(0, 100) + (jsonContent.length > 100 ? '...' : '')
})
}
}
}

return errors
}

const structuredDataTestHtml = async (html, options) => {
// Check for JSON-LD parsing errors before parsing
const jsonLdErrors = __detectJsonLdErrors(html)

let structuredData = WAE().parse(html)
structuredData = __transformStructuredData(structuredData)

// Add JSON-LD parsing errors to the structured data
if (jsonLdErrors.length > 0) {
structuredData.jsonLdErrors = jsonLdErrors
}

return _structuredDataTest(structuredData, { html, ...options })
}

Expand Down
Loading