iaincollins · jessealama · Oct 29, 2025 · Oct 29, 2025
diff --git a/__tests__/fixtures/malformed-jsonld.html b/__tests__/fixtures/malformed-jsonld.html
@@ -0,0 +1,23 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Test Page with Malformed JSON-LD</title>
+  <!-- First malformed JSON-LD: simple invalid JSON -->
+  <script type="application/ld+json">invalid</script>
+
+  <!-- Second malformed JSON-LD: missing comma -->
+  <script type="application/ld+json">
+  {
+    "@context": "http://schema.org",
+    "@type": "Article",
+    "headline": "Test Article",
+    "author": "John Doe"
+    "datePublished": "2023-01-01"
+  }
+  </script>
+</head>
+<body>
+  <h1>Test Page</h1>
+  <p>This page contains malformed JSON-LD</p>
+</body>
+</html>
diff --git a/__tests__/fixtures/two-scripts-mixed.html b/__tests__/fixtures/two-scripts-mixed.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Two JSON-LD Scripts: Invalid then Valid</title>
+
+  <!-- First: Invalid JSON -->
+  <script type="application/ld+json">invalid</script>
+
+  <!-- Second: Valid JSON-LD -->
+  <script type="application/ld+json">
+  {
+    "@context": "http://schema.org",
+    "@type": "Article",
+    "headline": "This Article Should Be Found",
+    "author": "Jane Doe",
+    "datePublished": "2023-06-15"
+  }
+  </script>
+</head>
+<body>
+  <h1>Test Page</h1>
+  <p>First script has invalid JSON, second script has valid JSON-LD</p>
+</body>
+</html>
diff --git a/__tests__/malformed-jsonld.js b/__tests__/malformed-jsonld.js
@@ -0,0 +1,111 @@
+const fs = require('fs')
+const { structuredDataTest } = require('../index')
+
+describe('Malformed JSON-LD', () => {
+  test('should report parsing error when JSON-LD is malformed', async () => {
+    const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')
+
+    // When JSON-LD parsing fails, the test should fail
+    const result = await structuredDataTest(html)
+      .then(response => response)
+      .catch(err => err.res)
+
+    // Should have at least one failure due to JSON parsing error
+    expect(result.failed.length).toBeGreaterThan(0)
+
+    // Should have a specific error about JSON-LD parsing
+    const jsonLdError = result.failed.find(test =>
+      test.description && test.description.includes('JSON-LD parsing')
+    )
+    expect(jsonLdError).toBeDefined()
+  })
+
+  test('should reject promise with VALIDATION_FAILED when JSON-LD is malformed', async () => {
+    const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')
+
+    // Promise should reject with VALIDATION_FAILED error type
+    await expect(structuredDataTest(html)).rejects.toMatchObject({
+      type: 'VALIDATION_FAILED',
+      message: 'Validation failed'
+    })
+  })
+
+  test('should not crash when encountering malformed JSON-LD', async () => {
+    const html = fs.readFileSync('__tests__/fixtures/malformed-jsonld.html')
+
+    // This should not throw an unexpected error
+    await expect(async () => {
+      await structuredDataTest(html).catch(err => {
+        // We expect a VALIDATION_FAILED error, not a crash
+        expect(err.type).toBe('VALIDATION_FAILED')
+      })
+    }).not.toThrow()
+  })
+
+  test('should still pass when JSON-LD is valid', async () => {
+    const validHtml = `
+      <!DOCTYPE html>
+      <html>
+      <head>
+        <script type="application/ld+json">
+        {
+          "@context": "http://schema.org",
+          "@type": "Article",
+          "headline": "Valid Article",
+          "author": "John Doe",
+          "datePublished": "2023-01-01"
+        }
+        </script>
+      </head>
+      <body><h1>Test</h1></body>
+      </html>
+    `
+
+    const result = await structuredDataTest(validHtml)
+      .then(response => response)
+      .catch(err => err.res)
+
+    // Should not have JSON-LD parsing errors
+    const jsonLdError = result.failed.find(test =>
+      test.description && test.description.includes('JSON-LD parsing')
+    )
+    expect(jsonLdError).toBeUndefined()
+  })
+
+  test('should process valid JSON-LD from 2nd script even when 1st script has invalid JSON', async () => {
+    const html = fs.readFileSync('__tests__/fixtures/two-scripts-mixed.html')
+
+    const result = await structuredDataTest(html)
+      .then(response => response)
+      .catch(err => err.res)
+
+    // Should have exactly 1 JSON-LD parsing error (from script #1)
+    const jsonLdErrors = result.failed.filter(test =>
+      test.group === 'JSON-LD Parsing Errors'
+    )
+    expect(jsonLdErrors.length).toBe(1)
+    expect(jsonLdErrors[0].test).toBe('JSON-LD script tag #1')
+    expect(jsonLdErrors[0].description).toContain('Unexpected token')
+
+    // Should have found the Article schema from the 2nd script
+    expect(result.schemas).toContain('Article')
+    expect(result.schemas.length).toBe(1)
+
+    // Should have passing tests for the valid Article JSON-LD from 2nd script
+    const articleTests = result.passed.filter(test =>
+      test.schema === 'Article'
+    )
+    expect(articleTests.length).toBeGreaterThan(0)
+
+    // Verify we extracted the correct data from the 2nd (valid) script
+    expect(result.structuredData.jsonld.Article).toBeDefined()
+    expect(result.structuredData.jsonld.Article.length).toBe(1)
+    expect(result.structuredData.jsonld.Article[0].headline).toBe('This Article Should Be Found')
+    expect(result.structuredData.jsonld.Article[0].author).toBe('Jane Doe')
+    expect(result.structuredData.jsonld.Article[0].datePublished).toBe('2023-06-15')
+
+    // Should have some passed tests and some failed tests (not all failed)
+    expect(result.passed.length).toBeGreaterThan(0)
+    expect(result.failed.length).toBeGreaterThan(0)
+  })
+})
diff --git a/index.js b/index.js
@@ -34,6 +34,26 @@ const _structuredDataTest = async (structuredData, options) => {
     let testsOptional = [] // Optional tests (regardless if passed or failed; they do not count towards either)
     let testsSkipped = [] // Only that were skipped
 
+    // Check for JSON-LD parsing errors and add them as failed tests
+    if (structuredData.jsonLdErrors && structuredData.jsonLdErrors.length > 0) {
+      structuredData.jsonLdErrors.forEach((error, i) => {
+        const errorTest = {
+          test: `JSON-LD script tag #${error.index}`,
+          type: 'jsonld',
+          group: 'JSON-LD Parsing Errors',
+          description: `JSON-LD parsing failed: ${error.error}`,
+          error: error.error,
+          passed: false,
+          expect: true,
+          autoDetected: true
+        }
+        // Only add to tests array; will be added to testsFailed later in the test loop
+        tests.push(errorTest)
+        if (!testGroups.includes('JSON-LD Parsing Errors'))
+          testGroups.push('JSON-LD Parsing Errors')
+      })
+    }
+
     // Combine schemas found with any schemas specified.
     let arrayOfSchemas = []
     if (auto === true) {
@@ -391,9 +411,49 @@ const structuredDataTestString = async (input, options) => {
   return structuredDataTestHtml(html, options)
 }
 
+// Helper function to detect JSON-LD parsing errors
+const __detectJsonLdErrors = (html) => {
+  const errors = []
+
+  // Use a simple regex to find JSON-LD script tags
+  const jsonLdRegex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi
+  let match
+  let index = 0
+
+  while ((match = jsonLdRegex.exec(html)) !== null) {
+    const jsonContent = match[1].trim()
+    index++
+
+    if (jsonContent) {
+      try {
+        JSON.parse(jsonContent)
+        // Successfully parsed, no error
+      } catch (e) {
+        // JSON parsing failed - record the error
+        errors.push({
+          index,
+          error: e.message,
+          content: jsonContent.substring(0, 100) + (jsonContent.length > 100 ? '...' : '')
+        })
+      }
+    }
+  }
+
+  return errors
+}
+
 const structuredDataTestHtml = async (html, options) => {
+  // Check for JSON-LD parsing errors before parsing
+  const jsonLdErrors = __detectJsonLdErrors(html)
+
   let structuredData = WAE().parse(html)
   structuredData = __transformStructuredData(structuredData)
+
+  // Add JSON-LD parsing errors to the structured data
+  if (jsonLdErrors.length > 0) {
+    structuredData.jsonLdErrors = jsonLdErrors
+  }
+
   return _structuredDataTest(structuredData, { html, ...options })
 }