Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,41 @@ npx @agentgram/ax-score https://example.com
-u, --upload Upload results to AgentGram hosted API
--api-url <url> API endpoint for uploading results
--api-key <key> API key for authentication (or set AGENTGRAM_API_KEY)
-r, --repeat <n> Run the audit N times and report score stability (default: 1)
```

### Repeat-run stability checks

Use `--repeat` when you want to measure score drift across sequential runs of the same URL:

```bash
npx @agentgram/ax-score https://example.com --repeat 3
```

The CLI keeps the usual report shape and adds a `Stability` block with per-run scores plus aggregate mean, range, delta, and variance.

### Programmatic Usage

```typescript
import { runAudit } from '@agentgram/ax-score';
import { runAudit, runRepeatedAudit } from '@agentgram/ax-score';

const report = await runAudit({
const singleRun = await runAudit({
url: 'https://example.com',
timeout: 30000,
verbose: false,
});
console.log(`Score: ${report.score}`);

const repeatedRun = await runRepeatedAudit(
{
url: 'https://example.com',
timeout: 30000,
verbose: false,
},
3
);

console.log(`Single-run score: ${singleRun.score}`);
console.log(repeatedRun.stability);
```

---
Expand Down
26 changes: 25 additions & 1 deletion docs/json-output-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The output follows the `AXReport` TypeScript interface defined in `src/types.ts`
| `categories` | `AXCategory[]` | Array of category scores |
| `audits` | `Record<string, AuditResult>` | Map of audit ID to audit result |
| `recommendations` | `Recommendation[]` | Actionable recommendations sorted by impact |
| `stability` | `StabilityResult \| undefined` | Present when the audit is run with repeat mode |

---

Expand Down Expand Up @@ -70,6 +71,18 @@ The output follows the `AXReport` TypeScript interface defined in `src/types.ts`
| `message` | `string` | The audit description explaining the issue |
| `impact` | `number` | Potential score improvement (higher = better) |

## `StabilityResult`

| Field | Type | Description |
| ---------- | ---------- | -------------------------------------------------------- |
| `runs` | `number` | Number of sequential audit runs |
| `scores` | `number[]` | Overall score from each run, in execution order |
| `min` | `number` | Lowest overall score across the repeated runs |
| `max` | `number` | Highest overall score across the repeated runs |
| `mean` | `number` | Mean overall score across the repeated runs |
| `delta` | `number` | `max - min`, useful for quick drift checks |
| `variance` | `number` | Population variance across the repeated overall scores |

---

## Audit IDs
Expand Down Expand Up @@ -181,10 +194,21 @@ There are 18 audits organized into 6 categories:
"message": "Rate limit headers inform AI agents about request quotas...",
"impact": 3
}
]
],
"stability": {
"runs": 3,
"scores": [62, 58, 61],
"min": 58,
"max": 62,
"mean": 60.33,
"delta": 4,
"variance": 2.89
}
}
```

When `runAudit()` is used directly, or the CLI runs without `--repeat`, the `stability` field is omitted.

---

## Score Interpretation
Expand Down
35 changes: 27 additions & 8 deletions src/bin/ax-score.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env node

import { Command } from 'commander';
import { Command, InvalidArgumentError } from 'commander';
import ora from 'ora';
import { runAudit } from '../runner.js';
import { runRepeatedAudit } from '../runner.js';
import { renderReport } from '../reporter/cli.js';
import { renderJSON } from '../reporter/json.js';
import { uploadReport } from '../upload.js';
Expand All @@ -15,6 +15,7 @@ interface CliOptions {
upload: boolean;
apiUrl: string;
apiKey?: string;
repeat: number;
}

const DEFAULT_API_URL = 'https://agentgram.co/api/v1/ax-score/scan';
Expand All @@ -34,15 +35,25 @@ program
.option('-u, --upload', 'Upload results to AgentGram hosted API', false)
.option('--api-url <url>', 'API endpoint for uploading results', DEFAULT_API_URL)
.option('--api-key <key>', 'API key for authentication (or set AGENTGRAM_API_KEY env var)')
.option(
'-r, --repeat <n>',
'Run the audit N times and report score stability',
parsePositiveInteger,
1
)
.action(async (url: string, options: CliOptions) => {
const spinner = ora(`Auditing ${url}...`).start();
const repeat = options.repeat;
const spinner = ora(
repeat > 1 ? `Auditing ${url} (${repeat} runs)...` : `Auditing ${url}...`
).start();

try {
const report = await runAudit({
const config = {
url,
timeout: parseInt(options.timeout, 10),
verbose: options.verbose,
});
};
const report = await runRepeatedAudit(config, repeat);

spinner.stop();

Expand Down Expand Up @@ -74,9 +85,7 @@ program
uploadSpinner.succeed('Results uploaded successfully.');
} catch (uploadError) {
uploadSpinner.fail('Failed to upload results.');
console.error(
uploadError instanceof Error ? uploadError.message : String(uploadError)
);
console.error(uploadError instanceof Error ? uploadError.message : String(uploadError));
// Upload failure is non-fatal: still exit based on score
}
}
Expand All @@ -90,3 +99,13 @@ program
});

program.parse();

function parsePositiveInteger(value: string): number {
const parsed = Number.parseInt(value, 10);

if (!Number.isInteger(parsed) || parsed < 1) {
throw new InvalidArgumentError('repeat must be a positive integer');
}

return parsed;
}
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Main API
export { runAudit } from './runner.js';
export { runAudit, runRepeatedAudit } from './runner.js';

// Types
export type {
Expand All @@ -11,6 +11,7 @@ export type {
AuditDetails,
AuditRef,
SiteType,
StabilityResult,
} from './types.js';

// Base classes (for extensibility)
Expand Down
15 changes: 15 additions & 0 deletions src/reporter/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ function renderCategory(category: AXCategory): string {
return ` ${label.padEnd(8)} ${category.title.padEnd(20)} ${score}`;
}

function formatMetric(value: number): string {
return Number.isInteger(value) ? `${value}` : value.toFixed(2);
}

/**
* Render an AX report as a rich CLI output.
*/
Expand All @@ -48,6 +52,17 @@ export function renderReport(report: AXReport): string {
lines.push(renderCategory(category));
}

if (report.stability) {
const s = report.stability;
lines.push('');
lines.push(chalk.bold(' Stability:'));
lines.push(` Runs: ${s.runs} Scores: [${s.scores.join(', ')}]`);
lines.push(
` Mean: ${formatMetric(s.mean)} Range: ${formatMetric(s.min)}-${formatMetric(s.max)} ` +
`(delta ${formatMetric(s.delta)}) Variance: ${formatMetric(s.variance)}`
);
}

if (report.recommendations.length > 0) {
lines.push('');
lines.push(chalk.bold(' Top Fixes:'));
Expand Down
Loading