Skip to content

Commit

Permalink
tests(v1): added for v1, improved ci
Browse files Browse the repository at this point in the history
  • Loading branch information
rafaelsideguide committed Sep 17, 2024
1 parent a0189ac commit d0c2133
Show file tree
Hide file tree
Showing 5 changed files with 343 additions and 9 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/test-suite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Test Suite Pre Deploy
on:
pull_request:
branches:
- main

env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
PORT: ${{ secrets.PORT }}
REDIS_URL: ${{ secrets.REDIS_URL }}
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }}
ENV: ${{ secrets.ENV }}

jobs:
pre-deploy-test-suite:
name: Test Suite
runs-on: ubuntu-latest
services:
redis:
image: redis
ports:
- 6379:6379
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: "20"
- name: Install pnpm
run: npm install -g pnpm
- name: Install dependencies
run: pnpm install
working-directory: ./apps/api
- name: Start the application
run: npm start &
working-directory: ./apps/api
id: start_app
- name: Start workers
run: npm run workers &
working-directory: ./apps/api
id: start_workers
- name: Install dependencies
run: pnpm install
working-directory: ./apps/test-suite
- name: Run E2E tests
run: |
npm run test:suite
working-directory: ./apps/test-suite
153 changes: 153 additions & 0 deletions apps/test-suite/data/map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
[
{
"website": "https://www.vellum.ai/llm-leaderboard",
"expected_min_num_of_pages": 1,
"expected_mapped_pages": ["https://www.vellum.ai/llm-leaderboard"]
},
{
"website": "https://openai.com/news",
"expected_min_num_of_pages": 4,
"expected_mapped_pages": [
"https://openai.com/news/company/",
"https://openai.com/news/research/",
"https://openai.com/news/safety-and-alignment/",
"https://openai.com/news/stories/"
]
},
{
"website": "https://www.framer.com/pricing",
"expected_min_num_of_pages": 1,
"expected_not_mapped_pages": [
"https://www.framer.com/features/navigation/",
"https://www.framer.com/contact/",
"https://www.framer.com/add-ons/",
"https://www.framer.com/free-saas-ui-kit/",
"https://www.framer.com/help/",
"https://www.framer.com/features/effects/",
"https://www.framer.com/enterprise/",
"https://www.framer.com/templates/"
]
},
{
"website": "https://mendable.ai/pricing",
"expected_min_num_of_pages": 1,
"expected_not_mapped_pages": [
"https://mendable.ai/",
"https://mendable.ai/blog",
"https://mendable.ai/signin",
"https://mendable.ai/signup",
"https://mendable.ai",
"https://mendable.ai/usecases/sales-enablement",
"https://mendable.ai/usecases/documentation",
"https://mendable.ai/usecases/cs-enablement",
"https://mendable.ai/usecases/productcopilot",
"https://mendable.ai/security"
],
"notes": "This one should not go backwards, but it does!"
},
{
"website": "https://agentops.ai/blog",
"expected_min_num_of_pages": 6,
"expected_mapped_pages": [
"https://www.agentops.ai/blog/effortless-hr-management-with-saas",
"https://www.agentops.ai/blog/streamlining-hr-with-saas",
"https://www.agentops.ai/blog/simplify-hr-with-modern-saas-solutions",
"https://www.agentops.ai/blog/efficient-hr-operations-with-saas",
"https://www.agentops.ai/blog/hr-made-simple-with-saas",
"https://agentops.ai/blog"
],
"expected_not_mapped_pages": [
"https://agentops.ai/about-us",
"https://agentops.ai/contact-us"
]
},
{
"website": "https://en.wikipedia.org/wiki/T._N._Seshan",
"expected_min_num_of_pages": 1,
"expected_not_mapped_pages": [
"https://en.wikipedia.org/wiki/Wikipedia:Contents",
"https://en.wikipedia.org/wiki/Wikipedia:Contact_us",
"https://en.wikipedia.org/wiki/V._S._Ramadevi",
"https://en.wikipedia.org/wiki/Wikipedia:About",
"https://en.wikipedia.org/wiki/Help:Introduction",
"https://en.wikipedia.org/wiki/H._D._Deve_Gowda",
"https://en.wikipedia.org/wiki/File:T.N._Seshan_in_1994.jpg"
]
},

{
"website": "https://ycombinator.com/companies",
"expected_min_num_of_pages": 20,
"expected_mapped_pages": [
"https://www.ycombinator.com/companies/industry/elearning",
"https://www.ycombinator.com/companies/industry/computer-vision",
"https://www.ycombinator.com/companies/industry/health-tech",
"https://www.ycombinator.com/companies/industry/education",
"https://www.ycombinator.com/companies/industry/robotics",
"https://www.ycombinator.com/companies/industry/hardware",
"https://www.ycombinator.com/companies/industry/saas",
"https://www.ycombinator.com/companies/industry/hard-tech",
"https://www.ycombinator.com/companies/industry/developer-tools",
"https://www.ycombinator.com/companies/industry/entertainment",
"https://www.ycombinator.com/companies/industry/finance",
"https://www.ycombinator.com/companies/industry/generative-ai",
"https://www.ycombinator.com/companies/industry/machine-learning"
]
},
{
"website": "https://firecrawl.dev",
"expected_min_num_of_pages": 2,
"expected_mapped_pages": [
"https://www.firecrawl.dev/",
"https://www.firecrawl.dev/pricing"
]
},
{
"website": "https://fly.io/docs/gpus/gpu-quickstart",
"expected_min_num_of_pages": 1,
"expected_not_mapped_pages": [
"https://fly.io/docs/getting-started/",
"https://fly.io/docs/hands-on/",
"https://fly.io/docs/about/support/",
"https://fly.io/docs/blueprints/going-to-production-with-healthcare-apps/",
"https://fly.io/docs/machines/flyctl/fly-machine-update/",
"https://fly.io/docs/blueprints/review-apps-guide/",
"https://fly.io/docs/blueprints/supercronic/"
],
"notes": "This one should not go backwards, but it does!"
},
{
"website": "https://www.instructables.com/circuits",
"expected_min_num_of_pages": 12,
"expected_mapped_pages": [
"https://www.instructables.com/circuits/",
"https://www.instructables.com/circuits/apple/projects/",
"https://www.instructables.com/circuits/art/projects/",
"https://www.instructables.com/circuits/electronics/projects/",
"https://www.instructables.com/circuits/microsoft/projects/",
"https://www.instructables.com/circuits/microcontrollers/projects/",
"https://www.instructables.com/circuits/community/",
"https://www.instructables.com/circuits/leds/projects/",
"https://www.instructables.com/circuits/gadgets/projects/",
"https://www.instructables.com/circuits/arduino/projects/",
"https://www.instructables.com/circuits/lasers/projects/",
"https://www.instructables.com/circuits/clocks/projects/"
]
},
{
"website": "https://richmondconfidential.org",
"expected_min_num_of_pages": 20,
"expected_mapped_pages": [
"https://richmondconfidential.org/2009/10/13/salesians-star-guard-has-a-big-impact/",
"https://richmondconfidential.org/2009/10/13/on-team-of-beginners-oilers-old-hand-stands-out/",
"https://richmondconfidential.org/2009/10/19/point-richmond-clockmaker-turns-clutter-into-crafts/",
"https://richmondconfidential.org/2009/10/13/profile-maurice-cathy/",
"https://richmondconfidential.org/2009/10/13/soul-food-rescue-mission-rebuilds-diets-and-lives/",
"https://richmondconfidential.org/2009/10/21/in-tough-economy-pain-trickles-to-the-bottom/",
"https://richmondconfidential.org/2009/10/19/richmond-homicide-map/",
"https://richmondconfidential.org/2009/10/13/rough-roads-for-richmonds-cab-drivers/",
"https://richmondconfidential.org/2009/10/13/before-napa-there-was-winehaven/",
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
]
}
]
13 changes: 8 additions & 5 deletions apps/test-suite/tests/crawl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ describe("Crawling Checkup (E2E)", () => {
describe("Crawling website tests with a dataset", () => {
it("Should crawl the website and verify the response", async () => {
let passedTests = 0;
let maxAttempts = 15;
const startTime = new Date().getTime();
const date = new Date();
const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`;
Expand All @@ -37,23 +38,24 @@ describe("Crawling Checkup (E2E)", () => {
for (const websiteData of websitesData) {
try {
const crawlResponse = await request(TEST_URL || "")
.post("/v0/crawl")
.post("/v1/crawl")
.set("Content-Type", "application/json")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.send({ url: websiteData.website, pageOptions: { onlyMainContent: true }, crawlerOptions: { limit: 100, returnOnlyUrls: true }});
.send({ url: websiteData.website, limit: 100 });

const jobId = crawlResponse.body.jobId;
const id = crawlResponse.body.id;
let completedResponse: any;
let isFinished = false;

while (!isFinished) {
while (!isFinished && maxAttempts > 0) {
completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${jobId}`)
.get(`/v1/crawl/${id}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);

isFinished = completedResponse.body.status === "completed";

if (!isFinished) {
maxAttempts--;
await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
}
}
Expand Down Expand Up @@ -117,6 +119,7 @@ describe("Crawling Checkup (E2E)", () => {
}

passedTests++;
maxAttempts = 15;
} catch (error) {
console.error(`Error processing ${websiteData.website}: ${error}`);
errorLog.push({
Expand Down
110 changes: 110 additions & 0 deletions apps/test-suite/tests/map.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import request from "supertest";
import dotenv from "dotenv";
import { WebsiteScrapeError } from "../utils/types";
import { logErrors } from "../utils/log";

import websitesData from "../data/map.json";
import "dotenv/config";

import fs from 'fs';
dotenv.config();

const TEST_URL = "http://127.0.0.1:3002";

describe("Map Checkup (E2E)", () => {
beforeAll(() => {
if (!process.env.TEST_API_KEY) {
throw new Error("TEST_API_KEY is not set");
}
});

describe("Map website tests with a dataset", () => {
it("Should map the website and verify the response", async () => {
let passedTests = 0;
const startTime = new Date().getTime();
const date = new Date();
const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`;

let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`;
const errorLog: WebsiteScrapeError[] = [];

for (const websiteData of websitesData) {
try {
const mapResponse = await request(TEST_URL || "")
.post("/v1/map")
.set("Content-Type", "application/json")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.send({ url: websiteData.website, limit: 100 });

// check how many webpages were mapped successfully
// compares with expected_num_of_pages
if (mapResponse.body.links.length < websiteData.expected_min_num_of_pages) {
errorLog.push({
website: websiteData.website,
prompt: 'MAP',
expected_output: `SUCCESS: ${websiteData.expected_min_num_of_pages}`,
actual_output: `FAILURE: ${mapResponse.body.links.length}`,
error: `Expected at least ${websiteData.expected_min_num_of_pages} webpages, but got ${mapResponse.body.links.length}`
});
console.log('Error: ', errorLog);
continue;
}

// checks if mapped pages contain expected_mapped_pages
if (websiteData.expected_mapped_pages && websiteData.expected_mapped_pages.length > 0 && websiteData.expected_mapped_pages.some(page => !mapResponse.body.links?.some((d: { url: string }) => d.url === page))) {
errorLog.push({
website: websiteData.website,
prompt: 'MAP',
expected_output: `SUCCESS: ${websiteData.expected_mapped_pages}`,
actual_output: `FAILURE: ${mapResponse.body.links.map((d: { url: string }) => d.url)}`,
error: `Expected mapped pages to contain ${websiteData.expected_mapped_pages}, but got ${mapResponse.body.links.map((d: { url: string }) => d.url)}`
});
console.log('Error: ', errorLog);
continue;
}

// checks if mapped pages not contain expected_not_mapped_pages
if (websiteData.expected_not_mapped_pages && websiteData.expected_not_mapped_pages.length > 0 && mapResponse.body.links && websiteData.expected_not_mapped_pages.filter(page => mapResponse.body.links.some((d: { url: string }) => d.url === page)).length > 0) {
errorLog.push({
website: websiteData.website,
prompt: 'MAP',
expected_output: `SUCCESS: ${websiteData.expected_not_mapped_pages}`,
actual_output: `FAILURE: ${mapResponse.body.links.map((d: { url: string }) => d.url)}`,
error: `Expected mapped pages to not contain ${websiteData.expected_not_mapped_pages}, but got ${mapResponse.body.links.map((d: { url: string }) => d.url)}`
});
console.log('Error: ', errorLog);
continue;
}

passedTests++;
} catch (error) {
console.error(`Error processing ${websiteData.website}: ${error}`);
errorLog.push({
website: websiteData.website,
prompt: 'MAP',
expected_output: 'SUCCESS',
actual_output: 'FAILURE',
error: `Error processing ${websiteData.website}: ${error}`
});
continue;
}
}

const score = (passedTests / websitesData.length) * 100;
const endTime = new Date().getTime();
const timeTaken = (endTime - startTime) / 1000;
console.log(`Score: ${score}%`);

await logErrors(errorLog, timeTaken, 0, score, websitesData.length);

if (process.env.ENV === "local" && errorLog.length > 0) {
if (!fs.existsSync(logsDir)){
fs.mkdirSync(logsDir, { recursive: true });
}
fs.writeFileSync(errorLogFileName, JSON.stringify(errorLog, null, 2));
}

expect(score).toBeGreaterThanOrEqual(90);
}, 350000); // 150 seconds timeout
});
});
Loading

0 comments on commit d0c2133

Please sign in to comment.