diff --git a/.env.example b/.env.example index f5febb2..2aa3586 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,9 @@ -POSTGRES_URL= -POSTGRES_URL_NON_POOLING= -POSTGRES_USER= -POSTGRES_HOST= -POSTGRES_PASSWORD= -POSTGRES_DATABASE= \ No newline at end of file +OPENAI_API_KEY="your_api_key_here" +POSTGRES_URL="..." +POSTGRES_PRISMA_URL="..." +POSTGRES_URL_NO_SSL="..." +POSTGRES_URL_NON_POOLING="..." +POSTGRES_USER="..." +POSTGRES_HOST="..." +POSTGRES_PASSWORD="..." +POSTGRES_DATABASE="..." diff --git a/app/actions.ts b/app/actions.ts index 464cd05..d845c6e 100644 --- a/app/actions.ts +++ b/app/actions.ts @@ -1,183 +1,186 @@ "use server"; -import { Config, configSchema, explanationsSchema, Result } from "@/lib/types"; -import { openai } from "@ai-sdk/openai"; -import { sql } from "@vercel/postgres"; -import { generateObject } from "ai"; -import { z } from "zod"; +import { createClient } from "@vercel/postgres"; +import { Result, explanationSchema, Config, configSchema } from "@/lib/types"; +import { generateObject } from 'ai'; +import { openai } from '@ai-sdk/openai'; +import { z } from 'zod'; + +/** + * Executes a SQL query and returns the result data + * @param {string} query - The SQL query to execute + * @returns {Promise} Array of query results + * @throws {Error} If query is not a SELECT statement or table doesn't exist + */ -export const generateQuery = async (input: string) => { - "use server"; - try { - const result = await generateObject({ - model: openai("gpt-4o"), - system: `You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows: - - unicorns ( - id SERIAL PRIMARY KEY, - company VARCHAR(255) NOT NULL UNIQUE, - valuation DECIMAL(10, 2) NOT NULL, - date_joined DATE, - country VARCHAR(255) NOT NULL, - city VARCHAR(255) NOT NULL, - industry VARCHAR(255) NOT NULL, - select_investors TEXT NOT NULL - ); +export const generateChartConfig = async ( + results: Result[], + userQuery: string, +) => { + 'use server'; - Only retrieval queries are allowed. + try { + const { object: config } = await generateObject({ + model: openai('gpt-4o'), + system: 'You are a data visualization expert.', + prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query. + For multiple groups use multi-lines. - For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%'). + Here is an example complete config: + export const chartConfig = { + type: "pie", + xKey: "month", + yKeys: ["sales", "profit", "expenses"], + colors: { + sales: "#4CAF50", // Green for sales + profit: "#2196F3", // Blue for profit + expenses: "#F44336" // Red for expenses + }, + legend: true + } - Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have onnly one value. - When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation'). + User Query: + ${userQuery} - The industries available are: - - healthcare & life sciences - - consumer & retail - - financial services - - enterprise tech - - insurance - - media & entertainment - - industrials - - health + Data: + ${JSON.stringify(results, null, 2)}`, + schema: configSchema, + }); - If the user asks for a category that is not in the list, infer based on the list above. + // Override with shadcn theme colors + const colors: Record = {}; + config.yKeys.forEach((key, index) => { + colors[key] = `hsl(var(--chart-${index + 1}))`; + }); - Note: valuation is in billions of dollars so 10b would be 10.0. - Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%. + const updatedConfig = { ...config, colors }; + return { config: updatedConfig }; + } catch (e) { + console.error(e); + throw new Error('Failed to generate chart suggestion'); + } +} - If the user asks for 'over time' data, return by year. +export const explainQuery = async (input: string, sqlQuery: string) => { + 'use server'; + try { + const result = await generateObject({ + model: openai('gpt-4o'), + system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY + prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise. - When searching for UK or USA, write out United Kingdom or United States respectively. + User Query: + ${input} - EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%. - `, - prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`, - schema: z.object({ - query: z.string(), - }), + Generated SQL Query: + ${sqlQuery}`, + schema: explanationSchema, + output: 'array', }); - return result.object.query; + return result.object; } catch (e) { console.error(e); - throw new Error("Failed to generate query"); + throw new Error('Failed to generate query'); } }; -export const getCompanies = async (query: string) => { +export const runGeneratedSQLQuery = async (query: string) => { "use server"; - // Check if the query is a SELECT statement + // Ensure the query is a SELECT statement. Otherwise, throw an error if ( !query.trim().toLowerCase().startsWith("select") || query.trim().toLowerCase().includes("drop") || query.trim().toLowerCase().includes("delete") || - query.trim().toLowerCase().includes("insert") + query.trim().toLowerCase().includes("insert") || + query.trim().toLowerCase().includes("update") || + query.trim().toLowerCase().includes("alter") || + query.trim().toLowerCase().includes("truncate") || + query.trim().toLowerCase().includes("create") || + query.trim().toLowerCase().includes("grant") || + query.trim().toLowerCase().includes("revoke") ) { throw new Error("Only SELECT queries are allowed"); } + // Create client with the same connection string as seed script + const client = createClient({ + connectionString: process.env.POSTGRES_URL || process.env.DATABASE_URL + }); + let data: any; try { - data = await sql.query(query); + await client.connect(); + data = await client.query(query); } catch (e: any) { if (e.message.includes('relation "unicorns" does not exist')) { console.log( "Table does not exist, creating and seeding it with dummy data now...", ); - // throw error throw Error("Table does not exist"); } else { throw e; } + } finally { + // Always close the connection + await client.end(); } return data.rows as Result[]; }; -export const explainQuery = async (input: string, sqlQuery: string) => { - "use server"; +export const generateQuery = async (input: string) => { + 'use server'; try { const result = await generateObject({ - model: openai("gpt-4o"), - schema: z.object({ - explanations: explanationsSchema, - }), - system: `You are a SQL (postgres) expert. Your job is to explain to the user write a SQL query you wrote to retrieve the data they asked for. The table schema is as follows: - unicorns ( - id SERIAL PRIMARY KEY, - company VARCHAR(255) NOT NULL UNIQUE, - valuation DECIMAL(10, 2) NOT NULL, - date_joined DATE, - country VARCHAR(255) NOT NULL, - city VARCHAR(255) NOT NULL, - industry VARCHAR(255) NOT NULL, - select_investors TEXT NOT NULL - ); - - When you explain you must take a section of the query, and then explain it. Each "section" should be unique. So in a query like: "SELECT * FROM unicorns limit 20", the sections could be "SELECT *", "FROM UNICORNS", "LIMIT 20". - If a section doesnt have any explanation, include it, but leave the explanation empty. - - `, - prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise. + model: openai('gpt-4o'), + system: `You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows: - User Query: - ${input} + unicorns ( + id SERIAL PRIMARY KEY, + company VARCHAR(255) NOT NULL UNIQUE, + valuation DECIMAL(10, 2) NOT NULL, + date_joined DATE, + country VARCHAR(255) NOT NULL, + city VARCHAR(255) NOT NULL, + industry VARCHAR(255) NOT NULL, + select_investors TEXT NOT NULL + ); - Generated SQL Query: - ${sqlQuery}`, - }); - return result.object; - } catch (e) { - console.error(e); - throw new Error("Failed to generate query"); - } -}; + Only retrieval queries are allowed. -export const generateChartConfig = async ( - results: Result[], - userQuery: string, -) => { - "use server"; - const system = `You are a data visualization expert. `; + For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%'). - try { - const { object: config } = await generateObject({ - model: openai("gpt-4o"), - system, - prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query. - For multiple groups use multi-lines. + Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have only one value. + When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation'). - Here is an example complete config: - export const chartConfig = { - type: "pie", - xKey: "month", - yKeys: ["sales", "profit", "expenses"], - colors: { - sales: "#4CAF50", // Green for sales - profit: "#2196F3", // Blue for profit - expenses: "#F44336" // Red for expenses - }, - legend: true - } + The industries available are: + - healthcare & life sciences + - consumer & retail + - financial services + - enterprise tech + - insurance + - media & entertainment + - industrials + - health - User Query: - ${userQuery} + If the user asks for a category that is not in the list, infer based on the list above. - Data: - ${JSON.stringify(results, null, 2)}`, - schema: configSchema, - }); + Note: valuation is in billions of dollars so 10b would be 10.0. + Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%. - const colors: Record = {}; - config.yKeys.forEach((key, index) => { - colors[key] = `hsl(var(--chart-${index + 1}))`; - }); + If the user asks for 'over time' data, return by year. - const updatedConfig: Config = { ...config, colors }; - return { config: updatedConfig }; + When searching for UK or USA, write out United Kingdom or United States respectively. + + EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%.`, + prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`, + schema: z.object({ + query: z.string(), + }), + }); + return result.object.query; } catch (e) { - // @ts-expect-errore - console.error(e.message); - throw new Error("Failed to generate chart suggestion"); + console.error(e); + throw new Error('Failed to generate query'); } }; \ No newline at end of file diff --git a/app/page.tsx b/app/page.tsx index 263fe6e..87d33ff 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -2,53 +2,69 @@ import { useState } from "react"; import { motion, AnimatePresence } from "framer-motion"; -import { generateChartConfig, generateQuery, getCompanies } from "./actions"; -import { Config, Result } from "@/lib/types"; + import { Loader2 } from "lucide-react"; import { toast } from "sonner"; + +import { Config, Result } from "@/lib/types"; +import { runGeneratedSQLQuery, generateQuery, generateChartConfig } from "./actions"; + +import { Header } from "@/components/header"; +import { QueryViewer } from "@/components/query-viewer"; import { ProjectInfo } from "@/components/project-info"; -import { UnicornTable } from "@/components/unicorn-table"; +import { Results } from "@/components/results"; +import { Search } from "@/components/search"; import { SuggestedQueries } from "@/components/suggested-queries"; -import { QueryViewer } from "@/components/query-viewer"; -import { UnicornSearch } from "@/components/unicorn-search"; -import { Header } from "@/components/header"; export default function Page() { const [inputValue, setInputValue] = useState(""); + const [submitted, setSubmitted] = useState(false); - const [results, setResults] = useState([]); - const [columns, setColumns] = useState([]); - const [activeQuery, setActiveQuery] = useState(""); const [loading, setLoading] = useState(false); const [loadingStep, setLoadingStep] = useState(1); + + const [activeQuery, setActiveQuery] = useState(""); + const [results, setResults] = useState([]); + const [columns, setColumns] = useState([]); const [chartConfig, setChartConfig] = useState(null); + const handleSubmit = async (suggestion?: string) => { + clearExistingData(); + const question = suggestion ?? inputValue; if (inputValue.length === 0 && !suggestion) return; - clearExistingData(); + + if (question.trim()) { setSubmitted(true); } + setLoading(true); setLoadingStep(1); setActiveQuery(""); + try { const query = await generateQuery(question); + if (query === undefined) { toast.error("An error occurred. Please try again."); setLoading(false); return; } + setActiveQuery(query); setLoadingStep(2); - const companies = await getCompanies(query); + + const companies = await runGeneratedSQLQuery(query); const columns = companies.length > 0 ? Object.keys(companies[0]) : []; setResults(companies); setColumns(columns); + setLoading(false); - const generation = await generateChartConfig(companies, question); - setChartConfig(generation.config); + + const { config } = await generateChartConfig(companies, question); + setChartConfig(config); } catch (e) { toast.error("An error occurred. Please try again."); setLoading(false); @@ -88,7 +104,7 @@ export default function Page() { >
-
) : ( - { const activeQueryCutoff = 100; - const [queryExplanations, setQueryExplanations] = useState< - QueryExplanation[] | null - >(); const [loadingExplanation, setLoadingExplanation] = useState(false); + const [queryExplanations, setQueryExplanations] = useState(); const [queryExpanded, setQueryExpanded] = useState(activeQuery.length > activeQueryCutoff); const handleExplainQuery = async () => { setQueryExpanded(true); setLoadingExplanation(true); - const { explanations } = await explainQuery(inputValue, activeQuery); + + const explanations = await explainQuery(inputValue, activeQuery); setQueryExplanations(explanations); + setLoadingExplanation(false); }; diff --git a/components/unicorn-table.tsx b/components/results.tsx similarity index 99% rename from components/unicorn-table.tsx rename to components/results.tsx index 07274f0..f791f65 100644 --- a/components/unicorn-table.tsx +++ b/components/results.tsx @@ -11,7 +11,7 @@ import { } from "./ui/table"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "./ui/tabs"; -export const UnicornTable = ({ +export const Results = ({ results, columns, chartConfig, diff --git a/components/unicorn-search.tsx b/components/search.tsx similarity index 88% rename from components/unicorn-search.tsx rename to components/search.tsx index 6a99b9b..1b446d6 100644 --- a/components/unicorn-search.tsx +++ b/components/search.tsx @@ -1,8 +1,8 @@ -import { Search } from "lucide-react"; +import { Search as SearchIcon } from "lucide-react"; import { Button } from "./ui/button"; import { Input } from "./ui/input"; -export const UnicornSearch = ({ +export const Search = ({ handleSubmit, inputValue, setInputValue, @@ -32,7 +32,7 @@ export const UnicornSearch = ({ onChange={(e) => setInputValue(e.target.value)} className="pr-10 text-base" /> - +
{submitted ? ( diff --git a/lib/seed.ts b/lib/seed.ts index 556473b..842b87d 100644 --- a/lib/seed.ts +++ b/lib/seed.ts @@ -1,23 +1,71 @@ -import { sql } from '@vercel/postgres'; +import { createClient } from '@vercel/postgres'; import fs from 'fs'; -import csv from 'csv-parser'; import path from 'path'; import "dotenv/config" +// Create client instead of using default pool +const client = createClient({ + connectionString: process.env.POSTGRES_URL || process.env.DATABASE_URL +}); + function parseDate(dateString: string): string { + if (!dateString) { + console.warn(`Date string is undefined or empty`); + return new Date().toISOString().split('T')[0]; // Return today's date as fallback + } + const parts = dateString.split('/'); if (parts.length === 3) { - const day = parts[0].padStart(2, '0'); - const month = parts[1].padStart(2, '0'); - const year = parts[2]; + const month = parts[0].padStart(2, '0'); + const day = parts[1].padStart(2, '0'); + let year = parts[2]; + + // Handle 2-digit years - convert to 4-digit + if (year.length === 2) { + const twoDigitYear = parseInt(year); + // If year is 00-30, it's 20XX; if 31-99, it's 19XX + if (twoDigitYear <= 30) { + year = (2000 + twoDigitYear).toString(); + } else { + year = (1900 + twoDigitYear).toString(); + } + } + return `${year}-${month}-${day}`; } console.warn(`Could not parse date: ${dateString}`); throw Error(); } +function parseCSVLine(line: string): string[] { + const result: string[] = []; + let current = ''; + let inQuotes = false; + + for (let i = 0; i < line.length; i++) { + const char = line[i]; + + if (char === '"') { + inQuotes = !inQuotes; + } else if (char === ',' && !inQuotes) { + result.push(current.trim()); + current = ''; + } else { + current += char; + } + } + + // Add the last field + result.push(current.trim()); + + return result; +} + export async function seed() { - const createTable = await sql` + // Connect to the client + await client.connect(); + + const createTable = await client.sql` CREATE TABLE IF NOT EXISTS unicorns ( id SERIAL PRIMARY KEY, company VARCHAR(255) NOT NULL UNIQUE, @@ -32,42 +80,83 @@ export async function seed() { console.log(`Created "unicorns" table`); - const results: any[] = []; + // Read the raw file and process it manually const csvFilePath = path.join(process.cwd(), 'unicorns.csv'); + const rawContent = fs.readFileSync(csvFilePath, 'utf8'); + const lines = rawContent.split('\n'); + + // We know the headers are at line 3 and data starts at line 4 + const headerLineIndex = 3; + const dataLines = lines.slice(headerLineIndex + 1).filter(line => line.trim()); + + console.log(`Processing ${dataLines.length} data lines`); + + let processedCount = 0; + let errorCount = 0; + + for (const line of dataLines) { + try { + // Parse the CSV line properly handling quoted fields + const columns = parseCSVLine(line); + + // Skip lines that don't have enough columns or start with empty values + if (columns.length < 7 || !columns[1]) continue; + + const company = columns[1]; + const valuationStr = columns[2].replace('$', '').replace(',', '').trim(); + const valuation = parseFloat(valuationStr); + const dateJoined = columns[3]; + const country = columns[4]; + const city = columns[5] || ''; + const industry = columns[6]; + const investors = columns[7] || ''; + + if (!company || isNaN(valuation)) continue; + + const formattedDate = parseDate(dateJoined); - await new Promise((resolve, reject) => { - fs.createReadStream(csvFilePath) - .pipe(csv()) - .on('data', (data) => results.push(data)) - .on('end', resolve) - .on('error', reject); - }); - - for (const row of results) { - const formattedDate = parseDate(row['Date Joined']); - - await sql` - INSERT INTO unicorns (company, valuation, date_joined, country, city, industry, select_investors) - VALUES ( - ${row.Company}, - ${parseFloat(row['Valuation ($B)'].replace('$', '').replace(',', ''))}, - ${formattedDate}, - ${row.Country}, - ${row.City}, - ${row.Industry}, - ${row['Select Investors']} - ) - ON CONFLICT (company) DO NOTHING; - `; + await client.sql` + INSERT INTO unicorns (company, valuation, date_joined, country, city, industry, select_investors) + VALUES ( + ${company}, + ${valuation}, + ${formattedDate}, + ${country}, + ${city}, + ${industry}, + ${investors} + ) + ON CONFLICT (company) DO NOTHING; + `; + + processedCount++; + + if (processedCount <= 10) { + console.log(`āœ“ Inserted: ${company} - $${valuation}B (${formattedDate})`); + } else if (processedCount % 100 === 0) { + console.log(`Processed ${processedCount} records...`); + } + } catch (error) { + errorCount++; + if (errorCount <= 5) { + console.warn(`Error processing line: ${line.substring(0, 100)}...`); + console.warn(error.message); + } + } } - console.log(`Seeded ${results.length} unicorns`); + console.log(`\nšŸŽ‰ Successfully seeded ${processedCount} unicorns`); + if (errorCount > 0) { + console.log(`āš ļø Skipped ${errorCount} rows due to errors`); + } + + // Close the connection + await client.end(); return { createTable, - unicorns: results, + unicorns: processedCount, }; } - seed().catch(console.error); \ No newline at end of file diff --git a/lib/types.ts b/lib/types.ts index 764a9a6..970500c 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -1,4 +1,4 @@ -import { z } from "zod"; +import { z } from 'zod'; export type Unicorn = { id: number; @@ -13,40 +13,57 @@ export type Unicorn = { export type Result = Record; -export const explanationSchema = z.object({ - section: z.string(), - explanation: z.string(), -}); -export const explanationsSchema = z.array(explanationSchema); - -export type QueryExplanation = z.infer; - -// Define the schema for chart configuration export const configSchema = z .object({ description: z .string() .describe( - "Describe the chart. What is it showing? What is interesting about the way the data is displayed?", + 'Describe the chart. What is it showing? What is interesting about the way the data is displayed?', ), - takeaway: z.string().describe("What is the main takeaway from the chart?"), - type: z.enum(["bar", "line", "area", "pie"]).describe("Type of chart"), + takeaway: z.string().describe('What is the main takeaway from the chart?'), + type: z.enum(['bar', 'line', 'area', 'pie']).describe('Type of chart'), title: z.string(), - xKey: z.string().describe("Key for x-axis or category"), - yKeys: z.array(z.string()).describe("Key(s) for y-axis values this is typically the quantitative column"), - multipleLines: z.boolean().describe("For line charts only: whether the chart is comparing groups of data.").optional(), - measurementColumn: z.string().describe("For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)").optional(), - lineCategories: z.array(z.string()).describe("For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.").optional(), + xKey: z.string().describe('Key for x-axis or category'), + yKeys: z + .array(z.string()) + .describe( + 'Key(s) for y-axis values this is typically the quantitative column', + ), + multipleLines: z + .boolean() + .describe( + 'For line charts only: whether the chart is comparing groups of data.', + ) + .optional(), + measurementColumn: z + .string() + .describe( + 'For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)', + ) + .optional(), + lineCategories: z + .array(z.string()) + .describe( + 'For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.', + ) + .optional(), colors: z .record( - z.string().describe("Any of the yKeys"), - z.string().describe("Color value in CSS format (e.g., hex, rgb, hsl)"), + z.string().describe('Any of the yKeys'), + z.string().describe('Color value in CSS format (e.g., hex, rgb, hsl)'), ) - .describe("Mapping of data keys to color values for chart elements") + .describe('Mapping of data keys to color values for chart elements') .optional(), - legend: z.boolean().describe("Whether to show legend"), + legend: z.boolean().describe('Whether to show legend'), }) - .describe("Chart configuration object"); - + .describe('Chart configuration object'); export type Config = z.infer; + +export const explanationSchema = z.object({ + section: z.string(), + explanation: z.string(), +}); + +export type QueryExplanation = z.infer; + diff --git a/package.json b/package.json index cee0c65..abb0c9e 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "postgres-starter", + "name": "natural-language-postgresql", "repository": "https://github.com/vercel/examples.git", "license": "MIT", "version": "0.0.0",