diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000..e69de29 diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..e69de29 diff --git a/eslint.config.js b/eslint.config.js new file mode 100644 index 0000000..e69de29 diff --git a/package.json b/package.json index e218fae..949e837 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "@tabler/icons-react": "3.34.1", "@tanstack/react-form": "1.20.0", "@tanstack/react-query": "5.87.4", + "@types/html-to-text": "9.0.4", "ai": "5.0.44", "better-auth": "1.3.9", "class-variance-authority": "0.7.1", @@ -61,6 +62,7 @@ "drizzle-kit": "0.31.4", "drizzle-orm": "0.44.5", "geist": "1.5.1", + "html-to-text": "9.0.5", "jotai": "2.14.0", "lucide-react": "0.544.0", "motion": "12.23.12", @@ -93,6 +95,7 @@ "@typescript-eslint/types": "8.43.0", "@typescript-eslint/utils": "8.43.0", "@vitest/ui": "3.2.4", + "dotenv": "17.2.2", "eslint": "9.35.0", "eslint-config-next": "15.5.3", "eslint-config-prettier": "10.1.8", @@ -104,6 +107,7 @@ "prettier": "3.6.2", "prettier-plugin-tailwindcss": "0.6.14", "tailwindcss": "4.1.13", + "tsx": "4.20.5", "tw-animate-css": "1.3.8", "typescript": "5.9.2", "typescript-eslint": "8.43.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7aa8ac6..cd1b0cf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -92,6 +92,9 @@ importers: '@tanstack/react-query': specifier: 5.87.4 version: 5.87.4(react@19.1.1) + '@types/html-to-text': + specifier: 9.0.4 + version: 9.0.4 ai: specifier: 5.0.44 version: 5.0.44(zod@4.1.8) @@ -119,6 +122,9 @@ importers: geist: specifier: 1.5.1 version: 1.5.1(next@15.5.3(@opentelemetry/api@1.9.0)(react-dom@19.1.1(react@19.1.1))(react@19.1.1)) + html-to-text: + specifier: 9.0.5 + version: 9.0.5 jotai: specifier: 2.14.0 version: 2.14.0(@babel/template@7.27.2)(@types/react@19.1.13)(react@19.1.1) @@ -210,6 +216,9 @@ importers: '@vitest/ui': specifier: 3.2.4 version: 3.2.4(vitest@3.2.4) + dotenv: + specifier: 17.2.2 + version: 17.2.2 eslint: specifier: 9.35.0 version: 9.35.0(jiti@2.5.1) @@ -243,6 +252,9 @@ importers: tailwindcss: specifier: 4.1.13 version: 4.1.13 + tsx: + specifier: 4.20.5 + version: 4.20.5 tw-animate-css: specifier: 1.3.8 version: 1.3.8 @@ -2285,6 +2297,9 @@ packages: '@rushstack/eslint-patch@1.12.0': resolution: {integrity: sha512-5EwMtOqvJMMa3HbmxLlF74e+3/HhwBTMcvt3nqVJgGCozO6hzIPOBlwm8mGVNR9SN2IJpxSnlxczyDjcn7qIyw==} + '@selderee/plugin-htmlparser2@0.11.0': + resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==} + '@shikijs/core@3.12.2': resolution: {integrity: sha512-L1Safnhra3tX/oJK5kYHaWmLEBJi1irASwewzY3taX5ibyXyMkkSDZlq01qigjryOBwrXSdFgTiZ3ryzSNeu7Q==} @@ -2656,6 +2671,9 @@ packages: '@types/hast@3.0.4': resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==} + '@types/html-to-text@9.0.4': + resolution: {integrity: sha512-pUY3cKH/Nm2yYrEmDlPR1mR7yszjGx4DrwPjQ702C4/D5CwHuZTgZdIdwPkRbcuhs7BAh2L5rg3CL5cbRiGTCQ==} + '@types/http-errors@2.0.5': resolution: {integrity: sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==} @@ -3613,6 +3631,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + define-data-property@1.1.4: resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==} engines: {node: '>= 0.4'} @@ -3675,13 +3697,30 @@ packages: dom-accessibility-api@0.6.3: resolution: {integrity: sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==} + dom-serializer@2.0.0: + resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} + + domelementtype@2.3.0: + resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==} + + domhandler@5.0.3: + resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} + engines: {node: '>= 4'} + dompurify@3.2.6: resolution: {integrity: sha512-/2GogDQlohXPZe6D6NOgQvXLPSYBqIWMnZ8zzOhn09REE4eyAzb+Hed3jhoM9OkuaJ8P6ZGTTVWQKAi8ieIzfQ==} + domutils@3.2.2: + resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==} + dotenv@16.6.1: resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==} engines: {node: '>=12'} + dotenv@17.2.2: + resolution: {integrity: sha512-Sf2LSQP+bOlhKWWyhFsn0UsfdK/kCWRv1iuA2gXAwt3dyNabr6QSj00I2V10pidqz69soatm9ZwZvpQMTIOd5Q==} + engines: {node: '>=12'} + drizzle-kit@0.31.4: resolution: {integrity: sha512-tCPWVZWZqWVx2XUsVpJRnH9Mx0ClVOf5YUHerZ5so1OKSlqww4zy1R5ksEdGRcO3tM3zj0PYN6V48TbQCL1RfA==} hasBin: true @@ -3809,6 +3848,10 @@ packages: resolution: {integrity: sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==} engines: {node: '>=10.13.0'} + entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + entities@6.0.1: resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==} engines: {node: '>=0.12'} @@ -4381,12 +4424,19 @@ packages: resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} engines: {node: '>=18'} + html-to-text@9.0.5: + resolution: {integrity: sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==} + engines: {node: '>=14'} + html-url-attributes@3.0.1: resolution: {integrity: sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==} html-void-elements@3.0.0: resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} + htmlparser2@8.0.2: + resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} + http-errors@2.0.0: resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==} engines: {node: '>= 0.8'} @@ -4747,6 +4797,9 @@ packages: layout-base@2.0.1: resolution: {integrity: sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg==} + leac@0.6.0: + resolution: {integrity: sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==} + levn@0.4.1: resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==} engines: {node: '>= 0.8.0'} @@ -5337,6 +5390,9 @@ packages: parse5@7.3.0: resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==} + parseley@0.12.1: + resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==} + parseurl@1.3.3: resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} engines: {node: '>= 0.8'} @@ -5369,6 +5425,9 @@ packages: resolution: {integrity: sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==} engines: {node: '>= 14.16'} + peberminta@0.9.0: + resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==} + pg-cloudflare@1.2.7: resolution: {integrity: sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==} @@ -5916,6 +5975,9 @@ packages: secure-json-parse@4.0.0: resolution: {integrity: sha512-dxtLJO6sc35jWidmLxo7ij+Eg48PM/kleBsxpC8QJE0qJICe+KawkDQmvCMZUr9u7WKVHgMW6vy3fQ7zMiFZMA==} + selderee@0.11.0: + resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==} + semver@6.3.1: resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} hasBin: true @@ -8989,6 +9051,11 @@ snapshots: '@rushstack/eslint-patch@1.12.0': {} + '@selderee/plugin-htmlparser2@0.11.0': + dependencies: + domhandler: 5.0.3 + selderee: 0.11.0 + '@shikijs/core@3.12.2': dependencies: '@shikijs/types': 3.12.2 @@ -9394,6 +9461,8 @@ snapshots: dependencies: '@types/unist': 3.0.3 + '@types/html-to-text@9.0.4': {} + '@types/http-errors@2.0.5': {} '@types/js-cookie@2.2.7': {} @@ -10425,6 +10494,8 @@ snapshots: deep-is@0.1.4: {} + deepmerge@4.3.1: {} + define-data-property@1.1.4: dependencies: es-define-property: 1.0.1 @@ -10475,12 +10546,32 @@ snapshots: dom-accessibility-api@0.6.3: {} + dom-serializer@2.0.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + entities: 4.5.0 + + domelementtype@2.3.0: {} + + domhandler@5.0.3: + dependencies: + domelementtype: 2.3.0 + dompurify@3.2.6: optionalDependencies: '@types/trusted-types': 2.0.7 + domutils@3.2.2: + dependencies: + dom-serializer: 2.0.0 + domelementtype: 2.3.0 + domhandler: 5.0.3 + dotenv@16.6.1: {} + dotenv@17.2.2: {} + drizzle-kit@0.31.4: dependencies: '@drizzle-team/brocli': 0.10.2 @@ -10525,6 +10616,8 @@ snapshots: graceful-fs: 4.2.11 tapable: 2.2.3 + entities@4.5.0: {} + entities@6.0.1: {} error-stack-parser@2.1.4: @@ -11327,10 +11420,25 @@ snapshots: dependencies: whatwg-encoding: 3.1.1 + html-to-text@9.0.5: + dependencies: + '@selderee/plugin-htmlparser2': 0.11.0 + deepmerge: 4.3.1 + dom-serializer: 2.0.0 + htmlparser2: 8.0.2 + selderee: 0.11.0 + html-url-attributes@3.0.1: {} html-void-elements@3.0.0: {} + htmlparser2@8.0.2: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.2.2 + entities: 4.5.0 + http-errors@2.0.0: dependencies: depd: 2.0.0 @@ -11693,6 +11801,8 @@ snapshots: layout-base@2.0.1: {} + leac@0.6.0: {} + levn@0.4.1: dependencies: prelude-ls: 1.2.1 @@ -12468,6 +12578,11 @@ snapshots: dependencies: entities: 6.0.1 + parseley@0.12.1: + dependencies: + leac: 0.6.0 + peberminta: 0.9.0 + parseurl@1.3.3: {} path-data-parser@0.1.0: {} @@ -12489,6 +12604,8 @@ snapshots: pathval@2.0.1: {} + peberminta@0.9.0: {} + pg-cloudflare@1.2.7: optional: true @@ -13086,6 +13203,10 @@ snapshots: secure-json-parse@4.0.0: {} + selderee@0.11.0: + dependencies: + parseley: 0.12.1 + semver@6.3.1: {} semver@7.7.2: {} diff --git a/src/app/api/parse-job/route.ts b/src/app/api/parse-job/route.ts new file mode 100644 index 0000000..49b3435 --- /dev/null +++ b/src/app/api/parse-job/route.ts @@ -0,0 +1,343 @@ +import { NextResponse } from 'next/server' +import { serverConfig } from '~/config/server-config' +import { convert } from 'html-to-text'; + +// Test mock data + + + + +interface JobAnalysisResult { + extractedSkills: ExtractedSkill[]; + requiredSkills: Skill[]; + preferredSkills: Skill[]; + experienceLevel?: ExperienceLevel; + salaryRange?: { + min?: number; + max?: number; + }; + keyTerms: string[]; + confidence: number; + summary?: string; +} + +interface ExtractedSkill { + name: string; + confidence: number; + category: SkillCategory; + synonyms?: string[]; + context?: string; +} + +interface Skill { + name: string; + proficiencyScore?: number; + required?: boolean; + category?: SkillCategory; +} + +type SkillCategory = 'technical' | 'soft' | 'domain' | 'language' | 'certification'; +type ExperienceLevel = 'entry' | 'mid' | 'senior' | 'executive' | 'intern'; + + +export class JobAnalysisService { + private readonly aiModel = 'gpt-3.5-turbo' + private readonly apiUrl = 'https://openrouter.ai/api/v1/chat/completions' + private readonly maxRetries = 3 + private readonly retryDelay = 1000 // 1 second + + constructor() { + if (!serverConfig.ai.openRouterApiKey) { + throw new Error('OpenRouter API key not configured') + } + } + + // NOTE: caching retries later + + async analyzeJobPosting(jobDescription: string, jobTitle?: string): Promise { + try { + const prompt = this.buildAnalysisPrompt(jobDescription, jobTitle) + const response = await fetch(this.apiUrl, { + method: 'POST', + headers: { + Authorization: `Bearer ${serverConfig.ai.openRouterApiKey}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': process.env.NEXT_PUBLIC_APP_URL || 'http://localhost:3000', + 'X-Title': 'Interview Management System', + }, + body: JSON.stringify({ + model: this.aiModel, + messages: [{role: 'user', content: prompt}], + temperature: 0.1, + max_tokens: 2000, + }), + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error( + `AI API call failed: ${String(response.status)} ${response.statusText} - ${errorText}`, + ) + } + + type AIResponse = { + choices: Array<{ + message: { + content: string + } + }> + } + const data = (await response.json()) as AIResponse + const content = data.choices[0]?.message?.content + if (!content) { + throw new Error('Invalid AI response format') + } + return this.parseAIResponse(content) + } catch (error) { + console.error('Error in job analysis:', error) + throw error + } + } + + private buildAnalysisPrompt(jobDescription: string, jobTitle?: string): string { + return ` +You are an expert job analysis system. Analyze the following job posting and extract structured information in JSON format. + +${jobTitle ? `Job Title: ${jobTitle}` : ''} + +Job Description: +${jobDescription} + +Please extract and return the following information in valid JSON format: + +{ + "extractedSkills": [{"name": "string", "confidence": "number", "category": "string"}], + "requiredSkills": [{"name": "string", "required": "boolean", "category": "string"}], + "preferredSkills": [{"name": "string", "required": "boolean", "category": "string"}], + "experienceLevel": "string", + "salaryRange": {"min": "number | null", "max": "number | null"}, + "keyTerms": ["string"], + "confidence": "number", + "summary": "string" +} + +Return ONLY the JSON object.`.trim() + } + + private parseAIResponse(response: string): JobAnalysisResult { + try { + const cleanedResponse = response + .replace(/```json\s*/g, '') + .replace(/```\s*/g, '') + .trim() + const parsed = JSON.parse(cleanedResponse) as JobAnalysisResult + return parsed + } catch (err) { + console.error('Failed to parse AI response', {response, error: err}) + throw new Error('AI returned invalid JSON format') + } + } + +} + +export function turnUrlToJsonUrl(url: string): string { + + if (!url.includes('greenhouse.io')) { + return ''; + } + + + const regex = /greenhouse\.io\/([^/]+)\/jobs\/(\d+)|greenhouse\.io\/jobs\/(\d+)/; + const match = url.match(regex); + + if (!match) { + throw new Error("Could not parse the company name or job ID from the Greenhouse URL."); + } + + // If the 1st capture group exists, use it as the company name. Otherwise, default to 'greenhouse'. + const companyName = match[1] || 'greenhouse'; + + // The job ID will be in either the 2nd or 3rd capture group, depending on which pattern matched. + const jobId = match[2] || match[3]; + + return `https://boards-api.greenhouse.io/v1/boards/${companyName}/jobs/${jobId}?questions=true&pay_transparency=true`; +} + +export function stripHtmlTags(html: string): string { + if (!html || typeof html !== 'string') { + return ''; + } + + // First pass: Use html-to-text for comprehensive conversion + const basicText = convert(html, { + wordwrap: false, + preserveNewlines: false, + selectors: [ + // Remove unwanted elements completely + { selector: 'script', format: 'skip' }, + { selector: 'style', format: 'skip' }, + { selector: 'noscript', format: 'skip' }, + { selector: 'iframe', format: 'skip' }, + { selector: 'object', format: 'skip' }, + { selector: 'embed', format: 'skip' }, + + // Convert structural elements to text with spacing + { selector: 'h1', format: 'block', options: { leadingLineBreaks: 2, trailingLineBreaks: 1 } }, + { selector: 'h2', format: 'block', options: { leadingLineBreaks: 2, trailingLineBreaks: 1 } }, + { selector: 'h3', format: 'block', options: { leadingLineBreaks: 2, trailingLineBreaks: 1 } }, + { selector: 'h4', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'h5', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'h6', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'p', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'div', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'br', format: 'lineBreak' }, + + // Lists + { selector: 'ul', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'ol', format: 'block', options: { leadingLineBreaks: 1, trailingLineBreaks: 1 } }, + { selector: 'li', format: 'block', options: { itemPrefix: '• ', leadingLineBreaks: 0, trailingLineBreaks: 0 } }, + + // Inline elements - just extract text + { selector: 'strong', format: 'inline' }, + { selector: 'b', format: 'inline' }, + { selector: 'em', format: 'inline' }, + { selector: 'i', format: 'inline' }, + { selector: 'span', format: 'inline' }, + { selector: 'a', format: 'inline' }, + { selector: 'code', format: 'inline' }, + { selector: 'kbd', format: 'inline' }, + { selector: 'samp', format: 'inline' }, + { selector: 'var', format: 'inline' }, + { selector: 'mark', format: 'inline' }, + { selector: 'small', format: 'inline' }, + { selector: 'sub', format: 'inline' }, + { selector: 'sup', format: 'inline' } + ] + }); + + // Second pass: Aggressive HTML tag removal with multiple regex passes + let cleanText = basicText; + + // Remove any remaining HTML tags (multiple passes to catch nested tags) + for (let i = 0; i < 5; i++) { + cleanText = cleanText.replace(/<[^>]*>/g, ''); + } + + // Remove HTML entities + cleanText = cleanText + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/'/g, "'") + .replace(/…/g, '...') + .replace(/—/g, '—') + .replace(/–/g, '–') + .replace(/’/g, "'") + .replace(/‘/g, "'") + .replace(/”/g, '"') + .replace(/“/g, '"') + .replace(/©/g, '©') + .replace(/®/g, '®') + .replace(/™/g, '™') + .replace(/°/g, '°') + .replace(/±/g, '±') + .replace(/×/g, '×') + .replace(/÷/g, '÷'); + + // Remove any remaining HTML entities (numeric) + cleanText = cleanText.replace(/&#\d+;/g, ''); + cleanText = cleanText.replace(/&#x[0-9a-fA-F]+;/g, ''); + + // Third pass: Clean up whitespace and formatting + cleanText = cleanText + .replace(/\r\n/g, '\n') // Normalize line endings + .replace(/\r/g, '\n') + .replace(/\n{3,}/g, '\n\n') // Replace multiple newlines with double newlines + .replace(/[ \t]{2,}/g, ' ') // Replace multiple spaces/tabs with single space + .replace(/^\s+|\s+$/gm, '') // Trim whitespace from each line + .replace(/\n\s*\n/g, '\n\n') // Clean up empty lines + .trim(); // Trim overall + + return cleanText; +} +export function extractAndClean(data: any): string { + const htmlToConvert = data.content || data.description || data.job_description || ''; + + if (!htmlToConvert) { + return ''; + } + + // Use the improved HTML stripping function + const descriptionText = stripHtmlTags(htmlToConvert); + + const title = data.title || ''; + const location = data.location?.name || ''; + const company = data.company || data.company_name || ''; + + const header = [ + title, + company ? `Company: ${company}` : '', + location ? `Location: ${location}` : '' + ].filter(Boolean).join('\n'); + + return `${header}\n\n${descriptionText}`.trim(); +} + + + + +export async function POST(request: Request) { + try { + const body = await request.json() + let jobDescription: string + let jobTitle: string | undefined = body.job_title + + // Check if we have a URL that might be a Greenhouse job posting + if (body.url && typeof body.url === 'string') { + try { + const jsonUrl = turnUrlToJsonUrl(body.url) + if (jsonUrl) { + const response = await fetch(jsonUrl) + if (!response.ok) { + throw new Error(`Failed to fetch job data: ${response.statusText}`) + } + const data = await response.json() + jobDescription = extractAndClean(data) + // Use the title from Greenhouse if available + jobTitle = data.title || jobTitle + } else { + throw new Error('Not a valid Greenhouse URL') + } + } catch (error) { + console.error('Error processing Greenhouse URL:', error) + return NextResponse.json( + { error: 'Failed to process job URL' }, + { status: 400 } + ) + } + } else if (body.job_description) { + jobDescription = body.job_description + } else { + return NextResponse.json( + { error: 'Either job_description or a valid Greenhouse URL is required' }, + { status: 400 } + ) + } + + const jobAnalysisService = new JobAnalysisService() + const result = await jobAnalysisService.analyzeJobPosting(jobDescription, jobTitle) + + return NextResponse.json(result) + } catch (error) { + console.error('Error processing job analysis:', error) + return NextResponse.json( + { error: 'Failed to analyze job posting' }, + { status: 500 } + ) + } +} + diff --git a/src/config/__mocks__/server-config.ts b/src/config/__mocks__/server-config.ts new file mode 100644 index 0000000..71605dc --- /dev/null +++ b/src/config/__mocks__/server-config.ts @@ -0,0 +1,6 @@ +// Mock server config for testing +export const serverConfig = { + ai: { + openRouterApiKey: 'test-key' + } +}; diff --git a/src/config/test-config.ts b/src/config/test-config.ts new file mode 100644 index 0000000..cff780f --- /dev/null +++ b/src/config/test-config.ts @@ -0,0 +1,6 @@ +// Mock configuration for testing +export const testConfig = { + ai: { + openRouterApiKey: 'test-key' + } +}; diff --git a/src/orpc/controllers/job/index.ts b/src/orpc/controllers/job/index.ts new file mode 100644 index 0000000..f83d141 --- /dev/null +++ b/src/orpc/controllers/job/index.ts @@ -0,0 +1,5 @@ +import parseJob from './parse-job' + +export default { + parseJob, +} diff --git a/src/orpc/controllers/job/parse-job.ts b/src/orpc/controllers/job/parse-job.ts new file mode 100644 index 0000000..dfc71cc --- /dev/null +++ b/src/orpc/controllers/job/parse-job.ts @@ -0,0 +1,28 @@ +import { z } from 'zod' + +import { publicBase } from '~/orpc/middleware/bases' +import { parseJobPosting } from '~/services/job-posting' + +const jobInputSchema = z.object({ + job_url: z.string().url().optional(), + job_description: z.string().optional(), +}).refine( + data => data.job_url || data.job_description, + { message: 'Either job_url or job_description must be provided' } +) + +export default publicBase + .input(jobInputSchema) + .handler(async function({ input }) { + if (input.job_url) { + const parsedJob = await parseJobPosting(input.job_url) + return parsedJob + } + + if (input.job_description) { + const parsedJob = await parseJobPosting(undefined, input.job_description) + return parsedJob + } + + throw new Error('Invalid input') + }) diff --git a/src/orpc/controllers/recruiter/jobs.ts b/src/orpc/controllers/recruiter/jobs.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/orpc/controllers/recruiter/matching.ts b/src/orpc/controllers/recruiter/matching.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/orpc/controllers/scheduling.ts b/src/orpc/controllers/scheduling.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/services/cal-com.ts b/src/services/cal-com.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/services/candidate-matching.ts b/src/services/candidate-matching.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/services/job-analysis.ts b/src/services/job-analysis.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/services/job-posting.ts b/src/services/job-posting.ts new file mode 100644 index 0000000..e49185b --- /dev/null +++ b/src/services/job-posting.ts @@ -0,0 +1,64 @@ +import { db } from '~/db' +import { jobs, type Job } from '~/db/models' + +type JobSkill = { + name: string + years?: number + level?: 'beginner' | 'intermediate' | 'expert' +} + +type ParsedJob = { + title: string + company?: string + location?: string + description: string + skills: JobSkill[] + salaryRange?: { + min?: number + max?: number + currency?: string + } + employmentType?: string + experienceLevel?: string + requirements?: string[] + responsibilities?: string[] + benefits?: string[] +} + +/** + * Parse job posting from URL or raw description + */ +export async function parseJobPosting(url?: string, description?: string): Promise { + // If URL is provided, fetch the content first + let jobText = description + if (url) { + const response = await fetch(url) + jobText = await response.text() + } + + if (!jobText) { + throw new Error('No job description provided') + } + + // Here you can integrate with vercel/openai to parse the job + // This is a placeholder implementation + const parsedJob: ParsedJob = { + title: 'Software Engineer', // Extract from text + description: jobText, + skills: [ + { name: 'TypeScript', level: 'intermediate' }, + { name: 'React' }, + { name: 'Node.js' } + ], + // Add other fields as needed + } + + // Store in database + await db.insert(jobs).values({ + title: parsedJob.title, + description: parsedJob.description, + metadata: parsedJob // Store full parsed data + }) + + return parsedJob +} diff --git a/src/types/internal/candidates.ts b/src/types/internal/candidates.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/types/internal/jobs.ts b/src/types/internal/jobs.ts new file mode 100644 index 0000000..d6233a3 --- /dev/null +++ b/src/types/internal/jobs.ts @@ -0,0 +1,22 @@ +export type Job = { + title: string + company?: string + location?: string + description: string + skills: { + name: string + years?: number + level?: 'beginner' | 'intermediate' | 'expert' + }[] + salaryRange?: { + min?: number + max?: number + currency?: string + } + employmentType?: string + experienceLevel?: string + requirements?: string[] + responsibilities?: string[] + benefits?: string[] + metadata?: Record +}