Skip to content

Commit

Permalink
Merge pull request #422 from Klimatbyran/staging
Browse files Browse the repository at this point in the history
New production release
  • Loading branch information
Greenheart authored Dec 9, 2024
2 parents 3900b48 + e99ca46 commit d14ba54
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 167 deletions.
17 changes: 16 additions & 1 deletion src/lib/saveUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ const recursiveOmit = <T extends Object>(
return obj
}

export const askDiff = async (before: any, after: any) => {
const askDiff = async (before: any, after: any) => {
if (!before || !after) return 'NO_CHANGES'
return await askPrompt(
`What is changed between these two json values? Please respond in clear text with markdown formatting.
Expand All @@ -70,3 +70,18 @@ NEVER REPEAT UNCHANGED VALUES OR UNCHANGED YEARS! If nothing important has chang
})
)
}

export async function diffChanges<T>({
existingCompany,
before,
after,
}: {
existingCompany: any
before: T
after: T
}) {
const diff = await askDiff(before, after)
const hasChanges = diff && !diff.includes('NO_CHANGES')
const requiresApproval = Boolean(existingCompany) || hasChanges
return { diff: hasChanges ? diff : '', requiresApproval }
}
10 changes: 1 addition & 9 deletions src/lib/wikidata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,5 @@ export async function getWikidataEntities(ids: EntityId[]) {
res.json()
)

const companies = Object.values(entities).filter(
(entity: any) => entity.claims.P5991
)

// Prioritise companies which include "carbon footprint" (P5991)
// Otherwise fall back to returning the top results and hope for the best
// IDEA: Maybe we could make a qualified guess here, for example by filtering the data for certain keywords
// related to companies?
return companies.length ? companies : Object.values(entities)
return Object.values(entities)
}
12 changes: 0 additions & 12 deletions src/prompts/feedback.ts

This file was deleted.

57 changes: 0 additions & 57 deletions src/prompts/parsePDF.ts

This file was deleted.

54 changes: 0 additions & 54 deletions src/prompts/reflect.ts

This file was deleted.

13 changes: 9 additions & 4 deletions src/workers/diffGoals.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker'
import { defaultMetadata, askDiff } from '../lib/saveUtils'
import { defaultMetadata, diffChanges } from '../lib/saveUtils'
import saveToAPI from './saveToAPI'

export class DiffGoalsJob extends DiscordJob {
Expand All @@ -20,15 +20,20 @@ const diffGoals = new DiscordWorker<DiffGoalsJob>('diffGoals', async (job) => {
metadata,
}

const diff = await askDiff(existingCompany?.goals, goals)
const requiresApproval = diff && !diff.includes('NO_CHANGES')
const { diff, requiresApproval } = await diffChanges({
existingCompany,
before: existingCompany?.goals,
after: goals,
})

job.log('Diff:' + diff)

await saveToAPI.queue.add(companyName + ' goals', {
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'goals',
requiresApproval: Boolean(existingCompany),

// Remove duplicated job data that should be part of the body from now on
goals: undefined,
Expand Down
13 changes: 9 additions & 4 deletions src/workers/diffIndustry.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker'
import { defaultMetadata, askDiff } from '../lib/saveUtils'
import { defaultMetadata, diffChanges } from '../lib/saveUtils'
import saveToAPI from './saveToAPI'

export class DiffIndustryJob extends DiscordJob {
Expand All @@ -22,15 +22,20 @@ const diffIndustry = new DiscordWorker<DiffIndustryJob>(
metadata,
}

const diff = await askDiff(existingCompany?.industry, industry)
const requiresApproval = diff && !diff.includes('NO_CHANGES')
const { diff, requiresApproval } = await diffChanges({
existingCompany,
before: existingCompany?.industry,
after: industry,
})

job.log('Diff:' + diff)

await saveToAPI.queue.add(companyName + ' industry', {
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'industry',
requiresApproval: Boolean(existingCompany),

// Remove duplicated job data that should be part of the body from now on
industry: undefined,
Expand Down
13 changes: 9 additions & 4 deletions src/workers/diffInitiatives.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker'
import { defaultMetadata, askDiff } from '../lib/saveUtils'
import { defaultMetadata, diffChanges } from '../lib/saveUtils'
import saveToAPI from './saveToAPI'

export class DiffInitiativesJob extends DiscordJob {
Expand All @@ -22,15 +22,20 @@ const diffInitiatives = new DiscordWorker<DiffInitiativesJob>(
metadata,
}

const diff = await askDiff(existingCompany?.initiatives, initiatives)
const requiresApproval = diff && !diff.includes('NO_CHANGES')
const { diff, requiresApproval } = await diffChanges({
existingCompany,
before: existingCompany?.initiatives,
after: initiatives,
})

job.log('Diff:' + diff)

await saveToAPI.queue.add(companyName + ' initiatives', {
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'initiatives',
requiresApproval: Boolean(existingCompany),

// Remove duplicated job data that should be part of the body from now on
initiatives: undefined,
Expand Down
17 changes: 9 additions & 8 deletions src/workers/diffReportingPeriods.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker'
import { defaultMetadata, askDiff } from '../lib/saveUtils'
import { defaultMetadata, diffChanges } from '../lib/saveUtils'
import { getReportingPeriodDates } from '../lib/reportingPeriodDates'
import saveToAPI from './saveToAPI'

Expand Down Expand Up @@ -82,12 +82,13 @@ const diffReportingPeriods = new DiscordWorker<DiffReportingPeriodsJob>(

// NOTE: Maybe only keep properties in existingCompany.reportingPeriods, e.g. the relevant economy properties, or the relevant emissions properties
// This could improve accuracy of the diff
const diff = await askDiff(
existingCompany?.reportingPeriods,
updatedReportingPeriods
)
job.log('diff: ' + diff)
const requiresApproval = diff && !diff.includes('NO_CHANGES')
const { diff, requiresApproval } = await diffChanges({
existingCompany,
before: existingCompany?.reportingPeriods,
after: reportingPeriods,
})

job.log('Diff:' + diff)

const body = {
reportingPeriods: updatedReportingPeriods,
Expand All @@ -98,8 +99,8 @@ const diffReportingPeriods = new DiscordWorker<DiffReportingPeriodsJob>(
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'reporting-periods',
requiresApproval: Boolean(existingCompany),

// Remove duplicated job data that should be part of the body from now on
scope12: undefined,
Expand Down
36 changes: 26 additions & 10 deletions src/workers/guessWikidata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,16 @@ class GuessWikidataJob extends DiscordJob {
}
}

const insignificantWords = new Set(['ab', 'the', 'and', 'inc', 'co', 'publ'])
const insignificantWords = new Set([
'ab',
'the',
'and',
'inc',
'co',
'publ',
'aktiebolag',
'aktiebolaget',
])

const guessWikidata = new DiscordWorker<GuessWikidataJob>(
'guessWikidata',
Expand All @@ -36,9 +45,8 @@ const guessWikidata = new DiscordWorker<GuessWikidataJob>(

if (retry === 0) {
const simplifiedCompanyName = companyName
.toLowerCase()
.split(/\s+/)
.filter((word) => !insignificantWords.has(word))
.filter((word) => !insignificantWords.has(word.toLowerCase()))
.join(' ')

return getWikidataSearchResults({
Expand Down Expand Up @@ -70,6 +78,20 @@ const guessWikidata = new DiscordWorker<GuessWikidataJob>(
throw new Error(`No Wikidata entry for "${companyName}"`)
}

const orderedResults = results
.toSorted((a, b) => {
// Move companies which include "carbon footprint" (P5991) to the start of the search results
// IDEA: Maybe we could make a qualified guess here, for example by ordering search results which include certain keywords
// Or do a string similarity search.
const hasEmissions = (e: any) => Boolean(e?.claims?.P5991)
return (hasEmissions(a) ? 0 : 1) - (hasEmissions(b) ? 0 : 1)
})
.map((e) => {
// Exclude claims to reduce the number of input + output tokens since we are primarily interested in the wikidataId.
// If we want to find other data like the company logo, we could filter the search results based on the wikidataId and extract the relevant property `PXXXXX` for the logo, which is probably standardised by wikidata
return { ...e, claims: undefined }
})

const response = await ask(
[
{
Expand All @@ -83,13 +105,7 @@ const guessWikidata = new DiscordWorker<GuessWikidataJob>(
},
{
role: 'user',
content: JSON.stringify(
// NOTE: Exclude claims to reduce the number of input + output tokens since we are primarily interested in the wikidataId.
// If we want to find other data like the company logo, we could filter the search results based on the wikidataId and extract the relevant property `PXXXXX` for the logo, which is probably standardised by wikidata
results.map((e) => ({ ...e, claims: undefined })),
null,
2
),
content: JSON.stringify(orderedResults, null, 2),
},
Array.isArray(job.stacktrace)
? { role: 'user', content: job.stacktrace.join('\n') }
Expand Down
6 changes: 6 additions & 0 deletions src/workers/nlmExtractTables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ const searchTerms = [
'FTE',
'fiscal year',
'summary',
'utsläpp',
'anställda',
'inkomster',
'omsättning',
'växthusgas',
'koldioxid',
]
const nlmExtractTables = new DiscordWorker(
'nlmExtractTables',
Expand Down
6 changes: 2 additions & 4 deletions src/workers/saveToAPI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export const saveToAPI = new DiscordWorker<SaveToApiJob>(
wikidata,
approved,
requiresApproval = true,
diff,
diff = '',
body,
apiSubEndpoint,
} = job.data
Expand All @@ -37,9 +37,7 @@ export const saveToAPI = new DiscordWorker<SaveToApiJob>(
// If approval is required and not yet approved, send approval request
const buttonRow = discord.createButtonRow(job.id!)
await job.sendMessage({
content: `## ${apiSubEndpoint}\n\nNew changes need approval for ${wikidataId}\n\n${
diff || ''
}`,
content: `## ${apiSubEndpoint}\n\nNew changes need approval for ${wikidataId}\n\n${diff}`,
components: [buttonRow],
})

Expand Down

0 comments on commit d14ba54

Please sign in to comment.