Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Groq Llama 3.1 #1817

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion app/api/chat/google/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ export async function POST(request: Request) {
return new Response(readableStream, {
headers: { "Content-Type": "text/plain" }
})

} catch (error: any) {
let errorMessage = error.message || "An unexpected error occurred"
const errorCode = error.status || 500
Expand Down
7 changes: 5 additions & 2 deletions components/chat/chat-helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,12 @@ export const handleHostedChat = async (

let draftMessages = await buildFinalMessages(payload, profile, chatImages)

let formattedMessages : any[] = []
let formattedMessages: any[] = []
if (provider === "google") {
formattedMessages = await adaptMessagesForGoogleGemini(payload, draftMessages)
formattedMessages = await adaptMessagesForGoogleGemini(
payload,
draftMessages
)
} else {
formattedMessages = draftMessages
}
Expand Down
5 changes: 4 additions & 1 deletion db/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ export const createFile = async (
let validFilename = fileRecord.name.replace(/[^a-z0-9.]/gi, "_").toLowerCase()
const extension = file.name.split(".").pop()
const extensionIndex = validFilename.lastIndexOf(".")
const baseName = validFilename.substring(0, (extensionIndex < 0) ? undefined : extensionIndex)
const baseName = validFilename.substring(
0,
extensionIndex < 0 ? undefined : extensionIndex
)
const maxBaseNameLength = 100 - (extension?.length || 0) - 1
if (baseName.length > maxBaseNameLength) {
fileRecord.name = baseName.substring(0, maxBaseNameLength) + "." + extension
Expand Down
54 changes: 26 additions & 28 deletions lib/build-prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,36 +184,35 @@ function buildRetrievalText(fileItems: Tables<"file_items">[]) {
}

function adaptSingleMessageForGoogleGemini(message: any) {

let adaptedParts = []

let rawParts = []
if(!Array.isArray(message.content)) {
rawParts.push({type: 'text', text: message.content})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you touch Google Gemini code sections with a PR that addresses Groq Llama 3.1 ?

if (!Array.isArray(message.content)) {
rawParts.push({ type: "text", text: message.content })
} else {
rawParts = message.content
}

for(let i = 0; i < rawParts.length; i++) {
for (let i = 0; i < rawParts.length; i++) {
let rawPart = rawParts[i]

if(rawPart.type == 'text') {
adaptedParts.push({text: rawPart.text})
} else if(rawPart.type === 'image_url') {
if (rawPart.type == "text") {
adaptedParts.push({ text: rawPart.text })
} else if (rawPart.type === "image_url") {
adaptedParts.push({
inlineData: {
data: getBase64FromDataURL(rawPart.image_url.url),
mimeType: getMediaTypeFromDataURL(rawPart.image_url.url),
mimeType: getMediaTypeFromDataURL(rawPart.image_url.url)
}
})
}
}

let role = 'user'
if(["user", "system"].includes(message.role)) {
role = 'user'
} else if(message.role === 'assistant') {
role = 'model'
let role = "user"
if (["user", "system"].includes(message.role)) {
role = "user"
} else if (message.role === "assistant") {
role = "model"
}

return {
Expand All @@ -222,39 +221,38 @@ function adaptSingleMessageForGoogleGemini(message: any) {
}
}

function adaptMessagesForGeminiVision(
messages: any[]
) {
function adaptMessagesForGeminiVision(messages: any[]) {
// Gemini Pro Vision cannot process multiple messages
// Reformat, using all texts and last visual only

const basePrompt = messages[0].parts[0].text
const baseRole = messages[0].role
const lastMessage = messages[messages.length-1]
const visualMessageParts = lastMessage.parts;
let visualQueryMessages = [{
role: "user",
parts: [
`${baseRole}:\n${basePrompt}\n\nuser:\n${visualMessageParts[0].text}\n\n`,
visualMessageParts.slice(1)
]
}]
const lastMessage = messages[messages.length - 1]
const visualMessageParts = lastMessage.parts
let visualQueryMessages = [
{
role: "user",
parts: [
`${baseRole}:\n${basePrompt}\n\nuser:\n${visualMessageParts[0].text}\n\n`,
visualMessageParts.slice(1)
]
}
]
return visualQueryMessages
}

export async function adaptMessagesForGoogleGemini(
payload: ChatPayload,
messages: any[]
messages: any[]
) {
let geminiMessages = []
for (let i = 0; i < messages.length; i++) {
let adaptedMessage = adaptSingleMessageForGoogleGemini(messages[i])
geminiMessages.push(adaptedMessage)
}

if(payload.chatSettings.model === "gemini-pro-vision") {
if (payload.chatSettings.model === "gemini-pro-vision") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look like automatic formatting changes with disturbs the focus of the PR here.

geminiMessages = adaptMessagesForGeminiVision(geminiMessages)
}
return geminiMessages
}

8 changes: 7 additions & 1 deletion lib/chat-setting-limits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export const CHAT_SETTING_LIMITS: Record<LLMID, ChatSettingLimits> = {
},

// GOOGLE MODELS

"gemini-1.5-flash": {
MIN_TEMPERATURE: 0.0,
MAX_TEMPERATURE: 1.0,
Expand Down Expand Up @@ -112,6 +112,12 @@ export const CHAT_SETTING_LIMITS: Record<LLMID, ChatSettingLimits> = {
MAX_TOKEN_OUTPUT_LENGTH: 8192,
MAX_CONTEXT_LENGTH: 8192
},
"llama-3.1-405b-reasoning": {
MIN_TEMPERATURE: 0.0,
MAX_TEMPERATURE: 1.0,
MAX_TOKEN_OUTPUT_LENGTH: 8192,
MAX_CONTEXT_LENGTH: 8192
},
"mixtral-8x7b-32768": {
MIN_TEMPERATURE: 0.0,
MAX_TEMPERATURE: 1.0,
Expand Down
7 changes: 6 additions & 1 deletion lib/models/llm/google-llm-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,9 @@ const GEMINI_PRO_VISION: LLM = {
imageInput: true
}

export const GOOGLE_LLM_LIST: LLM[] = [GEMINI_PRO, GEMINI_PRO_VISION, GEMINI_1_5_PRO, GEMINI_1_5_FLASH]
export const GOOGLE_LLM_LIST: LLM[] = [
GEMINI_PRO,
GEMINI_PRO_VISION,
GEMINI_1_5_PRO,
GEMINI_1_5_FLASH
]
16 changes: 16 additions & 0 deletions lib/models/llm/groq-llm-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,21 @@ const LLaMA3_70B: LLM = {
}
}

const LLaMA31_405B: LLM = {
modelId: "llama-3.1-405b-reasoning",
modelName: "LLaMA31-405b-chat",
provider: "groq",
hostedId: "llama-3.1-405b-reasoning",
platformLink: GROQ_PLATORM_LINK,
imageInput: false,
pricing: {
currency: "USD",
unit: "1M tokens",
inputCost: 0.59,
outputCost: 0.79
}
}

const MIXTRAL_8X7B: LLM = {
modelId: "mixtral-8x7b-32768",
modelName: "Mixtral-8x7b-Instruct-v0.1",
Expand Down Expand Up @@ -65,6 +80,7 @@ const GEMMA_7B_IT: LLM = {
export const GROQ_LLM_LIST: LLM[] = [
LLaMA3_8B,
LLaMA3_70B,
LLaMA31_405B,
MIXTRAL_8X7B,
GEMMA_7B_IT
]
Loading