Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified devops-mcp-server/rag/client/devops-rag.db
Binary file not shown.
61 changes: 61 additions & 0 deletions local-rag/auth.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"fmt"

"cloud.google.com/go/auth/credentials"
)

// getGCPToken retrieves the Google Cloud Platform access token and project ID
// using Application Default Credentials.
func getGCPToken(ctx context.Context) (tokenValue string, projectID string, err error) {
// Use Application Default Credentials to get a TokenSource
scopes := []string{"https://www.googleapis.com/auth/cloud-platform"}
creds, err := credentials.DetectDefault(&credentials.DetectOptions{
Scopes: scopes,
})
if err != nil {
return "", "", fmt.Errorf("failed to find default credentials: %w", err)
}

projectID, err = creds.ProjectID(ctx)
if err != nil {
return "", "", fmt.Errorf("failed to get project ID: %w", err)
}

if projectID == "" {
// Try quota project
projectID, err = creds.QuotaProjectID(ctx)
if err != nil {
return "", "", fmt.Errorf("failed to get quota project ID: %w", err)
}
if projectID == "" {
return "", "", fmt.Errorf("no Project ID found in Application Default Credentials. " +
"This can happen if credentials are user-based or the project hasn't been explicitly set " +
"e.g., via gcloud auth application-default set-quota-project")
}
}

// We need an access token
token, err := creds.TokenProvider.Token(ctx)
if err != nil {
return "", "", fmt.Errorf("failed to retrieve access token: %w", err)
}

return token.Value, projectID, nil
}
13 changes: 11 additions & 2 deletions local-rag/fetch_docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,28 @@ import (
"path/filepath"
"regexp"
"strings"
"time"

md "github.com/JohannesKaufmann/html-to-markdown"
"github.com/PuerkitoBio/goquery"
"github.com/go-git/go-git/v5"
)

var httpClient = &http.Client{
Timeout: 30 * time.Second,
}

func downloadFile(url, targetDir string) (string, error) {
resp, err := http.Get(url)
resp, err := httpClient.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("bad status: %s", resp.Status)
}

fileName := filepath.Base(url)
filePath := filepath.Join(targetDir, fileName)

Expand Down Expand Up @@ -240,7 +249,7 @@ func downloadWebsites(sources *Source, extractToDir string) error {

log.Printf("Fetching: %s", currentURLBase)

resp, err := http.Get(currentURLBase)
resp, err := httpClient.Get(currentURLBase)
if err != nil {
log.Printf("Error fetching %s: %v", currentURLBase, err)
continue
Expand Down
165 changes: 54 additions & 111 deletions local-rag/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"os"
"path/filepath"

"cloud.google.com/go/auth/credentials"
chromem "github.com/philippgille/chromem-go"
)

Expand All @@ -35,22 +34,15 @@ type Source struct {
URLPattern string `json:"url_pattern,omitempty"`
}

var KNOWLEDGE_RAG_SOURCES = []Source{
var knowledgeRAGSources = []Source{
{
Name: "GCP_DOCS",
Extract: "devsite-content",
Type: "webpage",
ExcludePattern: ".*\\?hl=.+$",
Dir: "GCP_DOCS",
URLs: []string{
"https://cloud.google.com/developer-connect/docs/api/reference/rest",
"https://cloud.google.com/developer-connect/docs/authentication",
"https://cloud.google.com/build/docs/api/reference/rest",
"https://cloud.google.com/deploy/docs/api/reference/rest",
"https://cloud.google.com/artifact-analysis/docs/reference/rest",
"https://cloud.google.com/artifact-registry/docs/reference/rest",
"https://cloud.google.com/infrastructure-manager/docs/reference/rest",
"https://cloud.google.com/docs/buildpacks/stacks",
//"https://cloud.google.com/docs/buildpacks/stacks", // Package list is too large for a single chunck
"https://cloud.google.com/docs/buildpacks/base-images",
"https://cloud.google.com/docs/buildpacks/build-application",
"https://cloud.google.com/docs/buildpacks/python",
Expand All @@ -60,19 +52,6 @@ var KNOWLEDGE_RAG_SOURCES = []Source{
"https://cloud.google.com/docs/buildpacks/ruby",
"https://cloud.google.com/docs/buildpacks/php",
"https://cloud.google.com/build/docs/build-config-file-schema",
"https://cloud.google.com/build/docs/private-pools/use-in-private-network",
"https://cloud.google.com/deploy/docs/config-files",
"https://cloud.google.com/deploy/docs/deploy-app-gke",
"https://cloud.google.com/deploy/docs/deploy-app-run",
"https://cloud.google.com/deploy/docs/overview",
"https://cloud.google.com/build/docs/build-push-docker-image",
"https://cloud.google.com/build/docs/deploy-containerized-application-cloud-run",
"https://cloud.google.com/build/docs/automate-builds",
"https://cloud.google.com/build/docs/configuring-builds/create-basic-configuration",
"https://cloud.google.com/build/docs/automating-builds/create-manage-triggers",
"https://cloud.google.com/build/docs/building/build-containers",
"https://cloud.google.com/build/docs/building/build-nodejs",
"https://cloud.google.com/build/docs/building/build-java",
"https://cloud.google.com/build/docs/deploying-builds/deploy-cloud-run",
"https://cloud.google.com/build/docs/deploying-builds/deploy-gke",
},
Expand All @@ -98,27 +77,6 @@ var KNOWLEDGE_RAG_SOURCES = []Source{
"https://switowski.com/blog/ci-101/",
},
},
{
Name: "cloud_builder_docs",
Extract: "section",
Type: "git_repo",
URLPattern: "\\.md$",
ExcludePattern: ".*(vendor|third_party|.github).*$",
URLs: []string{
"https://github.com/GoogleCloudPlatform/cloud-builders/archive/refs/heads/master.zip",
"https://github.com/GoogleCloudPlatform/cloud-builders-community/archive/refs/heads/master.zip",
},
},
{
Name: "GCP_Terraform_Docs",
Extract: "section",
Type: "git_repo",
URLPattern: "website/docs/.*\\.markdown$",
ExcludePattern: ".*(vendor|third_party|.github).*$",
URLs: []string{
"https://github.com/hashicorp/terraform-provider-google/archive/refs/heads/main.zip",
},
},
}

func processSource(source Source, tmpDir string) {
Expand All @@ -143,69 +101,69 @@ func processSource(source Source, tmpDir string) {
}
}

func main() {
// Initialize the chromem database
ctx := context.Background()

// Use Application Default Credentials to get a TokenSource
scopes := []string{"https://www.googleapis.com/auth/cloud-platform"}
creds, err := credentials.DetectDefault(&credentials.DetectOptions{
Scopes: scopes,
})
if err != nil {
log.Fatalf("Failed to find default credentials: %v", err)
}

projectID, err := creds.ProjectID(ctx)
if err != nil {
log.Fatalf("Failed to get project ID: %v", err)
}
if projectID == "" {
//Try quota project
projectID, err = creds.QuotaProjectID(ctx)
func dbFile() string {
dbFile := os.Getenv("RAG_DB_PATH")
if len(dbFile) == 0 {
pwd, err := os.Getwd()
if err != nil {
log.Fatalf("Failed to get project ID: %v", err)
}
if projectID == "" {
log.Fatalf(`
No Project ID found in Application Default Credentials.
This can happen if credentials are user-based or the project hasn't been explicitly set
e.g., via gcloud auth application-default set-quota-project.
Error:%v`, err)
log.Fatal(err)
}
dbFile = filepath.Join(pwd, "devops-rag.db")
}

// We need an access token
token, err := creds.TokenProvider.Token(ctx)
if err != nil {
log.Fatalf("Failed to retrieve access token: %v", err)
}

return dbFile
}
func setupRAGDB(ctx context.Context, token, projectID string) (*chromem.DB, chromem.EmbeddingFunc, error) {
vertexEmbeddingFunc := chromem.NewEmbeddingFuncVertex(
token.Value,
token,
projectID,
chromem.EmbeddingModelVertexEnglishV4)
db := chromem.NewDB()
dbFile := os.Getenv("RAG_DB_PATH")
dbFile := dbFile()
if len(dbFile) > 0 {
//check if file exists, only import if it does
if _, err := os.Stat(dbFile); os.IsNotExist(err) {
log.Printf("RAG_DB_PATH file does not exist, skipping import: %v", dbFile)
} else {
err := db.ImportFromFile(dbFile, "")
log.Printf("Imported RAG with collections:%d", len(db.ListCollections()))
if err != nil {
log.Fatalf("Unable to import from the RAG DB file:%s - %v", dbFile, err)
return nil, nil, err
}
}
}
collectionKnowledge, err := db.GetOrCreateCollection("knowledge", nil, vertexEmbeddingFunc)
_, err := db.GetOrCreateCollection("knowledge", nil, vertexEmbeddingFunc)
if err != nil {
log.Fatal(err)
return nil, nil, err
}
collectionPattern, err := db.GetOrCreateCollection("pattern", nil, vertexEmbeddingFunc)
_, err = db.GetOrCreateCollection("pattern", nil, vertexEmbeddingFunc)
if err != nil {
log.Fatal(err)
return nil, nil, err
}
return db, vertexEmbeddingFunc, nil
}

func processAllSources(ragSourceDir string) {
entries, err := os.ReadDir(ragSourceDir)
if err != nil {
log.Fatalf("Unable to read directory: %v", err)
}
if len(entries) == 0 {
for _, source := range knowledgeRAGSources {
processSource(source, ragSourceDir)
}
}
}

func main() {
ctx := context.Background()

token, projectID, err := getGCPToken(ctx)
if err != nil {
log.Fatalf("GCP Auth failed: %v", err)
}

db, embeddingFunc, err := setupRAGDB(ctx, token, projectID)
if err != nil {
log.Fatalf("Failed to setup RAG DB: %v", err)
}

// Upload local directories
Expand All @@ -214,46 +172,31 @@ func main() {
log.Fatal(err)
}

collectionPattern := db.GetCollection("pattern", embeddingFunc)
patternsDir := filepath.Join(pwd, "patterns")
addDirectoryToRag(ctx, collectionPattern, patternsDir)

collectionKnowledge := db.GetCollection("knowledge", embeddingFunc)
knowledgeDir := filepath.Join(pwd, "knowledge")
addDirectoryToRag(ctx, collectionKnowledge, knowledgeDir)

// Create a temporary directory for downloads
//tmpDir, err := os.MkdirTemp("", "rag-data-")
ragSourceDir, err := os.Getwd()
if err != nil {
log.Fatalf("Unable to get working directory: %v", err)
}
ragSourceDir = ragSourceDir + "/.rag-sources"
//Create dir if it does not exist
if fileStat, err := os.Stat(dbFile); os.IsNotExist(err) {
ragSourceDir := filepath.Join(pwd, ".rag-sources")
if _, err := os.Stat(ragSourceDir); os.IsNotExist(err) {
log.Printf("Dir does not exist: %v", ragSourceDir)
err = os.MkdirAll(ragSourceDir, 0755)
log.Printf("Dir created: %v", fileStat)
if err != nil {
log.Fatal(err)
}
log.Printf("Dir created: %v", ragSourceDir)
}
//defer os.RemoveAll(tmpDir)

// Process data sources if destination is empty
// otherwise we assume last run was successful in
// fetching sources
entries, err := os.ReadDir(ragSourceDir)
if err != nil {
log.Fatalf("Unable to read directory: %v", err)
}
if len(entries) == 0 {
for _, source := range KNOWLEDGE_RAG_SOURCES {
processSource(source, ragSourceDir)
}
}
processAllSources(ragSourceDir)

// Upload all files in the temporary directory to RAG
// Upload all files in the source directory to RAG
addDirectoryToRag(ctx, collectionKnowledge, ragSourceDir)

// Export the database to a file
dbFile := dbFile()
if len(dbFile) > 0 {
log.Printf("Exporting database Knowledge base docs:%d, Pattern docs:%d",
collectionKnowledge.Count(),
Expand Down
Loading
Loading