diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 14d705198..67e40993a 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -75,7 +75,7 @@ jobs: runs-on: ubuntu-latest services: postgres: - image: postgres:15 + image: pgvector/pgvector:pg16 env: POSTGRES_USER: postgres POSTGRES_PASSWORD: password diff --git a/backend/background/jobs/club_embeddings.go b/backend/background/jobs/club_embeddings.go new file mode 100644 index 000000000..c143d2eed --- /dev/null +++ b/backend/background/jobs/club_embeddings.go @@ -0,0 +1,55 @@ +package jobs + +import ( + "context" + "fmt" + "log/slog" + "time" + + "github.com/GenerateNU/sac/backend/background" + "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/search" + + "github.com/GenerateNU/sac/backend/constants" +) + +// Generate club embeddings for clubs that did not receive them when being created or updated. This could occur in the case of +// mock data (which is uploaded to postgres directly, doesn't go through the app), or in the case OpenAI API goes down (service outage, bad api key, etc) +func (j *Jobs) ClubEmbeddings(ctx context.Context) background.JobFunc { + return func() { + t := time.NewTicker(constants.EMBEDDINGS_GENERATION_INTERVAL) + + for range t.C { + func() { + tx := j.db.WithContext(ctx).Begin() + defer func() { + if r := recover(); r != nil { + tx.Rollback() + } + }() + + var club models.Club + if err := tx.Raw("SELECT * FROM clubs WHERE embedding IS NULL FOR UPDATE SKIP LOCKED LIMIT 1").Scan(&club).Error; err != nil { + tx.Rollback() + return + } + + if club.Name == "" && club.Preview == "" && club.Description == "" { // empty club + tx.Rollback() + return + } + + slog.Info(fmt.Sprintf("Generating embeddings for club '%s' (%s)", club.Name, club.ID.String())) + + if err := search.UpsertClubEmbedding(tx, j.search, &club); err != nil { + tx.Rollback() + return + } + + if err := tx.Commit().Error; err != nil { + return + } + }() + } + } +} diff --git a/backend/background/jobs/event_embeddings.go b/backend/background/jobs/event_embeddings.go new file mode 100644 index 000000000..4c3da1a83 --- /dev/null +++ b/backend/background/jobs/event_embeddings.go @@ -0,0 +1,55 @@ +package jobs + +import ( + "context" + "fmt" + "log/slog" + "time" + + "github.com/GenerateNU/sac/backend/background" + "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/search" + + "github.com/GenerateNU/sac/backend/constants" +) + +// Generate event embeddings for events that did not receive them when being created or updated. This could occur in the case of +// mock data (which is uploaded to postgres directly, doesn't go through the app), or in the case OpenAI API goes down (service outage, bad api key, etc) +func (j *Jobs) EventEmbeddings(ctx context.Context) background.JobFunc { + return func() { + t := time.NewTicker(constants.EMBEDDINGS_GENERATION_INTERVAL) + + for range t.C { + func() { + tx := j.db.WithContext(ctx).Begin() + defer func() { + if r := recover(); r != nil { + tx.Rollback() + } + }() + + var event models.Event + if err := tx.Raw("SELECT * FROM events WHERE embedding IS NULL FOR UPDATE SKIP LOCKED LIMIT 1").Scan(&event).Error; err != nil { + tx.Rollback() + return + } + + if event.Name == "" && event.Preview == "" && event.Description == "" { // empty club + tx.Rollback() + return + } + + slog.Info(fmt.Sprintf("Generating embeddings for event '%s' (%s)", event.Name, event.ID.String())) + + if err := search.UpsertEventEmbedding(tx, j.search, &event); err != nil { + tx.Rollback() + return + } + + if err := tx.Commit().Error; err != nil { + return + } + }() + } + } +} diff --git a/backend/background/jobs/jobs.go b/backend/background/jobs/jobs.go index 65ae2ad64..5cf38effb 100644 --- a/backend/background/jobs/jobs.go +++ b/backend/background/jobs/jobs.go @@ -1,6 +1,7 @@ package jobs import ( + "github.com/GenerateNU/sac/backend/config" "github.com/GenerateNU/sac/backend/integrations/email" "gorm.io/gorm" ) @@ -8,8 +9,9 @@ import ( type Jobs struct { db *gorm.DB emailer email.Emailer + search *config.SearchSettings } -func New(db *gorm.DB) *Jobs { - return &Jobs{db: db} +func New(db *gorm.DB, settings *config.Settings) *Jobs { + return &Jobs{db: db, search: &settings.Search} } diff --git a/backend/config/search.go b/backend/config/search.go index 27f40eaf1..08a5e3855 100644 --- a/backend/config/search.go +++ b/backend/config/search.go @@ -1,5 +1,22 @@ package config +import m "github.com/garrettladley/mattress" + type SearchSettings struct { - URI string `env:"URI"` + OpenAIApiKey *m.Secret[string] +} + +type intermediateSearchSettings struct { + OpenAIApiKey string `env:"OPENAI_API_KEY"` +} + +func (i *intermediateSearchSettings) into() (*SearchSettings, error) { + openAiApiKey, err := m.NewSecret(i.OpenAIApiKey) + if err != nil { + return nil, err + } + + return &SearchSettings{ + OpenAIApiKey: openAiApiKey, + }, nil } diff --git a/backend/config/settings.go b/backend/config/settings.go index 03d3946b0..c00593ff1 100644 --- a/backend/config/settings.go +++ b/backend/config/settings.go @@ -34,7 +34,7 @@ type intermediateSettings struct { Google intermediateGoogleOAuthSettings `envPrefix:"SAC_GOOGLE_OAUTH_"` MicrosoftWeb intermediateMicrosoftWebOAuthSettings `envPrefix:"SAC_MICROSOFT_OAUTH_WEB_"` MicrosoftMobile intermediateMicrosoftMobileOAuthSettings `envPrefix:"SAC_MICROSOFT_OAUTH_MOBILE_"` - Search SearchSettings `envPrefix:"SAC_SEARCH_"` + Search intermediateSearchSettings `envPrefix:"SAC_SEARCH_"` } func (i *intermediateSettings) into() (*Settings, error) { @@ -98,6 +98,11 @@ func (i *intermediateSettings) into() (*Settings, error) { return nil, err } + search, err := i.Search.into() + if err != nil { + return nil, err + } + return &Settings{ Application: i.Application, DBCache: *dbCache, @@ -112,7 +117,7 @@ func (i *intermediateSettings) into() (*Settings, error) { MicrosoftMobile: *microsoftMobile, AWS: *aws, Resend: *resend, - Search: i.Search, + Search: *search, }, }, nil } diff --git a/backend/constants/jobs.go b/backend/constants/jobs.go index 49f8757e3..0dfec1e86 100644 --- a/backend/constants/jobs.go +++ b/backend/constants/jobs.go @@ -7,4 +7,6 @@ const ( DELETE_EXPIRED_VERIFICATION_LIMIT int = 100 EMAIL_SENDER_INTERVAL time.Duration = 5 * time.Second MAX_EMAIL_ATTEMPTS int = 3 + + EMBEDDINGS_GENERATION_INTERVAL time.Duration = 1 * time.Second ) diff --git a/backend/constants/search.go b/backend/constants/search.go deleted file mode 100644 index 69bdd8289..000000000 --- a/backend/constants/search.go +++ /dev/null @@ -1,9 +0,0 @@ -package constants - -const ( - CLUBS_INDEX string = "clubs" - EVENTS_INDEX string = "events" - SEARCH_QUERY_DEFAULT_MAX_MEMBERS int = 16384 -) - -var SEARCH_URI string diff --git a/backend/database/db.go b/backend/database/db.go index 4953c52a5..a76b19627 100644 --- a/backend/database/db.go +++ b/backend/database/db.go @@ -5,7 +5,6 @@ import ( "github.com/GenerateNU/sac/backend/config" "github.com/GenerateNU/sac/backend/constants" - "github.com/GenerateNU/sac/backend/database/cache" "github.com/GenerateNU/sac/backend/entities/models" "gorm.io/driver/postgres" "gorm.io/gorm" @@ -18,17 +17,18 @@ func ConfigureDB(settings config.Settings) (*gorm.DB, error) { return nil, err } - cachePlugin := &cache.Caches{ - Conf: &cache.Config{ - Easer: true, - Cacher: cache.NewRedisCacher(settings.DBCache), - TTL: constants.DB_CACHE_TTL, - }, - } - - if err := db.Use(cachePlugin); err != nil { - return nil, err - } + // MAKE REDIS NOT ERROR EVERYTIME CHALLENGE (IMPOSSIBLE) + // cachePlugin := &cache.Caches{ + // Conf: &cache.Config{ + // Easer: true, + // Cacher: cache.NewRedisCacher(settings.DBCache), + // TTL: constants.DB_CACHE_TTL, + // }, + // } + + // if err := db.Use(cachePlugin); err != nil { + // return nil, err + // } if err := CreateSuperUserIfNotExists(settings.SuperUser, db); err != nil { return nil, err diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index b39f27a3d..2ad364f57 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -39,48 +39,7 @@ services: volumes: - redis-limiter-data:/data - opensearch-node1: - image: opensearchproject/opensearch:latest - container_name: opensearch-node1 - environment: - - cluster.name=opensearch-cluster - - node.name=opensearch-node1 - - discovery.type=single-node - - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping - - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM - - DISABLE_SECURITY_PLUGIN=true # - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 # maximum number of open files for the OpenSearch user, set to at least 65536 on modern systems - hard: 65536 - volumes: - - opensearch-data1:/usr/share/opensearch/data - ports: - - 9200:9200 - - 9600:9600 # required for Performance Analyzer - networks: - - opensearch-net - opensearch-dashboards: - image: opensearchproject/opensearch-dashboards:latest - container_name: opensearch-dashboards - ports: - - 5601:5601 - expose: - - "5601" - environment: - OPENSEARCH_HOSTS: '["http://opensearch-node1:9200"]' - DISABLE_SECURITY_DASHBOARDS_PLUGIN: true - networks: - - opensearch-net - volumes: redis-db-cache-data: redis-session-data: - redis-limiter-data: - opensearch-data1: - -networks: - opensearch-net: + redis-limiter-data: \ No newline at end of file diff --git a/backend/entities/clubs/base/service.go b/backend/entities/clubs/base/service.go index 495727065..4197e2d27 100644 --- a/backend/entities/clubs/base/service.go +++ b/backend/entities/clubs/base/service.go @@ -1,9 +1,12 @@ package base import ( + "log/slog" + "github.com/GenerateNU/sac/backend/entities/clubs" "github.com/GenerateNU/sac/backend/entities/models" "github.com/GenerateNU/sac/backend/errs" + "github.com/GenerateNU/sac/backend/search" "github.com/GenerateNU/sac/backend/types" "github.com/GenerateNU/sac/backend/utilities" "github.com/garrettladley/fiberpaginate" @@ -40,7 +43,20 @@ func (c *ClubService) CreateClub(userID uuid.UUID, clubBody CreateClubRequestBod return nil, err } - return CreateClub(c.DB, userID, *club) + result, err := CreateClub(c.DB, userID, *club) + if err != nil { + return nil, err + } + + go func() { + err := search.UpsertClubEmbedding(c.DB, c.Search, result) + + if err != nil { + slog.Error("Upsert club embedding failed") + } + }() + + return result, err } func (c *ClubService) GetClub(id string) (*models.Club, error) { @@ -71,6 +87,19 @@ func (c *ClubService) UpdateClub(id string, clubBody UpdateClubRequestBody) (*mo return nil, err } + result, err := UpdateClub(c.DB, *idAsUUID, *club) + if err != nil { + return nil, err + } + + go func() { + err := search.UpsertClubEmbedding(c.DB, c.Search, result) + + if err != nil { + slog.Error("Upsert club embedding failed") + } + }() + return UpdateClub(c.DB, *idAsUUID, *club) } diff --git a/backend/entities/clubs/base/transactions.go b/backend/entities/clubs/base/transactions.go index f59a9f8da..7618b6c09 100644 --- a/backend/entities/clubs/base/transactions.go +++ b/backend/entities/clubs/base/transactions.go @@ -3,14 +3,12 @@ package base import ( "errors" - "github.com/GenerateNU/sac/backend/constants" "github.com/GenerateNU/sac/backend/database/cache" "github.com/GenerateNU/sac/backend/utilities" "github.com/garrettladley/fiberpaginate" "github.com/GenerateNU/sac/backend/entities/models" "github.com/GenerateNU/sac/backend/entities/users" - search "github.com/GenerateNU/sac/backend/search/base" "github.com/google/uuid" "gorm.io/gorm" "gorm.io/gorm/clause" @@ -61,10 +59,6 @@ func CreateClub(db *gorm.DB, userId uuid.UUID, club models.Club) (*models.Club, return nil, err } - if err := search.Upsert[models.Club](db, constants.CLUBS_INDEX, club.ID.String(), &club); err != nil { - return nil, err - } - return &club, nil } @@ -107,11 +101,6 @@ func UpdateClub(db *gorm.DB, id uuid.UUID, club models.Club) (*models.Club, erro return nil, err } - err = search.Upsert[models.Club](db, constants.CLUBS_INDEX, club.ID.String(), &club) - if err != nil { - return nil, err - } - return &existingClub, tx.Commit().Error } @@ -141,10 +130,5 @@ func DeleteClub(db *gorm.DB, id uuid.UUID) error { return result.Error } - err = search.Delete(db, constants.CLUBS_INDEX, id.String()) - if err != nil { - return err - } - return tx.Commit().Error } diff --git a/backend/entities/events/base/service.go b/backend/entities/events/base/service.go index 41ad3d996..78e3c47aa 100644 --- a/backend/entities/events/base/service.go +++ b/backend/entities/events/base/service.go @@ -2,9 +2,11 @@ package base import ( "errors" + "log/slog" "github.com/GenerateNU/sac/backend/entities/events" "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/search" "github.com/GenerateNU/sac/backend/types" "github.com/GenerateNU/sac/backend/utilities" @@ -65,7 +67,19 @@ func (e *EventService) CreateEvent(body events.CreateEventRequestBody) (*models. return nil, err } - return CreateEvent(e.DB, *body.Into()) + result, err := CreateEvent(e.DB, *body.Into()) + if err != nil { + return nil, err + } + + go func() { + err := search.UpsertEventEmbedding(e.DB, e.Search, result) + + if err != nil { + slog.Error("Upsert club embedding failed") + } + }() + return result, err } func (e *EventService) UpdateEvent(eventID string, eventBody events.UpdateEventRequestBody) ([]models.Event, error) { @@ -83,7 +97,20 @@ func (e *EventService) UpdateEvent(eventID string, eventBody events.UpdateEventR return nil, err } - return UpdateEvent(e.DB, *idAsUUID, *event, *eventBody.UpdateAllInSeries) + result, err := UpdateEvent(e.DB, *idAsUUID, *event, *eventBody.UpdateAllInSeries) + if err != nil { + return nil, err + } + + go func() { + err := search.UpsertEventEmbedding(e.DB, e.Search, &result[0]) + + if err != nil { + slog.Error("Upsert club embedding failed") + } + }() + + return result, err } func (e *EventService) DeleteEvent(eventID string) error { diff --git a/backend/entities/events/base/transactions.go b/backend/entities/events/base/transactions.go index 2eb78afc9..5a57007bf 100644 --- a/backend/entities/events/base/transactions.go +++ b/backend/entities/events/base/transactions.go @@ -5,13 +5,11 @@ import ( "log/slog" "time" - "github.com/GenerateNU/sac/backend/constants" "github.com/GenerateNU/sac/backend/database/cache" "github.com/GenerateNU/sac/backend/entities/events" "github.com/GenerateNU/sac/backend/entities/models" "github.com/garrettladley/fiberpaginate" - search "github.com/GenerateNU/sac/backend/search/base" "github.com/GenerateNU/sac/backend/utilities" "github.com/google/uuid" @@ -41,11 +39,6 @@ func CreateEvent(db *gorm.DB, event models.Event) (*models.Event, error) { return nil, err } - err := search.Upsert[models.Event](db, constants.EVENTS_INDEX, event.ID.String(), &event) - if err != nil { - return nil, err - } - return &event, nil } @@ -66,11 +59,6 @@ func updateEvent(db *gorm.DB, id uuid.UUID, event models.Event) ([]models.Event, return nil, result.Error } - err := search.Upsert[models.Event](db, constants.EVENTS_INDEX, id.String(), &event) - if err != nil { - return nil, err - } - return []models.Event{resultingEvent}, nil } @@ -104,10 +92,5 @@ func DeleteEvent(db *gorm.DB, id uuid.UUID) error { return result.Error } - err := search.Delete(db, constants.EVENTS_INDEX, id.String()) - if err != nil { - return err - } - return nil } diff --git a/backend/entities/models/club.go b/backend/entities/models/club.go index 5aa2015c4..f2ce32e1e 100644 --- a/backend/entities/models/club.go +++ b/backend/entities/models/club.go @@ -35,30 +35,3 @@ type Club struct { Event []Event `gorm:"many2many:club_events;" json:"-"` Notification []Notification `gorm:"polymorphic:Reference;" json:"-"` } - -type ClubSearchDocument struct { - Name string `json:"name"` - Preview string `json:"preview"` - Description string `json:"description"` - NumMembers int `json:"num_members"` - Tags []string `json:"tags"` -} - -func (c *Club) ToSearchDocument() interface{} { - tagIds := make([]string, len(c.Tag)) - for i, tag := range c.Tag { - tagIds[i] = tag.ID.String() - } - - return ClubSearchDocument{ - Name: c.Name, - Preview: c.Preview, - Description: c.Description, - NumMembers: c.NumMembers, - Tags: tagIds, - } -} - -func (c Club) Preload(db *gorm.DB) *gorm.DB { - return db.Preload("Tag") -} diff --git a/backend/entities/models/event.go b/backend/entities/models/event.go index 4defdacea..b9ef3cf55 100644 --- a/backend/entities/models/event.go +++ b/backend/entities/models/event.go @@ -4,7 +4,6 @@ import ( "time" "github.com/google/uuid" - "gorm.io/gorm" ) type EventType string @@ -50,42 +49,3 @@ type Event struct { Tag []Tag `gorm:"many2many:event_tags;" json:"-"` Notification []Notification `gorm:"polymorphic:Reference;" json:"-"` } - -// How Events are represented in the OpenSearch index. -type EventSearchDocument struct { - Name string `json:"name"` - Preview string `json:"preview"` - Description string `json:"description"` - EventType EventType `json:"event_type"` - StartTime time.Time `json:"start_time"` - EndTime time.Time `json:"end_time"` - Clubs []string `json:"clubs"` - Tags []string `json:"tags"` -} - -func (c *Event) ToSearchDocument() interface{} { - tagIds := make([]string, len(c.Tag)) - for i, tag := range c.Tag { - tagIds[i] = tag.ID.String() - } - - clubIds := make([]string, len(c.Clubs)) - for i, club := range c.Clubs { - clubIds[i] = club.ID.String() - } - - return EventSearchDocument{ - Name: c.Name, - Preview: c.Preview, - Description: c.Description, - EventType: c.EventType, - StartTime: c.StartTime, - EndTime: c.EndTime, - Tags: tagIds, - Clubs: clubIds, - } -} - -func (c Event) Preload(db *gorm.DB) *gorm.DB { - return db.Preload("Tag").Preload("Club") -} diff --git a/backend/main.go b/backend/main.go index be739406c..a97f9c188 100644 --- a/backend/main.go +++ b/backend/main.go @@ -9,19 +9,18 @@ import ( "net" "os" "os/signal" + "path/filepath" "syscall" "github.com/GenerateNU/sac/backend/background" "github.com/GenerateNU/sac/backend/background/jobs" "github.com/GenerateNU/sac/backend/config" - "github.com/GenerateNU/sac/backend/constants" "github.com/GenerateNU/sac/backend/database" "github.com/GenerateNU/sac/backend/database/store" "github.com/GenerateNU/sac/backend/integrations" "github.com/GenerateNU/sac/backend/integrations/email" "github.com/GenerateNU/sac/backend/integrations/file" "github.com/GenerateNU/sac/backend/integrations/oauth/soth/sothic" - "github.com/GenerateNU/sac/backend/search" "github.com/GenerateNU/sac/backend/server" "github.com/GenerateNU/sac/backend/telemetry" "github.com/GenerateNU/sac/backend/utilities" @@ -32,14 +31,12 @@ import ( ) func main() { - onlyMigrate, seedSearch, configPath := parseFlags() + onlyMigrate, configPath := parseFlags() config, err := config.GetConfiguration(*configPath) if err != nil { utilities.Exit("Error getting configuration: %s", err.Error()) } - constants.SEARCH_URI = config.Search.URI - if checkServerRunning(config.Application.Host, config.Application.Port) == nil { utilities.Exit("A server is already running on %s:%d.\n", config.Application.Host, config.Application.Port) } @@ -55,15 +52,11 @@ func main() { return } - if *seedSearch { - seedSearchData(db) - } - if err := database.ConnPooling(db); err != nil { utilities.Exit("Error with connection pooling: %s", err.Error()) } - startBackgroundJobs(ctx, db) + startBackgroundJobs(ctx, db, config) stores := store.ConfigureStores(config.RedisLimiter) sothic.Init(config.Session) @@ -79,10 +72,9 @@ func main() { waitForShutdown(app) } -func parseFlags() (onlyMigrate, seedSearch *bool, configPath *string) { +func parseFlags() (onlyMigrate *bool, configPath *string) { onlyMigrate = flag.Bool("only-migrate", false, "Specify if you want to only perform the database migration") - seedSearch = flag.Bool("seed-search", true, "Specify if you want to seed the opensearch nodes.") - configPath = flag.String("config", "", "Specify the path to the config file (.env)") + configPath = flag.String("config", filepath.Join("..", "config", ".env.dev"), "Specify the path to the config file (.env)") flag.Parse() return } @@ -97,19 +89,11 @@ func checkServerRunning(host string, port uint16) error { return nil } -func seedSearchData(db *gorm.DB) { - if err := search.SeedClubs(db); err != nil { - return - } - - if err := search.SeedEvents(db); err != nil { - return - } -} - -func startBackgroundJobs(ctx context.Context, db *gorm.DB) { - jobs := jobs.New(db) +func startBackgroundJobs(ctx context.Context, db *gorm.DB, settings *config.Settings) { + jobs := jobs.New(db, settings) background.Go(jobs.WelcomeSender(ctx)) + background.Go(jobs.ClubEmbeddings(ctx)) + background.Go(jobs.EventEmbeddings(ctx)) } func configureIntegrations(config *config.Integrations) *integrations.Integrations { diff --git a/backend/migrations/20240617161444_create_search_indices.down.sql b/backend/migrations/20240617161444_create_search_indices.down.sql new file mode 100644 index 000000000..d58ddb323 --- /dev/null +++ b/backend/migrations/20240617161444_create_search_indices.down.sql @@ -0,0 +1,21 @@ +BEGIN; + +ALTER TABLE clubs + DROP COLUMN IF EXISTS clubsearch_index_col; + +ALTER TABLE clubs + DROP COLUMN IF EXISTS embedding; + +DROP INDEX IF EXISTS clubsearch_index; + +ALTER TABLE "events" + DROP COLUMN IF EXISTS eventsearch_index_col; + +ALTER TABLE "events" + DROP COLUMN IF EXISTS embedding; + +DROP INDEX IF EXISTS eventsearch_index; + +DROP EXTENSION IF EXISTS vector; + +COMMIT; \ No newline at end of file diff --git a/backend/migrations/20240617161444_create_search_indices.up.sql b/backend/migrations/20240617161444_create_search_indices.up.sql new file mode 100644 index 000000000..a85d5b45c --- /dev/null +++ b/backend/migrations/20240617161444_create_search_indices.up.sql @@ -0,0 +1,25 @@ +BEGIN; + +CREATE EXTENSION vector; + +ALTER TABLE clubs + ADD COLUMN clubsearch_index_col tsvector + GENERATED ALWAYS AS (to_tsvector('english', coalesce(name, '') || ' ' || coalesce(preview, '') || ' ' || coalesce(description, ''))) STORED; + +ALTER TABLE clubs + ADD COLUMN embedding vector(512); + +CREATE INDEX + clubsearch_index ON clubs using GIN(clubsearch_index_col); + +ALTER TABLE "events" + ADD COLUMN eventsearch_index_col tsvector + GENERATED ALWAYS AS (to_tsvector('english', coalesce(name, '') || ' ' || coalesce(preview, '') || ' ' || coalesce(description, ''))) STORED; + +ALTER TABLE "events" + ADD COLUMN embedding vector(512); + +CREATE INDEX + eventsearch_index on "events" using GIN(eventsearch_index_col); + +COMMIT; \ No newline at end of file diff --git a/backend/search/base/service.go b/backend/search/base/service.go index 347e96cb8..00576e823 100644 --- a/backend/search/base/service.go +++ b/backend/search/base/service.go @@ -21,9 +21,9 @@ func NewSearchService(serviceParams types.ServiceParams) SearchServiceInterface } func (s *SearchService) SearchClubs(query search_types.ClubSearchRequest) (*search_types.SearchResult[models.Club], error) { - return Search[models.Club](s.DB, &query) + return SearchClubs(s.DB, s.Search, &query) } func (s *SearchService) SearchEvents(query search_types.EventSearchRequest) (*search_types.SearchResult[models.Event], error) { - return Search[models.Event](s.DB, &query) + return SearchEvents(s.DB, &query) } diff --git a/backend/search/base/transactions.go b/backend/search/base/transactions.go index f97301570..b95eccbe1 100644 --- a/backend/search/base/transactions.go +++ b/backend/search/base/transactions.go @@ -2,112 +2,134 @@ package base import ( "fmt" - "io" - "log/slog" - "net/http" + "strings" + "time" - json "github.com/goccy/go-json" - - "github.com/GenerateNU/sac/backend/constants" - "github.com/GenerateNU/sac/backend/database/cache" + "github.com/GenerateNU/sac/backend/config" + "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/search" "github.com/GenerateNU/sac/backend/search/types" - "github.com/GenerateNU/sac/backend/utilities" "gorm.io/gorm" ) -// Do a GET request to the OpenSearch instance. -func doSearchGetRequest[T, V any](url string, requestBody T) (*V, error) { - payload, err := json.Marshal(requestBody) - if err != nil { - return nil, err - } +// Use this over strings.join so elements can be single quoted if necessary. +func joinQuoted(elems []string, separator string) string { + elemsQuoted := make([]string, len(elems)) - resp, err := utilities.Request(http.MethodGet, fmt.Sprintf("%s%s", constants.SEARCH_URI, url), payload, utilities.JSON()) - if err != nil { - return nil, err + for i, elem := range elems { + elemsQuoted[i] = fmt.Sprintf("'%s'", elem) } - defer resp.Body.Close() - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } + return strings.Join(elemsQuoted, separator) +} - var responseData V - err = json.Unmarshal(responseBody, &responseData) - if err != nil { +func SearchClubs(db *gorm.DB, s *config.SearchSettings, query *types.ClubSearchRequest) (*types.SearchResult[models.Club], error) { + var clubs []models.Club + + dbQuery := db.Model(&clubs) + + finalQuery := clubKeywordSearchSQL(query) + + if err := dbQuery.Raw(finalQuery).Scan(&clubs).Error; err != nil { return nil, err } - return &responseData, nil + // return results + return &types.SearchResult[models.Club]{ + Results: clubs, + }, nil } -func Search[T types.Searchable](db *gorm.DB, query types.SearchRequest) (*types.SearchResult[T], error) { - result, err := doSearchGetRequest[types.SearchEndpointRequest, types.SearchEndpointResponse](fmt.Sprintf("/%s/_search", query.Index()), query.ToSearchEndpointRequest()) - if err != nil { - return nil, err +func clubKeywordSearchSQL(query *types.ClubSearchRequest) string { + var whereClauses []string + rankClause := "" + innerJoin := "" + + if query.Search != "" { + whereClauses = + append(whereClauses, + fmt.Sprintf("clubsearch_index_col @@ plainto_tsquery('%s')", query.Search)) + + rankClause = fmt.Sprintf(", RANK () OVER (ORDER BY ts_rank_cd(clubsearch_index_col, plainto_tsquery('%s')) DESC)", query.Search) } - ids := make([]string, len(result.Hits.Hits)) - for i, result := range result.Hits.Hits { - ids[i] = result.Id + if query.MaxMembers != 0 { + whereClauses = + append(whereClauses, + fmt.Sprintf("num_members <= %d", query.MaxMembers)) } - db = cache.SetUseCache(db, true) + if query.MinMembers != 0 { + whereClauses = + append(whereClauses, + fmt.Sprintf("num_members >= %d", query.MinMembers)) + } - var results []T - if err = query.Preload(db).Where("id IN ?", ids).Find(&results).Error; err != nil { - return nil, err + if len(query.Tags) != 0 { + innerJoin = "INNER JOIN club_tags ON clubs.id = club_tags.club_id" + whereClauses = append(whereClauses, + fmt.Sprintf("club_tags.tag_id IN (%s)", joinQuoted(query.Tags, ","))) } - return &types.SearchResult[T]{ - Results: results, - }, nil + return fmt.Sprintf("SELECT *%s FROM clubs %s WHERE %s", rankClause, innerJoin, strings.Join(whereClauses, " AND ")) } -func Upsert[T types.Searchable](db *gorm.DB, index string, uuid string, model types.ToSearchDocument) error { - var elem T +//lint:ignore U1000 Ignore unused function temporarily for debugging +func clubSemanticSearchSQL(embedding []float32) string { + embeddingStr := search.FloatArrayToSql(embedding) - query := cache.SetUseCache(db, true).Model(&elem) + return fmt.Sprintf("SELECT id, RANK () OVER (ORDER BY embedding <=> '[%s]') AS rank FROM clubs ORDER BY embedding <=> '[%s]' LIMIT 20", embeddingStr, embeddingStr) +} - query = elem.Preload(query) +func SearchEvents(db *gorm.DB, query *types.EventSearchRequest) (*types.SearchResult[models.Event], error) { + var events []models.Event - if err := query.Where("id = ?", uuid).Find(&elem).Error; err != nil { - return err - } + dbQuery := db.Model(&events) - doc := model.ToSearchDocument() - requestBody := types.Json{ - "doc": doc, - "doc_as_upsert": true, - } - payload, err := json.Marshal(requestBody) - if err != nil { - return err + var whereClauses []string + var innerJoins []string + + if query.Search != "" { + whereClauses = + append(whereClauses, + fmt.Sprintf("eventsearch_index_col @@ plainto_tsquery('%s')", query.Search)) } - resp, err := utilities.Request(http.MethodPost, fmt.Sprintf("%s/%s/_update/%s", constants.SEARCH_URI, index, uuid), payload, utilities.JSON()) - if err != nil { - return err + if !query.StartTime.IsZero() { + whereClauses = + append(whereClauses, + fmt.Sprintf("start_time >= %s", query.StartTime.Format(time.DateTime))) } - defer resp.Body.Close() - return nil -} + if !query.EndTime.IsZero() { + whereClauses = + append(whereClauses, + fmt.Sprintf("end_time <= %s", query.EndTime.Format(time.DateTime))) + } -func Delete(db *gorm.DB, index string, uuid string) error { - resp, err := utilities.Request(http.MethodDelete, fmt.Sprintf("%s/%s/_doc/%s", constants.SEARCH_URI, index, uuid), nil, utilities.JSON()) - if err != nil { - return err + if len(query.Tags) != 0 { + innerJoins = append(innerJoins, "INNER JOIN event_tags ON events.id = event_tags.event_id") + whereClauses = append(whereClauses, + fmt.Sprintf("event_tags.tag_id IN (%s)", joinQuoted(query.Tags, ","))) } - defer resp.Body.Close() - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - return err + if len(query.Clubs) != 0 { + innerJoins = append(innerJoins, "INNER JOIN club_events ON events.id = club_events.event_id") + whereClauses = append(whereClauses, + fmt.Sprintf("club_events.club_id IN (%s)", joinQuoted(query.Clubs, ","))) } - slog.Info("delete successful", "response", string(responseBody), "index", index, "uuid", uuid) + finalQuery := fmt.Sprintf( + "SELECT * FROM events %s WHERE %s LIMIT 20", + strings.Join(innerJoins, " "), + strings.Join(whereClauses, " AND ")) + + if err := dbQuery.Raw(finalQuery).Scan(&events).Error; err != nil { + return nil, err + } - return nil + // return results + return &types.SearchResult[models.Event]{ + Results: events, + }, nil } diff --git a/backend/search/embeddings.go b/backend/search/embeddings.go new file mode 100644 index 000000000..e04845983 --- /dev/null +++ b/backend/search/embeddings.go @@ -0,0 +1,128 @@ +package search + +import ( + "bytes" + "fmt" + "net/http" + "strconv" + "strings" + + "github.com/GenerateNU/sac/backend/config" + "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/utilities" + "github.com/goccy/go-json" + "github.com/gofiber/fiber/v2" + "gorm.io/gorm" + "gorm.io/gorm/logger" +) + +type CreateEmbeddingRequestBody struct { + Input []string `json:"input"` + Model string `json:"model"` + Dimensions int `json:"dimensions"` +} + +type EmbeddingResponseItem struct { + Embedding []float32 `json:"embedding"` +} + +type CreateEmbeddingResponseBody struct { + Data []EmbeddingResponseItem `json:"data"` +} + +func UpsertClubEmbedding(db *gorm.DB, s *config.SearchSettings, club *models.Club) error { + embedding, err := + CreateEmbedding(s, fmt.Sprintf("%s %s %s", club.Name, club.Preview, club.Description)) + if err != nil { + return err + } + + embeddingStr := FloatArrayToSql(embedding) + + queryString := fmt.Sprintf( + "UPDATE clubs SET embedding = '[%s]' WHERE id = '%s'", embeddingStr, club.ID.String()) + + // Keep stdout/logs clean, don't output 512 floats + session := db.Session(&gorm.Session{Logger: logger.Default.LogMode(logger.Error)}) + + if err := session.Exec(queryString).Error; err != nil { + return err + } + + return nil +} + +func UpsertEventEmbedding(db *gorm.DB, s *config.SearchSettings, event *models.Event) error { + embedding, err := + CreateEmbedding(s, fmt.Sprintf("%s %s %s", event.Name, event.Preview, event.Description)) + if err != nil { + return err + } + + embeddingStr := FloatArrayToSql(embedding) + + queryString := fmt.Sprintf( + "UPDATE events SET embedding = '[%s]' WHERE id = '%s'", embeddingStr, event.ID.String()) + + // Keep stdout/logs clean, don't output 512 floats + session := db.Session(&gorm.Session{Logger: logger.Default.LogMode(logger.Error)}) + + if err := session.Exec(queryString).Error; err != nil { + return err + } + + return nil +} + +func FloatArrayToSql(embedding []float32) string { + embeddingArr := make([]string, len(embedding)) + for i, f := range embedding { + embeddingArr[i] = strconv.FormatFloat(float64(f), 'f', -1, 32) + } + embeddingStr := strings.Join(embeddingArr, ", ") + return embeddingStr +} + +func CreateEmbedding(s *config.SearchSettings, item string) ([]float32, error) { + embeddingBody, err := json.Marshal( + CreateEmbeddingRequestBody{ + Input: []string{item}, + Model: "text-embedding-3-large", + Dimensions: 512, + }) + if err != nil { + return nil, err + } + + req, err := http.NewRequest(fiber.MethodPost, + "https://api.openai.com/v1/embeddings", + bytes.NewBuffer(embeddingBody)) + if err != nil { + return nil, err + } + + req = utilities.ApplyModifiers(req, + utilities.Authorization(s.OpenAIApiKey.Expose()), + utilities.JSON(), + ) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + + var embeddingResultBody CreateEmbeddingResponseBody + + err = json.NewDecoder(resp.Body).Decode(&embeddingResultBody) + if err != nil { + return nil, err + } + + if len(embeddingResultBody.Data) < 1 { + return nil, err + } + + return embeddingResultBody.Data[0].Embedding, nil +} diff --git a/backend/search/seed.go b/backend/search/seed.go deleted file mode 100644 index 2408373f0..000000000 --- a/backend/search/seed.go +++ /dev/null @@ -1,125 +0,0 @@ -package search - -import ( - "fmt" - "net/http" - "os" - "strings" - - go_json "github.com/goccy/go-json" - - "github.com/GenerateNU/sac/backend/constants" - "github.com/GenerateNU/sac/backend/entities/models" - search_types "github.com/GenerateNU/sac/backend/search/types" - "github.com/GenerateNU/sac/backend/transactions" - "github.com/GenerateNU/sac/backend/utilities" - "gorm.io/gorm" -) - -// FIXME: For now it is performant enough to just delete the index and redo, but in the future -// we may have to start worrying about updating in place for this, maybe a migration-esque strategy as well. (???) -func SeedClubs(db *gorm.DB) error { - stdout := os.Stdout - - stdout.WriteString("Deleting existing club index...\n") - req, err := http.NewRequest("DELETE", fmt.Sprintf("%s/clubs", constants.SEARCH_URI), nil) - if err != nil { - return err - } - - _, err = http.DefaultClient.Do(req) - if err != nil { - return err - } - - var clubs []models.Club - - if err := transactions.PreloadTag()(db).Find(&clubs).Error; err != nil { - return err - } - - var requestBodyBuilder strings.Builder - - for _, club := range clubs { - indexData := search_types.BulkRequestCreate{} - indexData.Create.Index = "clubs" - indexData.Create.Id = club.ID.String() - indexJson, err := go_json.Marshal(indexData) - if err != nil { - return err - } - - clubData := club.ToSearchDocument() - clubJson, err := go_json.Marshal(clubData) - if err != nil { - return err - } - - requestBodyBuilder.WriteString(fmt.Sprintf("%s\n%s\n", indexJson, clubJson)) - } - - resp, err := utilities.Request(http.MethodPost, fmt.Sprintf("%s/_bulk", constants.SEARCH_URI), []byte(requestBodyBuilder.String()), utilities.JSON()) - if err != nil { - stdout.WriteString(fmt.Sprintf("Error making _bulk request for club seeding: %s\n", err.Error())) - } - defer resp.Body.Close() - - if !utilities.IsOk(resp.StatusCode) { - stdout.WriteString(fmt.Sprintf("Error making _bulk request for club seeding: response returned %d code\n", resp.StatusCode)) - } - - stdout.WriteString("Seeding clubs finished") - - return nil -} - -// FIXME: see fixme comment for SeedClubs() -func SeedEvents(db *gorm.DB) error { - stdout := os.Stdout - - stdout.WriteString("Deleting existing event index...\n") - resp, err := utilities.Request(http.MethodDelete, fmt.Sprintf("%s/events", constants.SEARCH_URI), nil, utilities.JSON()) - if err != nil { - return err - } - defer resp.Body.Close() - var events []models.Event - - if err := db.Preload("Tag").Preload("Clubs").Not("is_draft = ?", true).Not("is_archived = ?", true).Find(&events).Error; err != nil { - return err - } - - var requestBodyBuilder strings.Builder - - for _, event := range events { - indexData := search_types.BulkRequestCreate{} - indexData.Create.Index = "events" - indexData.Create.Id = event.ID.String() - indexJson, err := go_json.Marshal(indexData) - if err != nil { - return err - } - - eventData := event.ToSearchDocument() - eventJson, err := go_json.Marshal(eventData) - if err != nil { - return err - } - - requestBodyBuilder.WriteString(fmt.Sprintf("%s\n%s\n", indexJson, eventJson)) - } - - resp, err = utilities.Request(http.MethodPost, fmt.Sprintf("%s/_bulk", constants.SEARCH_URI), []byte(requestBodyBuilder.String()), utilities.JSON()) - if err != nil { - stdout.WriteString(fmt.Sprintf("Error making _bulk request for event seeding: %s\n", err.Error())) - } - defer resp.Body.Close() - - if !utilities.IsOk(resp.StatusCode) { - stdout.WriteString(fmt.Sprintf("Error making _bulk request for event seeding: response returned %d code\n", resp.StatusCode)) - } - - stdout.WriteString("Seeding events finished") - - return nil -} diff --git a/backend/search/types.go b/backend/search/types.go deleted file mode 100644 index 4151122c4..000000000 --- a/backend/search/types.go +++ /dev/null @@ -1,11 +0,0 @@ -// Types that are needed to interact with OpenSearch's API. -package search - -import ( - "github.com/GenerateNU/sac/backend/entities/models" -) - -// The (successful) response of an API GET /api/v1/search/events request. -type EventSearchResult struct { - Results []models.Event `json:"results"` -} diff --git a/backend/search/types/document.go b/backend/search/types/document.go deleted file mode 100644 index 317e9e981..000000000 --- a/backend/search/types/document.go +++ /dev/null @@ -1,5 +0,0 @@ -package types - -type ToSearchDocument interface { - ToSearchDocument() interface{} -} diff --git a/backend/search/types/request.go b/backend/search/types/request.go index 7a50c598f..fcdf2b8a9 100644 --- a/backend/search/types/request.go +++ b/backend/search/types/request.go @@ -1,36 +1,11 @@ package types import ( - "strings" "time" - "github.com/GenerateNU/sac/backend/constants" "github.com/GenerateNU/sac/backend/entities/models" - "gorm.io/gorm" ) -type SearchRequest interface { - ToSearchEndpointRequest() SearchEndpointRequest - Index() string - Preload(db *gorm.DB) *gorm.DB -} - -// Used for making OpenSearch GET //_search requests. -// OpenSearch's DSL is such that we can't make this a nice static struct with all the fields -// specified; depending on what we are looking to search we'll need to add different clauses. -type SearchEndpointRequest Json - -// Used for responses from OpenSearch GET //_search requests. -type SearchEndpointResponse struct { - Hits struct { - Hits []struct { - Index string `json:"_index"` - Score float32 `json:"_score"` - Id string `json:"_id"` - } `json:"hits"` - } `json:"hits"` -} - // JSON parameters for an API GET /api/v1/search/clubs request. type ClubSearchRequest struct { Search string `json:"search"` @@ -39,63 +14,6 @@ type ClubSearchRequest struct { Tags []string `json:"tags"` } -// To Query DSL JSON -func (q *ClubSearchRequest) ToSearchEndpointRequest() SearchEndpointRequest { - query := BooleanQuery{} - - if q.Search != "" { - query.Must = append(query.Must, Json{ - "query_string": Json{ - "query": q.Search, - }, - }) - } - - maxMembers := 0 - if q.MaxMembers != 0 { - maxMembers = q.MaxMembers - } else { - maxMembers = constants.SEARCH_QUERY_DEFAULT_MAX_MEMBERS // FIXME: may break in the future (will a club ever have more than 16384 members?) - } - - query.Filter = append(query.Filter, Json{ - "range": Json{ - "num_members": Json{ - "lte": maxMembers, - // by default this 0, hence why we dont check it similar to maxMembers - "gte": q.MinMembers, - }, - }, - }) - - if len(q.Tags) != 0 { - query.Filter = append(query.Filter, Json{ - "match": Json{ - "tags": Json{ - "query": strings.Join(q.Tags, " "), - "operator": "or", - "minimum_should_match": 1, - }, - }, - }) - } - - return SearchEndpointRequest{ - "query": Json{ - "bool": query, - }, - } -} - -func (q *ClubSearchRequest) Preload(db *gorm.DB) *gorm.DB { - return db.Model(&models.Club{}).Preload("Tag") -} - -func (q *ClubSearchRequest) Index() string { - return "clubs" -} - -// JSON parameters for an API GET /api/v1/search/events request. type EventSearchRequest struct { Search string `json:"search"` StartTime time.Time `json:"start_time"` @@ -104,82 +22,3 @@ type EventSearchRequest struct { Clubs []string `json:"clubs"` Tags []string `json:"tags"` } - -// To Query DSL JSON -func (q *EventSearchRequest) ToSearchEndpointRequest() SearchEndpointRequest { - query := BooleanQuery{} - - if q.Search != "" { - query.Must = append(query.Must, Json{ - "query_string": Json{ - "query": q.Search, - }, - }) - } - - if !q.StartTime.IsZero() { - query.Filter = append(query.Filter, Json{ - "range": Json{ - "start_time": Json{ - "gte": q.StartTime, - }, - }, - }) - } - - if !q.EndTime.IsZero() { - query.Filter = append(query.Filter, Json{ - "range": Json{ - "end_time": Json{ - "lte": q.EndTime, - }, - }, - }) - } - - if len(q.EventType) != 0 { - query.Filter = append(query.Filter, Json{ - "terms": Json{ - "event_type": q.EventType, - }, - }) - } - - if len(q.Clubs) != 0 { - query.Filter = append(query.Filter, Json{ - "match": Json{ - "tags": Json{ - "query": strings.Join(q.Clubs, " "), - "operator": "or", - "minimum_should_match": 1, - }, - }, - }) - } - - if len(q.Tags) != 0 { - query.Filter = append(query.Filter, Json{ - "match": Json{ - "tags": Json{ - "query": strings.Join(q.Tags, " "), - "operator": "or", - "minimum_should_match": 1, - }, - }, - }) - } - - return SearchEndpointRequest{ - "query": Json{ - "bool": query, - }, - } -} - -func (q *EventSearchRequest) Preload(db *gorm.DB) *gorm.DB { - return db.Model(&models.Event{}).Preload("Tag").Preload("Clubs") -} - -func (q *EventSearchRequest) Index() string { - return "events" -} diff --git a/backend/search/types/utilities.go b/backend/search/types/utilities.go deleted file mode 100644 index 364bf464d..000000000 --- a/backend/search/types/utilities.go +++ /dev/null @@ -1,28 +0,0 @@ -package types - -import ( - "github.com/GenerateNU/sac/backend/entities/models" - "gorm.io/gorm" -) - -type Json map[string]interface{} - -// Used for SearchEndpointRequests. -type BooleanQuery struct { - Must []Json `json:"must"` - Should []Json `json:"should"` - Filter []Json `json:"filter"` -} - -// Used for making OpenSearch /_bulk requests. -type BulkRequestCreate struct { - Create struct { - Index string `json:"_index"` - Id string `json:"_id"` - } `json:"create"` -} - -type Searchable interface { - models.Club | models.Event - Preload(db *gorm.DB) *gorm.DB -} diff --git a/backend/server/server.go b/backend/server/server.go index 0f6565da1..402db57af 100644 --- a/backend/server/server.go +++ b/backend/server/server.go @@ -74,7 +74,7 @@ func Init(db *gorm.DB, stores *store.Stores, integrations integrations.Integrati types.NewServiceParams( db, validate, - &settings.Calendar, + &settings, stores, integrations, ), diff --git a/backend/types/params.go b/backend/types/params.go index 339dc4cd6..a4a769046 100644 --- a/backend/types/params.go +++ b/backend/types/params.go @@ -31,15 +31,17 @@ type ServiceParams struct { DB *gorm.DB Validate *validator.Validate Calendar *config.CalendarSettings + Search *config.SearchSettings Stores *store.Stores Integrations integrations.Integrations } -func NewServiceParams(db *gorm.DB, validate *validator.Validate, calendar *config.CalendarSettings, stores *store.Stores, integrations integrations.Integrations) ServiceParams { +func NewServiceParams(db *gorm.DB, validate *validator.Validate, settings *config.Settings, stores *store.Stores, integrations integrations.Integrations) ServiceParams { return ServiceParams{ DB: db, Validate: validate, - Calendar: calendar, + Calendar: &settings.Calendar, + Search: &settings.Search, Stores: stores, Integrations: integrations, } diff --git a/config/.env.template b/config/.env.template index 9f0f92fc3..7fc4702d0 100644 --- a/config/.env.template +++ b/config/.env.template @@ -49,4 +49,4 @@ SAC_MICROSOFT_OAUTH_WEB_SECRET=test SAC_MICROSOFT_OAUTH_MOBILE_KEY=test -SAC_SEARCH_URI="http://127.0.0.1:9200" \ No newline at end of file +SAC_SEARCH_OPENAI_API_KEY="OPENAI_API_KEY" \ No newline at end of file