Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 1 addition & 74 deletions backend/app/seed_data/seed_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,77 +53,4 @@
"is_deleted": false,
"deleted_at": null
}
],
"credentials": [
{
"is_active": true,
"provider": "openai",
"credential": "{\"api_key\": \"sk-proj-GlificI3i5SCxN\"}",
"project_name": "Glific",
"organization_name": "Project Tech4dev",
"deleted_at": null
},
{
"is_active": true,
"provider": "openai",
"credential": "{\"api_key\": \"sk-proj-DalgoI3i5SCxN\"}",
"project_name": "Dalgo",
"organization_name": "Project Tech4dev",
"deleted_at": null
},
{
"is_active": true,
"provider": "langfuse",
"credential": "{\"secret_key\": \"sk-lf-test-glific-secret\", \"public_key\": \"pk-lf-test-glific-public\", \"host\": \"https://cloud.langfuse.com\"}",
"project_name": "Glific",
"organization_name": "Project Tech4dev",
"deleted_at": null
},
{
"is_active": true,
"provider": "langfuse",
"credential": "{\"secret_key\": \"sk-lf-test-dalgo-secret\", \"public_key\": \"pk-lf-test-dalgo-public\", \"host\": \"https://cloud.langfuse.com\"}",
"project_name": "Dalgo",
"organization_name": "Project Tech4dev",
"deleted_at": null
}
],
"assistants": [
{
"assistant_id": "assistant_glific",
"name": "Test Assistant Glific",
"instructions": "Test instructions",
"model": "gpt-4o",
"vector_store_ids": ["vs_glific"],
"temperature": 0.1,
"max_num_results": 20,
"project_name": "Glific",
"organization_name": "Project Tech4dev"
},
{
"assistant_id": "assistant_dalgo",
"name": "Test Assistant Dalgo",
"instructions": "Test instructions",
"model": "gpt-4o",
"vector_store_ids": ["vs_dalgo"],
"temperature": 0.1,
"max_num_results": 20,
"project_name": "Dalgo",
"organization_name": "Project Tech4dev"
}
],
"documents": [
{
"fname": "glific_sample.json",
"object_store_url": "s3://test-bucket/glific_sample.json",
"project_name": "Glific",
"organization_name": "Project Tech4dev"
},
{
"fname": "dalgo_sample.json",
"object_store_url": "s3://test-bucket/dalgo_sample.json",
"project_name": "Dalgo",
"organization_name": "Project Tech4dev"
}
]
}
]}
207 changes: 2 additions & 205 deletions backend/app/seed_data/seed_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@

from app.core.db import engine
from app.core import settings
from app.core.security import get_password_hash, encrypt_credentials
from app.core.security import get_password_hash
from app.models import (
APIKey,
Organization,
Project,
User,
Credential,
Assistant,
Document,
)


Expand Down Expand Up @@ -51,34 +48,6 @@ class APIKeyData(BaseModel):
deleted_at: Optional[str] = None


class CredentialData(BaseModel):
is_active: bool
provider: str
credential: str
organization_name: str
project_name: str
deleted_at: Optional[str] = None


class AssistantData(BaseModel):
assistant_id: str
name: str
instructions: str
model: str
vector_store_ids: list[str]
temperature: float
max_num_results: int
project_name: str
organization_name: str


class DocumentData(BaseModel):
fname: str
object_store_url: str
organization_name: str
project_name: str


def load_seed_data() -> dict:
"""Load seed data from JSON file."""
json_path = Path(__file__).parent / "seed_data.json"
Expand Down Expand Up @@ -217,158 +186,13 @@ def create_api_key(session: Session, api_key_data_raw: dict) -> APIKey:
raise


def create_credential(session: Session, credential_data_raw: dict) -> Credential:
"""Create a credential from data."""
try:
credential_data = CredentialData.model_validate(credential_data_raw)
logging.info(f"Creating credential for provider: {credential_data.provider}")

# Query organization ID by name
organization = session.exec(
select(Organization).where(
Organization.name == credential_data.organization_name
)
).first()
if not organization:
raise ValueError(
f"Organization '{credential_data.organization_name}' not found"
)

# Query organization ID by name
project = session.exec(
select(Project).where(Project.name == credential_data.project_name)
).first()
if not project:
raise ValueError(f"Project '{credential_data.project_name}' not found")

# Encrypt the credential data - convert string to dict first, then encrypt
credential_dict = json.loads(credential_data.credential)
encrypted_credential = encrypt_credentials(credential_dict)

credential = Credential(
is_active=credential_data.is_active,
provider=credential_data.provider,
credential=encrypted_credential,
organization_id=organization.id,
project_id=project.id,
deleted_at=credential_data.deleted_at,
)
session.add(credential)
session.flush() # Ensure ID is assigned
return credential
except Exception as e:
logging.error(f"Error creating credential: {e}")
raise


def create_assistant(session: Session, assistant_data_raw: dict) -> Assistant:
"""Create an assistant from data."""
try:
assistant_data = AssistantData.model_validate(assistant_data_raw)
logging.info(f"Creating assistant: {assistant_data.name}")

# Query organization ID by name
organization = session.exec(
select(Organization).where(
Organization.name == assistant_data.organization_name
)
).first()
if not organization:
raise ValueError(
f"Organization '{assistant_data.organization_name}' not found"
)

# Query project ID by name
project = session.exec(
select(Project).where(Project.name == assistant_data.project_name)
).first()
if not project:
raise ValueError(f"Project '{assistant_data.project_name}' not found")

assistant = Assistant(
assistant_id=assistant_data.assistant_id,
name=assistant_data.name,
instructions=assistant_data.instructions,
model=assistant_data.model,
vector_store_ids=assistant_data.vector_store_ids,
temperature=assistant_data.temperature,
max_num_results=assistant_data.max_num_results,
organization_id=organization.id,
project_id=project.id,
)
session.add(assistant)
session.flush() # Ensure ID is assigned
return assistant
except Exception as e:
logging.error(f"Error creating assistant: {e}")
raise


def create_document(session: Session, document_data_raw: dict) -> Document:
"""Create a document from seed data."""
try:
document_data = DocumentData.model_validate(document_data_raw)
logging.info(f"Creating document: {document_data.fname}")

# Get the organization
organization = session.exec(
select(Organization).where(
Organization.name == document_data.organization_name
)
).first()

if not organization:
raise ValueError(
f"Organization '{document_data.organization_name}' not found"
)

# Get the project
project = session.exec(
select(Project).where(
Project.name == document_data.project_name,
Project.organization_id == organization.id,
)
).first()
if not project:
raise ValueError(
f"Project '{document_data.project_name}' not found in organization '{organization.name}'"
)

users = session.exec(
select(User)
.join(APIKey, APIKey.user_id == User.id)
.where(APIKey.organization_id == organization.id)
).all()

user = users[1]
if not user:
raise ValueError(f"No user found in organization '{organization.name}'")

# Create and store document
document = Document(
fname=document_data.fname,
object_store_url=document_data.object_store_url,
project_id=project.id,
)

session.add(document)
session.flush()
return document

except Exception as e:
logging.error(f"Error creating document: {e}")
raise


def clear_database(session: Session) -> None:
"""Clear all seeded data from the database."""
logging.info("Clearing existing data...")
session.exec(delete(Assistant))
session.exec(delete(APIKey))
session.exec(delete(Project))
session.exec(delete(Organization))
session.exec(delete(User))
session.exec(delete(Credential))
session.commit()
logging.info("Existing data cleared.")

Expand All @@ -382,13 +206,8 @@ def seed_database(session: Session) -> None:
- Projects (Glific, Dalgo)
- Users (superuser, test user)
- API Keys for both users
- OpenAI Credentials for both projects (ensures all tests have credentials)
- Langfuse Credentials for both projects (for tracing and observability tests)
- Test Assistants for both projects
- Sample Documents

This seed data is used by the test suite and ensures that all tests
can rely on both OpenAI and Langfuse credentials being available without manual setup.
This seed data is used by the test suite.
"""
logging.info("Starting database seeding...")

Expand Down Expand Up @@ -441,28 +260,6 @@ def seed_database(session: Session) -> None:
api_keys.append(api_key)
logging.info(f"Created API key (ID: {api_key.id})")

# Create credentials
credentials = []
for credential_data in seed_data["credentials"]:
credential = create_credential(session, credential_data)
credentials.append(credential)
logging.info(
f"Created credential for provider: {credential.provider} (ID: {credential.id})"
)

# Create assistants
assistants = []
for assistant_data in seed_data.get("assistants", []):
assistant = create_assistant(session, assistant_data)
assistants.append(assistant)
logging.info(f"Created assistant: {assistant.name} (ID: {assistant.id})")

documents = []
for document_data in seed_data.get("documents", []):
document = create_document(session, document_data)
documents.append(document)
logging.info(f"Created document: {document.fname} (ID: {document.id})")

logging.info("Database seeding completed successfully!")
session.commit()
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion backend/app/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
get_user_test_auth_context,
TestAuthContext,
)
from app.seed_data.seed_data import seed_database
from app.tests.seed_data.seed_data import seed_database


@pytest.fixture(scope="function")
Expand Down
Loading