Skip to content

Commit

Permalink
precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
bedanley committed Jan 15, 2025
1 parent 4f20999 commit cd259b9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 4 deletions.
3 changes: 3 additions & 0 deletions lambda/models/domain_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,13 @@ class IngestionType(Enum):

RagDocumentDict: TypeAlias = Dict[str, Any]


class ChunkStrategyType(Enum):
"""Enum for different types of chunking strategies."""

FIXED = "fixed"


class RagSubDocument(BaseModel):
"""Rag Sub-Document Entity for storing in DynamoDB."""

Expand Down
6 changes: 5 additions & 1 deletion lambda/repository/lambda_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,11 @@ def ingest_documents(event: dict, context: dict) -> dict:
source=doc_source,
subdocs=ids,
username=username,
chunk_strategy={"type": ChunkStrategyType.FIXED.value, "size": str(chunk_size), "overlap": str(chunk_overlap)},
chunk_strategy={
"type": ChunkStrategyType.FIXED.value,
"size": str(chunk_size),
"overlap": str(chunk_overlap),
},
ingestion_type=IngestionType.MANUAL,
)
doc_repo.save(doc_entity)
Expand Down
8 changes: 6 additions & 2 deletions lambda/repository/pipeline_ingest_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from utilities.validation import validate_chunk_params, validate_model_name, validate_repository_type, ValidationError
from utilities.vector_store import get_vector_store_client

from .lambda_functions import ChunkStrategyType, _get_embeddings_pipeline, IngestionType, RagDocument
from .lambda_functions import _get_embeddings_pipeline, ChunkStrategyType, IngestionType, RagDocument

logger = logging.getLogger(__name__)
session = boto3.Session()
Expand Down Expand Up @@ -159,7 +159,11 @@ def handle_pipeline_ingest_documents(event: Dict[str, Any], context: Any) -> Dic
document_name=key,
source=docs[0][0].metadata.get("source"),
subdocs=all_ids,
chunk_strategy={"type": ChunkStrategyType.FIXED.value, "size": str(chunk_size), "overlap": str(chunk_overlap)},
chunk_strategy={
"type": ChunkStrategyType.FIXED.value,
"size": str(chunk_size),
"overlap": str(chunk_overlap),
},
username=username,
ingestion_type=IngestionType.AUTO,
)
Expand Down
6 changes: 5 additions & 1 deletion lambda/repository/state_machine/pipeline_ingest_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ def handle_pipeline_ingest_documents(event: Dict[str, Any], context: Any) -> Dic
document_name=key,
source=source,
subdocs=ids,
chunk_strategy={"type": ChunkStrategyType.FIXED.value, "size": str(chunk_size), "overlap": str(chunk_overlap)},
chunk_strategy={
"type": ChunkStrategyType.FIXED.value,
"size": str(chunk_size),
"overlap": str(chunk_overlap),
},
username=username,
ingestion_type=IngestionType.AUTO,
)
Expand Down

0 comments on commit cd259b9

Please sign in to comment.