From 4ed99435ba3e22b5fbdc7ddf7fe9e6596c32ab06 Mon Sep 17 00:00:00 2001 From: Yaroslav Poltoran Date: Mon, 9 Oct 2023 20:51:30 +0300 Subject: [PATCH] add tiktoken --- langchain_helper.py | 2 +- requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/langchain_helper.py b/langchain_helper.py index d5cec37..bfa0f8f 100644 --- a/langchain_helper.py +++ b/langchain_helper.py @@ -16,7 +16,7 @@ def create_db_from_youtube_video_url(video_url: str) -> FAISS: loader = YoutubeLoader.from_youtube_url(video_url) transcript = loader.load() - text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) + text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=100) docs = text_splitter.split_documents(transcript) db = FAISS.from_documents(docs, embeddings) diff --git a/requirements.txt b/requirements.txt index cf86459..ed6d2f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ langchain openai youtube-transcript-api faiss-cpu -streamlit \ No newline at end of file +streamlit +tiktoken \ No newline at end of file