diff --git a/.fernignore b/.fernignore index b780c33..8e3352b 100644 --- a/.fernignore +++ b/.fernignore @@ -34,3 +34,4 @@ CONFIGURATION.md resources/ src/vectara/types/search_corpora_parameters.py # do not remove +src/vectara/corpora/types/search_corpus_parameters.py # do not remove diff --git a/.gitignore b/.gitignore index a3083ce..00195e1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,6 @@ poetry.toml .ruff_cache/ .idea +.env **/.ipynb_checkpoints \ No newline at end of file diff --git a/int_tests/conftest.py b/int_tests/conftest.py new file mode 100644 index 0000000..3779c62 --- /dev/null +++ b/int_tests/conftest.py @@ -0,0 +1,6 @@ +from pathlib import Path +from dotenv import load_dotenv + +# Load environment variables from .env file +env_path = Path(__file__).parent / '.env' +load_dotenv(dotenv_path=env_path) \ No newline at end of file diff --git a/int_tests/vectara_int_tests/__init__.py b/int_tests/managers/__init__.py similarity index 100% rename from int_tests/vectara_int_tests/__init__.py rename to int_tests/managers/__init__.py diff --git a/int_tests/managers/test_api_keys.py b/int_tests/managers/test_api_keys.py new file mode 100644 index 0000000..91ae79d --- /dev/null +++ b/int_tests/managers/test_api_keys.py @@ -0,0 +1,88 @@ +import unittest +import os + +from vectara import Vectara + + +class TestApiKeys(unittest.TestCase): + client = None + corpus_name = None + corpus_key = None + created_api_keys = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.corpus_name = "test-api-keys" + cls.corpus_key = cls.corpus_name + cls.created_api_keys = set() + + # Create corpus + response = cls.client.corpora.create(name=cls.corpus_name, key=cls.corpus_key) + cls.key = response.key + + def _create_api_key(self, name="test-key", api_key_role="serving"): + """Helper method to create an API key with given parameters.""" + response = self.client.api_keys.create( + name=name, + api_key_role=api_key_role, + corpus_keys=[self.key] + ) + self.created_api_keys.add(response.id) + return response + + def test_create_api_key(self): + response = self._create_api_key() + self.assertEqual(response.name, "test-key") + self.assertEqual(response.api_key_role, "serving") + + def test_delete_api_key(self): + create_response = self._create_api_key() + delete_response = self.client.api_keys.delete(create_response.id) + self.assertIsNone(delete_response) + self.created_api_keys.remove(create_response.id) + + def test_get_api_key(self): + create_response = self._create_api_key() + get_response = self.client.api_keys.get(create_response.id) + self.assertEqual(get_response.name, create_response.name) + + def test_update_api_key(self): + create_response = self._create_api_key() + update_response = self.client.api_keys.update(create_response.id, enabled=False) + self.assertEqual(update_response.enabled, False) + + def test_list_api_keys(self): + # Create two test keys + created_keys = [] + for index in range(2): + create_response = self._create_api_key(name=f"test-key-{index}") + created_keys.append(create_response.name) + + # Get all keys and verify our created keys are in the list + all_keys = list(self.client.api_keys.list()) + + # Verify our created keys are in the list + for key in all_keys: + if key.name in created_keys: + self.assertIn(key.name, [name for name in created_keys]) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + # Clean up created API keys + for api_key_id in cls.created_api_keys: + try: + cls.client.api_keys.delete(api_key_id) + except Exception: + pass + + # Clean up corpus + try: + cls.client.corpora.delete(cls.corpus_key) + except Exception: + pass diff --git a/int_tests/managers/test_app_client.py b/int_tests/managers/test_app_client.py new file mode 100644 index 0000000..8212b92 --- /dev/null +++ b/int_tests/managers/test_app_client.py @@ -0,0 +1,76 @@ +import os +import unittest + +from vectara import Vectara + + +class TestAppClient(unittest.TestCase): + client = None + created_clients = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + cls.client = Vectara(api_key=api_key) + cls.created_clients = set() + + def _create_app_client(self, name="test-client", api_roles=["owner"]): + """Helper method to create an app client with given parameters.""" + response = self.client.app_clients.create(name=name, api_roles=api_roles) + self.created_clients.add(response.id) + return response + + def test_create_app_client(self): + response = self._create_app_client() + self.assertEqual(response.name, "test-client") + self.assertIsNotNone(response.client_id) + self.assertIsNotNone(response.client_secret) + + def test_get_app_client(self): + create_response = self._create_app_client() + get_response = self.client.app_clients.get(create_response.id) + + self.assertEqual(get_response.client_id, create_response.client_id) + self.assertEqual(get_response.client_secret, create_response.client_secret) + + def test_delete_app_client(self): + create_response = self._create_app_client() + del_response = self.client.app_clients.delete(create_response.id) + self.assertIsNone(del_response) + self.created_clients.remove(create_response.id) + + def test_update_app_client(self): + create_response = self._create_app_client() + update_response = self.client.app_clients.update( + create_response.id, + api_roles=["owner", "administrator"], + description="test client" + ) + + self.assertEqual(update_response.api_roles, ["owner", "administrator"]) + self.assertEqual(update_response.description, "test client") + + def test_list_app_clients(self): + # Create two test clients + created_clients = [] + for index in range(2): + create_response = self._create_app_client(name=f"test-client-{index}") + created_clients.append(create_response) + + created_client_ids = {client.id for client in created_clients} + + # Verify our created clients are in the list + for client in self.client.app_clients.list().items: + if client.id in created_client_ids: + self.assertIn(client.name, [c.name for c in created_clients]) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + for client_id in cls.created_clients: + try: + cls.client.app_clients.delete(client_id) + except Exception: + pass diff --git a/int_tests/managers/test_auth.py b/int_tests/managers/test_auth.py new file mode 100644 index 0000000..065d409 --- /dev/null +++ b/int_tests/managers/test_auth.py @@ -0,0 +1,46 @@ +import unittest +import os + +from vectara import Vectara + + +class TestAuthManager(unittest.TestCase): + client = None + client_id = None + client_secret = None + created_clients = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_clients = set() + + # Create test client + response = cls.client.app_clients.create(name="test-client", api_roles=["owner"]) + cls.client_id = response.client_id + cls.client_secret = response.client_secret + cls.created_clients.add(response.id) + + def test_get_access_token(self): + response = self.client.auth.get_token( + client_id=self.client_id, + client_secret=self.client_secret, + grant_type="client_credentials" + ) + + self.assertIsNotNone(response.access_token) + self.assertIsNotNone(response.token_type) + self.assertIsNotNone(response.expires_in) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + for client_id in cls.created_clients: + try: + cls.client.app_clients.delete(client_id) + except Exception: + pass diff --git a/int_tests/vectara_int_tests/managers/test_chat.py b/int_tests/managers/test_chat.py similarity index 51% rename from int_tests/vectara_int_tests/managers/test_chat.py rename to int_tests/managers/test_chat.py index 5c245eb..192282b 100644 --- a/int_tests/vectara_int_tests/managers/test_chat.py +++ b/int_tests/managers/test_chat.py @@ -1,20 +1,42 @@ import unittest +import os +from vectara import Vectara from vectara import SearchCorporaParameters, KeyedSearchCorpus, ContextConfiguration, GenerationParameters, \ CitationParameters, ChatParameters, CoreDocument, CoreDocumentPart -from vectara.factory import Factory class TestChatManager(unittest.TestCase): - def setUp(self): - self.addCleanup(self.cleanup) - self.client = Factory().build() - response = self.client.corpora.create(name="test-chat-manager", key="test-chat-manager") - self.key = response.key - self.search_params = SearchCorporaParameters( + client = None + key = None + chat_id = None + turn_id = None + search_params = None + generation_params = None + chat_params = None + created_corpora = None + created_chats = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_corpora = set() + cls.created_chats = set() + + # Create test corpus + response = cls.client.corpora.create(name="test-chat-manager", key="test-chat-manager") + cls.key = response.key + cls.created_corpora.add(cls.key) + + # Setup search parameters + cls.search_params = SearchCorporaParameters( corpora=[ KeyedSearchCorpus( - corpus_key=self.key, + corpus_key=cls.key, lexical_interpolation=0, ) ], @@ -25,14 +47,19 @@ def setUp(self): sentences_after=1, ), ) - self.generation_params = GenerationParameters( + + # Setup generation parameters + cls.generation_params = GenerationParameters( citations=CitationParameters( style="none", ), - enable_factual_consistency_score=True, + enable_factual_consistency_score=False, ) - self.chat_params = ChatParameters(store=True) + # Setup chat parameters + cls.chat_params = ChatParameters(store=True) + + # Add test document document = CoreDocument( id="my-doc-id", document_parts=[ @@ -42,40 +69,66 @@ def setUp(self): ) ], ) - self.client.documents.create("test-chat-manager", request=document) + cls.client.documents.create(cls.key, request=document) + + # Create initial chat + response = cls.client.chat( + query="Robot Utility Models", + search=cls.search_params, + generation=cls.generation_params, + chat=cls.chat_params + ) + + cls.chat_id = response.chat_id + cls.turn_id = response.turn_id + cls.created_chats.add(cls.chat_id) + def _create_chat(self): + """Helper method to create a chat with default parameters.""" response = self.client.chat( query="Robot Utility Models", search=self.search_params, generation=self.generation_params, chat=self.chat_params ) - - self.chat_id = response.chat_id - self.turn_id = response.turn_id + self.created_chats.add(response.chat_id) + return response def test_get_chat(self): response = self.client.chats.get(chat_id=self.chat_id) self.assertEqual(response.id, self.chat_id) - def test_list_chats(self): - chat_ids = [self.chat_id] - for _ in range(2): - response = self.client.chat( - query="Robot Utility Models", - search=self.search_params, - generation=self.generation_params, - chat=self.chat_params - ) - chat_ids.append(response.chat_id) - - response = self.client.chats.list() - for chat in response: - self.assertIn(chat.id, chat_ids) + # def test_list_chats(self): + # # Create additional chats + # created_chat_ids = {self.chat_id} + # for _ in range(2): + # response = self._create_chat() + # created_chat_ids.add(response.chat_id) + + # # Get all chats and verify our created chats are in the list + # found_chats = set() + + # # Use iter_pages to handle pagination automatically + # for page in self.client.chats.list().iter_pages(): + # # Check each chat in the current page + # for chat in page: + # if chat.id in created_chat_ids: + # found_chats.add(chat.id) + + # # If we've found all our chats, we can stop + # if found_chats == created_chat_ids: + # break + + # # Verify all our created chats were found + # self.assertEqual(found_chats, created_chat_ids) def test_delete_chat(self): - response = self.client.chats.delete(chat_id=self.chat_id) + chat = self._create_chat() + + # Delete the chat + response = self.client.chats.delete(chat_id=chat.chat_id) self.assertIsNone(response) + self.created_chats.remove(chat.chat_id) def test_create_turn(self): response = self.client.chats.create_turns( @@ -128,7 +181,8 @@ def test_update_turn(self): self.assertEqual(turn.enabled, False) def test_list_turns(self): - turn_ids = [self.turn_id] + # Create additional turns + created_turn_ids = {self.turn_id} for _ in range(2): response = self.client.chats.create_turns( chat_id=self.chat_id, @@ -137,18 +191,27 @@ def test_list_turns(self): generation=self.generation_params, chat=self.chat_params ) - turn_ids.append(response.turn_id) + created_turn_ids.add(response.turn_id) + # Get all turns and verify our created turns are in the list response = self.client.chats.list_turns(chat_id=self.chat_id) for turn in response.turns: - self.assertIn(turn.id, turn_ids) - - def cleanup(self): - self.client.chats.delete(chat_id=self.chat_id) - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - - def tearDown(self): - self.client.chats.delete(chat_id=self.chat_id) - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) + if turn.id in created_turn_ids: + self.assertIn(turn.id, created_turn_ids) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + # Clean up chats + for chat_id in cls.created_chats: + try: + cls.client.chats.delete(chat_id=chat_id) + except Exception: + pass + + # Clean up corpora + for corpus_key in cls.created_corpora: + try: + cls.client.corpora.delete(corpus_key) + except Exception: + pass diff --git a/int_tests/managers/test_config.py b/int_tests/managers/test_config.py new file mode 100644 index 0000000..fcc518b --- /dev/null +++ b/int_tests/managers/test_config.py @@ -0,0 +1,129 @@ +import os +from pathlib import Path +import yaml +import tempfile +import shutil + +from vectara.factory import Factory +from vectara.config import HomeConfigLoader, ApiKeyAuthConfig, OAuth2AuthConfig, PathConfigLoader, EnvConfigLoader + +import unittest +import logging + + +class FactoryConfigTest(unittest.TestCase): + """ + This test depends on our YAML default config being defined. + + We use this to test various methods of injection. + """ + test_config_path = None + temp_home = None + test_env_vars = { + EnvConfigLoader.ENV_CUSTOMER_ID: "test_customer", + EnvConfigLoader.ENV_API_KEY: "test_api_key", + EnvConfigLoader.ENV_OAUTH2_CLIENT_ID: "test_client_id", + EnvConfigLoader.ENV_OAUTH2_CLIENT_SECRET: "test_client_secret" + } + + VECTARA_API_KEY = os.environ["VECTARA_API_KEY"] + + @classmethod + def setUpClass(cls): + """Set up test resources.""" + logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.INFO, + datefmt='%H:%M:%S %z') + cls.logger = logging.getLogger(cls.__name__) + + # Create a temporary home directory for testing + cls.temp_home = tempfile.mkdtemp() + + # Create .vec_auth.yaml in the temporary home directory + home_config_path = Path(cls.temp_home) / ".vec_auth.yaml" + test_config = { + "default": { + "customer_id": "test_customer", + "auth": { + "api_key": "test_api_key", + "type": "api_key" + } + } + } + with open(home_config_path, 'w') as f: + yaml.dump(test_config, f) + + # Set up test config path for explicit path tests + cls.test_config_path = Path(__file__).parent.parent / "resources" / ".vec_auth.yaml" + + @classmethod + def tearDownClass(cls): + """Clean up test resources.""" + + os.environ["VECTARA_API_KEY"] = cls.VECTARA_API_KEY + + try: + if cls.temp_home and os.path.exists(cls.temp_home): + # Remove the .vec_auth.yaml file first + home_config_path = Path(cls.temp_home) / ".vec_auth.yaml" + if home_config_path.exists(): + home_config_path.unlink() + + # Then remove the temporary directory + shutil.rmtree(cls.temp_home) + cls.logger.info(f"Cleaned up temporary home directory: {cls.temp_home}") + except Exception as e: + cls.logger.error(f"Error cleaning up temporary home directory: {e}") + + def setUp(self): + """Clean up test environment variables before each test.""" + # Remove test environment variables + for key in self.test_env_vars: + if key in os.environ: + del os.environ[key] + + # Set HOME to our temporary directory + self.original_home = os.environ.get('HOME') + os.environ['HOME'] = self.temp_home + + def tearDown(self): + """Restore original HOME environment variable.""" + if hasattr(self, 'original_home'): + if self.original_home: + os.environ['HOME'] = self.original_home + else: + del os.environ['HOME'] + + def _test_factory_auth(self, target: Factory, expected_method: str): + client = target.build() + self.assertEqual(expected_method, target.load_method) + + def test_default_load(self): + # With no config_path and no environment variables, it should use "path_home" + factory = Factory() + self._test_factory_auth(factory, "path_home") + + def test_env_load(self): + # With environment variables set, it should use "env" + os.environ[EnvConfigLoader.ENV_CUSTOMER_ID] = self.test_env_vars[EnvConfigLoader.ENV_CUSTOMER_ID] + os.environ[EnvConfigLoader.ENV_API_KEY] = self.test_env_vars[EnvConfigLoader.ENV_API_KEY] + + factory = Factory() + self._test_factory_auth(factory, "env") + + def test_explicit_path(self): + # With config_path specified, it should use "path_explicit" + factory = Factory(config_path=str(self.test_config_path)) + self._test_factory_auth(factory, "path_explicit") + + def test_explicit_typed(self): + # With explicit typed config, it should use "explicit_typed" + client_config = PathConfigLoader(config_path=str(self.test_config_path)).load() + factory = Factory(config=client_config) + self._test_factory_auth(factory, "explicit_typed") + + def test_explicit_dict(self): + # With explicit dict config, it should use "explicit_dict" + client_config = PathConfigLoader(config_path=str(self.test_config_path)).load().model_dump() + factory = Factory(config=client_config) + self._test_factory_auth(factory, "explicit_dict") + diff --git a/int_tests/managers/test_corpora.py b/int_tests/managers/test_corpora.py new file mode 100644 index 0000000..01ab2bf --- /dev/null +++ b/int_tests/managers/test_corpora.py @@ -0,0 +1,248 @@ +import unittest +import os +import time +from typing import Optional + +from vectara import Vectara, FilterAttribute, CorpusCustomDimension, CoreDocument, CoreDocumentPart, SearchCorporaParameters, \ + ContextConfiguration, CustomerSpecificReranker, GenerationParameters, ModelParameters, \ + CitationParameters, SearchCorpusParameters +from vectara.core import RequestOptions +from vectara.factory import Factory + + +class TestCorporaManager(unittest.TestCase): + client = None + created_corpora = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_corpora = set() + + def _create_corpus(self, key: str, name: Optional[str] = None, description: Optional[str] = None, + filter_attributes: Optional[list[FilterAttribute]] = None) -> str: + """Helper method to create a corpus with given parameters.""" + if name is None: + name = key + response = self.client.corpora.create( + key=key, + name=name, + description=description, + filter_attributes=filter_attributes + ) + self.created_corpora.add(key) + return key + + def _wait_for_corpus(self, corpus_key: str, timeout: int = 60): + """Helper method to wait for corpus operations to complete.""" + import time + start_time = time.time() + while time.time() - start_time < timeout: + try: + corpus = self.client.corpora.get(corpus_key) + if corpus: + return + except Exception: + pass + time.sleep(5) + raise TimeoutError(f"Corpus {corpus_key} not ready after {timeout} seconds") + + def test_create_corpora(self): + filter_attributes = FilterAttribute( + name="Title", + level="document", + description="The title of the document.", + indexed=True, + type="text" + ) + custom_dimensions = CorpusCustomDimension( + name="importance", + description="Product importance.", + indexing_default=0, + querying_default=0 + ) + + corpus_key = self._create_corpus( + key="test-create-corpus", + description="test description", + filter_attributes=[filter_attributes] + ) + + corpus = self.client.corpora.get(corpus_key) + self.assertEqual(corpus.key, "test-create-corpus") + self.assertEqual(corpus.name, "test-create-corpus") + self.assertEqual(corpus.description, "test description") + self.assertEqual(corpus.filter_attributes, [filter_attributes]) + + def test_list_corpora(self): + # Create test corpora + corpus_keys = [] + for i in range(2): + corpus_key = self._create_corpus(key=f"corpus-{i}") + corpus_keys.append(corpus_key) + + # Verify corpora are in the list + found_keys = set() + for page in self.client.corpora.list().iter_pages(): + for corpus in page: + if corpus.key in corpus_keys: + found_keys.add(corpus.key) + if found_keys == set(corpus_keys): + break + + self.assertEqual(found_keys, set(corpus_keys)) + + def test_delete_corpora(self): + corpus_key = self._create_corpus(key="test-delete-corpus") + + self.client.corpora.delete(corpus_key=corpus_key) + self.created_corpora.remove(corpus_key) + + # Verify corpus is deleted + found = False + for page in self.client.corpora.list().iter_pages(): + for corpus in page: + if corpus.key == corpus_key: + found = True + break + if found: + break + self.assertFalse(found) + + def test_update_corpora(self): + corpus_key = self._create_corpus(key="test-update-corpus") + + response = self.client.corpora.update( + corpus_key, + name="updated-name", + description="updated-description" + ) + + self.assertEqual(response.description, "updated-description") + self.assertEqual(response.name, "updated-name") + + def test_get_metadata_of_corpora(self): + corpus_key = self._create_corpus( + key="test-get-metadata", + description="test-description", + name="Test" + ) + + corpus = self.client.corpora.get(corpus_key) + self.assertEqual(corpus.key, "test-get-metadata") + self.assertEqual(corpus.name, "Test") + self.assertEqual(corpus.description, "test-description") + + def test_corpus_reset(self): + corpus_key = self._create_corpus(key="test-reset-corpus") + + # Add document + document = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="I'm a nice document part.", + ) + ], + ) + self.client.documents.create(corpus_key, request=document) + time.sleep(30) + # Verify document was added + documents = list(self.client.documents.list(corpus_key)) + self.assertEqual(len(documents), 1) + + # Reset corpus + self.client.corpora.reset(corpus_key) + + # Verify document was removed + documents = list(self.client.documents.list(corpus_key)) + self.assertEqual(len(documents), 0) + + def test_replace_filter_attributes(self): + corpus_key = self._create_corpus(key="test-reset-filters") + + filter_attributes = FilterAttribute( + name="Title", + level="document", + description="The title of the document.", + indexed=True, + type="text" + ) + res = self.client.corpora.replace_filter_attributes( + corpus_key, + filter_attributes=[filter_attributes] + ) + self.assertIsNotNone(res.job_id) + + def test_search(self): + corpus_key = self._create_corpus(name="test-search", key="test-search") + + document = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " + "be deployed in novel environments with novel objects without any further data or training.", + ) + ], + ) + self.client.documents.create(corpus_key, request=document) + + response = self.client.corpora.search(corpus_key=corpus_key, query="Robot Utility Models") + self.assertIsNone(response.summary) + self.assertGreater(len(response.search_results), 0) + + def test_query(self): + corpus_key = self._create_corpus(name="test-query", key="test-query") + + document = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " + "be deployed in novel environments with novel objects without any further data or training.", + ) + ], + ) + self.client.documents.create(corpus_key, request=document) + + search_params = SearchCorpusParameters( + context_configuration=ContextConfiguration( + sentences_before=2, + sentences_after=2, + ), + reranker=CustomerSpecificReranker( + reranker_id="rnk_272725719" + ), + ) + generation_params = GenerationParameters( + response_language="eng", + citations=CitationParameters( + style="none", + ), + enable_factual_consistency_score=True, + ) + request_options = RequestOptions(timeout_in_seconds=100) + + response = self.client.corpora.query( + corpus_key=corpus_key, + search=search_params, + query="Robot Utility Models", + generation=generation_params, + request_options=request_options + ) + self.assertIsNotNone(response.summary) + self.assertGreater(len(response.search_results), 0) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + for corpus_key in cls.created_corpora: + try: + cls.client.corpora.delete(corpus_key) + except Exception: + pass diff --git a/int_tests/managers/test_corpus_manager.py b/int_tests/managers/test_corpus_manager.py new file mode 100644 index 0000000..8fcb46f --- /dev/null +++ b/int_tests/managers/test_corpus_manager.py @@ -0,0 +1,59 @@ +from vectara.corpora.client import CorporaClient +from vectara.managers.corpus import CreateCorpusRequest +from vectara.client import Vectara +from unittest.mock import MagicMock +from vectara.factory import Factory + +import unittest +import logging + +class CorpusManagerTest(unittest.TestCase): + client = None + created_corpora = None + + @classmethod + def setUpClass(cls): + """Set up test resources.""" + logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%H:%M:%S %z') + cls.logger = logging.getLogger(cls.__name__) + cls.client = Factory().build() + cls.created_corpora = set() + + def test_find_corpora_by_name_no_match(self): + found = self.client.corpus_manager.find_corpora_by_name("company_names") + self.assertEqual(len(found), 0) + + def test_find_corpora_by_name_match(self): + request = CreateCorpusRequest.model_validate( + { + "key": "company_names_document", + "name": "company_names_document", + "description": "Test corpus for finding by name" + }) + response = self.client.corpus_manager.create_corpus(request, delete_existing=True) + self.created_corpora.add(response.key) + + # Search for the corpus + found = self.client.corpus_manager.find_corpora_by_name("company_names_document") + self.assertEqual(len(found), 1) + self.assertEqual(found[0].name, "company_names_document") + + def test_create_corpus(self): + request = CreateCorpusRequest.model_validate( + { + "key": "test-sdk-corpus", + "name": "test-sdk-corpus", + "description": "Our first test corpus from the SDK" + }) + response = self.client.corpus_manager.create_corpus(request, delete_existing=True) + self.created_corpora.add(response.key) + + @classmethod + def tearDownClass(cls): + """Clean up test resources.""" + for corpus_key in cls.created_corpora: + try: + cls.client.corpora.delete(corpus_key) + except Exception as e: + cls.logger.error(f"Failed to delete corpus {corpus_key}: {e}") + diff --git a/int_tests/managers/test_document.py b/int_tests/managers/test_document.py new file mode 100644 index 0000000..e8ee5f1 --- /dev/null +++ b/int_tests/managers/test_document.py @@ -0,0 +1,119 @@ +import unittest +import os + +from vectara import Vectara, CoreDocument, CoreDocumentPart + + +class TestDocument(unittest.TestCase): + client = None + corpus_key = None + created_corpora = None + created_documents = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_corpora = set() + cls.created_documents = set() + + # Create test corpus + response = cls.client.corpora.create(name="test-document-manager", key="test-document-manager") + cls.corpus_key = response.key + cls.created_corpora.add(cls.corpus_key) + + def _create_document(self, doc_id: str, text: str = "test-doc-part-1") -> str: + """Helper method to create a document with given parameters.""" + document = CoreDocument( + id=doc_id, + document_parts=[ + CoreDocumentPart( + text=text, + ) + ], + ) + response = self.client.documents.create(self.corpus_key, request=document) + self.created_documents.add((self.corpus_key, doc_id)) + return response.id + + def test_add_document(self): + doc_id = self._create_document("my-doc-id") + self.assertEqual(doc_id, "my-doc-id") + + def test_delete_document(self): + doc_id = self._create_document("test-delete-my-doc") + response = self.client.documents.delete(self.corpus_key, doc_id) + self.assertIsNone(response) + self.created_documents.remove((self.corpus_key, doc_id)) + + def test_get_document(self): + doc_id = self._create_document("test-get-my-doc") + response = self.client.documents.get(self.corpus_key, doc_id) + self.assertEqual(response.id, doc_id) + + def test_list_documents(self): + # Create test documents + doc_ids = [] + for index in range(2): + doc_id = self._create_document(f"my-doc-id-{index}") + doc_ids.append(doc_id) + + # Get all documents and verify our created documents are in the list + found_ids = set() + for page in self.client.documents.list(self.corpus_key).iter_pages(): + for doc in page: + if doc.id in doc_ids: + found_ids.add(doc.id) + if found_ids == set(doc_ids): + break + + self.assertEqual(found_ids, set(doc_ids)) + + def test_update_metadata(self): + doc_id = self._create_document("test-update-metadata") + metadata = { + "title": "Test Document", + "author": "Test Author", + "category": "Test Category" + } + response = self.client.documents.update_metadata( + corpus_key=self.corpus_key, + document_id=doc_id, + metadata=metadata + ) + self.assertEqual(response.id, doc_id) + self.assertEqual(response.metadata, metadata) + + def test_summarize(self): + doc_id = self._create_document( + "test-summarize", + text="""Robot Utility Models are trained on a diverse set of environments and objects, and then can + be deployed in novel environments with novel objects without any further data or training.""" + ) + response = self.client.documents.summarize( + corpus_key=self.corpus_key, + document_id=doc_id, + llm_name="gpt-3.5-turbo" + ) + self.assertIsNotNone(response.summary) + self.assertGreater(len(response.summary), 0) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + # Clean up documents + for corpus_key, doc_id in cls.created_documents: + try: + cls.client.documents.delete(corpus_key, doc_id) + except Exception: + pass + + # Clean up corpora + for corpus_key in cls.created_corpora: + try: + cls.client.corpora.delete(corpus_key) + except Exception: + pass diff --git a/int_tests/vectara_int_tests/managers/test_document.py b/int_tests/managers/test_document_manager.py similarity index 51% rename from int_tests/vectara_int_tests/managers/test_document.py rename to int_tests/managers/test_document_manager.py index 2bb6f70..34e0010 100644 --- a/int_tests/vectara_int_tests/managers/test_document.py +++ b/int_tests/managers/test_document_manager.py @@ -1,35 +1,31 @@ -from vectara.corpora.client import CorporaClient +import unittest +import logging +import os + +from vectara.factory import Factory from vectara.managers.corpus import CreateCorpusRequest from vectara.managers.document import DocOpEnum from vectara.types import StructuredDocument from vectara.client import Vectara -from unittest.mock import MagicMock -from vectara.factory import Factory -#from vectara.utils.httpx_logging import dump_all_requests -from pathlib import Path -import time -import unittest -import logging -import json -import urllib + class DocumentManagerTest(unittest.TestCase): + corpus_key = None + client = None - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + @classmethod + def setUpClass(cls): + """Set up test resources.""" logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%H:%M:%S %z') - self.logger = logging.getLogger(self.__class__.__name__) + cls.logger = logging.getLogger(cls.__name__) + cls.client = Factory().build() + + request = CreateCorpusRequest(name="int-test-document-manager", key="int-test-document-manager") + create_response = cls.client.lab_helper.create_lab_corpus(request, user_prefix=False) + cls.corpus_key = create_response.key def test_upsert(self): self.logger.info("Testing Doc Upload with Upsert") - client = Factory().build() - - request = CreateCorpusRequest(name="int-test-doc-upsert1", key="int-test-doc-upsert1") - create_response = client.lab_helper.create_lab_corpus(request) - key = create_response.key - - # Sleep for 30 seconds, let caches expire otherwise Documents service can give 404 error (Corpus not found). - #time.sleep(30) doc = StructuredDocument.model_validate({ "id": "abc", @@ -38,10 +34,10 @@ def test_upsert(self): ] }) - response = client.document_manager.index_doc(key, doc) + response = self.client.document_manager.index_doc(self.corpus_key, doc) self.assertEqual(DocOpEnum.CREATED, response) - response = client.document_manager.index_doc(key, doc) + response = self.client.document_manager.index_doc(self.corpus_key, doc) self.assertEqual(DocOpEnum.IGNORED, response) doc = StructuredDocument.model_validate({ @@ -51,7 +47,16 @@ def test_upsert(self): ] }) - response = client.document_manager.index_doc(key, doc) + response = self.client.document_manager.index_doc(self.corpus_key, doc) self.assertEqual(DocOpEnum.UPDATED, response) + @classmethod + def tearDownClass(cls): + """Clean up test resources.""" + if cls.corpus_key: + try: + cls.client.corpora.delete(cls.corpus_key) + except Exception as e: + cls.logger.error(f"Failed to delete corpus {cls.corpus_key}: {e}") + diff --git a/int_tests/managers/test_encoders.py b/int_tests/managers/test_encoders.py new file mode 100644 index 0000000..bcdeccd --- /dev/null +++ b/int_tests/managers/test_encoders.py @@ -0,0 +1,18 @@ +import unittest +import os + +from vectara.client import Vectara + + +class TestEncodersManager(unittest.TestCase): + + def setUp(self): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + self.client = Vectara(api_key=api_key) + + def test_list_encoders(self): + response = self.client.encoders.list() + for encoder in response: + self.assertIsNotNone(encoder.name) diff --git a/int_tests/managers/test_generation_presets.py b/int_tests/managers/test_generation_presets.py new file mode 100644 index 0000000..aef9d74 --- /dev/null +++ b/int_tests/managers/test_generation_presets.py @@ -0,0 +1,29 @@ +import unittest +import os + +from vectara import Vectara + + +class TestGenerationPresetsManager(unittest.TestCase): + def setUp(self): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + self.client = Vectara(api_key=api_key) + + def test_list_generation_presets(self): + # Test with default parameters + found_presets = set() + pager = self.client.generation_presets.list() + for preset in pager.items: + self.assertIsNotNone(preset.name) + found_presets.add(preset.name) + + # Test with limit parameter + limited_pager = self.client.generation_presets.list(limit=2) + self.assertLessEqual(len(limited_pager.items), 2) + + # Test with llm_name filter + llm_pager = self.client.generation_presets.list(llm_name="gpt-3.5-turbo") + for preset in llm_pager.items: + self.assertEqual(preset.llm_name, "gpt-3.5-turbo") diff --git a/int_tests/managers/test_jobs.py b/int_tests/managers/test_jobs.py new file mode 100644 index 0000000..0074eca --- /dev/null +++ b/int_tests/managers/test_jobs.py @@ -0,0 +1,64 @@ +import unittest +import os + +from vectara import Vectara, FilterAttribute + + +class TestJobsManager(unittest.TestCase): + client = None + corpus_key = None + job_id = None + created_corpora = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_corpora = set() + + # Create test corpus + cls.corpus_key = "test-jobs" + cls.client.corpora.create(key=cls.corpus_key) + cls.created_corpora.add(cls.corpus_key) + + # Setup filter attributes + filter_attributes = FilterAttribute( + name="Title", + level="document", + description="The title of the document.", + indexed=True, + type="text" + ) + res = cls.client.corpora.replace_filter_attributes( + cls.corpus_key, + filter_attributes=[filter_attributes] + ) + cls.job_id = res.job_id + + def test_get_job(self): + res = self.client.jobs.get(job_id=self.job_id) + self.assertEqual(res.id, self.job_id) + self.assertEqual(res.corpus_keys, [self.corpus_key]) + + def test_list_jobs(self): + found_job = False + jobs_list = self.client.jobs.list(corpus_key=self.corpus_key) + + for job in jobs_list.items: + if job.id == self.job_id: + found_job = True + break + + self.assertTrue(found_job) + + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + for corpus_key in cls.created_corpora: + try: + cls.client.corpora.delete(corpus_key) + except Exception: + pass diff --git a/int_tests/managers/test_llms.py b/int_tests/managers/test_llms.py new file mode 100644 index 0000000..708efa3 --- /dev/null +++ b/int_tests/managers/test_llms.py @@ -0,0 +1,68 @@ +import unittest +import os + +from vectara import Vectara, RemoteAuth + + +class TestLlmsManager(unittest.TestCase): + client = None + created_llms = None + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.created_llms = set() + + def test_list_llms(self): + # Test with default parameters + found_llms = set() + llms_list = self.client.llms.list() + for llm in llms_list.items: + self.assertIsNotNone(llm.name) + found_llms.add(llm.name) + + # Test with filter parameter + filtered_llms = self.client.llms.list(filter="gpt") + for llm in filtered_llms.items: + self.assertIn("gpt", llm.name.lower()) + + # Test with limit parameter + limited_llms = self.client.llms.list(limit=2) + self.assertLessEqual(len(limited_llms.items), 2) + + # def test_create_and_delete_llm(self): + # # Create a test LLM + # llm_name = f"test-custom-llm" + # llm = self.client.llms.create( + # name=llm_name, + # description="Test LLM for integration tests", + # model="gpt-3.5-turbo", + # uri="https://api.openai.com/v1/chat/completions", + # auth={ + # "type": "bearer", + # "token": os.getenv("OPENAI_API_KEY") + # } + # ) + # self.created_llms.add(llm_name) + + # # Verify the LLM was created + # self.assertEqual(llm.name, llm_name) + # self.assertEqual(llm.uri, "https://api.openai.com/v1/chat/completions") + # self.assertEqual(llm.description, "Test LLM for integration tests") + + # # Get the LLM + # retrieved_llm = self.client.llms.get(llm_id=llm_name) + # self.assertEqual(retrieved_llm.name, llm_name) + # self.assertEqual(retrieved_llm.uri, "https://api.openai.com/v1/chat/completions") + + # # Delete the LLM + # self.client.llms.delete(llm_id=llm_name) + # self.created_llms.remove(llm_name) + + # # Verify the LLM was deleted + # with self.assertRaises(Exception): + # self.client.llms.get(llm_id=llm_name) \ No newline at end of file diff --git a/int_tests/managers/test_rerankers.py b/int_tests/managers/test_rerankers.py new file mode 100644 index 0000000..25dbec1 --- /dev/null +++ b/int_tests/managers/test_rerankers.py @@ -0,0 +1,18 @@ +import unittest +import os + +from vectara import Vectara + + +class TestRerankersManager(unittest.TestCase): + def setUp(self): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + self.client = Vectara(api_key=api_key) + + def test_list_rerankers(self): + response = self.client.rerankers.list() + for reranker in response: + self.assertIsNotNone(reranker.name) diff --git a/int_tests/managers/test_upload.py b/int_tests/managers/test_upload.py new file mode 100644 index 0000000..c30266d --- /dev/null +++ b/int_tests/managers/test_upload.py @@ -0,0 +1,151 @@ +import unittest +import os +from pathlib import Path + +from vectara import Vectara +from vectara.core import File +from vectara.types import MaxCharsChunkingStrategy, TableExtractionConfig +from vectara.core.api_error import ApiError + + +class UploadManagerTest(unittest.TestCase): + client = None + corpus = None + + @classmethod + def setUpClass(cls): + # Setup client + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + + response = cls.client.corpora.create(key="test-upload", name="test-upload") + cls.corpus = response + + def _get_test_file(self): + """Helper method to get the test file path.""" + test_file = Path("examples/01_getting_started/resources/arxiv/2409.05866v1.pdf") + if not test_file.exists(): + raise FileNotFoundError(f"Test file not found: {test_file}") + return test_file + + def test_upload_with_metadata(self): + """Test file upload with metadata.""" + test_file = self._get_test_file() + file = (test_file.name, open(test_file, "rb"), "application/pdf") + + document = self.client.upload.file( + corpus_key=self.corpus.key, + file=file, + metadata={"key": "value", "test": True}, + filename="test_document_with_metadata.pdf", + request_timeout=600 # 10 minutes timeout + ) + + # Verify upload + self.assertIsNotNone(document) + self.assertGreater(document.storage_usage.bytes_used, 0) + self.assertGreater(document.storage_usage.metadata_bytes_used, 0) + + def test_upload_with_chunking(self): + """Test file upload with custom chunking strategy.""" + test_file = self._get_test_file() + file = (test_file.name, open(test_file, "rb"), "application/pdf") + + chunking_strategy = MaxCharsChunkingStrategy( + type="max_chars_chunking_strategy", + max_chars_per_chunk=200 + ) + + document = self.client.upload.file( + corpus_key=self.corpus.key, + file=file, + chunking_strategy=chunking_strategy, + filename="test_document_with_chunking.pdf", + request_timeout=600 # 10 minutes timeout + ) + + # Verify upload + self.assertIsNotNone(document) + self.assertGreater(document.storage_usage.bytes_used, 0) + + def test_upload_with_larger_chunking(self): + """Test file upload with larger chunk size.""" + test_file = self._get_test_file() + file = (test_file.name, open(test_file, "rb"), "application/pdf") + + chunking_strategy = MaxCharsChunkingStrategy( + type="max_chars_chunking_strategy", + max_chars_per_chunk=1024 + ) + + document = self.client.upload.file( + corpus_key=self.corpus.key, + file=file, + chunking_strategy=chunking_strategy, + filename="test_document_with_larger_chunks.pdf", + request_timeout=600 + ) + + # Verify upload + self.assertIsNotNone(document) + self.assertGreater(document.storage_usage.bytes_used, 0) + + def test_upload_with_table_extraction(self): + """Test file upload with table extraction.""" + test_file = self._get_test_file() + file = (test_file.name, open(test_file, "rb"), "application/pdf") + + table_config = TableExtractionConfig(extract_tables=True) + + document = self.client.upload.file( + corpus_key=self.corpus.key, + file=file, + table_extraction_config=table_config, + filename="test_document_with_table_extraction.pdf", + request_timeout=600 # 10 minutes timeout + ) + + # Verify upload + self.assertIsNotNone(document) + self.assertGreater(document.storage_usage.bytes_used, 0) + + def test_upload_with_all_options(self): + """Test file upload with all options.""" + test_file = self._get_test_file() + file = (test_file.name, open(test_file, "rb"), "application/pdf") + + chunking_strategy = MaxCharsChunkingStrategy( + type="max_chars_chunking_strategy", + max_chars_per_chunk=200 + ) + + table_config = TableExtractionConfig(extract_tables=True) + + document = self.client.upload.file( + corpus_key=self.corpus.key, + file=file, + metadata={"key": "value", "test": True}, + chunking_strategy=chunking_strategy, + table_extraction_config=table_config, + filename="test_document_with_all_options.pdf", + request_timeout=600 # 10 minutes timeout + ) + + # Verify upload + self.assertIsNotNone(document) + self.assertGreater(document.storage_usage.bytes_used, 0) + self.assertGreater(document.storage_usage.metadata_bytes_used, 0) + + @classmethod + def tearDownClass(cls): + """Clean up test resources.""" + # Delete the test corpus + try: + cls.client.corpora.delete(cls.corpus.key) + except Exception: + pass + + diff --git a/int_tests/managers/test_users.py b/int_tests/managers/test_users.py new file mode 100644 index 0000000..5eb8e95 --- /dev/null +++ b/int_tests/managers/test_users.py @@ -0,0 +1,136 @@ +''' +import unittest +import os +import json + +from vectara import Vectara +from vectara.types import ApiRole + + +class TestUsersManager(unittest.TestCase): + client = None + created_users = None + app_client = None + + @classmethod + def setUpClass(cls): + # Create app client + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + cls.client = Vectara(api_key=api_key) + cls.app_client = cls.client.app_clients.create(name="test-users", api_roles=["owner"]) + + cls.created_users = set() + cls.client_wit_oauth = Vectara(client_id=cls.app_client.client_id, client_secret=cls.app_client.client_secret) + + + def _create_test_user(self, suffix=""): + """Helper method to create a test user.""" + username = f"test-user-{suffix}" if suffix else "test-user" + email = f"{username}@example.com" + + response = self.client_wit_oauth.users.create( + email=email, + username=username, + description=f"Test user {suffix}", + api_roles=["owner"] + ) + self.created_users.add(username) + print("\nCreated user response:") + print(json.dumps(response.model_dump(mode="json"), indent=2)) + print(response.email) + print(response.username) + print(response.description) + print(response.api_roles) + + return response + + def test_create_user(self): + """Test user creation.""" + user = self._create_test_user("create") + self.assertEqual(user.username, "test-user-create") + self.assertEqual(user.email, "test-user-create@example.com") + self.assertEqual(user.description, "Test user create") + self.assertIn(ApiRole, user.api_roles) + + def test_get_user(self): + """Test getting a user.""" + # Create a user first + created_user = self._create_test_user("get") + + # Get the user + user = self.client.users.get(username=created_user.username) + print("\nGet user response:") + print(json.dumps(user.to_dict(), indent=2)) + + self.assertEqual(user.username, created_user.username) + self.assertEqual(user.email, created_user.email) + self.assertEqual(user.description, created_user.description) + self.assertEqual(user.api_roles, created_user.api_roles) + + def test_update_user(self): + """Test updating a user.""" + # Create a user first + user = self._create_test_user("update") + + # Update the user + updated_user = self.client.users.update( + username=user.username, + description="Updated description", + api_roles=[ApiRole.ADMIN, ApiRole.USER] + ) + print("\nUpdate user response:") + print(json.dumps(updated_user.to_dict(), indent=2)) + + self.assertEqual(updated_user.username, user.username) + self.assertEqual(updated_user.description, "Updated description") + self.assertEqual(len(updated_user.api_roles), 2) + self.assertIn(ApiRole.ADMIN, updated_user.api_roles) + self.assertIn(ApiRole.USER, updated_user.api_roles) + + def test_list_users(self): + """Test listing users.""" + # Create some test users + self._create_test_user("list1") + self._create_test_user("list2") + + # List users + users = self.client.users.list() + print("\nList users response:") + print(json.dumps([user.to_dict() for user in users.items], indent=2)) + + # Verify our test users are in the list + found_users = {user.username for user in users.items} + self.assertTrue(all(f"test-user-list{i}" in found_users for i in range(1, 3))) + + def test_delete_user(self): + """Test deleting a user.""" + # Create a user first + user = self._create_test_user("delete") + + # Delete the user + self.client.users.delete(username=user.username) + self.created_users.remove(user.username) + + # Verify the user was deleted + with self.assertRaises(Exception): + self.client.users.get(username=user.username) + + @classmethod + def tearDownClass(cls): + """Clean up test resources.""" + # Delete all created users + for username in cls.created_users: + try: + cls.client_wit_oauth.users.delete(username=username) + except Exception: + pass + + # Delete the app client + if cls.app_client: + try: + cls.app_client.delete() + except Exception: + pass +''' \ No newline at end of file diff --git a/int_tests/resources/.vec_auth.yaml b/int_tests/resources/.vec_auth.yaml new file mode 100644 index 0000000..9da035f --- /dev/null +++ b/int_tests/resources/.vec_auth.yaml @@ -0,0 +1,5 @@ +default: + auth: + api_key: test_api_key + type: api_key + customer_id: test_customer diff --git a/int_tests/test_chat.py b/int_tests/test_chat.py index b562aad..d797f88 100644 --- a/int_tests/test_chat.py +++ b/int_tests/test_chat.py @@ -1,38 +1,53 @@ -import time import unittest +import os +from vectara import Vectara from vectara.core import RequestOptions, ApiError -from vectara.factory import Factory - from vectara import CoreDocument, CoreDocumentPart, SearchCorporaParameters, KeyedSearchCorpus, \ ContextConfiguration, CustomerSpecificReranker, GenerationParameters, CitationParameters, \ ChatParameters class TestChat(unittest.TestCase): - - def setUp(self): - self.addCleanup(self.cleanup) - self.client = Factory().build() - self.client.corpora.create(name="test-chat", key="test-chat") - time.sleep(30) - test_search_1_document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " - "be deployed in novel environments with novel objects without any further data or training.", - ) - ], + corpus_name = None + client = None + TEST_DOCUMENT = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " + "be deployed in novel environments with novel objects without any further data or training.", + ) + ], + ) + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + + cls.client = Vectara(api_key=api_key) + cls.request_options = RequestOptions(timeout_in_seconds=100) + cls.generation_params = GenerationParameters( + response_language="eng", + citations=CitationParameters(style="none"), + enable_factual_consistency_score=False, ) - self.client.documents.create("test-chat", request=test_search_1_document) + cls.chat_params = ChatParameters(store=True) - self.search_params = SearchCorporaParameters( + # Create corpus and add document + cls.corpus_name = "test-chat-corpus" + cls.client.corpora.create(name=cls.corpus_name, key=cls.corpus_name) + cls.client.documents.create(cls.corpus_name, request=cls.TEST_DOCUMENT) + + # Create default search parameters + cls.search_params = SearchCorporaParameters( corpora=[ KeyedSearchCorpus( - corpus_key="test-chat", + corpus_key=cls.corpus_name, metadata_filter="", - lexical_interpolation=1, + lexical_interpolation=0.05, ) ], context_configuration=ContextConfiguration( @@ -43,15 +58,6 @@ def setUp(self): reranker_id="rnk_272725719" ), ) - self.generation_params = GenerationParameters( - response_language="eng", - citations=CitationParameters( - style="none", - ), - enable_factual_consistency_score=True, - ) - self.chat_params = ChatParameters(store=True) - self.request_options = RequestOptions(timeout_in_seconds=100) def test_chat(self): session = self.client.create_chat_session( @@ -61,23 +67,30 @@ def test_chat(self): request_options=self.request_options ) - response = session.chat(query="Robot Utility Models") - self.assertIsNotNone(response.chat_id) - self.assertIsNotNone(response.answer) + first_response = session.chat(query="What are Robot Utility Models?") + self.assertIsNotNone(first_response.chat_id) + self.assertIsNotNone(first_response.answer) + first_chat_id = first_response.chat_id - response = session.chat(query="Utility Models") - self.assertIsNotNone(response.chat_id) - self.assertIsNotNone(response.answer) + second_response = session.chat(query="How do they handle novel environments?") + self.assertIsNotNone(second_response.chat_id) + self.assertIsNotNone(second_response.answer) + + # Verify chat continuity + self.assertEqual(first_chat_id, second_response.chat_id, "Chat ID should remain the same across turns") + def test_chat_with_default_params(self): session = self.client.create_chat_session( - search=SearchCorporaParameters(corpora=[ - KeyedSearchCorpus( - corpus_key="test-chat", - metadata_filter="", - lexical_interpolation=1, - ) - ]) + search=SearchCorporaParameters( + corpora=[ + KeyedSearchCorpus( + corpus_key=self.corpus_name, + metadata_filter="", + lexical_interpolation=1, + ) + ] + ) ) response = session.chat(query="Robot Utility Models") @@ -100,7 +113,7 @@ def test_exception_in_chat(self): def test_chat_stream(self): session = self.client.create_chat_session( - search=self.search_params, + search=self.search_params, generation=self.generation_params, chat_config=self.chat_params, request_options=self.request_options @@ -111,10 +124,10 @@ def test_chat_stream(self): self.assertGreater(len(response_chunks), 0) - def cleanup(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - - def tearDown(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + try: + cls.client.corpora.delete(cls.corpus_name) + except Exception: + pass diff --git a/int_tests/test_query.py b/int_tests/test_query.py index c1954fe..d02b1e6 100644 --- a/int_tests/test_query.py +++ b/int_tests/test_query.py @@ -1,8 +1,8 @@ -import time import unittest +import os +from vectara import Vectara from vectara.core import RequestOptions -from vectara.factory import Factory from vectara import CoreDocument, CoreDocumentPart, SearchCorporaParameters, KeyedSearchCorpus, \ ContextConfiguration, CustomerSpecificReranker, GenerationParameters, CitationParameters, \ QueryStreamedResponse, QueryFullResponse, MmrReranker, NoneReranker, UserFunctionReranker, \ @@ -10,253 +10,180 @@ class TestMultipleCorporaQuery(unittest.TestCase): - def setUp(self): - self.addCleanup(self.cleanup) - self.client = Factory().build() - self.client.corpora.create(name="test-search-1", key="test-search-1") - self.client.corpora.create(name="test-search-2", key="test-search-2") - time.sleep(30) - test_search_1_document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " - "be deployed in novel environments with novel objects without any further data or training.", - ) - ], - ) - - test_search_2_document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="We show that it is possible to create general Robot Utility Models with a moderate amount " - "of data in the order of 1,000 demonstrations (Section 2). These RUMs achieve a 90% average " - "success rate on zero-shot deployment in 25 novel environments (Section 3.1).", - ) - ], - ) - self.client.documents.create("test-search-1", request=test_search_1_document) - self.client.documents.create("test-search-2", request=test_search_2_document) - - self.search_params = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=1, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=1, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), - reranker=CustomerSpecificReranker( - reranker_id="rnk_272725719" - ), - ) - self.generation_params = GenerationParameters( + client = None + corpus_names = None + test_documents = None + + TEST_DOCUMENT_1 = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " + "be deployed in novel environments with novel objects without any further data or training.", + ) + ], + ) + + TEST_DOCUMENT_2 = CoreDocument( + id="my-doc-id", + document_parts=[ + CoreDocumentPart( + text="We show that it is possible to create general Robot Utility Models with a moderate amount " + "of data in the order of 1,000 demonstrations (Section 2). These RUMs achieve a 90% average " + "success rate on zero-shot deployment in 25 novel environments (Section 3.1).", + ) + ], + ) + + @classmethod + def setUpClass(cls): + api_key = os.getenv("VECTARA_API_KEY") + if not api_key: + raise ValueError("VECTARA_API_KEY not found in environment variables or .env file") + cls.client = Vectara(api_key=api_key) + cls.request_options = RequestOptions(timeout_in_seconds=100) + cls.generation_params = GenerationParameters( response_language="eng", - citations=CitationParameters( - style="none", - ), + citations=CitationParameters(style="none"), enable_factual_consistency_score=False, ) - self.request_options = RequestOptions(timeout_in_seconds=100) + # Create corpora and add documents + cls.corpus_names = ["test-query-corpus-1", "test-query-corpus-2"] + cls.test_documents = [cls.TEST_DOCUMENT_1, cls.TEST_DOCUMENT_2] - def test_query(self): - response = self.client.query(query="Robot Utility Models", search=self.search_params, - generation=self.generation_params, - request_options=self.request_options) - self.assertIsInstance(response, QueryFullResponse) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) + for corpus_name, document in zip(cls.corpus_names, cls.test_documents): + cls.client.corpora.create(name=corpus_name, key=corpus_name) + cls.client.documents.create(corpus_name, request=document) - def test_query_with_different_lambda(self): - search = SearchCorporaParameters( + def setUp(self): + # Create default search parameters + self.search_params = self._create_search_params( + lexical_interpolation=1, + reranker=CustomerSpecificReranker(reranker_id="rnk_272725719") + ) + + def _create_search_params(self, lexical_interpolation=0, reranker=None): + """Helper method to create search parameters with given interpolation and reranker.""" + return SearchCorporaParameters( corpora=[ KeyedSearchCorpus( - corpus_key="test-search-1", + corpus_key=corpus_name, metadata_filter="", - lexical_interpolation=0, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0, + lexical_interpolation=lexical_interpolation, ) + for corpus_name in self.corpus_names ], context_configuration=ContextConfiguration( sentences_before=2, sentences_after=2, ), - reranker=CustomerSpecificReranker( - reranker_id="rnk_272725719" - ), + reranker=reranker, ) - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) + def _assert_query_response(self, response): + """Helper method to assert common response properties.""" self.assertIsInstance(response, QueryFullResponse) self.assertIsNotNone(response.summary) self.assertGreater(len(response.search_results), 0) - search = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=0.1, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0.1, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), - reranker=CustomerSpecificReranker( - reranker_id="rnk_272725719" - ), + def test_query(self): + response = self.client.query( + query="Robot Utility Models", + search=self.search_params, + generation=self.generation_params, + request_options=self.request_options ) + self._assert_query_response(response) - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) - self.assertIsInstance(response, QueryFullResponse) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) + def test_query_with_different_lambda(self): + # Test with lexical_interpolation=0 + search = self._create_search_params( + lexical_interpolation=0, + reranker=CustomerSpecificReranker(reranker_id="rnk_272725719") + ) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options + ) + self._assert_query_response(response) - def test_query_with_mmr_reranker(self): - search = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=0, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), - reranker=MmrReranker( - diversity_bias=0.3 - ), + # Test with lexical_interpolation=0.1 + search = self._create_search_params( + lexical_interpolation=0.1, + reranker=CustomerSpecificReranker(reranker_id="rnk_272725719") ) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options + ) + self._assert_query_response(response) - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) - self.assertIsInstance(response, QueryFullResponse) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) + def test_query_with_mmr_reranker(self): + search = self._create_search_params( + lexical_interpolation=0, + reranker=MmrReranker(diversity_bias=0.3) + ) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options + ) + self._assert_query_response(response) def test_query_with_none_reranker(self): - search = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=0, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), - reranker=NoneReranker(), + search = self._create_search_params( + lexical_interpolation=0, + reranker=NoneReranker() ) - - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) - self.assertIsInstance(response, QueryFullResponse) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options + ) + self._assert_query_response(response) def test_query_with_udf_reranker(self): - search = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=0, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), + search = self._create_search_params( + lexical_interpolation=0, reranker=UserFunctionReranker( user_function="if (get('$.score') < 0.7) null else get('$.score') + 1" - ), + ) + ) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options ) - - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) self.assertIsInstance(response, QueryFullResponse) for result in response.search_results: self.assertGreater(result.score, 1) def test_query_with_chain_reranker(self): - search = SearchCorporaParameters( - corpora=[ - KeyedSearchCorpus( - corpus_key="test-search-1", - metadata_filter="", - lexical_interpolation=0, - ), - KeyedSearchCorpus( - corpus_key="test-search-2", - metadata_filter="", - lexical_interpolation=0, - ) - ], - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), + search = self._create_search_params( + lexical_interpolation=0, reranker=ChainReranker( rerankers=[ - CustomerSpecificReranker( - reranker_id="rnk_272725719" - ), + CustomerSpecificReranker(reranker_id="rnk_272725719"), UserFunctionReranker( - user_function="if (get('$.score') < 0.7) null else get('$.score') + 1"), + user_function="if (get('$.score') < 0.7) null else get('$.score') + 1" + ), ] ) ) - - response = self.client.query(query="Robot Utility Models", search=search, - generation=self.generation_params, - request_options=self.request_options) + response = self.client.query( + query="Robot Utility Models", + search=search, + generation=self.generation_params, + request_options=self.request_options + ) self.assertIsInstance(response, QueryFullResponse) for result in response.search_results: self.assertGreater(result.score, 1) @@ -264,33 +191,34 @@ def test_query_with_chain_reranker(self): def test_query_with_fcs_enabled(self): generation_params = GenerationParameters( response_language="eng", - citations=CitationParameters( - style="none", - ), + citations=CitationParameters(style="none"), enable_factual_consistency_score=True, ) - response = self.client.query(query="Robot Utility Models", search=self.search_params, - generation=generation_params, - request_options=self.request_options) - self.assertIsInstance(response, QueryFullResponse) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) + response = self.client.query( + query="Robot Utility Models", + search=self.search_params, + generation=generation_params, + request_options=self.request_options + ) + self._assert_query_response(response) def test_query_stream(self): - response = self.client.query_stream(query="Robot Utility Models", search=self.search_params, - generation=self.generation_params, - request_options=self.request_options) - + response = self.client.query_stream( + query="Robot Utility Models", + search=self.search_params, + generation=self.generation_params, + request_options=self.request_options + ) response = list(response) - self.assertGreater(len(response), 0) for item in response: self.assertIsInstance(item, QueryStreamedResponse) - def cleanup(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - - def tearDown(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) + @classmethod + def tearDownClass(cls): + """Clean up all test resources.""" + for corpus_name in cls.corpus_names: + try: + cls.client.corpora.delete(corpus_name) + except Exception: + pass diff --git a/int_tests/vectara_int_tests/managers/__init__.py b/int_tests/vectara_int_tests/managers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/int_tests/vectara_int_tests/managers/test_api_keys.py b/int_tests/vectara_int_tests/managers/test_api_keys.py deleted file mode 100644 index b97c7ff..0000000 --- a/int_tests/vectara_int_tests/managers/test_api_keys.py +++ /dev/null @@ -1,58 +0,0 @@ -import time -import unittest - -from vectara.factory import Factory - - -class TestApiKeys(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - self.addCleanup(self.cleanup) - response = self.client.corpora.create(name="test-api-key-manager", key="test-document-manager") - self.key = response.key - time.sleep(60) - - def test_create_api_key(self): - response = self.client.api_keys.create(name="test-key", api_key_role="serving", corpus_keys=[self.key]) - self.assertEqual(response.name, "test-key") - self.assertEqual(response.enabled, True) - self.assertEqual(response.api_key_role, "serving") - - def test_delete_api_key(self): - create_response = self.client.api_keys.create(name="test-key", api_key_role="serving", corpus_keys=[self.key]) - delete_response = self.client.api_keys.delete(create_response.id) - - self.assertIsNone(delete_response) - - def test_get_api_key(self): - create_response = self.client.api_keys.create(name="test-key", api_key_role="serving", corpus_keys=[self.key]) - get_response = self.client.api_keys.get(create_response.id) - - self.assertEqual(get_response.name, create_response.name) - - def test_update_api_key(self): - create_response = self.client.api_keys.create(name="test-key", api_key_role="serving", corpus_keys=[self.key]) - update_response = self.client.api_keys.update(create_response.id, enabled=False) - - self.assertEqual(update_response.enabled, False) - - def test_list_api_keys(self): - api_keys_names = [] - for index in range(2): - create_response = self.client.api_keys.create(name=f"test-key-{index}", api_key_role="serving", - corpus_keys=[self.key]) - api_keys_names.append(create_response.name) - - for key in self.client.api_keys.list(): - self.assertIn(key.name, api_keys_names) - - def cleanup(self): - response = self.client.corpora.list() - for corpora in response: - self.client.corpora.delete(corpora.key) - - def tearDown(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - for key in self.client.api_keys.list(): - self.client.api_keys.delete(key.id) diff --git a/int_tests/vectara_int_tests/managers/test_app_client.py b/int_tests/vectara_int_tests/managers/test_app_client.py deleted file mode 100644 index 24659b3..0000000 --- a/int_tests/vectara_int_tests/managers/test_app_client.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestAppClient(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - - def test_create_app_client(self): - response = self.client.app_clients.create(name="test-client", api_roles=["owner"]) - self.assertEqual(response.name, "test-client") - self.assertIsNotNone(response.client_id) - self.assertIsNotNone(response.client_secret) - - def test_get_app_client(self): - create_response = self.client.app_clients.create(name="test-client", api_roles=["owner"]) - get_response = self.client.app_clients.get(create_response.id) - - self.assertEqual(get_response.client_id, create_response.client_id) - self.assertEqual(get_response.client_secret, create_response.client_secret) - - def test_delete_app_client(self): - create_response = self.client.app_clients.create(name="test-client", api_roles=["owner"]) - del_response = self.client.app_clients.delete(create_response.id) - - self.assertIsNone(del_response) - - def test_update_app_client(self): - create_response = self.client.app_clients.create(name="test-client", api_roles=["owner"]) - update_response = self.client.app_clients.update( - create_response.id, api_roles=["owner", "administrator"], description="test client") - - self.assertEqual(update_response.api_roles, ["administrator"]) - self.assertEqual(update_response.description, "test client") - - def test_list_app_clients(self): - client_ids = [] - for index in range(2): - create_response = self.client.app_clients.create(name=f"test-client-{index}", api_roles=["owner"]) - client_ids.append(create_response.client_id) - - for client in self.client.app_clients.list(): - self.assertIn(client.client_id, client_ids) - - def tearDown(self): - for client in self.client.app_clients.list(): - self.client.app_clients.delete(client.id) diff --git a/int_tests/vectara_int_tests/managers/test_auth.py b/int_tests/vectara_int_tests/managers/test_auth.py deleted file mode 100644 index 985aca2..0000000 --- a/int_tests/vectara_int_tests/managers/test_auth.py +++ /dev/null @@ -1,32 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestAuthManager(unittest.TestCase): - - def setUp(self): - self.addCleanup(self.cleanup) - self.client = Factory().build() - response = self.client.app_clients.create(name="test-client", api_roles=["owner"]) - self.client_id = response.client_id - self.client_secret = response.client_secret - - def test_get_access_token(self): - response = self.client.auth.get_token( - client_id=self.client_id, - client_secret=self.client_secret, - grant_type="client_credentials" - ) - - self.assertIsNotNone(response.access_token) - self.assertIsNotNone(response.token_type) - self.assertIsNotNone(response.expires_in) - - def cleanup(self): - for client in self.client.app_clients.list(): - self.client.app_clients.delete(client.id) - - def tearDown(self): - for client in self.client.app_clients.list(): - self.client.app_clients.delete(client.id) diff --git a/int_tests/vectara_int_tests/managers/test_config.py b/int_tests/vectara_int_tests/managers/test_config.py deleted file mode 100644 index 985a03b..0000000 --- a/int_tests/vectara_int_tests/managers/test_config.py +++ /dev/null @@ -1,74 +0,0 @@ -import os - -from vectara.factory import Factory -from vectara.config import HomeConfigLoader, EnvConfigLoader, ApiKeyAuthConfig, OAuth2AuthConfig -from pathlib import Path - -import unittest -import logging - -class FactoryConfigTest(unittest.TestCase): - """ - This test depends on our YAML default config being defined. - - We use this to test various methods of injection. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.INFO, - datefmt='%H:%M:%S %z') - self.logger = logging.getLogger(self.__class__.__name__) - - def _test_factory_auth(self, target: Factory, expected_method: str): - client = target.build() - self.assertEqual(expected_method, target.load_method) - - if not client.corpus_manager: - raise Exception("Corpus manager should be defined") - - results = client.corpus_manager.find_corpora_with_filter("", 1) - if results and len(results) > 0: - self.logger.info(f"Found corpus [{results[0].key}]") - - - def test_default_load(self): - factory = Factory() - self._test_factory_auth(factory, "path_home") - - def test_explicit_path(self): - factory = Factory(config_path=str(Path.home().resolve())) - self._test_factory_auth(factory, "path_explicit") - - def test_env(self): - client_config = HomeConfigLoader().load() - os.environ[EnvConfigLoader.ENV_CUSTOMER_ID] = client_config.customer_id - if isinstance(client_config.auth, ApiKeyAuthConfig): - os.environ[EnvConfigLoader.ENV_API_KEY] = client_config.auth.api_key - elif isinstance(client_config.auth, OAuth2AuthConfig): - os.environ[EnvConfigLoader.ENV_OAUTH2_CLIENT_ID] = client_config.auth.app_client_id - os.environ[EnvConfigLoader.ENV_OAUTH2_CLIENT_SECRET] = client_config.auth.app_client_secret - - try: - factory = Factory() - self._test_factory_auth(factory, "env") - finally: - if isinstance(client_config.auth, ApiKeyAuthConfig): - del os.environ[EnvConfigLoader.ENV_API_KEY] - elif isinstance(client_config.auth, OAuth2AuthConfig): - del os.environ[EnvConfigLoader.ENV_OAUTH2_CLIENT_ID] - del os.environ[EnvConfigLoader.ENV_OAUTH2_CLIENT_SECRET] - - def test_explicit_typed(self): - client_config = HomeConfigLoader().load() - factory = Factory(config=client_config) - self._test_factory_auth(factory, "explicit_typed") - - def test_explicit_dict(self): - client_config = HomeConfigLoader().load().model_dump() - factory = Factory(config=client_config) - self._test_factory_auth(factory, "explicit_dict") - - -if __name__ == '__main__': - unittest.main() diff --git a/int_tests/vectara_int_tests/managers/test_corpora.py b/int_tests/vectara_int_tests/managers/test_corpora.py deleted file mode 100644 index 06a6e89..0000000 --- a/int_tests/vectara_int_tests/managers/test_corpora.py +++ /dev/null @@ -1,188 +0,0 @@ -import time -import unittest - -from vectara import FilterAttribute, CorpusCustomDimension, CoreDocument, CoreDocumentPart, SearchCorporaParameters, \ - ContextConfiguration, CustomerSpecificReranker, GenerationParameters, ModelParameters, \ - CitationParameters, SearchCorpusParameters -from vectara.core import RequestOptions -from vectara.factory import Factory - - -class TestCorporaManager(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - self.addCleanup(self.cleanup) - - def test_create_corpora(self): - filter_attributes = FilterAttribute( - name="Title", - level="document", - description="The title of the document.", - indexed=True, - type="text" - ) - custom_dimensions = CorpusCustomDimension( - name="importance", - description="Product importance.", - indexing_default=0, - querying_default=0 - ) - response = self.client.corpora.create( - key="test-create-corpus", - name="test-create-corpus", - description="test description", - queries_are_answers=True, - documents_are_questions=True, - encoder_name="boomerang-2023-q3", - filter_attributes=[filter_attributes], - # custom_dimensions=[custom_dimensions] - ) - time.sleep(30) - self.assertEqual(response.key, "test-create-corpus") - self.assertEqual(response.name, "test-create-corpus") - self.assertEqual(response.description, "test description") - self.assertEqual(response.queries_are_answers, True) - self.assertEqual(response.documents_are_questions, True) - self.assertEqual(response.encoder_name, "boomerang-2023-q3") - self.assertEqual(response.filter_attributes, [filter_attributes]) - # self.assertEqual(response.custom_dimensions, [custom_dimensions]) - - def test_list_corpora(self): - self.client.corpora.create(key="corpus-1") - self.client.corpora.create(key="corpus-2") - time.sleep(30) - - response = self.client.corpora.list() - self.assertEqual(len(list(response)), 2) - - for corpora in response: - self.assertIn(corpora.key, ["corpus-1", "corpus-2"]) - - def test_delete_corpora(self): - self.client.corpora.create(key="test-delete-corpus") - self.client.corpora.delete(corpus_key="test-delete-corpus") - time.sleep(30) - corpora = self.client.corpora.list() - - self.assertEqual(len(list(corpora)), 0) - - def test_update_corpora(self): - response = self.client.corpora.create(key="test-update-corpus") - time.sleep(30) - self.assertEqual(response.key, "test-update-corpus") - self.assertEqual(response.name, "test-update-corpus") - - response = self.client.corpora.update("test-update-corpus", - name="updated-name", description="updated-description") - - time.sleep(30) - - self.assertEqual(response.description, "updated-description") - self.assertEqual(response.name, "updated-name") - - def test_get_metadata_of_corpora(self): - self.client.corpora.create(key="test-get-metadata", - description="test-description", name="Test") - time.sleep(30) - corpora = self.client.corpora.get("test-get-metadata") - - self.assertEqual(corpora.key, "test-get-metadata") - self.assertEqual(corpora.name, "Test") - self.assertEqual(corpora.description, "test-description") - - def test_corpus_reset(self): - self.client.corpora.create(key="test-reset-corpus") - time.sleep(30) - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="I'm a nice document part.", - ) - ], - ) - self.client.documents.create("test-reset-corpus", request=document) - documents = self.client.documents.list("test-reset-corpus") - self.assertEqual(len(list(documents)), 1) - - self.client.corpora.reset("test-reset-corpus") - - documents = self.client.documents.list("test-reset-corpus") - self.assertEqual(len(list(documents)), 0) - - def test_replace_filter_attributes(self): - self.client.corpora.create(key="test-reset-filters") - time.sleep(30) - filter_attributes = FilterAttribute( - name="Title", - level="document", - description="The title of the document.", - indexed=True, - type="text" - ) - res = self.client.corpora.replace_filter_attributes("test-reset-filters", filter_attributes=[filter_attributes]) - - self.assertIsNotNone(res.job_id) - - def test_search(self): - self.client.corpora.create(name="test-search", key="test-search") - time.sleep(30) - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " - "be deployed in novel environments with novel objects without any further data or training.", - ) - ], - ) - self.client.documents.create("test-search", request=document) - - response = self.client.corpora.search(corpus_key="test-search", query="Robot Utility Models") - self.assertIsNone(response.summary) - self.assertGreater(len(response.search_results), 0) - - def test_query(self): - self.client.corpora.create(name="test-search", key="test-search") - time.sleep(60) - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="Robot Utility Models are trained on a diverse set of environments and objects, and then can " - "be deployed in novel environments with novel objects without any further data or training.", - ) - ], - ) - self.client.documents.create("test-search", request=document) - search_params = SearchCorpusParameters( - context_configuration=ContextConfiguration( - sentences_before=2, - sentences_after=2, - ), - reranker=CustomerSpecificReranker( - reranker_id="rnk_272725719" - ), - ) - generation_params = GenerationParameters( - response_language="eng", - citations=CitationParameters( - style="none", - ), - enable_factual_consistency_score=True, - ) - request_options = RequestOptions(timeout_in_seconds=100) - - response = self.client.corpora.query(corpus_key="test-search", search=search_params, - query="Robot Utility Models", generation=generation_params, - request_options=request_options) - self.assertIsNotNone(response.summary) - self.assertGreater(len(response.search_results), 0) - - def cleanup(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - - def tearDown(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) diff --git a/int_tests/vectara_int_tests/managers/test_corpus.py b/int_tests/vectara_int_tests/managers/test_corpus.py deleted file mode 100644 index ff746dc..0000000 --- a/int_tests/vectara_int_tests/managers/test_corpus.py +++ /dev/null @@ -1,41 +0,0 @@ -from vectara.corpora.client import CorporaClient -from vectara.managers.corpus import CreateCorpusRequest -from vectara.client import Vectara -from unittest.mock import MagicMock -from vectara.factory import Factory - -import time -import unittest -import logging - -class CorpusManagerTest(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%H:%M:%S %z') - self.logger = logging.getLogger(self.__class__.__name__) - - - def test_find_corpora_by_name_no_match(self): - client = Factory().build() - - found = client.corpus_manager.find_corpora_by_name("company_names") - self.assertEqual(len(found), 0) - - def test_find_corpora_by_name_match(self): - client = Factory().build() - - found = client.corpus_manager.find_corpora_by_name("company_names_document") - self.assertEqual(len(found), 1) - - def test_create_corpus(self): - client = Factory().build() - - request = CreateCorpusRequest.model_validate( - { - "key": "test-sdk-corpus", - "name": "test-sdk-corpus", - "description": "Our first test corpus from the SDK" - }) - client.corpus_manager.create_corpus(request, delete_existing=True) - diff --git a/int_tests/vectara_int_tests/managers/test_documnet.py b/int_tests/vectara_int_tests/managers/test_documnet.py deleted file mode 100644 index be15705..0000000 --- a/int_tests/vectara_int_tests/managers/test_documnet.py +++ /dev/null @@ -1,84 +0,0 @@ -from unittest import TestCase - -from vectara import CoreDocument, CoreDocumentPart -from vectara.factory import Factory - - -class TestDocument(TestCase): - - def setUp(self): - self.addCleanup(self.cleanup) - self.client = Factory().build() - response = self.client.corpora.create(name="test-document-manager", key="test-document-manager") - self.key = response.key - - def test_add_document(self): - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="test-doc-part-1", - ) - ], - ) - response = self.client.documents.create(self.key, request=document) - - self.assertEqual(response.id, "my-doc-id") - - def test_delete_document(self): - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="test-doc-part-1", - ) - ], - ) - self.client.documents.create(self.key, request=document) - - response = self.client.documents.delete(self.key, "my-doc-id") - - self.assertIsNone(response) - - def test_get_document(self): - document = CoreDocument( - id="my-doc-id", - document_parts=[ - CoreDocumentPart( - text="test-doc-part-1", - ) - ], - ) - self.client.documents.create(self.key, request=document) - - response = self.client.documents.get_corpus_document(self.key, "my-doc-id") - - self.assertEqual(response.id, "my-doc-id") - - def test_list_documents(self): - doc_ids = [] - for index in range(2): - document = CoreDocument( - id=f"my-doc-id-{index}", - document_parts=[ - CoreDocumentPart( - text="test-doc-part-1", - ) - ], - ) - response = self.client.documents.create(self.key, request=document) - doc_ids.append(response.id) - - response = self.client.documents.list(self.key) - for doc in response: - self.assertIn(doc.id, doc_ids) - - def cleanup(self): - response = self.client.corpora.list() - for corpora in response: - self.client.corpora.delete(corpora.key) - - def tearDown(self): - response = self.client.corpora.list() - for corpora in response: - self.client.corpora.delete(corpora.key) diff --git a/int_tests/vectara_int_tests/managers/test_encoders.py b/int_tests/vectara_int_tests/managers/test_encoders.py deleted file mode 100644 index 27e4087..0000000 --- a/int_tests/vectara_int_tests/managers/test_encoders.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestEncodersManager(unittest.TestCase): - - def setUp(self): - self.client = Factory().build() - - def test_list_encoders(self): - response = self.client.encoders.list() - for encoder in response: - self.assertIsNotNone(encoder.name) diff --git a/int_tests/vectara_int_tests/managers/test_generation_presets.py b/int_tests/vectara_int_tests/managers/test_generation_presets.py deleted file mode 100644 index d4f5037..0000000 --- a/int_tests/vectara_int_tests/managers/test_generation_presets.py +++ /dev/null @@ -1,13 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestGenerationPresetsManager(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - - def test_list_generation_presets(self): - response = self.client.generation_presets.list_generation_presets() - for gp in response.generation_presets: - self.assertIsNotNone(gp.name) diff --git a/int_tests/vectara_int_tests/managers/test_jobs.py b/int_tests/vectara_int_tests/managers/test_jobs.py deleted file mode 100644 index 346fb67..0000000 --- a/int_tests/vectara_int_tests/managers/test_jobs.py +++ /dev/null @@ -1,38 +0,0 @@ -import unittest - -from vectara import FilterAttribute -from vectara.factory import Factory - - -class TestJobsManager(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - self.client.corpora.create(key="test-reset-filters") - filter_attributes = FilterAttribute( - name="Title", - level="document", - description="The title of the document.", - indexed=True, - type="text" - ) - res = self.client.corpora.replace_filter_attributes("test-reset-filters", filter_attributes=[filter_attributes]) - - self.job_id = res.job_id - - def test_get_job(self): - res = self.client.jobs.get(job_id=self.job_id) - - self.assertEqual(res.id, self.job_id) - self.assertEqual(res.corpus_keys, ["test-reset-filters"]) - - def test_list_jobs(self): - for job in self.client.jobs.list(): - self.assertIsNotNone(job.id) - - def cleanup(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) - - def tearDown(self): - for corpora in self.client.corpora.list(): - self.client.corpora.delete(corpora.key) diff --git a/int_tests/vectara_int_tests/managers/test_llms.py b/int_tests/vectara_int_tests/managers/test_llms.py deleted file mode 100644 index 15e8a14..0000000 --- a/int_tests/vectara_int_tests/managers/test_llms.py +++ /dev/null @@ -1,13 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestLlmsManager(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - - def test_list_llms(self): - response = self.client.llms.list() - for reranker in response: - self.assertIsNotNone(reranker.name) \ No newline at end of file diff --git a/int_tests/vectara_int_tests/managers/test_rerankers.py b/int_tests/vectara_int_tests/managers/test_rerankers.py deleted file mode 100644 index 11f584a..0000000 --- a/int_tests/vectara_int_tests/managers/test_rerankers.py +++ /dev/null @@ -1,13 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestRerankersManager(unittest.TestCase): - def setUp(self): - self.client = Factory().build() - - def test_list_rerankers(self): - response = self.client.rerankers.list() - for llm in response: - self.assertIsNotNone(llm.name) diff --git a/int_tests/vectara_int_tests/managers/test_upload.py b/int_tests/vectara_int_tests/managers/test_upload.py deleted file mode 100644 index 07b8348..0000000 --- a/int_tests/vectara_int_tests/managers/test_upload.py +++ /dev/null @@ -1,50 +0,0 @@ -from vectara.corpora.client import CorporaClient -from vectara.managers.corpus import CreateCorpusRequest -from vectara.client import Vectara -from vectara.types import Document -from unittest.mock import MagicMock -from vectara.factory import Factory -#from vectara.utils.httpx_logging import dump_all_requests -from pathlib import Path -import time -import unittest -import logging -import json -import urllib - -class UploadManagerTest(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%H:%M:%S %z') - self.logger = logging.getLogger(self.__class__.__name__) - - def test_upload(self): - """ - The following test was written to bypass Fern to debug the HTTP upload without any additional abstraction. - - This is useful when we are trying to isolate an issue in Fern or the API as multipart form is hard to - write test harnesses for. - - :return: - """ - self.logger.info("Testing upload via manager (bypass Fern)") - client = Factory().build() - - request = CreateCorpusRequest(name="int-test-upload", key="int-test-upload") - create_response = client.lab_helper.create_lab_corpus(request) - key = create_response.key - - time.sleep(30) - path = Path("examples/01_getting_started/resources/arxiv/2409.05866v1.pdf") - document: Document = client.upload_manager.upload(key, path, metadata={"key": "value"}, doc_id="basic_metadata") - self.assertTrue(document.storage_usage.bytes_used > 0) - self.assertTrue(document.storage_usage.metadata_bytes_used > 0) - - def test_delete_all_tests(self): - self.logger.info("Deleting all tests") - client = Factory().build() - # TODO Refactor tests so they use username prefix like labs. - client.lab_helper.delete_labs("int-test-", user_prefix=False) - - diff --git a/int_tests/vectara_int_tests/managers/test_users.py b/int_tests/vectara_int_tests/managers/test_users.py deleted file mode 100644 index 3f3dca6..0000000 --- a/int_tests/vectara_int_tests/managers/test_users.py +++ /dev/null @@ -1,64 +0,0 @@ -import unittest - -from vectara.factory import Factory - - -class TestUserManager(unittest.TestCase): - - def setUp(self): - self.client = Factory().build() - - def test_create_user(self): - response = self.client.users.create(email="test-email@example.com", - username="test-user", api_roles=["administrator"]) - - self.assertEqual(response.username, "test-user") - self.assertEqual(response.email, "test-email@example.com") - self.assertEqual(response.api_roles, ["administrator"]) - self.assertEqual(response.enabled, False) - - def test_update_user(self): - create_eresponse = self.client.users.create(email="test-email@example.com", - username="test-user", api_roles=["administrator"]) - - self.assertEqual(create_eresponse.api_roles, ["administrator"]) - self.assertEqual(create_eresponse.enabled, False) - - update_response = self.client.users.update(username="test-user", enabled=True, - api_roles=["corpus_administrator"]) - - self.assertEqual(update_response.api_roles, ["corpus_administrator"]) - self.assertEqual(update_response.enabled, True) - - def test_delete_user(self): - create_eresponse = self.client.users.create(email="test-email@example.com", - username="test-user", api_roles=["administrator"]) - - del_response = self.client.users.delete(username=create_eresponse.username) - - self.assertIsNone(del_response) - - def test_get_user(self): - create_eresponse = self.client.users.create(email="test-email@example.com", - username="test-user", api_roles=["administrator"]) - - get_response = self.client.users.get(username=create_eresponse.username) - - self.assertEqual(get_response.username, "test-user") - self.assertEqual(get_response.email, "test-email@example.com") - self.assertEqual(get_response.api_roles, ["administrator"]) - self.assertEqual(get_response.enabled, False) - - def test_list_users(self): - usernames = [] - for index in range(2): - create_eresponse = self.client.users.create(email="test-email@example.com", - username=f"test-user-{index}", api_roles=["administrator"]) - usernames.append(create_eresponse.username) - - for user in self.client.users.list(): - self.assertIn(user.username, usernames) - - def tearDown(self): - for user in self.client.users.list(): - self.client.users.delete(username=user.username) diff --git a/int_tests/vectara_int_tests/temp/.gitignore b/int_tests/vectara_int_tests/temp/.gitignore deleted file mode 100644 index 22e8364..0000000 --- a/int_tests/vectara_int_tests/temp/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/* \ No newline at end of file diff --git a/int_tests/vectara_int_tests/util.py b/int_tests/vectara_int_tests/util.py deleted file mode 100644 index eaa55ac..0000000 --- a/int_tests/vectara_int_tests/util.py +++ /dev/null @@ -1,22 +0,0 @@ - - -import logging -import os - -class TestUtil: - - logger = logging.getLogger(__name__ + ".TestUtil") - - def create_unique_key(cls, test_name: str, username_prefix=True): - if not test_name: - raise TypeError("You must supply a test name") - elif len(test_name) < 10: - raise TypeError("Please use a descriptive name of at least 10 characters") - - username = os.getlogin() - # Use maximum 10 characters from username - user_part = username.split("@")[0][:10] - cls.logger.info(f"User prefix for test: {user_part}") - - full_test_name = f"{user_part}-{test_name}" - return full_test_name \ No newline at end of file diff --git a/int_tests/vectara_int_tests/utils/__init__.py b/int_tests/vectara_int_tests/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/int_tests/vectara_int_tests/utils/test_lab_helper.py b/int_tests/vectara_int_tests/utils/test_lab_helper.py deleted file mode 100644 index 21fbf38..0000000 --- a/int_tests/vectara_int_tests/utils/test_lab_helper.py +++ /dev/null @@ -1,93 +0,0 @@ - -from vectara.factory import Factory -from vectara.corpora.client import CorporaClient -from vectara.managers.corpus import CreateCorpusRequest -from vectara.client import Vectara -from vectara.utils import LabHelper -from unittest.mock import MagicMock -from vectara.config import ClientConfig, HomeConfigLoader, ApiKeyAuthConfig - -import unittest -import logging -import json - -import time - -class LabHelperTest(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - logging.basicConfig(format='%(asctime)s:%(name)-35s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%H:%M:%S %z') - self.logger = logging.getLogger(self.__class__.__name__) - - - def test_create_lab_corpus_name(self): - - # Creates a "foo" config useful for testing without performing OAuth in a unittest environment - foo_config = { - "customer_id": "asdf", - "auth": { - "api_key": "foo" - } - } - - client = Factory(config_json=json.dumps(foo_config)).build() - target = client.lab_helper - - def create_request(): - return CreateCorpusRequest(name="filter-attributes", key="101-filter-attr") - - modified_request = CreateCorpusRequest(name="david - filter-attributes", key="david_101-filter-attr") - - # First test using our username discovery from operating system environment - create_corpus_mock = MagicMock(return_value={"stub": True}) - target.corpus_manager.create_corpus = create_corpus_mock - target.discover_user = MagicMock(return_value="david@vectara.com") - target.create_lab_corpus(create_request()) - target.discover_user.assert_called_once_with() - create_corpus_mock.assert_called_once_with(modified_request, delete_existing=True) - - # Second test using injected username prefix - create_corpus_mock = MagicMock(return_value={"stub": True}) - target.corpus_manager.create_corpus = create_corpus_mock - target.create_lab_corpus(create_request(), username="howward.smith@vectara.com") - - modified_request = CreateCorpusRequest(name="howwardSmith - filter-attributes", key="howwardSmith_101-filter-attr") - create_corpus_mock.assert_called_once_with(modified_request, delete_existing=True) - - # Third test with very long username!! - create_corpus_mock = MagicMock(return_value={"stub": True}) - target.corpus_manager.create_corpus = create_corpus_mock - target.create_lab_corpus(create_request(), username="what.a.very.long.username.too.long.some.would.say@vectara.com") - - modified_request = CreateCorpusRequest(name="whatAVeryLongUsernam - filter-attributes", key="whatAVeryLongUsernam_101-filter-attr") - create_corpus_mock.assert_called_once_with(modified_request, delete_existing=True) - - # Fourth test with prefix disabled - create_corpus_mock = MagicMock(return_value={"stub": True}) - target.corpus_manager.create_corpus = create_corpus_mock - target.create_lab_corpus(create_request(), user_prefix=False) - create_corpus_mock.assert_called_once_with(create_request(), delete_existing=True) - - - #target.create_lab_corpus(create_request(), username="howward.smith@vectara.com", dry_run=True) - - def test_create_lab_corpus(self): - client = Factory().build() - target = client.lab_helper - - def create_request(): - return CreateCorpusRequest(name="filter-attributes", key="101-filter-attr") - - target.create_lab_corpus(create_request()) - - - def test_save_profile(self): - target = HomeConfigLoader(profile="test_write") - - target.delete() - - self.assertFalse(target.has_profile()) - config = ClientConfig(customer_id="asdf", auth=ApiKeyAuthConfig(api_key="foo")) - target.save(config) - self.assertTrue(target.has_profile()) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index eb054c0..1b47e22 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "annotated-types" @@ -426,6 +426,21 @@ pytest = ">=7.0.0,<9" docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] +[[package]] +name = "pytest-dotenv" +version = "0.5.2" +description = "A py.test plugin that parses environment files before running tests" +optional = false +python-versions = "*" +files = [ + {file = "pytest-dotenv-0.5.2.tar.gz", hash = "sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732"}, + {file = "pytest_dotenv-0.5.2-py3-none-any.whl", hash = "sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f"}, +] + +[package.dependencies] +pytest = ">=5.0.0" +python-dotenv = ">=0.9.1" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -440,6 +455,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -628,4 +657,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "3fb930549b245f82e3e3f75882b1bab750c718fb75d2b5f7ce2b3a29ff135583" +content-hash = "7ed0a3b9ccbd3a60d0f264672a8110f9ff5f4ca55ac979ed2668fde0463bb7c7" diff --git a/pyproject.toml b/pyproject.toml index 5adc8c9..96170ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ typing_extensions = ">= 4.0.0" mypy = "==1.13.0" pytest = "^7.4.0" pytest-asyncio = "^0.23.5" +pytest-dotenv = "^0.5.2" python-dateutil = "^2.9.0" types-python-dateutil = "^2.9.0.20240316" ruff = "==0.11.5" diff --git a/pytest.ini b/pytest.ini index c0ccdc2..9ead20b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] -log_cli=true +log_cli=false log_level=DEBUG log_format = %(asctime)s %(levelname)s %(message)s -log_date_format = %Y-%m-%d %H:%M:%S \ No newline at end of file +log_date_format = %Y-%m-%d %H:%M:%S +env_files = + .env +testpaths = ["int_tests", "tests"] \ No newline at end of file diff --git a/src/vectara/base_client.py b/src/vectara/base_client.py index fde193c..eb0c0eb 100644 --- a/src/vectara/base_client.py +++ b/src/vectara/base_client.py @@ -55,6 +55,7 @@ from .app_clients.client import AsyncAppClientsClient from .query_history.client import AsyncQueryHistoryClient from .auth.client import AsyncAuthClient +from .utils.auth import get_oauth2_credentials_from_env, create_oauth2_client_wrapper, create_api_key_client_wrapper # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -103,54 +104,50 @@ def __init__( self, *, environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION, - api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"), - client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"), - client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"), + api_key: typing.Optional[str] = None, + client_id: typing.Optional[str] = None, + client_secret: typing.Optional[str] = None, _token_getter_override: typing.Optional[typing.Callable[[], str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, httpx_client: typing.Optional[httpx.Client] = None, ): - _defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None - if api_key is not None: - self._client_wrapper = SyncClientWrapper( + if client_id is not None and client_secret is not None: + self._client_wrapper = create_oauth2_client_wrapper( environment=environment, - api_key=api_key, - httpx_client=httpx_client - if httpx_client is not None - else httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.Client(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, - ) - elif client_id is not None and client_secret is not None: - oauth_token_provider = OAuthTokenProvider( client_id=client_id, client_secret=client_secret, - client_wrapper=SyncClientWrapper( - environment=environment, - api_key=api_key, - httpx_client=httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.Client(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, - ), + timeout=timeout, + follow_redirects=follow_redirects, + httpx_client=httpx_client, + token_getter_override=_token_getter_override ) - self._client_wrapper = SyncClientWrapper( + + elif api_key is not None or os.getenv("VECTARA_API_KEY") is not None: + self._client_wrapper = create_api_key_client_wrapper( environment=environment, - api_key=api_key, - token=_token_getter_override if _token_getter_override is not None else oauth_token_provider.get_token, + api_key=api_key or os.getenv("VECTARA_API_KEY"), + timeout=timeout, + follow_redirects=follow_redirects, httpx_client=httpx_client - if httpx_client is not None - else httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.Client(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, ) - else: + + elif get_oauth2_credentials_from_env() != (None, None): + env_client_id, env_client_secret = get_oauth2_credentials_from_env() + self._client_wrapper = create_oauth2_client_wrapper( + environment=environment, + client_id=env_client_id, + client_secret=env_client_secret, + timeout=timeout, + follow_redirects=follow_redirects, + httpx_client=httpx_client, + token_getter_override=_token_getter_override + ) + + else: raise ApiError( - body="The client must be instantiated be either passing in api_key, client_id or client_secret" - ) + body="The client must be instantiated with either api_key or both client_id and client_secret" + ) self.corpora = CorporaClient(client_wrapper=self._client_wrapper) self.upload = UploadClient(client_wrapper=self._client_wrapper) self.documents = DocumentsClient(client_wrapper=self._client_wrapper) @@ -768,54 +765,55 @@ def __init__( self, *, environment: VectaraEnvironment = VectaraEnvironment.PRODUCTION, - api_key: typing.Optional[str] = os.getenv("VECTARA_API_KEY"), - client_id: typing.Optional[str] = os.getenv("VECTARA_CLIENT_ID"), - client_secret: typing.Optional[str] = os.getenv("VECTARA_CLIENT_SECRET"), + api_key: typing.Optional[str] = None, + client_id: typing.Optional[str] = None, + client_secret: typing.Optional[str] = None, _token_getter_override: typing.Optional[typing.Callable[[], str]] = None, timeout: typing.Optional[float] = None, follow_redirects: typing.Optional[bool] = True, httpx_client: typing.Optional[httpx.AsyncClient] = None, ): _defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None - if api_key is not None: - self._client_wrapper = AsyncClientWrapper( + + # If OAuth2 credentials are explicitly provided, use them + if client_id is not None and client_secret is not None: + self._client_wrapper = create_oauth2_client_wrapper( environment=environment, - api_key=api_key, - httpx_client=httpx_client - if httpx_client is not None - else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.AsyncClient(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, - ) - elif client_id is not None and client_secret is not None: - oauth_token_provider = OAuthTokenProvider( client_id=client_id, client_secret=client_secret, - client_wrapper=SyncClientWrapper( - environment=environment, - api_key=api_key, - httpx_client=httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.Client(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, - ), + timeout=timeout, + follow_redirects=follow_redirects, + httpx_client=httpx_client, + token_getter_override=_token_getter_override, + is_async=True ) - self._client_wrapper = AsyncClientWrapper( + # Check for OAuth2 credentials in environment variables + elif get_oauth2_credentials_from_env() != (None, None): + env_client_id, env_client_secret = get_oauth2_credentials_from_env() + self._client_wrapper = create_oauth2_client_wrapper( environment=environment, - api_key=api_key, - token=_token_getter_override if _token_getter_override is not None else oauth_token_provider.get_token, - httpx_client=httpx_client - if httpx_client is not None - else httpx.AsyncClient(timeout=_defaulted_timeout, follow_redirects=follow_redirects) - if follow_redirects is not None - else httpx.AsyncClient(timeout=_defaulted_timeout), - timeout=_defaulted_timeout, + client_id=env_client_id, + client_secret=env_client_secret, + timeout=timeout, + follow_redirects=follow_redirects, + httpx_client=httpx_client, + token_getter_override=_token_getter_override, + is_async=True ) - else: + # Otherwise, use API key if provided (either explicitly or from environment) + elif api_key is not None or os.getenv("VECTARA_API_KEY") is not None: + self._client_wrapper = create_api_key_client_wrapper( + environment=environment, + api_key=api_key or os.getenv("VECTARA_API_KEY"), + timeout=timeout, + follow_redirects=follow_redirects, + httpx_client=httpx_client, + is_async=True + ) + else: raise ApiError( - body="The client must be instantiated be either passing in api_key, client_id or client_secret" - ) + body="The client must be instantiated with either api_key or both client_id and client_secret" + ) self.corpora = AsyncCorporaClient(client_wrapper=self._client_wrapper) self.upload = AsyncUploadClient(client_wrapper=self._client_wrapper) self.documents = AsyncDocumentsClient(client_wrapper=self._client_wrapper) diff --git a/src/vectara/corpora/types/search_corpus_parameters.py b/src/vectara/corpora/types/search_corpus_parameters.py index 5aa5b63..1563353 100644 --- a/src/vectara/corpora/types/search_corpus_parameters.py +++ b/src/vectara/corpora/types/search_corpus_parameters.py @@ -3,7 +3,7 @@ import typing import pydantic -from ...core.pydantic_utilities import IS_PYDANTIC_V2 +from ...core.pydantic_utilities import IS_PYDANTIC_V2, update_forward_refs from ...types.search_corpus import SearchCorpus from ...types.search_parameters import SearchParameters @@ -21,3 +21,10 @@ class Config: frozen = True smart_union = True extra = pydantic.Extra.allow + +from ...types.chain_reranker import ChainReranker # noqa: E402 + +update_forward_refs(ChainReranker) + +if IS_PYDANTIC_V2: + SearchCorpusParameters.model_rebuild() \ No newline at end of file diff --git a/src/vectara/managers/document.py b/src/vectara/managers/document.py index df3dd76..fab05f3 100644 --- a/src/vectara/managers/document.py +++ b/src/vectara/managers/document.py @@ -106,9 +106,9 @@ def set_metadata(): if doc.metadata: metadata_copy = dict(doc.metadata) metadata_copy[HASH_FIELD] = sha256_hash - result = doc.copy(update={"metadata": metadata_copy}) + result = doc.model_copy(update={"metadata": metadata_copy}) else: - result = doc.copy(update={"metadata": {HASH_FIELD: sha256_hash}}) + result = doc.model_copy(update={"metadata": {HASH_FIELD: sha256_hash}}) return result diff --git a/src/vectara/utils/auth.py b/src/vectara/utils/auth.py new file mode 100644 index 0000000..2e6d396 --- /dev/null +++ b/src/vectara/utils/auth.py @@ -0,0 +1,110 @@ +import os +from typing import Tuple, Optional +import httpx +from ..core.oauth_token_provider import OAuthTokenProvider +from ..core.client_wrapper import SyncClientWrapper +from ..environment import VectaraEnvironment + +def get_oauth2_credentials_from_env() -> Tuple[Optional[str], Optional[str]]: + """ + Get OAuth2 credentials from environment variables. + + Returns: + Tuple[Optional[str], Optional[str]]: A tuple containing (client_id, client_secret) if both are present, + otherwise (None, None) + """ + client_id = os.getenv("VECTARA_CLIENT_ID") + client_secret = os.getenv("VECTARA_CLIENT_SECRET") + + if client_id is not None and client_secret is not None: + return client_id, client_secret + return None, None + +def create_oauth2_client_wrapper( + environment: VectaraEnvironment, + client_id: str, + client_secret: str, + timeout: Optional[float], + follow_redirects: Optional[bool], + httpx_client: Optional[httpx.Client], + token_getter_override: Optional[callable] = None +) -> SyncClientWrapper: + """ + Create a client wrapper with OAuth2 authentication. + + Args: + environment: The Vectara environment to use + client_id: OAuth2 client ID + client_secret: OAuth2 client secret + timeout: Request timeout in seconds + follow_redirects: Whether to follow redirects + httpx_client: Custom httpx client to use + token_getter_override: Optional token getter override + + Returns: + A configured client wrapper + """ + defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None + + # Create base client for OAuth token provider + base_client = SyncClientWrapper( + environment=environment, + api_key=None, + httpx_client=httpx.Client(timeout=defaulted_timeout, follow_redirects=follow_redirects) + if follow_redirects is not None + else httpx.Client(timeout=defaulted_timeout), + timeout=defaulted_timeout, + ) + + # Create OAuth token provider + oauth_token_provider = OAuthTokenProvider( + client_id=client_id, + client_secret=client_secret, + client_wrapper=base_client, + ) + + # Create client wrapper + return SyncClientWrapper( + environment=environment, + api_key=None, + token=token_getter_override if token_getter_override is not None else oauth_token_provider.get_token, + httpx_client=httpx_client + if httpx_client is not None + else httpx.Client(timeout=defaulted_timeout, follow_redirects=follow_redirects) + if follow_redirects is not None + else httpx.Client(timeout=defaulted_timeout), + timeout=defaulted_timeout, + ) + +def create_api_key_client_wrapper( + environment: VectaraEnvironment, + api_key: str, + timeout: Optional[float], + follow_redirects: Optional[bool], + httpx_client: Optional[httpx.Client] +) -> SyncClientWrapper: + """ + Create a client wrapper with API key authentication. + + Args: + environment: The Vectara environment to use + api_key: API key to use + timeout: Request timeout in seconds + follow_redirects: Whether to follow redirects + httpx_client: Custom httpx client to use + + Returns: + A configured client wrapper + """ + defaulted_timeout = timeout if timeout is not None else 60 if httpx_client is None else None + + return SyncClientWrapper( + environment=environment, + api_key=api_key, + httpx_client=httpx_client + if httpx_client is not None + else httpx.Client(timeout=defaulted_timeout, follow_redirects=follow_redirects) + if follow_redirects is not None + else httpx.Client(timeout=defaulted_timeout), + timeout=defaulted_timeout, + ) \ No newline at end of file diff --git a/src/vectara/utils/lab_helper.py b/src/vectara/utils/lab_helper.py index cd51173..52707d3 100644 --- a/src/vectara/utils/lab_helper.py +++ b/src/vectara/utils/lab_helper.py @@ -89,9 +89,9 @@ def create_lab_corpus(self, corpus: CreateCorpusRequest, user_prefix=True, usern :return: """ - corpus_clone = corpus.copy() + corpus_clone = corpus.model_copy() - name, key = self._build_lab_name_and_key(corpus.name, corpus.key) + name, key = self._build_lab_name_and_key(corpus.name, corpus.key, user_prefix=user_prefix, username=username) corpus_clone.name = name corpus_clone.key = key