1+ import pytest
2+ from scrapegraphai .helpers .models_tokens import models_tokens
3+
4+ class TestModelsTokens :
5+ """Test suite for verifying the models_tokens dictionary content and structure."""
6+
7+ def test_openai_tokens (self ):
8+ """Test that the 'openai' provider exists and its tokens are valid positive integers."""
9+ openai_models = models_tokens .get ("openai" )
10+ assert openai_models is not None , "'openai' key should be present in models_tokens"
11+ for model , token in openai_models .items ():
12+ assert isinstance (model , str ), "Model name should be a string"
13+ assert isinstance (token , int ), "Token limit should be an integer"
14+ assert token > 0 , "Token limit should be positive"
15+
16+ def test_azure_openai_tokens (self ):
17+ """Test that the 'azure_openai' provider exists and its tokens are valid."""
18+ azure_models = models_tokens .get ("azure_openai" )
19+ assert azure_models is not None , "'azure_openai' key should be present"
20+ for model , token in azure_models .items ():
21+ assert isinstance (model , str ), "Model name should be a string"
22+ assert isinstance (token , int ), "Token limit should be an integer"
23+
24+ def test_google_providers (self ):
25+ """Test that Google provider dictionaries ('google_genai' and 'google_vertexai') contain expected entries."""
26+ google_genai = models_tokens .get ("google_genai" )
27+ google_vertexai = models_tokens .get ("google_vertexai" )
28+ assert google_genai is not None , "'google_genai' key should be present"
29+ assert google_vertexai is not None , "'google_vertexai' key should be present"
30+ # Check a specific key from google_genai
31+ assert "gemini-pro" in google_genai , "'gemini-pro' should be in google_genai models"
32+ # Validate token values types
33+ for provider in [google_genai , google_vertexai ]:
34+ for token in provider .values ():
35+ assert isinstance (token , int ), "Token limit must be an integer"
36+
37+ def test_non_existent_provider (self ):
38+ """Test that a non-existent provider returns None."""
39+ assert models_tokens .get ("non_existent" ) is None , "Non-existent provider should return None"
40+
41+ def test_total_model_keys (self ):
42+ """Test that the total number of models across all providers is above an expected count."""
43+ total_keys = sum (len (details ) for details in models_tokens .values ())
44+ assert total_keys > 20 , "Expected more than 20 total model tokens defined"
45+
46+ def test_specific_token_value (self ):
47+ """Test specific expected token value for a known model."""
48+ openai = models_tokens .get ("openai" )
49+ # Verify that the token limit for "gpt-4" is 8192 as defined
50+ assert openai .get ("gpt-4" ) == 8192 , "Expected token limit for gpt-4 to be 8192"
51+
52+ def test_non_empty_model_keys (self ):
53+ """Ensure that model token names are non-empty strings."""
54+ for provider , model_dict in models_tokens .items ():
55+ for model in model_dict .keys ():
56+ assert model != "" , f"Model name in provider '{ provider } ' should not be empty."
57+
58+ def test_token_limits_range (self ):
59+ """Test that token limits for all models fall within a plausible range (e.g., 1 to 300000)."""
60+ for provider , model_dict in models_tokens .items ():
61+ for model , token in model_dict .items ():
62+ assert 1 <= token <= 1100000 , f"Token limit for { model } in provider { provider } is out of plausible range."
63+ def test_provider_structure (self ):
64+ """Test that every provider in models_tokens has a dictionary as its value."""
65+ for provider , models in models_tokens .items ():
66+ assert isinstance (models , dict ), f"Provider { provider } should map to a dictionary, got { type (models ).__name__ } "
67+
68+ def test_non_empty_provider (self ):
69+ """Test that each provider dictionary is not empty."""
70+ for provider , models in models_tokens .items ():
71+ assert len (models ) > 0 , f"Provider { provider } should contain at least one model."
72+
73+ def test_specific_model_token_values (self ):
74+ """Test specific expected token values for selected models from various providers."""
75+ # Verify a token for a selected model from the 'openai' provider
76+ openai = models_tokens .get ("openai" )
77+ assert openai .get ("gpt-3.5-turbo-0125" ) == 16385 , "Expected token limit for gpt-3.5-turbo-0125 in openai to be 16385"
78+
79+ # Verify a token for a selected model from the 'azure_openai' provider
80+ azure = models_tokens .get ("azure_openai" )
81+ assert azure .get ("gpt-3.5" ) == 4096 , "Expected token limit for gpt-3.5 in azure_openai to be 4096"
82+
83+ # Verify a token for a selected model from the 'anthropic' provider
84+ anthropic = models_tokens .get ("anthropic" )
85+ assert anthropic .get ("claude_instant" ) == 100000 , "Expected token limit for claude_instant in anthropic to be 100000"
86+
87+ def test_providers_count (self ):
88+ """Test that the total number of providers is as expected (at least 15)."""
89+ assert len (models_tokens ) >= 15 , "Expected at least 15 providers in models_tokens"
90+
91+ def test_non_existent_model (self ):
92+ """Test that a non-existent model within a valid provider returns None."""
93+ openai = models_tokens .get ("openai" )
94+ assert openai .get ("non_existent_model" ) is None , "Non-existent model should return None from a valid provider."
95+ def test_no_whitespace_in_model_names (self ):
96+ """Test that model names do not contain leading or trailing whitespace."""
97+ for provider , model_dict in models_tokens .items ():
98+ for model in model_dict .keys ():
99+ # Assert that stripping whitespace does not change the model name
100+ assert model == model .strip (), f"Model name '{ model } ' in provider '{ provider } ' contains leading or trailing whitespace."
101+
102+ def test_specific_models_additional (self ):
103+ """Test specific token values for additional models across various providers."""
104+ # Check some models in the 'ollama' provider
105+ ollama = models_tokens .get ("ollama" )
106+ assert ollama .get ("llama2" ) == 4096 , "Expected token limit for 'llama2' in ollama to be 4096"
107+ assert ollama .get ("llama2:70b" ) == 4096 , "Expected token limit for 'llama2:70b' in ollama to be 4096"
108+
109+ # Check a specific model from the 'mistralai' provider
110+ mistralai = models_tokens .get ("mistralai" )
111+ assert mistralai .get ("open-codestral-mamba" ) == 256000 , "Expected token limit for 'open-codestral-mamba' in mistralai to be 256000"
112+
113+ # Check a specific model from the 'deepseek' provider
114+ deepseek = models_tokens .get ("deepseek" )
115+ assert deepseek .get ("deepseek-chat" ) == 28672 , "Expected token limit for 'deepseek-chat' in deepseek to be 28672"
116+
117+ # Check a model from the 'ernie' provider
118+ ernie = models_tokens .get ("ernie" )
119+ assert ernie .get ("ernie-bot" ) == 4096 , "Expected token limit for 'ernie-bot' in ernie to be 4096"
120+
121+ def test_nvidia_specific (self ):
122+ """Test specific token value for 'meta/codellama-70b' in the nvidia provider."""
123+ nvidia = models_tokens .get ("nvidia" )
124+ assert nvidia is not None , "'nvidia' provider should exist"
125+ # Verify token for 'meta/codellama-70b' equals 16384 as defined in the nvidia dictionary
126+ assert nvidia .get ("meta/codellama-70b" ) == 16384 , "Expected token limit for 'meta/codellama-70b' in nvidia to be 16384"
127+
128+ def test_groq_specific (self ):
129+ """Test specific token value for 'claude-3-haiku-20240307\' ' in the groq provider."""
130+ groq = models_tokens .get ("groq" )
131+ assert groq is not None , "'groq' provider should exist"
132+ # Note: The model name has an embedded apostrophe at the end in its name.
133+ assert groq .get ("claude-3-haiku-20240307'" ) == 8192 , "Expected token limit for 'claude-3-haiku-20240307\\ '' in groq to be 8192"
134+
135+ def test_togetherai_specific (self ):
136+ """Test specific token value for 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' in the toghetherai provider."""
137+ togetherai = models_tokens .get ("toghetherai" )
138+ assert togetherai is not None , "'toghetherai' provider should exist"
139+ expected = 128000
140+ model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
141+ assert togetherai .get (model_name ) == expected , f"Expected token limit for '{ model_name } ' in toghetherai to be { expected } "
142+
143+ def test_ernie_all_values (self ):
144+ """Test that all models in the 'ernie' provider have token values exactly 4096."""
145+ ernie = models_tokens .get ("ernie" )
146+ assert ernie is not None , "'ernie' provider should exist"
147+ for model , token in ernie .items ():
148+ assert token == 4096 , f"Expected token limit for '{ model } ' in ernie to be 4096, got { token } "
0 commit comments