1+ import  pytest 
2+ from  scrapegraphai .helpers .models_tokens  import  models_tokens 
3+ 
4+ class  TestModelsTokens :
5+     """Test suite for verifying the models_tokens dictionary content and structure.""" 
6+ 
7+     def  test_openai_tokens (self ):
8+         """Test that the 'openai' provider exists and its tokens are valid positive integers.""" 
9+         openai_models  =  models_tokens .get ("openai" )
10+         assert  openai_models  is  not   None , "'openai' key should be present in models_tokens" 
11+         for  model , token  in  openai_models .items ():
12+             assert  isinstance (model , str ), "Model name should be a string" 
13+             assert  isinstance (token , int ), "Token limit should be an integer" 
14+             assert  token  >  0 , "Token limit should be positive" 
15+ 
16+     def  test_azure_openai_tokens (self ):
17+         """Test that the 'azure_openai' provider exists and its tokens are valid.""" 
18+         azure_models  =  models_tokens .get ("azure_openai" )
19+         assert  azure_models  is  not   None , "'azure_openai' key should be present" 
20+         for  model , token  in  azure_models .items ():
21+             assert  isinstance (model , str ), "Model name should be a string" 
22+             assert  isinstance (token , int ), "Token limit should be an integer" 
23+ 
24+     def  test_google_providers (self ):
25+         """Test that Google provider dictionaries ('google_genai' and 'google_vertexai') contain expected entries.""" 
26+         google_genai  =  models_tokens .get ("google_genai" )
27+         google_vertexai  =  models_tokens .get ("google_vertexai" )
28+         assert  google_genai  is  not   None , "'google_genai' key should be present" 
29+         assert  google_vertexai  is  not   None , "'google_vertexai' key should be present" 
30+         # Check a specific key from google_genai 
31+         assert  "gemini-pro"  in  google_genai , "'gemini-pro' should be in google_genai models" 
32+         # Validate token values types 
33+         for  provider  in  [google_genai , google_vertexai ]:
34+             for  token  in  provider .values ():
35+                 assert  isinstance (token , int ), "Token limit must be an integer" 
36+ 
37+     def  test_non_existent_provider (self ):
38+         """Test that a non-existent provider returns None.""" 
39+         assert  models_tokens .get ("non_existent" ) is  None , "Non-existent provider should return None" 
40+ 
41+     def  test_total_model_keys (self ):
42+         """Test that the total number of models across all providers is above an expected count.""" 
43+         total_keys  =  sum (len (details ) for  details  in  models_tokens .values ())
44+         assert  total_keys  >  20 , "Expected more than 20 total model tokens defined" 
45+ 
46+     def  test_specific_token_value (self ):
47+         """Test specific expected token value for a known model.""" 
48+         openai  =  models_tokens .get ("openai" )
49+         # Verify that the token limit for "gpt-4" is 8192 as defined 
50+         assert  openai .get ("gpt-4" ) ==  8192 , "Expected token limit for gpt-4 to be 8192" 
51+ 
52+     def  test_non_empty_model_keys (self ):
53+         """Ensure that model token names are non-empty strings.""" 
54+         for  provider , model_dict  in  models_tokens .items ():
55+             for  model  in  model_dict .keys ():
56+                 assert  model  !=  "" , f"Model name in provider '{ provider }  ' should not be empty." 
57+ 
58+     def  test_token_limits_range (self ):
59+         """Test that token limits for all models fall within a plausible range (e.g., 1 to 300000).""" 
60+         for  provider , model_dict  in  models_tokens .items ():
61+             for  model , token  in  model_dict .items ():
62+                 assert  1  <=  token  <=  1100000 , f"Token limit for { model }   in provider { provider }   is out of plausible range." 
63+     def  test_provider_structure (self ):
64+         """Test that every provider in models_tokens has a dictionary as its value.""" 
65+         for  provider , models  in  models_tokens .items ():
66+             assert  isinstance (models , dict ), f"Provider { provider }   should map to a dictionary, got { type (models ).__name__ }  " 
67+ 
68+     def  test_non_empty_provider (self ):
69+         """Test that each provider dictionary is not empty.""" 
70+         for  provider , models  in  models_tokens .items ():
71+             assert  len (models ) >  0 , f"Provider { provider }   should contain at least one model." 
72+ 
73+     def  test_specific_model_token_values (self ):
74+         """Test specific expected token values for selected models from various providers.""" 
75+         # Verify a token for a selected model from the 'openai' provider 
76+         openai  =  models_tokens .get ("openai" )
77+         assert  openai .get ("gpt-3.5-turbo-0125" ) ==  16385 , "Expected token limit for gpt-3.5-turbo-0125 in openai to be 16385" 
78+ 
79+         # Verify a token for a selected model from the 'azure_openai' provider 
80+         azure  =  models_tokens .get ("azure_openai" )
81+         assert  azure .get ("gpt-3.5" ) ==  4096 , "Expected token limit for gpt-3.5 in azure_openai to be 4096" 
82+ 
83+         # Verify a token for a selected model from the 'anthropic' provider 
84+         anthropic  =  models_tokens .get ("anthropic" )
85+         assert  anthropic .get ("claude_instant" ) ==  100000 , "Expected token limit for claude_instant in anthropic to be 100000" 
86+ 
87+     def  test_providers_count (self ):
88+         """Test that the total number of providers is as expected (at least 15).""" 
89+         assert  len (models_tokens ) >=  15 , "Expected at least 15 providers in models_tokens" 
90+ 
91+     def  test_non_existent_model (self ):
92+         """Test that a non-existent model within a valid provider returns None.""" 
93+         openai  =  models_tokens .get ("openai" )
94+         assert  openai .get ("non_existent_model" ) is  None , "Non-existent model should return None from a valid provider." 
95+     def  test_no_whitespace_in_model_names (self ):
96+         """Test that model names do not contain leading or trailing whitespace.""" 
97+         for  provider , model_dict  in  models_tokens .items ():
98+             for  model  in  model_dict .keys ():
99+                 # Assert that stripping whitespace does not change the model name 
100+                 assert  model  ==  model .strip (), f"Model name '{ model }  ' in provider '{ provider }  ' contains leading or trailing whitespace." 
101+ 
102+     def  test_specific_models_additional (self ):
103+         """Test specific token values for additional models across various providers.""" 
104+         # Check some models in the 'ollama' provider 
105+         ollama  =  models_tokens .get ("ollama" )
106+         assert  ollama .get ("llama2" ) ==  4096 , "Expected token limit for 'llama2' in ollama to be 4096" 
107+         assert  ollama .get ("llama2:70b" ) ==  4096 , "Expected token limit for 'llama2:70b' in ollama to be 4096" 
108+ 
109+         # Check a specific model from the 'mistralai' provider 
110+         mistralai  =  models_tokens .get ("mistralai" )
111+         assert  mistralai .get ("open-codestral-mamba" ) ==  256000 , "Expected token limit for 'open-codestral-mamba' in mistralai to be 256000" 
112+ 
113+         # Check a specific model from the 'deepseek' provider 
114+         deepseek  =  models_tokens .get ("deepseek" )
115+         assert  deepseek .get ("deepseek-chat" ) ==  28672 , "Expected token limit for 'deepseek-chat' in deepseek to be 28672" 
116+ 
117+         # Check a model from the 'ernie' provider 
118+         ernie  =  models_tokens .get ("ernie" )
119+         assert  ernie .get ("ernie-bot" ) ==  4096 , "Expected token limit for 'ernie-bot' in ernie to be 4096" 
120+     
121+     def  test_nvidia_specific (self ):
122+         """Test specific token value for 'meta/codellama-70b' in the nvidia provider.""" 
123+         nvidia  =  models_tokens .get ("nvidia" )
124+         assert  nvidia  is  not   None , "'nvidia' provider should exist" 
125+         # Verify token for 'meta/codellama-70b' equals 16384 as defined in the nvidia dictionary 
126+         assert  nvidia .get ("meta/codellama-70b" ) ==  16384 , "Expected token limit for 'meta/codellama-70b' in nvidia to be 16384" 
127+ 
128+     def  test_groq_specific (self ):
129+         """Test specific token value for 'claude-3-haiku-20240307\' ' in the groq provider.""" 
130+         groq  =  models_tokens .get ("groq" )
131+         assert  groq  is  not   None , "'groq' provider should exist" 
132+         # Note: The model name has an embedded apostrophe at the end in its name. 
133+         assert  groq .get ("claude-3-haiku-20240307'" ) ==  8192 , "Expected token limit for 'claude-3-haiku-20240307\\ '' in groq to be 8192" 
134+ 
135+     def  test_togetherai_specific (self ):
136+         """Test specific token value for 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' in the toghetherai provider.""" 
137+         togetherai  =  models_tokens .get ("toghetherai" )
138+         assert  togetherai  is  not   None , "'toghetherai' provider should exist" 
139+         expected  =  128000 
140+         model_name  =  "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 
141+         assert  togetherai .get (model_name ) ==  expected , f"Expected token limit for '{ model_name }  ' in toghetherai to be { expected }  " 
142+ 
143+     def  test_ernie_all_values (self ):
144+         """Test that all models in the 'ernie' provider have token values exactly 4096.""" 
145+         ernie  =  models_tokens .get ("ernie" )
146+         assert  ernie  is  not   None , "'ernie' provider should exist" 
147+         for  model , token  in  ernie .items ():
148+             assert  token  ==  4096 , f"Expected token limit for '{ model }  ' in ernie to be 4096, got { token }  " 
0 commit comments