11
11
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
12
12
""" .strip ()
13
13
14
- @pytest .fixture (scope = "module" , autouse = True )
14
+ @pytest .fixture (autouse = True )
15
15
def create_server ():
16
16
global server
17
17
server = ServerPreset .tinyllama2 ()
@@ -25,6 +25,7 @@ def test_ctx_shift_enabled():
25
25
# the prompt is truncated to keep the last 109 tokens
26
26
# 64 tokens are generated thanks to shifting the context when it gets full
27
27
global server
28
+ server .enable_ctx_shift = True
28
29
server .start ()
29
30
res = server .make_request ("POST" , "/completion" , data = {
30
31
"n_predict" : 64 ,
@@ -42,7 +43,6 @@ def test_ctx_shift_enabled():
42
43
])
43
44
def test_ctx_shift_disabled_short_prompt (n_predict : int , n_token_output : int , truncated : bool ):
44
45
global server
45
- server .disable_ctx_shift = True
46
46
server .n_predict = - 1
47
47
server .start ()
48
48
res = server .make_request ("POST" , "/completion" , data = {
@@ -56,7 +56,6 @@ def test_ctx_shift_disabled_short_prompt(n_predict: int, n_token_output: int, tr
56
56
57
57
def test_ctx_shift_disabled_long_prompt ():
58
58
global server
59
- server .disable_ctx_shift = True
60
59
server .start ()
61
60
res = server .make_request ("POST" , "/completion" , data = {
62
61
"n_predict" : 64 ,
@@ -68,7 +67,6 @@ def test_ctx_shift_disabled_long_prompt():
68
67
69
68
def test_ctx_shift_disabled_stream ():
70
69
global server
71
- server .disable_ctx_shift = True
72
70
server .start ()
73
71
res = server .make_stream_request ("POST" , "/v1/completions" , data = {
74
72
"n_predict" : 256 ,
0 commit comments