vectorvein 0.1.58__tar.gz → 0.1.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {vectorvein-0.1.58 → vectorvein-0.1.59}/PKG-INFO +1 -1
  2. {vectorvein-0.1.58 → vectorvein-0.1.59}/pyproject.toml +1 -1
  3. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/anthropic_client.py +16 -3
  4. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/base_client.py +8 -0
  5. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/gemini_client.py +10 -2
  6. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/minimax_client.py +10 -2
  7. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/openai_compatible_client.py +10 -2
  8. {vectorvein-0.1.58 → vectorvein-0.1.59}/README.md +0 -0
  9. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/__init__.py +0 -0
  10. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/__init__.py +0 -0
  11. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  12. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  13. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/groq_client.py +0 -0
  14. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/local_client.py +0 -0
  15. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  16. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  17. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/openai_client.py +0 -0
  18. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/py.typed +0 -0
  19. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  20. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  21. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/utils.py +0 -0
  22. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/xai_client.py +0 -0
  23. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/yi_client.py +0 -0
  24. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  25. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/py.typed +0 -0
  26. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/server/token_server.py +0 -0
  27. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/settings/__init__.py +0 -0
  28. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/settings/py.typed +0 -0
  29. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/types/defaults.py +0 -0
  30. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/types/enums.py +0 -0
  31. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/types/exception.py +0 -0
  32. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/types/llm_parameters.py +0 -0
  33. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/types/py.typed +0 -0
  34. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/utilities/media_processing.py +0 -0
  35. {vectorvein-0.1.58 → vectorvein-0.1.59}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.58
3
+ Version: 0.1.59
4
4
  Summary: VectorVein python SDK
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.1.58"
20
+ version = "0.1.59"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -250,6 +250,7 @@ class AnthropicChatClient(BaseChatClient):
250
250
  response_format: dict | None = None,
251
251
  stream_options: ChatCompletionStreamOptionsParam | None = None,
252
252
  top_p: float | NotGiven | None = NOT_GIVEN,
253
+ skip_cutoff: bool = False,
253
254
  **kwargs,
254
255
  ) -> ChatCompletionMessage:
255
256
  pass
@@ -267,6 +268,7 @@ class AnthropicChatClient(BaseChatClient):
267
268
  response_format: dict | None = None,
268
269
  stream_options: ChatCompletionStreamOptionsParam | None = None,
269
270
  top_p: float | NotGiven | None = NOT_GIVEN,
271
+ skip_cutoff: bool = False,
270
272
  **kwargs,
271
273
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
272
274
  pass
@@ -284,6 +286,7 @@ class AnthropicChatClient(BaseChatClient):
284
286
  response_format: dict | None = None,
285
287
  stream_options: ChatCompletionStreamOptionsParam | None = None,
286
288
  top_p: float | NotGiven | None = NOT_GIVEN,
289
+ skip_cutoff: bool = False,
287
290
  **kwargs,
288
291
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
289
292
  pass
@@ -300,6 +303,7 @@ class AnthropicChatClient(BaseChatClient):
300
303
  response_format: dict | None = None,
301
304
  stream_options: ChatCompletionStreamOptionsParam | None = None,
302
305
  top_p: float | NotGiven | None = NOT_GIVEN,
306
+ skip_cutoff: bool = False,
303
307
  **kwargs,
304
308
  ):
305
309
  if model is not None:
@@ -349,6 +353,7 @@ class AnthropicChatClient(BaseChatClient):
349
353
  response_format=response_format,
350
354
  stream_options=stream_options,
351
355
  top_p=top_p,
356
+ skip_cutoff=skip_cutoff,
352
357
  **kwargs,
353
358
  )
354
359
  for chunk in response:
@@ -374,8 +379,8 @@ class AnthropicChatClient(BaseChatClient):
374
379
  tools=_tools,
375
380
  tool_choice=_tool_choice,
376
381
  response_format=response_format,
377
- stream_options=stream_options,
378
382
  top_p=top_p,
383
+ skip_cutoff=skip_cutoff,
379
384
  **kwargs,
380
385
  )
381
386
 
@@ -399,7 +404,7 @@ class AnthropicChatClient(BaseChatClient):
399
404
  else:
400
405
  system_prompt = ""
401
406
 
402
- if self.context_length_control == ContextLengthControlType.Latest:
407
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
403
408
  messages = cutoff_messages(
404
409
  messages,
405
410
  max_count=self.model_setting.context_length,
@@ -626,6 +631,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
626
631
  response_format: dict | None = None,
627
632
  stream_options: ChatCompletionStreamOptionsParam | None = None,
628
633
  top_p: float | NotGiven | None = NOT_GIVEN,
634
+ skip_cutoff: bool = False,
629
635
  **kwargs,
630
636
  ) -> ChatCompletionMessage:
631
637
  pass
@@ -643,6 +649,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
643
649
  response_format: dict | None = None,
644
650
  stream_options: ChatCompletionStreamOptionsParam | None = None,
645
651
  top_p: float | NotGiven | None = NOT_GIVEN,
652
+ skip_cutoff: bool = False,
646
653
  **kwargs,
647
654
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
648
655
  pass
@@ -660,6 +667,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
660
667
  response_format: dict | None = None,
661
668
  stream_options: ChatCompletionStreamOptionsParam | None = None,
662
669
  top_p: float | NotGiven | None = NOT_GIVEN,
670
+ skip_cutoff: bool = False,
663
671
  **kwargs,
664
672
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
665
673
  pass
@@ -676,6 +684,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
676
684
  response_format: dict | None = None,
677
685
  stream_options: ChatCompletionStreamOptionsParam | None = None,
678
686
  top_p: float | NotGiven | None = NOT_GIVEN,
687
+ skip_cutoff: bool = False,
679
688
  **kwargs,
680
689
  ):
681
690
  if model is not None:
@@ -725,6 +734,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
725
734
  tool_choice=_tool_choice,
726
735
  response_format=response_format,
727
736
  stream_options=stream_options,
737
+ top_p=top_p,
738
+ skip_cutoff=skip_cutoff,
728
739
  **kwargs,
729
740
  )
730
741
  async for chunk in response:
@@ -751,6 +762,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
751
762
  tools=_tools,
752
763
  tool_choice=_tool_choice,
753
764
  response_format=response_format,
765
+ top_p=top_p,
766
+ skip_cutoff=skip_cutoff,
754
767
  **kwargs,
755
768
  )
756
769
 
@@ -774,7 +787,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
774
787
  else:
775
788
  system_prompt = ""
776
789
 
777
- if self.context_length_control == ContextLengthControlType.Latest:
790
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
778
791
  messages = cutoff_messages(
779
792
  messages,
780
793
  max_count=self.model_setting.context_length,
@@ -74,6 +74,7 @@ class BaseChatClient(ABC):
74
74
  response_format: dict | None = None,
75
75
  stream_options: ChatCompletionStreamOptionsParam | None = None,
76
76
  top_p: float | NotGiven | None = NOT_GIVEN,
77
+ skip_cutoff: bool = False,
77
78
  **kwargs,
78
79
  ) -> ChatCompletionMessage:
79
80
  pass
@@ -92,6 +93,7 @@ class BaseChatClient(ABC):
92
93
  response_format: dict | None = None,
93
94
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
95
  top_p: float | NotGiven | None = NOT_GIVEN,
96
+ skip_cutoff: bool = False,
95
97
  **kwargs,
96
98
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
97
99
  pass
@@ -110,6 +112,7 @@ class BaseChatClient(ABC):
110
112
  response_format: dict | None = None,
111
113
  stream_options: ChatCompletionStreamOptionsParam | None = None,
112
114
  top_p: float | NotGiven | None = NOT_GIVEN,
115
+ skip_cutoff: bool = False,
113
116
  **kwargs,
114
117
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
115
118
  pass
@@ -127,6 +130,7 @@ class BaseChatClient(ABC):
127
130
  response_format: dict | None = None,
128
131
  stream_options: ChatCompletionStreamOptionsParam | None = None,
129
132
  top_p: float | NotGiven | None = NOT_GIVEN,
133
+ skip_cutoff: bool = False,
130
134
  **kwargs,
131
135
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
132
136
  pass
@@ -212,6 +216,7 @@ class BaseAsyncChatClient(ABC):
212
216
  response_format: dict | None = None,
213
217
  stream_options: ChatCompletionStreamOptionsParam | None = None,
214
218
  top_p: float | NotGiven | None = NOT_GIVEN,
219
+ skip_cutoff: bool = False,
215
220
  **kwargs,
216
221
  ) -> ChatCompletionMessage:
217
222
  pass
@@ -230,6 +235,7 @@ class BaseAsyncChatClient(ABC):
230
235
  response_format: dict | None = None,
231
236
  stream_options: ChatCompletionStreamOptionsParam | None = None,
232
237
  top_p: float | NotGiven | None = NOT_GIVEN,
238
+ skip_cutoff: bool = False,
233
239
  **kwargs,
234
240
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
235
241
  pass
@@ -248,6 +254,7 @@ class BaseAsyncChatClient(ABC):
248
254
  response_format: dict | None = None,
249
255
  stream_options: ChatCompletionStreamOptionsParam | None = None,
250
256
  top_p: float | NotGiven | None = NOT_GIVEN,
257
+ skip_cutoff: bool = False,
251
258
  **kwargs,
252
259
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
253
260
  pass
@@ -265,6 +272,7 @@ class BaseAsyncChatClient(ABC):
265
272
  response_format: dict | None = None,
266
273
  stream_options: ChatCompletionStreamOptionsParam | None = None,
267
274
  top_p: float | NotGiven | None = NOT_GIVEN,
275
+ skip_cutoff: bool = False,
268
276
  **kwargs,
269
277
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
270
278
  pass
@@ -66,6 +66,7 @@ class GeminiChatClient(BaseChatClient):
66
66
  response_format: dict | None = None,
67
67
  stream_options: ChatCompletionStreamOptionsParam | None = None,
68
68
  top_p: float | NotGiven | None = NOT_GIVEN,
69
+ skip_cutoff: bool = False,
69
70
  **kwargs,
70
71
  ) -> ChatCompletionMessage:
71
72
  pass
@@ -83,6 +84,7 @@ class GeminiChatClient(BaseChatClient):
83
84
  response_format: dict | None = None,
84
85
  stream_options: ChatCompletionStreamOptionsParam | None = None,
85
86
  top_p: float | NotGiven | None = NOT_GIVEN,
87
+ skip_cutoff: bool = False,
86
88
  **kwargs,
87
89
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
88
90
  pass
@@ -100,6 +102,7 @@ class GeminiChatClient(BaseChatClient):
100
102
  response_format: dict | None = None,
101
103
  stream_options: ChatCompletionStreamOptionsParam | None = None,
102
104
  top_p: float | NotGiven | None = NOT_GIVEN,
105
+ skip_cutoff: bool = False,
103
106
  **kwargs,
104
107
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
105
108
  pass
@@ -116,6 +119,7 @@ class GeminiChatClient(BaseChatClient):
116
119
  response_format: dict | None = None,
117
120
  stream_options: ChatCompletionStreamOptionsParam | None = None,
118
121
  top_p: float | NotGiven | None = NOT_GIVEN,
122
+ skip_cutoff: bool = False,
119
123
  **kwargs,
120
124
  ):
121
125
  if model is not None:
@@ -133,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
133
137
  else:
134
138
  system_prompt = ""
135
139
 
136
- if self.context_length_control == ContextLengthControlType.Latest:
140
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
137
141
  messages = cutoff_messages(
138
142
  messages,
139
143
  max_count=self.model_setting.context_length,
@@ -313,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
313
317
  response_format: dict | None = None,
314
318
  stream_options: ChatCompletionStreamOptionsParam | None = None,
315
319
  top_p: float | NotGiven | None = NOT_GIVEN,
320
+ skip_cutoff: bool = False,
316
321
  **kwargs,
317
322
  ) -> ChatCompletionMessage:
318
323
  pass
@@ -330,6 +335,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
330
335
  response_format: dict | None = None,
331
336
  stream_options: ChatCompletionStreamOptionsParam | None = None,
332
337
  top_p: float | NotGiven | None = NOT_GIVEN,
338
+ skip_cutoff: bool = False,
333
339
  **kwargs,
334
340
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
335
341
  pass
@@ -347,6 +353,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
347
353
  response_format: dict | None = None,
348
354
  stream_options: ChatCompletionStreamOptionsParam | None = None,
349
355
  top_p: float | NotGiven | None = NOT_GIVEN,
356
+ skip_cutoff: bool = False,
350
357
  **kwargs,
351
358
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
352
359
  pass
@@ -363,6 +370,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
363
370
  response_format: dict | None = None,
364
371
  stream_options: ChatCompletionStreamOptionsParam | None = None,
365
372
  top_p: float | NotGiven | None = NOT_GIVEN,
373
+ skip_cutoff: bool = False,
366
374
  **kwargs,
367
375
  ):
368
376
  if model is not None:
@@ -380,7 +388,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
380
388
  else:
381
389
  system_prompt = ""
382
390
 
383
- if self.context_length_control == ContextLengthControlType.Latest:
391
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
384
392
  messages = cutoff_messages(
385
393
  messages,
386
394
  max_count=self.model_setting.context_length,
@@ -92,6 +92,7 @@ class MiniMaxChatClient(BaseChatClient):
92
92
  response_format: dict | None = None,
93
93
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
94
  top_p: float | NotGiven | None = NOT_GIVEN,
95
+ skip_cutoff: bool = False,
95
96
  **kwargs,
96
97
  ) -> ChatCompletionMessage:
97
98
  pass
@@ -109,6 +110,7 @@ class MiniMaxChatClient(BaseChatClient):
109
110
  response_format: dict | None = None,
110
111
  stream_options: ChatCompletionStreamOptionsParam | None = None,
111
112
  top_p: float | NotGiven | None = NOT_GIVEN,
113
+ skip_cutoff: bool = False,
112
114
  **kwargs,
113
115
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
114
116
  pass
@@ -126,6 +128,7 @@ class MiniMaxChatClient(BaseChatClient):
126
128
  response_format: dict | None = None,
127
129
  stream_options: ChatCompletionStreamOptionsParam | None = None,
128
130
  top_p: float | NotGiven | None = NOT_GIVEN,
131
+ skip_cutoff: bool = False,
129
132
  **kwargs,
130
133
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
131
134
  pass
@@ -142,6 +145,7 @@ class MiniMaxChatClient(BaseChatClient):
142
145
  response_format: dict | None = None,
143
146
  stream_options: ChatCompletionStreamOptionsParam | None = None,
144
147
  top_p: float | NotGiven | None = NOT_GIVEN,
148
+ skip_cutoff: bool = False,
145
149
  **kwargs,
146
150
  ):
147
151
  if model is not None:
@@ -166,7 +170,7 @@ class MiniMaxChatClient(BaseChatClient):
166
170
  self.endpoint_id = endpoint_choice
167
171
  self.endpoint = settings.get_endpoint(self.endpoint_id)
168
172
 
169
- if self.context_length_control == ContextLengthControlType.Latest:
173
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
170
174
  messages = cutoff_messages(
171
175
  messages,
172
176
  max_count=self.model_setting.context_length,
@@ -337,6 +341,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
337
341
  response_format: dict | None = None,
338
342
  stream_options: ChatCompletionStreamOptionsParam | None = None,
339
343
  top_p: float | NotGiven | None = NOT_GIVEN,
344
+ skip_cutoff: bool = False,
340
345
  **kwargs,
341
346
  ) -> ChatCompletionMessage:
342
347
  pass
@@ -354,6 +359,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
354
359
  response_format: dict | None = None,
355
360
  stream_options: ChatCompletionStreamOptionsParam | None = None,
356
361
  top_p: float | NotGiven | None = NOT_GIVEN,
362
+ skip_cutoff: bool = False,
357
363
  **kwargs,
358
364
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
359
365
  pass
@@ -371,6 +377,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
371
377
  response_format: dict | None = None,
372
378
  stream_options: ChatCompletionStreamOptionsParam | None = None,
373
379
  top_p: float | NotGiven | None = NOT_GIVEN,
380
+ skip_cutoff: bool = False,
374
381
  **kwargs,
375
382
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
376
383
  pass
@@ -387,6 +394,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
387
394
  response_format: dict | None = None,
388
395
  stream_options: ChatCompletionStreamOptionsParam | None = None,
389
396
  top_p: float | NotGiven | None = NOT_GIVEN,
397
+ skip_cutoff: bool = False,
390
398
  **kwargs,
391
399
  ):
392
400
  if model is not None:
@@ -411,7 +419,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
411
419
  self.endpoint_id = endpoint_choice
412
420
  self.endpoint = settings.get_endpoint(self.endpoint_id)
413
421
 
414
- if self.context_length_control == ContextLengthControlType.Latest:
422
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
415
423
  messages = cutoff_messages(
416
424
  messages,
417
425
  max_count=self.model_setting.context_length,
@@ -99,6 +99,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
99
99
  response_format: dict | None = None,
100
100
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
101
101
  top_p: float | NotGiven | None = NOT_GIVEN,
102
+ skip_cutoff: bool = False,
102
103
  **kwargs,
103
104
  ) -> ChatCompletionMessage:
104
105
  pass
@@ -116,6 +117,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
116
117
  response_format: dict | None = None,
117
118
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
118
119
  top_p: float | NotGiven | None = NOT_GIVEN,
120
+ skip_cutoff: bool = False,
119
121
  **kwargs,
120
122
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
121
123
  pass
@@ -133,6 +135,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
133
135
  response_format: dict | None = None,
134
136
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
135
137
  top_p: float | NotGiven | None = NOT_GIVEN,
138
+ skip_cutoff: bool = False,
136
139
  **kwargs,
137
140
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
138
141
  pass
@@ -149,6 +152,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
149
152
  response_format: dict | None = None,
150
153
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
151
154
  top_p: float | NotGiven | None = NOT_GIVEN,
155
+ skip_cutoff: bool = False,
152
156
  **kwargs,
153
157
  ):
154
158
  if model is not None:
@@ -165,7 +169,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
165
169
  if self.model_id is None:
166
170
  self.model_id = self.model_setting.id
167
171
 
168
- if self.context_length_control == ContextLengthControlType.Latest:
172
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
169
173
  messages = cutoff_messages(
170
174
  messages,
171
175
  max_count=self.model_setting.context_length,
@@ -361,6 +365,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
361
365
  response_format: dict | None = None,
362
366
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
363
367
  top_p: float | NotGiven | None = NOT_GIVEN,
368
+ skip_cutoff: bool = False,
364
369
  **kwargs,
365
370
  ) -> ChatCompletionMessage:
366
371
  pass
@@ -378,6 +383,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
378
383
  response_format: dict | None = None,
379
384
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
380
385
  top_p: float | NotGiven | None = NOT_GIVEN,
386
+ skip_cutoff: bool = False,
381
387
  **kwargs,
382
388
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
383
389
  pass
@@ -395,6 +401,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
395
401
  response_format: dict | None = None,
396
402
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
397
403
  top_p: float | NotGiven | None = NOT_GIVEN,
404
+ skip_cutoff: bool = False,
398
405
  **kwargs,
399
406
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
400
407
  pass
@@ -411,6 +418,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
411
418
  response_format: dict | None = None,
412
419
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
413
420
  top_p: float | NotGiven | None = NOT_GIVEN,
421
+ skip_cutoff: bool = False,
414
422
  **kwargs,
415
423
  ):
416
424
  if model is not None:
@@ -427,7 +435,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
427
435
  if self.model_id is None:
428
436
  self.model_id = self.model_setting.id
429
437
 
430
- if self.context_length_control == ContextLengthControlType.Latest:
438
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
431
439
  messages = cutoff_messages(
432
440
  messages,
433
441
  max_count=self.model_setting.context_length,
File without changes