unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unique_toolkit might be problematic. Click here for more details.

Files changed (166) hide show
  1. unique_toolkit/__init__.py +28 -1
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +252 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +305 -0
  16. unique_toolkit/_common/endpoint_requestor.py +430 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/feature_flags/schema.py +9 -0
  19. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  20. unique_toolkit/_common/pydantic_helpers.py +154 -0
  21. unique_toolkit/_common/referencing.py +53 -0
  22. unique_toolkit/_common/string_utilities.py +140 -0
  23. unique_toolkit/_common/tests/test_referencing.py +521 -0
  24. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  25. unique_toolkit/_common/token/image_token_counting.py +67 -0
  26. unique_toolkit/_common/token/token_counting.py +204 -0
  27. unique_toolkit/_common/utils/__init__.py +1 -0
  28. unique_toolkit/_common/utils/files.py +43 -0
  29. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  30. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  31. unique_toolkit/_common/utils/write_configuration.py +51 -0
  32. unique_toolkit/_common/validators.py +101 -4
  33. unique_toolkit/agentic/__init__.py +1 -0
  34. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  35. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  36. unique_toolkit/agentic/evaluation/config.py +36 -0
  37. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  38. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  39. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  40. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  41. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  42. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
  43. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  44. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
  45. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
  46. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  47. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  48. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  49. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  50. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
  51. unique_toolkit/agentic/history_manager/history_manager.py +242 -0
  52. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  53. unique_toolkit/agentic/history_manager/utils.py +96 -0
  54. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  55. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  56. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  57. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
  58. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
  59. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  60. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  61. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  62. unique_toolkit/agentic/tools/__init__.py +1 -0
  63. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  64. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  65. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  66. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  67. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  68. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  69. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  70. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  71. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  72. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
  73. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
  74. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
  75. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
  76. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  77. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
  78. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  79. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  80. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  81. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  82. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  83. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  84. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  85. unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
  86. unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
  87. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  88. unique_toolkit/agentic/tools/config.py +167 -0
  89. unique_toolkit/agentic/tools/factory.py +44 -0
  90. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  91. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  92. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  93. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  94. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  95. unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
  96. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  97. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
  98. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
  99. unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
  100. unique_toolkit/agentic/tools/schemas.py +141 -0
  101. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  102. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  103. unique_toolkit/agentic/tools/tool.py +183 -0
  104. unique_toolkit/agentic/tools/tool_manager.py +523 -0
  105. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  106. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  107. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  108. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  109. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  110. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  111. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  112. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  113. unique_toolkit/app/__init__.py +6 -0
  114. unique_toolkit/app/dev_util.py +180 -0
  115. unique_toolkit/app/init_sdk.py +32 -1
  116. unique_toolkit/app/schemas.py +198 -31
  117. unique_toolkit/app/unique_settings.py +367 -0
  118. unique_toolkit/chat/__init__.py +8 -1
  119. unique_toolkit/chat/deprecated/service.py +232 -0
  120. unique_toolkit/chat/functions.py +642 -77
  121. unique_toolkit/chat/rendering.py +34 -0
  122. unique_toolkit/chat/responses_api.py +461 -0
  123. unique_toolkit/chat/schemas.py +133 -2
  124. unique_toolkit/chat/service.py +115 -767
  125. unique_toolkit/content/functions.py +153 -4
  126. unique_toolkit/content/schemas.py +122 -15
  127. unique_toolkit/content/service.py +278 -44
  128. unique_toolkit/content/smart_rules.py +301 -0
  129. unique_toolkit/content/utils.py +8 -3
  130. unique_toolkit/embedding/service.py +102 -11
  131. unique_toolkit/framework_utilities/__init__.py +1 -0
  132. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  133. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  134. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  135. unique_toolkit/framework_utilities/openai/client.py +83 -0
  136. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  137. unique_toolkit/framework_utilities/utils.py +23 -0
  138. unique_toolkit/language_model/__init__.py +3 -0
  139. unique_toolkit/language_model/builder.py +27 -11
  140. unique_toolkit/language_model/default_language_model.py +3 -0
  141. unique_toolkit/language_model/functions.py +327 -43
  142. unique_toolkit/language_model/infos.py +992 -50
  143. unique_toolkit/language_model/reference.py +242 -0
  144. unique_toolkit/language_model/schemas.py +475 -48
  145. unique_toolkit/language_model/service.py +228 -27
  146. unique_toolkit/protocols/support.py +145 -0
  147. unique_toolkit/services/__init__.py +7 -0
  148. unique_toolkit/services/chat_service.py +1630 -0
  149. unique_toolkit/services/knowledge_base.py +861 -0
  150. unique_toolkit/short_term_memory/service.py +178 -41
  151. unique_toolkit/smart_rules/__init__.py +0 -0
  152. unique_toolkit/smart_rules/compile.py +56 -0
  153. unique_toolkit/test_utilities/events.py +197 -0
  154. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
  155. unique_toolkit-1.23.0.dist-info/RECORD +182 -0
  156. unique_toolkit/evaluators/__init__.py +0 -1
  157. unique_toolkit/evaluators/config.py +0 -35
  158. unique_toolkit/evaluators/constants.py +0 -1
  159. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  160. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  161. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  162. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  163. unique_toolkit-0.7.7.dist-info/RECORD +0 -64
  164. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  165. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
  166. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
@@ -1,11 +1,12 @@
1
1
  from datetime import date
2
2
  from enum import StrEnum
3
- from typing import ClassVar, Optional, Self
3
+ from typing import Annotated, Any, ClassVar, Optional, Self
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
6
  from pydantic.json_schema import SkipJsonSchema
7
7
  from typing_extensions import deprecated
8
8
 
9
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
10
  from unique_toolkit.language_model.schemas import LanguageModelTokenLimits
10
11
 
11
12
 
@@ -14,13 +15,57 @@ class LanguageModelName(StrEnum):
14
15
  AZURE_GPT_4_0613 = "AZURE_GPT_4_0613"
15
16
  AZURE_GPT_4_32K_0613 = "AZURE_GPT_4_32K_0613"
16
17
  AZURE_GPT_4_TURBO_2024_0409 = "AZURE_GPT_4_TURBO_2024_0409"
18
+ AZURE_GPT_5_2025_0807 = "AZURE_GPT_5_2025_0807"
19
+ AZURE_GPT_5_MINI_2025_0807 = "AZURE_GPT_5_MINI_2025_0807"
20
+ AZURE_GPT_5_NANO_2025_0807 = "AZURE_GPT_5_NANO_2025_0807"
21
+ AZURE_GPT_5_CHAT_2025_0807 = "AZURE_GPT_5_CHAT_2025_0807"
22
+ AZURE_GPT_5_PRO_2025_1006 = "AZURE_GPT_5_PRO_2025_1006"
17
23
  AZURE_GPT_4o_2024_0513 = "AZURE_GPT_4o_2024_0513"
18
24
  AZURE_GPT_4o_2024_0806 = "AZURE_GPT_4o_2024_0806"
25
+ AZURE_GPT_4o_2024_1120 = "AZURE_GPT_4o_2024_1120"
19
26
  AZURE_GPT_4o_MINI_2024_0718 = "AZURE_GPT_4o_MINI_2024_0718"
20
27
  AZURE_o1_MINI_2024_0912 = "AZURE_o1_MINI_2024_0912"
21
28
  AZURE_o1_2024_1217 = "AZURE_o1_2024_1217"
22
29
  AZURE_o3_MINI_2025_0131 = "AZURE_o3_MINI_2025_0131"
23
30
  AZURE_GPT_45_PREVIEW_2025_0227 = "AZURE_GPT_45_PREVIEW_2025_0227"
31
+ AZURE_GPT_41_2025_0414 = "AZURE_GPT_41_2025_0414"
32
+ AZURE_GPT_41_MINI_2025_0414 = "AZURE_GPT_41_MINI_2025_0414"
33
+ AZURE_GPT_41_NANO_2025_0414 = "AZURE_GPT_41_NANO_2025_0414"
34
+ AZURE_o3_2025_0416 = "AZURE_o3_2025_0416"
35
+ AZURE_o4_MINI_2025_0416 = "AZURE_o4_MINI_2025_0416"
36
+ ANTHROPIC_CLAUDE_3_7_SONNET = "litellm:anthropic-claude-3-7-sonnet"
37
+ ANTHROPIC_CLAUDE_3_7_SONNET_THINKING = (
38
+ "litellm:anthropic-claude-3-7-sonnet-thinking"
39
+ )
40
+ ANTHROPIC_CLAUDE_HAIKU_4_5 = "litellm:anthropic-claude-haiku-4-5"
41
+ ANTHROPIC_CLAUDE_SONNET_4 = "litellm:anthropic-claude-sonnet-4"
42
+ ANTHROPIC_CLAUDE_SONNET_4_5 = "litellm:anthropic-claude-sonnet-4-5"
43
+ ANTHROPIC_CLAUDE_OPUS_4 = "litellm:anthropic-claude-opus-4"
44
+ ANTHROPIC_CLAUDE_OPUS_4_1 = "litellm:anthropic-claude-opus-4-1"
45
+ GEMINI_2_0_FLASH = "litellm:gemini-2-0-flash"
46
+ GEMINI_2_5_FLASH = "litellm:gemini-2-5-flash"
47
+ GEMINI_2_5_FLASH_LITE_PREVIEW_0617 = "litellm:gemini-2-5-flash-lite-preview-06-17"
48
+ GEMINI_2_5_FLASH_PREVIEW_0520 = "litellm:gemini-2-5-flash-preview-05-20"
49
+ GEMINI_2_5_PRO = "litellm:gemini-2-5-pro"
50
+ GEMINI_2_5_PRO_EXP_0325 = "litellm:gemini-2-5-pro-exp-03-25"
51
+ GEMINI_2_5_PRO_PREVIEW_0605 = "litellm:gemini-2-5-pro-preview-06-05"
52
+ LITELLM_OPENAI_GPT_5 = "litellm:openai-gpt-5"
53
+ LITELLM_OPENAI_GPT_5_MINI = "litellm:openai-gpt-5-mini"
54
+ LITELLM_OPENAI_GPT_5_NANO = "litellm:openai-gpt-5-nano"
55
+ LITELLM_OPENAI_GPT_5_CHAT = "litellm:openai-gpt-5-chat"
56
+ LITELLM_OPENAI_GPT_5_PRO = "litellm:openai-gpt-5-pro"
57
+ LITELLM_OPENAI_O1 = "litellm:openai-o1"
58
+ LITELLM_OPENAI_O3 = "litellm:openai-o3"
59
+ LITELLM_OPENAI_O3_DEEP_RESEARCH = "litellm:openai-o3-deep-research"
60
+ LITELLM_OPENAI_O3_PRO = "litellm:openai-o3-pro"
61
+ LITELLM_OPENAI_O4_MINI = "litellm:openai-o4-mini"
62
+ LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH = "litellm:openai-o4-mini-deep-research"
63
+ LITELLM_OPENAI_GPT_4_1_MINI = "litellm:openai-gpt-4-1-mini"
64
+ LITELLM_OPENAI_GPT_4_1_NANO = "litellm:openai-gpt-4-1-nano"
65
+ LITELLM_DEEPSEEK_R1 = "litellm:deepseek-r1"
66
+ LITELLM_DEEPSEEK_V3 = "litellm:deepseek-v3-1"
67
+ LITELLM_QWEN_3 = "litellm:qwen-3-235B-A22B"
68
+ LITELLM_QWEN_3_THINKING = "litellm:qwen-3-235B-A22B-thinking"
24
69
 
25
70
 
26
71
  class EncoderName(StrEnum):
@@ -43,6 +88,25 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
43
88
  LMN.AZURE_GPT_4o_2024_0513
44
89
  | LMN.AZURE_GPT_4o_2024_0806
45
90
  | LMN.AZURE_GPT_4o_MINI_2024_0718
91
+ | LMN.AZURE_GPT_4o_2024_1120
92
+ | LMN.AZURE_GPT_5_2025_0807
93
+ | LMN.AZURE_GPT_5_MINI_2025_0807
94
+ | LMN.AZURE_GPT_5_NANO_2025_0807
95
+ | LMN.AZURE_GPT_5_CHAT_2025_0807
96
+ | LMN.AZURE_GPT_5_PRO_2025_1006
97
+ | LMN.LITELLM_OPENAI_GPT_5
98
+ | LMN.LITELLM_OPENAI_GPT_5_MINI
99
+ | LMN.LITELLM_OPENAI_GPT_5_NANO
100
+ | LMN.LITELLM_OPENAI_GPT_5_CHAT
101
+ | LMN.LITELLM_OPENAI_GPT_5_PRO
102
+ | LMN.LITELLM_OPENAI_O1
103
+ | LMN.LITELLM_OPENAI_O3
104
+ | LMN.LITELLM_OPENAI_O3_DEEP_RESEARCH
105
+ | LMN.LITELLM_OPENAI_O4_MINI
106
+ | LMN.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH
107
+ | LMN.LITELLM_OPENAI_GPT_4_1_MINI
108
+ | LMN.LITELLM_OPENAI_GPT_4_1_NANO
109
+ | LMN.LITELLM_OPENAI_O3_PRO
46
110
  ):
47
111
  return EncoderName.O200K_BASE
48
112
  case _:
@@ -55,6 +119,7 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
55
119
  class LanguageModelProvider(StrEnum):
56
120
  AZURE = "AZURE"
57
121
  CUSTOM = "CUSTOM"
122
+ LITELLM = "LITELLM"
58
123
 
59
124
 
60
125
  class ModelCapabilities(StrEnum):
@@ -67,10 +132,19 @@ class ModelCapabilities(StrEnum):
67
132
  REASONING = "reasoning"
68
133
 
69
134
 
135
+ class TemperatureBounds(BaseModel):
136
+ min_temperature: float
137
+ max_temperature: float
138
+
139
+
70
140
  class LanguageModelInfo(BaseModel):
71
- name: LanguageModelName | str
72
- version: str
73
- provider: LanguageModelProvider
141
+ model_config = get_configuration_dict()
142
+ name: (
143
+ Annotated[str, Field(title="Custom Model Name")]
144
+ | SkipJsonSchema[LanguageModelName]
145
+ ) = Field(title="Model Name", default=LanguageModelName.AZURE_GPT_4o_2024_1120)
146
+ provider: LanguageModelProvider = LanguageModelProvider.AZURE
147
+ version: str = Field(title="Model Version", default="")
74
148
 
75
149
  encoder_name: EncoderName = EncoderName.CL100K_BASE
76
150
 
@@ -78,14 +152,36 @@ class LanguageModelInfo(BaseModel):
78
152
  token_limits: LanguageModelTokenLimits = LanguageModelTokenLimits(
79
153
  token_limit_input=7_000, token_limit_output=1_000
80
154
  )
155
+
81
156
  capabilities: list[ModelCapabilities] = [ModelCapabilities.STREAMING]
82
157
 
83
- info_cutoff_at: date | SkipJsonSchema[None] = None
84
- published_at: date | SkipJsonSchema[None] = None
85
- retirement_at: date | SkipJsonSchema[None] = None
158
+ info_cutoff_at: (
159
+ Annotated[date, Field(title="Info Cutoff")]
160
+ | Annotated[None, Field(title="Info Cutoff Unknown")]
161
+ ) = None
162
+
163
+ published_at: (
164
+ Annotated[date, Field(title="Publishing Date")]
165
+ | Annotated[None, Field(title="Publishing Date Unknown")]
166
+ ) = None
167
+
168
+ retirement_at: (
169
+ Annotated[date, Field(title="Retirement Date")]
170
+ | Annotated[None, Field(title="Retirement Date Unknown")]
171
+ ) = date(2225, 12, 31)
172
+
173
+ deprecated_at: (
174
+ Annotated[date, Field(title="Deprecated Date")]
175
+ | Annotated[None, Field(title="Deprecated Date Unknown")]
176
+ ) = date(2225, 12, 31)
177
+
178
+ retirement_text: str = "This model is no longer supported."
86
179
 
87
- deprecated_at: date | SkipJsonSchema[None] = None
88
- retirement_text: str | SkipJsonSchema[None] = None
180
+ temperature_bounds: (
181
+ TemperatureBounds | Annotated[None, Field(title="Temperature Bounds Unknown")]
182
+ ) = None
183
+
184
+ default_options: dict[str, Any] = {}
89
185
 
90
186
  @classmethod
91
187
  def from_name(cls, model_name: LanguageModelName) -> Self:
@@ -95,7 +191,6 @@ class LanguageModelInfo(BaseModel):
95
191
  name=model_name,
96
192
  provider=LanguageModelProvider.AZURE,
97
193
  capabilities=[
98
- ModelCapabilities.STRUCTURED_OUTPUT,
99
194
  ModelCapabilities.FUNCTION_CALLING,
100
195
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
101
196
  ModelCapabilities.REPRODUCIBLE_OUTPUT,
@@ -141,6 +236,129 @@ class LanguageModelInfo(BaseModel):
141
236
  deprecated_at=date(2024, 10, 1),
142
237
  retirement_at=date(2025, 6, 6),
143
238
  )
239
+ case LanguageModelName.AZURE_GPT_5_2025_0807:
240
+ return cls(
241
+ name=model_name,
242
+ provider=LanguageModelProvider.AZURE,
243
+ version="2025-08-07",
244
+ encoder_name=EncoderName.O200K_BASE,
245
+ capabilities=[
246
+ ModelCapabilities.FUNCTION_CALLING,
247
+ ModelCapabilities.STREAMING,
248
+ ModelCapabilities.REASONING,
249
+ ModelCapabilities.VISION,
250
+ ModelCapabilities.STRUCTURED_OUTPUT,
251
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
252
+ ],
253
+ token_limits=LanguageModelTokenLimits(
254
+ token_limit_input=272000, token_limit_output=128000
255
+ ),
256
+ info_cutoff_at=date(2024, 10, 24),
257
+ published_at=date(2025, 8, 7),
258
+ deprecated_at=date(2026, 8, 7),
259
+ retirement_at=date(2026, 8, 7),
260
+ temperature_bounds=TemperatureBounds(
261
+ min_temperature=1.0, max_temperature=1.0
262
+ ),
263
+ default_options={
264
+ "reasoning_effort": "minimal",
265
+ },
266
+ )
267
+ case LanguageModelName.AZURE_GPT_5_MINI_2025_0807:
268
+ return cls(
269
+ name=model_name,
270
+ provider=LanguageModelProvider.AZURE,
271
+ version="2025-08-07",
272
+ encoder_name=EncoderName.O200K_BASE,
273
+ capabilities=[
274
+ ModelCapabilities.FUNCTION_CALLING,
275
+ ModelCapabilities.STREAMING,
276
+ ModelCapabilities.REASONING,
277
+ ModelCapabilities.VISION,
278
+ ModelCapabilities.STRUCTURED_OUTPUT,
279
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
280
+ ],
281
+ token_limits=LanguageModelTokenLimits(
282
+ token_limit_input=272000, token_limit_output=128000
283
+ ),
284
+ info_cutoff_at=date(2024, 6, 24),
285
+ published_at=date(2025, 8, 7),
286
+ deprecated_at=date(2026, 8, 7),
287
+ retirement_at=date(2026, 8, 7),
288
+ temperature_bounds=TemperatureBounds(
289
+ min_temperature=1.0, max_temperature=1.0
290
+ ),
291
+ default_options={
292
+ "reasoning_effort": "minimal",
293
+ },
294
+ )
295
+ case LanguageModelName.AZURE_GPT_5_NANO_2025_0807:
296
+ return cls(
297
+ name=model_name,
298
+ provider=LanguageModelProvider.AZURE,
299
+ version="2025-08-07",
300
+ encoder_name=EncoderName.O200K_BASE,
301
+ capabilities=[
302
+ ModelCapabilities.FUNCTION_CALLING,
303
+ ModelCapabilities.STREAMING,
304
+ ModelCapabilities.REASONING,
305
+ ModelCapabilities.VISION,
306
+ ModelCapabilities.STRUCTURED_OUTPUT,
307
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
308
+ ],
309
+ token_limits=LanguageModelTokenLimits(
310
+ token_limit_input=272000, token_limit_output=128000
311
+ ),
312
+ info_cutoff_at=date(2024, 5, 31),
313
+ published_at=date(2025, 8, 7),
314
+ deprecated_at=date(2026, 8, 7),
315
+ retirement_at=date(2026, 8, 7),
316
+ temperature_bounds=TemperatureBounds(
317
+ min_temperature=1.0, max_temperature=1.0
318
+ ),
319
+ default_options={
320
+ "reasoning_effort": "minimal",
321
+ },
322
+ )
323
+ case LanguageModelName.AZURE_GPT_5_CHAT_2025_0807:
324
+ return cls(
325
+ name=model_name,
326
+ provider=LanguageModelProvider.AZURE,
327
+ version="2025-08-07",
328
+ encoder_name=EncoderName.O200K_BASE,
329
+ capabilities=[
330
+ ModelCapabilities.STREAMING,
331
+ ModelCapabilities.VISION,
332
+ ],
333
+ token_limits=LanguageModelTokenLimits(
334
+ token_limit_input=128000, token_limit_output=16384
335
+ ),
336
+ info_cutoff_at=date(2024, 10, 24),
337
+ published_at=date(2025, 8, 7),
338
+ deprecated_at=date(2026, 8, 7),
339
+ retirement_at=date(2026, 8, 7),
340
+ )
341
+ case LanguageModelName.AZURE_GPT_5_PRO_2025_1006:
342
+ return cls(
343
+ name=model_name,
344
+ provider=LanguageModelProvider.AZURE,
345
+ version="2025-10-06",
346
+ encoder_name=EncoderName.O200K_BASE,
347
+ capabilities=[
348
+ ModelCapabilities.FUNCTION_CALLING,
349
+ ModelCapabilities.REASONING,
350
+ ModelCapabilities.VISION,
351
+ ModelCapabilities.STRUCTURED_OUTPUT,
352
+ ],
353
+ token_limits=LanguageModelTokenLimits(
354
+ token_limit_input=272000, token_limit_output=128000
355
+ ),
356
+ info_cutoff_at=date(2024, 10, 30),
357
+ published_at=date(2025, 10, 6),
358
+ temperature_bounds=TemperatureBounds(
359
+ min_temperature=1.0, max_temperature=1.0
360
+ ),
361
+ )
144
362
  case LanguageModelName.AZURE_GPT_4_TURBO_2024_0409:
145
363
  return cls(
146
364
  name=model_name,
@@ -148,7 +366,6 @@ class LanguageModelInfo(BaseModel):
148
366
  capabilities=[
149
367
  ModelCapabilities.FUNCTION_CALLING,
150
368
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
151
- ModelCapabilities.STRUCTURED_OUTPUT,
152
369
  ModelCapabilities.VISION,
153
370
  ModelCapabilities.STREAMING,
154
371
  ],
@@ -165,7 +382,6 @@ class LanguageModelInfo(BaseModel):
165
382
  name=model_name,
166
383
  encoder_name=EncoderName.O200K_BASE,
167
384
  capabilities=[
168
- ModelCapabilities.STRUCTURED_OUTPUT,
169
385
  ModelCapabilities.FUNCTION_CALLING,
170
386
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
171
387
  ModelCapabilities.STREAMING,
@@ -198,9 +414,10 @@ class LanguageModelInfo(BaseModel):
198
414
  info_cutoff_at=date(2023, 10, 1),
199
415
  published_at=date(2024, 8, 6),
200
416
  )
201
- case LanguageModelName.AZURE_GPT_4o_MINI_2024_0718:
417
+ case LanguageModelName.AZURE_GPT_4o_2024_1120:
202
418
  return cls(
203
419
  name=model_name,
420
+ encoder_name=EncoderName.O200K_BASE,
204
421
  capabilities=[
205
422
  ModelCapabilities.STRUCTURED_OUTPUT,
206
423
  ModelCapabilities.FUNCTION_CALLING,
@@ -209,6 +426,23 @@ class LanguageModelInfo(BaseModel):
209
426
  ModelCapabilities.VISION,
210
427
  ],
211
428
  provider=LanguageModelProvider.AZURE,
429
+ version="2024-11-20",
430
+ token_limits=LanguageModelTokenLimits(
431
+ token_limit_input=128_000, token_limit_output=16_384
432
+ ),
433
+ info_cutoff_at=date(2023, 10, 1),
434
+ published_at=date(2024, 11, 20),
435
+ )
436
+ case LanguageModelName.AZURE_GPT_4o_MINI_2024_0718:
437
+ return cls(
438
+ name=model_name,
439
+ capabilities=[
440
+ ModelCapabilities.FUNCTION_CALLING,
441
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
442
+ ModelCapabilities.STREAMING,
443
+ ModelCapabilities.VISION,
444
+ ],
445
+ provider=LanguageModelProvider.AZURE,
212
446
  version="2024-07-18",
213
447
  encoder_name=EncoderName.O200K_BASE,
214
448
  token_limits=LanguageModelTokenLimits(
@@ -235,6 +469,9 @@ class LanguageModelInfo(BaseModel):
235
469
  ),
236
470
  info_cutoff_at=date(2023, 10, 1),
237
471
  published_at=date(2024, 9, 12),
472
+ temperature_bounds=TemperatureBounds(
473
+ min_temperature=1.0, max_temperature=1.0
474
+ ),
238
475
  )
239
476
  case LanguageModelName.AZURE_o1_2024_1217:
240
477
  return cls(
@@ -254,6 +491,9 @@ class LanguageModelInfo(BaseModel):
254
491
  ),
255
492
  info_cutoff_at=date(2023, 10, 1),
256
493
  published_at=date(2024, 12, 17),
494
+ temperature_bounds=TemperatureBounds(
495
+ min_temperature=1.0, max_temperature=1.0
496
+ ),
257
497
  )
258
498
  case LanguageModelName.AZURE_o3_MINI_2025_0131:
259
499
  return cls(
@@ -272,6 +512,53 @@ class LanguageModelInfo(BaseModel):
272
512
  ),
273
513
  info_cutoff_at=date(2023, 10, 1),
274
514
  published_at=date(2025, 1, 31),
515
+ temperature_bounds=TemperatureBounds(
516
+ min_temperature=1.0, max_temperature=1.0
517
+ ),
518
+ )
519
+ case LanguageModelName.AZURE_o3_2025_0416:
520
+ return cls(
521
+ name=model_name,
522
+ capabilities=[
523
+ ModelCapabilities.STRUCTURED_OUTPUT,
524
+ ModelCapabilities.FUNCTION_CALLING,
525
+ ModelCapabilities.STREAMING,
526
+ ModelCapabilities.REASONING,
527
+ ModelCapabilities.VISION,
528
+ ],
529
+ provider=LanguageModelProvider.AZURE,
530
+ version="2025-04-16",
531
+ encoder_name=EncoderName.O200K_BASE,
532
+ token_limits=LanguageModelTokenLimits(
533
+ token_limit_input=200_000, token_limit_output=100_000
534
+ ),
535
+ info_cutoff_at=date(2024, 5, 31),
536
+ published_at=date(2025, 4, 16),
537
+ temperature_bounds=TemperatureBounds(
538
+ min_temperature=1.0, max_temperature=1.0
539
+ ),
540
+ )
541
+ case LanguageModelName.AZURE_o4_MINI_2025_0416:
542
+ return cls(
543
+ name=model_name,
544
+ capabilities=[
545
+ ModelCapabilities.STRUCTURED_OUTPUT,
546
+ ModelCapabilities.FUNCTION_CALLING,
547
+ ModelCapabilities.STREAMING,
548
+ ModelCapabilities.REASONING,
549
+ ModelCapabilities.VISION,
550
+ ],
551
+ provider=LanguageModelProvider.AZURE,
552
+ version="2025-04-16",
553
+ encoder_name=EncoderName.O200K_BASE,
554
+ token_limits=LanguageModelTokenLimits(
555
+ token_limit_input=200_000, token_limit_output=100_000
556
+ ),
557
+ info_cutoff_at=date(2024, 5, 31),
558
+ published_at=date(2025, 4, 16),
559
+ temperature_bounds=TemperatureBounds(
560
+ min_temperature=1.0, max_temperature=1.0
561
+ ),
275
562
  )
276
563
  case LanguageModelName.AZURE_GPT_45_PREVIEW_2025_0227:
277
564
  return cls(
@@ -291,45 +578,700 @@ class LanguageModelInfo(BaseModel):
291
578
  info_cutoff_at=date(2023, 10, 1),
292
579
  published_at=date(2025, 2, 27),
293
580
  )
294
- case _:
295
- if isinstance(model_name, LanguageModelName):
296
- raise ValueError(
297
- f"{model_name} is not supported. Please add model information in toolkit."
298
- )
299
-
581
+ case LanguageModelName.AZURE_GPT_41_2025_0414:
300
582
  return cls(
301
583
  name=model_name,
302
- version="custom",
303
- provider=LanguageModelProvider.CUSTOM,
584
+ capabilities=[
585
+ ModelCapabilities.STRUCTURED_OUTPUT,
586
+ ModelCapabilities.FUNCTION_CALLING,
587
+ ModelCapabilities.STREAMING,
588
+ ModelCapabilities.VISION,
589
+ ],
590
+ provider=LanguageModelProvider.AZURE,
591
+ version="2025-04-14",
592
+ encoder_name=EncoderName.O200K_BASE,
593
+ token_limits=LanguageModelTokenLimits(
594
+ token_limit_input=1_047_576, token_limit_output=32_768
595
+ ),
596
+ info_cutoff_at=date(2024, 5, 31),
597
+ published_at=date(2025, 4, 14),
304
598
  )
305
-
306
- @property
307
- def display_name(self) -> str:
308
- """
309
- Returns the name of the model as a string.
310
- """
311
-
312
- if isinstance(self.name, LanguageModelName):
313
- return self.name.value
314
- else:
315
- return self.name
316
-
317
-
318
- @deprecated(
319
- """
320
- Use `LanguageModelInfo` instead of `LanguageModel`
321
- """
322
- )
323
- class LanguageModel:
324
- _info: ClassVar[LanguageModelInfo]
325
-
326
- def __init__(self, model_name: LanguageModelName | str):
327
- self._model_info = self.get_model_info(model_name)
328
-
329
- @property
330
- def info(self) -> LanguageModelInfo:
331
- """
332
- Returns all infos about the model:
599
+ case LanguageModelName.AZURE_GPT_41_MINI_2025_0414:
600
+ return cls(
601
+ name=model_name,
602
+ capabilities=[
603
+ ModelCapabilities.STRUCTURED_OUTPUT,
604
+ ModelCapabilities.FUNCTION_CALLING,
605
+ ModelCapabilities.STREAMING,
606
+ ModelCapabilities.VISION,
607
+ ],
608
+ provider=LanguageModelProvider.AZURE,
609
+ version="2025-04-14",
610
+ encoder_name=EncoderName.O200K_BASE,
611
+ token_limits=LanguageModelTokenLimits(
612
+ token_limit_input=1_047_576, token_limit_output=32_768
613
+ ),
614
+ info_cutoff_at=date(2024, 5, 31),
615
+ published_at=date(2025, 4, 14),
616
+ )
617
+ case LanguageModelName.AZURE_GPT_41_NANO_2025_0414:
618
+ return cls(
619
+ name=model_name,
620
+ capabilities=[
621
+ ModelCapabilities.STRUCTURED_OUTPUT,
622
+ ModelCapabilities.FUNCTION_CALLING,
623
+ ModelCapabilities.STREAMING,
624
+ ModelCapabilities.VISION,
625
+ ],
626
+ provider=LanguageModelProvider.AZURE,
627
+ version="2025-04-14",
628
+ encoder_name=EncoderName.O200K_BASE,
629
+ token_limits=LanguageModelTokenLimits(
630
+ token_limit_input=1_047_576, token_limit_output=32_768
631
+ ),
632
+ info_cutoff_at=date(2024, 5, 31),
633
+ published_at=date(2025, 4, 14),
634
+ )
635
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET:
636
+ return cls(
637
+ name=model_name,
638
+ capabilities=[
639
+ ModelCapabilities.FUNCTION_CALLING,
640
+ ModelCapabilities.STREAMING,
641
+ ModelCapabilities.VISION,
642
+ ],
643
+ provider=LanguageModelProvider.LITELLM,
644
+ version="claude-3-7-sonnet",
645
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
646
+ token_limits=LanguageModelTokenLimits(
647
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
648
+ token_limit_input=180_000,
649
+ token_limit_output=64_000,
650
+ ),
651
+ info_cutoff_at=date(2024, 10, 31),
652
+ published_at=date(2025, 2, 24),
653
+ )
654
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET_THINKING:
655
+ return cls(
656
+ name=model_name,
657
+ capabilities=[
658
+ ModelCapabilities.FUNCTION_CALLING,
659
+ ModelCapabilities.STREAMING,
660
+ ModelCapabilities.VISION,
661
+ ModelCapabilities.REASONING,
662
+ ],
663
+ provider=LanguageModelProvider.LITELLM,
664
+ version="claude-3-7-sonnet-thinking",
665
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
666
+ token_limits=LanguageModelTokenLimits(
667
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
668
+ token_limit_input=180_000,
669
+ token_limit_output=64_000,
670
+ ),
671
+ info_cutoff_at=date(2024, 10, 31),
672
+ published_at=date(2025, 2, 24),
673
+ )
674
+ case LanguageModelName.ANTHROPIC_CLAUDE_HAIKU_4_5:
675
+ return cls(
676
+ name=model_name,
677
+ capabilities=[
678
+ ModelCapabilities.FUNCTION_CALLING,
679
+ ModelCapabilities.STREAMING,
680
+ ModelCapabilities.VISION,
681
+ ModelCapabilities.REASONING,
682
+ ],
683
+ provider=LanguageModelProvider.LITELLM,
684
+ version="claude-haiku-4-5",
685
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
686
+ token_limits=LanguageModelTokenLimits(
687
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
688
+ token_limit_input=180_000,
689
+ token_limit_output=64_000,
690
+ ),
691
+ info_cutoff_at=date(2025, 2, 1),
692
+ published_at=date(2025, 10, 1),
693
+ )
694
+ case LanguageModelName.ANTHROPIC_CLAUDE_SONNET_4:
695
+ return cls(
696
+ name=model_name,
697
+ capabilities=[
698
+ ModelCapabilities.FUNCTION_CALLING,
699
+ ModelCapabilities.STREAMING,
700
+ ModelCapabilities.VISION,
701
+ ModelCapabilities.REASONING,
702
+ ],
703
+ provider=LanguageModelProvider.LITELLM,
704
+ version="claude-sonnet-4",
705
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
706
+ token_limits=LanguageModelTokenLimits(
707
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
708
+ token_limit_input=180_000,
709
+ token_limit_output=64_000,
710
+ ),
711
+ info_cutoff_at=date(2025, 3, 1),
712
+ published_at=date(2025, 5, 1),
713
+ )
714
+ case LanguageModelName.ANTHROPIC_CLAUDE_SONNET_4_5:
715
+ return cls(
716
+ name=model_name,
717
+ capabilities=[
718
+ ModelCapabilities.FUNCTION_CALLING,
719
+ ModelCapabilities.STREAMING,
720
+ ModelCapabilities.VISION,
721
+ ModelCapabilities.REASONING,
722
+ ],
723
+ provider=LanguageModelProvider.LITELLM,
724
+ version="claude-sonnet-4-5",
725
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
726
+ token_limits=LanguageModelTokenLimits(
727
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
728
+ token_limit_input=180_000,
729
+ token_limit_output=64_000,
730
+ ),
731
+ info_cutoff_at=date(2025, 7, 1),
732
+ published_at=date(2025, 9, 29),
733
+ )
734
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4:
735
+ return cls(
736
+ name=model_name,
737
+ capabilities=[
738
+ ModelCapabilities.FUNCTION_CALLING,
739
+ ModelCapabilities.STREAMING,
740
+ ModelCapabilities.VISION,
741
+ ModelCapabilities.REASONING,
742
+ ],
743
+ provider=LanguageModelProvider.LITELLM,
744
+ version="claude-opus-4",
745
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
746
+ token_limits=LanguageModelTokenLimits(
747
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
748
+ token_limit_input=180_000,
749
+ token_limit_output=32_000,
750
+ ),
751
+ info_cutoff_at=date(2025, 3, 1),
752
+ published_at=date(2025, 5, 1),
753
+ )
754
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4_1:
755
+ return cls(
756
+ name=model_name,
757
+ capabilities=[
758
+ ModelCapabilities.FUNCTION_CALLING,
759
+ ModelCapabilities.STREAMING,
760
+ ModelCapabilities.VISION,
761
+ ModelCapabilities.REASONING,
762
+ ],
763
+ provider=LanguageModelProvider.LITELLM,
764
+ version="claude-opus-4",
765
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
766
+ token_limits=LanguageModelTokenLimits(
767
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
768
+ token_limit_input=180_000,
769
+ token_limit_output=32_000,
770
+ ),
771
+ info_cutoff_at=date(2025, 3, 1),
772
+ published_at=date(2025, 5, 1),
773
+ )
774
+ case LanguageModelName.GEMINI_2_0_FLASH:
775
+ return cls(
776
+ name=model_name,
777
+ capabilities=[
778
+ ModelCapabilities.FUNCTION_CALLING,
779
+ ModelCapabilities.STREAMING,
780
+ ModelCapabilities.VISION,
781
+ ModelCapabilities.STRUCTURED_OUTPUT,
782
+ ModelCapabilities.REASONING,
783
+ ],
784
+ provider=LanguageModelProvider.LITELLM,
785
+ version="gemini-2-0-flash",
786
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
787
+ token_limits=LanguageModelTokenLimits(
788
+ token_limit_input=1_048_576, token_limit_output=8_192
789
+ ),
790
+ info_cutoff_at=date(2024, 8, 1),
791
+ published_at=date(2025, 2, 1),
792
+ )
793
+ case LanguageModelName.GEMINI_2_5_FLASH:
794
+ return cls(
795
+ name=model_name,
796
+ capabilities=[
797
+ ModelCapabilities.FUNCTION_CALLING,
798
+ ModelCapabilities.STREAMING,
799
+ ModelCapabilities.VISION,
800
+ ModelCapabilities.STRUCTURED_OUTPUT,
801
+ ModelCapabilities.REASONING,
802
+ ],
803
+ provider=LanguageModelProvider.LITELLM,
804
+ version="gemini-2-5-flash",
805
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
806
+ token_limits=LanguageModelTokenLimits(
807
+ token_limit_input=1_048_576, token_limit_output=65_536
808
+ ),
809
+ info_cutoff_at=date(2025, 1, day=1),
810
+ published_at=date(2025, 4, 1),
811
+ )
812
+ case LanguageModelName.GEMINI_2_5_FLASH_LITE_PREVIEW_0617:
813
+ return cls(
814
+ name=model_name,
815
+ capabilities=[
816
+ ModelCapabilities.FUNCTION_CALLING,
817
+ ModelCapabilities.STREAMING,
818
+ ModelCapabilities.VISION,
819
+ ModelCapabilities.STRUCTURED_OUTPUT,
820
+ ModelCapabilities.REASONING,
821
+ ],
822
+ provider=LanguageModelProvider.LITELLM,
823
+ version="gemini-2-5-flash-lite-preview-06-17",
824
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
825
+ token_limits=LanguageModelTokenLimits(
826
+ token_limit_input=1_000_000, token_limit_output=64_000
827
+ ),
828
+ info_cutoff_at=date(2025, 1, day=1),
829
+ published_at=date(2025, 6, 17),
830
+ )
831
+ case LanguageModelName.GEMINI_2_5_FLASH_PREVIEW_0520:
832
+ return cls(
833
+ name=model_name,
834
+ capabilities=[
835
+ ModelCapabilities.FUNCTION_CALLING,
836
+ ModelCapabilities.STREAMING,
837
+ ModelCapabilities.VISION,
838
+ ModelCapabilities.STRUCTURED_OUTPUT,
839
+ ModelCapabilities.REASONING,
840
+ ],
841
+ provider=LanguageModelProvider.LITELLM,
842
+ version="gemini-2-5-flash-preview-05-20",
843
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
844
+ token_limits=LanguageModelTokenLimits(
845
+ token_limit_input=1_048_576, token_limit_output=65_536
846
+ ),
847
+ info_cutoff_at=date(2025, 1, day=1),
848
+ published_at=date(2025, 4, 1),
849
+ )
850
+ case LanguageModelName.GEMINI_2_5_PRO:
851
+ return cls(
852
+ name=model_name,
853
+ capabilities=[
854
+ ModelCapabilities.FUNCTION_CALLING,
855
+ ModelCapabilities.STREAMING,
856
+ ModelCapabilities.VISION,
857
+ ModelCapabilities.STRUCTURED_OUTPUT,
858
+ ModelCapabilities.REASONING,
859
+ ],
860
+ provider=LanguageModelProvider.LITELLM,
861
+ version="gemini-2-5-pro",
862
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
863
+ token_limits=LanguageModelTokenLimits(
864
+ token_limit_input=1_048_576, token_limit_output=65_536
865
+ ),
866
+ info_cutoff_at=date(2025, 1, day=1),
867
+ published_at=date(2025, 6, 17),
868
+ )
869
+ case LanguageModelName.GEMINI_2_5_PRO_EXP_0325:
870
+ return cls(
871
+ name=model_name,
872
+ capabilities=[
873
+ ModelCapabilities.FUNCTION_CALLING,
874
+ ModelCapabilities.STREAMING,
875
+ ModelCapabilities.VISION,
876
+ ModelCapabilities.STRUCTURED_OUTPUT,
877
+ ModelCapabilities.REASONING,
878
+ ],
879
+ provider=LanguageModelProvider.LITELLM,
880
+ version="gemini-2-5-pro-exp-0325",
881
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
882
+ token_limits=LanguageModelTokenLimits(
883
+ token_limit_input=1_048_576, token_limit_output=65_536
884
+ ),
885
+ info_cutoff_at=date(2025, 1, day=1),
886
+ published_at=date(2025, 3, 1),
887
+ )
888
+ case LanguageModelName.GEMINI_2_5_PRO_PREVIEW_0605:
889
+ return cls(
890
+ name=model_name,
891
+ capabilities=[
892
+ ModelCapabilities.FUNCTION_CALLING,
893
+ ModelCapabilities.STREAMING,
894
+ ModelCapabilities.VISION,
895
+ ModelCapabilities.STRUCTURED_OUTPUT,
896
+ ModelCapabilities.REASONING,
897
+ ],
898
+ provider=LanguageModelProvider.LITELLM,
899
+ version="gemini-2-5-pro-preview-06-05",
900
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
901
+ token_limits=LanguageModelTokenLimits(
902
+ token_limit_input=1_048_576, token_limit_output=65_536
903
+ ),
904
+ info_cutoff_at=date(2025, 1, day=1),
905
+ published_at=date(2025, 6, 5),
906
+ )
907
+ case LanguageModelName.LITELLM_OPENAI_GPT_5:
908
+ return cls(
909
+ name=model_name,
910
+ provider=LanguageModelProvider.LITELLM,
911
+ version="gpt-5",
912
+ encoder_name=EncoderName.O200K_BASE,
913
+ capabilities=[
914
+ ModelCapabilities.FUNCTION_CALLING,
915
+ ModelCapabilities.STREAMING,
916
+ ModelCapabilities.REASONING,
917
+ ModelCapabilities.VISION,
918
+ ModelCapabilities.STRUCTURED_OUTPUT,
919
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
920
+ ],
921
+ token_limits=LanguageModelTokenLimits(
922
+ token_limit_input=272000, token_limit_output=128000
923
+ ),
924
+ info_cutoff_at=date(2024, 10, 24),
925
+ published_at=date(2025, 8, 7),
926
+ deprecated_at=date(2026, 8, 7),
927
+ retirement_at=date(2026, 8, 7),
928
+ temperature_bounds=TemperatureBounds(
929
+ min_temperature=1.0, max_temperature=1.0
930
+ ),
931
+ default_options={
932
+ "reasoning_effort": "minimal",
933
+ },
934
+ )
935
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_MINI:
936
+ return cls(
937
+ name=model_name,
938
+ provider=LanguageModelProvider.LITELLM,
939
+ version="gpt-5-mini",
940
+ encoder_name=EncoderName.O200K_BASE,
941
+ capabilities=[
942
+ ModelCapabilities.FUNCTION_CALLING,
943
+ ModelCapabilities.STREAMING,
944
+ ModelCapabilities.REASONING,
945
+ ModelCapabilities.VISION,
946
+ ModelCapabilities.STRUCTURED_OUTPUT,
947
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
948
+ ],
949
+ token_limits=LanguageModelTokenLimits(
950
+ token_limit_input=272000, token_limit_output=128000
951
+ ),
952
+ info_cutoff_at=date(2024, 6, 24),
953
+ published_at=date(2025, 8, 7),
954
+ deprecated_at=date(2026, 8, 7),
955
+ retirement_at=date(2026, 8, 7),
956
+ temperature_bounds=TemperatureBounds(
957
+ min_temperature=1.0, max_temperature=1.0
958
+ ),
959
+ default_options={
960
+ "reasoning_effort": "minimal",
961
+ },
962
+ )
963
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_NANO:
964
+ return cls(
965
+ name=model_name,
966
+ provider=LanguageModelProvider.LITELLM,
967
+ version="gpt-5-nano",
968
+ encoder_name=EncoderName.O200K_BASE,
969
+ capabilities=[
970
+ ModelCapabilities.FUNCTION_CALLING,
971
+ ModelCapabilities.STREAMING,
972
+ ModelCapabilities.REASONING,
973
+ ModelCapabilities.VISION,
974
+ ModelCapabilities.STRUCTURED_OUTPUT,
975
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
976
+ ],
977
+ token_limits=LanguageModelTokenLimits(
978
+ token_limit_input=272000, token_limit_output=128000
979
+ ),
980
+ info_cutoff_at=date(2024, 5, 31),
981
+ published_at=date(2025, 8, 7),
982
+ deprecated_at=date(2026, 8, 7),
983
+ retirement_at=date(2026, 8, 7),
984
+ temperature_bounds=TemperatureBounds(
985
+ min_temperature=1.0, max_temperature=1.0
986
+ ),
987
+ default_options={
988
+ "reasoning_effort": "minimal",
989
+ },
990
+ )
991
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_CHAT:
992
+ return cls(
993
+ name=model_name,
994
+ provider=LanguageModelProvider.LITELLM,
995
+ version="gpt-5-chat",
996
+ encoder_name=EncoderName.O200K_BASE,
997
+ capabilities=[
998
+ ModelCapabilities.STREAMING,
999
+ ModelCapabilities.VISION,
1000
+ ],
1001
+ token_limits=LanguageModelTokenLimits(
1002
+ token_limit_input=128000, token_limit_output=16384
1003
+ ),
1004
+ info_cutoff_at=date(2024, 10, 24),
1005
+ published_at=date(2025, 8, 7),
1006
+ deprecated_at=date(2026, 8, 7),
1007
+ retirement_at=date(2026, 8, 7),
1008
+ )
1009
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_PRO:
1010
+ return cls(
1011
+ name=model_name,
1012
+ provider=LanguageModelProvider.LITELLM,
1013
+ version="2025-10-06",
1014
+ encoder_name=EncoderName.O200K_BASE,
1015
+ capabilities=[
1016
+ ModelCapabilities.FUNCTION_CALLING,
1017
+ ModelCapabilities.REASONING,
1018
+ ModelCapabilities.VISION,
1019
+ ModelCapabilities.STRUCTURED_OUTPUT,
1020
+ ],
1021
+ token_limits=LanguageModelTokenLimits(
1022
+ token_limit_input=272000, token_limit_output=128000
1023
+ ),
1024
+ info_cutoff_at=date(2024, 10, 30),
1025
+ published_at=date(2025, 10, 6),
1026
+ temperature_bounds=TemperatureBounds(
1027
+ min_temperature=1.0, max_temperature=1.0
1028
+ ),
1029
+ )
1030
+ case LanguageModelName.LITELLM_OPENAI_O1:
1031
+ return cls(
1032
+ name=model_name,
1033
+ provider=LanguageModelProvider.LITELLM,
1034
+ version="2024-12-17",
1035
+ encoder_name=EncoderName.O200K_BASE,
1036
+ capabilities=[
1037
+ ModelCapabilities.STRUCTURED_OUTPUT,
1038
+ ModelCapabilities.FUNCTION_CALLING,
1039
+ ModelCapabilities.STREAMING,
1040
+ ModelCapabilities.VISION,
1041
+ ModelCapabilities.REASONING,
1042
+ ],
1043
+ token_limits=LanguageModelTokenLimits(
1044
+ token_limit_input=200_000, token_limit_output=100_000
1045
+ ),
1046
+ info_cutoff_at=date(2023, 10, 1),
1047
+ published_at=date(2024, 12, 17),
1048
+ temperature_bounds=TemperatureBounds(
1049
+ min_temperature=1.0, max_temperature=1.0
1050
+ ),
1051
+ )
1052
+ case LanguageModelName.LITELLM_OPENAI_O3:
1053
+ return cls(
1054
+ name=model_name,
1055
+ provider=LanguageModelProvider.LITELLM,
1056
+ version="2025-04-16",
1057
+ encoder_name=EncoderName.O200K_BASE,
1058
+ capabilities=[
1059
+ ModelCapabilities.FUNCTION_CALLING,
1060
+ ModelCapabilities.STRUCTURED_OUTPUT,
1061
+ ModelCapabilities.STREAMING,
1062
+ ModelCapabilities.REASONING,
1063
+ ],
1064
+ token_limits=LanguageModelTokenLimits(
1065
+ token_limit_input=200_000, token_limit_output=100_000
1066
+ ),
1067
+ temperature_bounds=TemperatureBounds(
1068
+ min_temperature=1.0, max_temperature=1.0
1069
+ ),
1070
+ published_at=date(2025, 4, 16),
1071
+ info_cutoff_at=date(2024, 6, 1),
1072
+ )
1073
+ case LanguageModelName.LITELLM_OPENAI_O3_DEEP_RESEARCH:
1074
+ return cls(
1075
+ name=model_name,
1076
+ provider=LanguageModelProvider.LITELLM,
1077
+ version="2025-06-26",
1078
+ encoder_name=EncoderName.O200K_BASE,
1079
+ token_limits=LanguageModelTokenLimits(
1080
+ token_limit_input=200_000, token_limit_output=100_000
1081
+ ),
1082
+ published_at=date(2025, 4, 16),
1083
+ capabilities=[ModelCapabilities.STREAMING],
1084
+ info_cutoff_at=date(2024, 6, 1),
1085
+ )
1086
+ case LanguageModelName.LITELLM_OPENAI_O3_PRO:
1087
+ return cls(
1088
+ name=model_name,
1089
+ provider=LanguageModelProvider.LITELLM,
1090
+ version="2025-06-10",
1091
+ encoder_name=EncoderName.O200K_BASE,
1092
+ capabilities=[
1093
+ ModelCapabilities.FUNCTION_CALLING,
1094
+ ModelCapabilities.REASONING,
1095
+ ModelCapabilities.STRUCTURED_OUTPUT,
1096
+ ],
1097
+ token_limits=LanguageModelTokenLimits(
1098
+ token_limit_input=200_000, token_limit_output=100_000
1099
+ ),
1100
+ published_at=date(2025, 6, 10),
1101
+ info_cutoff_at=date(2024, 6, 1),
1102
+ )
1103
+ case LanguageModelName.LITELLM_OPENAI_O4_MINI:
1104
+ return cls(
1105
+ name=model_name,
1106
+ provider=LanguageModelProvider.LITELLM,
1107
+ version="2025-04-16",
1108
+ encoder_name=EncoderName.O200K_BASE,
1109
+ capabilities=[
1110
+ ModelCapabilities.FUNCTION_CALLING,
1111
+ ModelCapabilities.STREAMING,
1112
+ ModelCapabilities.STRUCTURED_OUTPUT,
1113
+ ],
1114
+ token_limits=LanguageModelTokenLimits(
1115
+ token_limit_input=200_000, token_limit_output=100_000
1116
+ ),
1117
+ published_at=date(2025, 4, 16),
1118
+ info_cutoff_at=date(2024, 6, 1),
1119
+ temperature_bounds=TemperatureBounds(
1120
+ min_temperature=1.0, max_temperature=1.0
1121
+ ),
1122
+ )
1123
+ case LanguageModelName.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH:
1124
+ return cls(
1125
+ name=model_name,
1126
+ provider=LanguageModelProvider.LITELLM,
1127
+ version="2025-06-26",
1128
+ encoder_name=EncoderName.O200K_BASE,
1129
+ token_limits=LanguageModelTokenLimits(
1130
+ token_limit_input=200_000, token_limit_output=100_000
1131
+ ),
1132
+ published_at=date(2025, 4, 16),
1133
+ capabilities=[ModelCapabilities.STREAMING],
1134
+ info_cutoff_at=date(2024, 6, 1),
1135
+ )
1136
+ case LanguageModelName.LITELLM_OPENAI_GPT_4_1_MINI:
1137
+ return cls(
1138
+ name=model_name,
1139
+ provider=LanguageModelProvider.LITELLM,
1140
+ version="2025-04-14",
1141
+ encoder_name=EncoderName.O200K_BASE,
1142
+ published_at=date(2025, 4, 14),
1143
+ info_cutoff_at=date(2024, 6, 1),
1144
+ token_limits=LanguageModelTokenLimits(
1145
+ token_limit_input=1_047_576, token_limit_output=32_768
1146
+ ),
1147
+ capabilities=[
1148
+ ModelCapabilities.STREAMING,
1149
+ ModelCapabilities.FUNCTION_CALLING,
1150
+ ModelCapabilities.STRUCTURED_OUTPUT,
1151
+ ],
1152
+ )
1153
+ case LanguageModelName.LITELLM_OPENAI_GPT_4_1_NANO:
1154
+ return cls(
1155
+ name=model_name,
1156
+ provider=LanguageModelProvider.LITELLM,
1157
+ version="2025-04-14",
1158
+ encoder_name=EncoderName.O200K_BASE,
1159
+ published_at=date(2025, 4, 14),
1160
+ info_cutoff_at=date(2024, 6, 1),
1161
+ token_limits=LanguageModelTokenLimits(
1162
+ token_limit_input=1_047_576, token_limit_output=32_768
1163
+ ),
1164
+ capabilities=[
1165
+ ModelCapabilities.STREAMING,
1166
+ ModelCapabilities.FUNCTION_CALLING,
1167
+ ModelCapabilities.STRUCTURED_OUTPUT,
1168
+ ],
1169
+ )
1170
+ case LanguageModelName.LITELLM_DEEPSEEK_R1:
1171
+ return cls(
1172
+ name=model_name,
1173
+ provider=LanguageModelProvider.LITELLM,
1174
+ version="deepseek-r1",
1175
+ capabilities=[
1176
+ ModelCapabilities.FUNCTION_CALLING,
1177
+ ModelCapabilities.STRUCTURED_OUTPUT,
1178
+ ModelCapabilities.STREAMING,
1179
+ ModelCapabilities.REASONING,
1180
+ ],
1181
+ token_limits=LanguageModelTokenLimits(
1182
+ token_limit_input=64_000, token_limit_output=4_000
1183
+ ),
1184
+ published_at=date(2025, 1, 25),
1185
+ )
1186
+ case LanguageModelName.LITELLM_DEEPSEEK_V3:
1187
+ return cls(
1188
+ name=model_name,
1189
+ provider=LanguageModelProvider.LITELLM,
1190
+ version="deepseek-v3-1",
1191
+ capabilities=[
1192
+ ModelCapabilities.FUNCTION_CALLING,
1193
+ ModelCapabilities.STRUCTURED_OUTPUT,
1194
+ ModelCapabilities.REASONING,
1195
+ ],
1196
+ token_limits=LanguageModelTokenLimits(
1197
+ token_limit_input=128_000, token_limit_output=4_000
1198
+ ),
1199
+ published_at=date(2025, 8, 1),
1200
+ )
1201
+ case LanguageModelName.LITELLM_QWEN_3:
1202
+ return cls(
1203
+ name=model_name,
1204
+ provider=LanguageModelProvider.LITELLM,
1205
+ version="qwen-3",
1206
+ capabilities=[
1207
+ ModelCapabilities.FUNCTION_CALLING,
1208
+ ModelCapabilities.STRUCTURED_OUTPUT,
1209
+ ModelCapabilities.STREAMING,
1210
+ ModelCapabilities.REASONING,
1211
+ ],
1212
+ published_at=date(2025, 4, 29),
1213
+ token_limits=LanguageModelTokenLimits(
1214
+ token_limit_input=256_000, token_limit_output=32_768
1215
+ ),
1216
+ )
1217
+ case LanguageModelName.LITELLM_QWEN_3_THINKING:
1218
+ return cls(
1219
+ name=model_name,
1220
+ provider=LanguageModelProvider.LITELLM,
1221
+ version="qwen-3-thinking",
1222
+ capabilities=[
1223
+ ModelCapabilities.FUNCTION_CALLING,
1224
+ ModelCapabilities.STRUCTURED_OUTPUT,
1225
+ ModelCapabilities.STREAMING,
1226
+ ModelCapabilities.REASONING,
1227
+ ],
1228
+ token_limits=LanguageModelTokenLimits(
1229
+ token_limit_input=256_000, token_limit_output=32_768
1230
+ ),
1231
+ published_at=date(2025, 7, 25),
1232
+ )
1233
+
1234
+ case _:
1235
+ if isinstance(model_name, LanguageModelName):
1236
+ raise ValueError(
1237
+ f"{model_name} is not supported. Please add model information in toolkit."
1238
+ )
1239
+
1240
+ return cls(
1241
+ name=model_name,
1242
+ version="custom",
1243
+ provider=LanguageModelProvider.CUSTOM,
1244
+ )
1245
+
1246
+ @property
1247
+ def display_name(self) -> str:
1248
+ """
1249
+ Returns the name of the model as a string.
1250
+ """
1251
+
1252
+ if isinstance(self.name, LanguageModelName):
1253
+ return self.name.value
1254
+ else:
1255
+ return self.name
1256
+
1257
+
1258
+ @deprecated(
1259
+ """
1260
+ Use `LanguageModelInfo` instead of `LanguageModel`.
1261
+
1262
+ `LanguageModel` will be deprecated on 31.12.2025
1263
+ """,
1264
+ )
1265
+ class LanguageModel:
1266
+ _info: ClassVar[LanguageModelInfo]
1267
+
1268
+ def __init__(self, model_name: LanguageModelName | str):
1269
+ self._model_info = self.get_model_info(model_name)
1270
+
1271
+ @property
1272
+ def info(self) -> LanguageModelInfo:
1273
+ """Return all infos about the model.
1274
+
333
1275
  - name
334
1276
  - version
335
1277
  - provider