unique_toolkit 0.7.9__py3-none-any.whl → 1.33.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. unique_toolkit/__init__.py +36 -3
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +357 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +225 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +368 -0
  16. unique_toolkit/_common/endpoint_requestor.py +480 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/experimental/endpoint_builder.py +368 -0
  19. unique_toolkit/_common/experimental/endpoint_requestor.py +488 -0
  20. unique_toolkit/_common/feature_flags/schema.py +9 -0
  21. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  22. unique_toolkit/_common/pydantic_helpers.py +174 -0
  23. unique_toolkit/_common/referencing.py +53 -0
  24. unique_toolkit/_common/string_utilities.py +140 -0
  25. unique_toolkit/_common/tests/test_referencing.py +521 -0
  26. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  27. unique_toolkit/_common/token/image_token_counting.py +67 -0
  28. unique_toolkit/_common/token/token_counting.py +204 -0
  29. unique_toolkit/_common/utils/__init__.py +1 -0
  30. unique_toolkit/_common/utils/files.py +43 -0
  31. unique_toolkit/_common/utils/image/encode.py +25 -0
  32. unique_toolkit/_common/utils/jinja/helpers.py +10 -0
  33. unique_toolkit/_common/utils/jinja/render.py +18 -0
  34. unique_toolkit/_common/utils/jinja/schema.py +65 -0
  35. unique_toolkit/_common/utils/jinja/utils.py +80 -0
  36. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  37. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  38. unique_toolkit/_common/utils/write_configuration.py +51 -0
  39. unique_toolkit/_common/validators.py +101 -4
  40. unique_toolkit/agentic/__init__.py +1 -0
  41. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  42. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  43. unique_toolkit/agentic/evaluation/config.py +36 -0
  44. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  45. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  46. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  47. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  48. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  49. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +112 -0
  50. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  51. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +20 -16
  52. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +32 -21
  53. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  54. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  55. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  56. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  57. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +298 -0
  58. unique_toolkit/agentic/history_manager/history_manager.py +241 -0
  59. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  60. unique_toolkit/agentic/history_manager/utils.py +96 -0
  61. unique_toolkit/agentic/message_log_manager/__init__.py +5 -0
  62. unique_toolkit/agentic/message_log_manager/service.py +93 -0
  63. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  64. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  65. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  66. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +71 -0
  67. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +297 -0
  68. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  69. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  70. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  71. unique_toolkit/agentic/tools/__init__.py +1 -0
  72. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  73. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  74. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  75. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  76. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  77. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  78. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  79. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  80. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  81. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +240 -0
  82. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +84 -0
  83. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +78 -0
  84. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +264 -0
  85. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  86. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display.py +421 -0
  87. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +2103 -0
  88. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  89. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  90. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  91. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  92. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  93. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  94. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  95. unique_toolkit/agentic/tools/a2a/tool/config.py +158 -0
  96. unique_toolkit/agentic/tools/a2a/tool/service.py +393 -0
  97. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  98. unique_toolkit/agentic/tools/config.py +128 -0
  99. unique_toolkit/agentic/tools/factory.py +44 -0
  100. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  101. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  102. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  103. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  104. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  105. unique_toolkit/agentic/tools/openai_builtin/base.py +46 -0
  106. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  107. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +88 -0
  108. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +250 -0
  109. unique_toolkit/agentic/tools/openai_builtin/manager.py +79 -0
  110. unique_toolkit/agentic/tools/schemas.py +145 -0
  111. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  112. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  113. unique_toolkit/agentic/tools/tool.py +187 -0
  114. unique_toolkit/agentic/tools/tool_manager.py +492 -0
  115. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  116. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  117. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  118. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  119. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  120. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  121. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  122. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  123. unique_toolkit/app/__init__.py +9 -0
  124. unique_toolkit/app/dev_util.py +180 -0
  125. unique_toolkit/app/fast_api_factory.py +131 -0
  126. unique_toolkit/app/init_sdk.py +32 -1
  127. unique_toolkit/app/schemas.py +206 -31
  128. unique_toolkit/app/unique_settings.py +367 -0
  129. unique_toolkit/app/webhook.py +77 -0
  130. unique_toolkit/chat/__init__.py +8 -1
  131. unique_toolkit/chat/deprecated/service.py +232 -0
  132. unique_toolkit/chat/functions.py +648 -78
  133. unique_toolkit/chat/rendering.py +34 -0
  134. unique_toolkit/chat/responses_api.py +461 -0
  135. unique_toolkit/chat/schemas.py +134 -2
  136. unique_toolkit/chat/service.py +115 -767
  137. unique_toolkit/content/functions.py +353 -8
  138. unique_toolkit/content/schemas.py +128 -15
  139. unique_toolkit/content/service.py +321 -45
  140. unique_toolkit/content/smart_rules.py +301 -0
  141. unique_toolkit/content/utils.py +10 -3
  142. unique_toolkit/data_extraction/README.md +96 -0
  143. unique_toolkit/data_extraction/__init__.py +11 -0
  144. unique_toolkit/data_extraction/augmented/__init__.py +5 -0
  145. unique_toolkit/data_extraction/augmented/service.py +93 -0
  146. unique_toolkit/data_extraction/base.py +25 -0
  147. unique_toolkit/data_extraction/basic/__init__.py +11 -0
  148. unique_toolkit/data_extraction/basic/config.py +18 -0
  149. unique_toolkit/data_extraction/basic/prompt.py +13 -0
  150. unique_toolkit/data_extraction/basic/service.py +55 -0
  151. unique_toolkit/embedding/service.py +103 -12
  152. unique_toolkit/framework_utilities/__init__.py +1 -0
  153. unique_toolkit/framework_utilities/langchain/__init__.py +10 -0
  154. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  155. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  156. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  157. unique_toolkit/framework_utilities/openai/client.py +84 -0
  158. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  159. unique_toolkit/framework_utilities/utils.py +23 -0
  160. unique_toolkit/language_model/__init__.py +3 -0
  161. unique_toolkit/language_model/_responses_api_utils.py +93 -0
  162. unique_toolkit/language_model/builder.py +27 -11
  163. unique_toolkit/language_model/default_language_model.py +3 -0
  164. unique_toolkit/language_model/functions.py +345 -43
  165. unique_toolkit/language_model/infos.py +1288 -46
  166. unique_toolkit/language_model/reference.py +242 -0
  167. unique_toolkit/language_model/schemas.py +481 -49
  168. unique_toolkit/language_model/service.py +229 -28
  169. unique_toolkit/protocols/support.py +145 -0
  170. unique_toolkit/services/__init__.py +7 -0
  171. unique_toolkit/services/chat_service.py +1631 -0
  172. unique_toolkit/services/knowledge_base.py +1094 -0
  173. unique_toolkit/short_term_memory/service.py +178 -41
  174. unique_toolkit/smart_rules/__init__.py +0 -0
  175. unique_toolkit/smart_rules/compile.py +56 -0
  176. unique_toolkit/test_utilities/events.py +197 -0
  177. unique_toolkit-1.33.3.dist-info/METADATA +1145 -0
  178. unique_toolkit-1.33.3.dist-info/RECORD +205 -0
  179. unique_toolkit/evaluators/__init__.py +0 -1
  180. unique_toolkit/evaluators/config.py +0 -35
  181. unique_toolkit/evaluators/constants.py +0 -1
  182. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  183. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  184. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  185. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  186. unique_toolkit-0.7.9.dist-info/METADATA +0 -413
  187. unique_toolkit-0.7.9.dist-info/RECORD +0 -64
  188. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  189. {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/LICENSE +0 -0
  190. {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/WHEEL +0 -0
@@ -1,11 +1,12 @@
1
1
  from datetime import date
2
2
  from enum import StrEnum
3
- from typing import ClassVar, Optional, Self
3
+ from typing import Annotated, Any, ClassVar, Optional, Self
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
6
  from pydantic.json_schema import SkipJsonSchema
7
7
  from typing_extensions import deprecated
8
8
 
9
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
10
  from unique_toolkit.language_model.schemas import LanguageModelTokenLimits
10
11
 
11
12
 
@@ -14,6 +15,16 @@ class LanguageModelName(StrEnum):
14
15
  AZURE_GPT_4_0613 = "AZURE_GPT_4_0613"
15
16
  AZURE_GPT_4_32K_0613 = "AZURE_GPT_4_32K_0613"
16
17
  AZURE_GPT_4_TURBO_2024_0409 = "AZURE_GPT_4_TURBO_2024_0409"
18
+ AZURE_GPT_5_2025_0807 = "AZURE_GPT_5_2025_0807"
19
+ AZURE_GPT_5_MINI_2025_0807 = "AZURE_GPT_5_MINI_2025_0807"
20
+ AZURE_GPT_5_NANO_2025_0807 = "AZURE_GPT_5_NANO_2025_0807"
21
+ AZURE_GPT_5_CHAT_2025_0807 = "AZURE_GPT_5_CHAT_2025_0807"
22
+ AZURE_GPT_5_PRO_2025_1006 = "AZURE_GPT_5_PRO_2025_1006"
23
+ AZURE_GPT_51_2025_1113 = "AZURE_GPT_51_2025_1113"
24
+ AZURE_GPT_51_THINKING_2025_1113 = "AZURE_GPT_51_THINKING_2025_1113"
25
+ AZURE_GPT_51_CHAT_2025_1113 = "AZURE_GPT_51_CHAT_2025_1113"
26
+ AZURE_GPT_51_CODEX_2025_1113 = "AZURE_GPT_51_CODEX_2025_1113"
27
+ AZURE_GPT_51_CODEX_MINI_2025_1113 = "AZURE_GPT_51_CODEX_MINI_2025_1113"
17
28
  AZURE_GPT_4o_2024_0513 = "AZURE_GPT_4o_2024_0513"
18
29
  AZURE_GPT_4o_2024_0806 = "AZURE_GPT_4o_2024_0806"
19
30
  AZURE_GPT_4o_2024_1120 = "AZURE_GPT_4o_2024_1120"
@@ -23,6 +34,48 @@ class LanguageModelName(StrEnum):
23
34
  AZURE_o3_MINI_2025_0131 = "AZURE_o3_MINI_2025_0131"
24
35
  AZURE_GPT_45_PREVIEW_2025_0227 = "AZURE_GPT_45_PREVIEW_2025_0227"
25
36
  AZURE_GPT_41_2025_0414 = "AZURE_GPT_41_2025_0414"
37
+ AZURE_GPT_41_MINI_2025_0414 = "AZURE_GPT_41_MINI_2025_0414"
38
+ AZURE_GPT_41_NANO_2025_0414 = "AZURE_GPT_41_NANO_2025_0414"
39
+ AZURE_o3_2025_0416 = "AZURE_o3_2025_0416"
40
+ AZURE_o4_MINI_2025_0416 = "AZURE_o4_MINI_2025_0416"
41
+ ANTHROPIC_CLAUDE_3_7_SONNET = "litellm:anthropic-claude-3-7-sonnet"
42
+ ANTHROPIC_CLAUDE_3_7_SONNET_THINKING = (
43
+ "litellm:anthropic-claude-3-7-sonnet-thinking"
44
+ )
45
+ ANTHROPIC_CLAUDE_HAIKU_4_5 = "litellm:anthropic-claude-haiku-4-5"
46
+ ANTHROPIC_CLAUDE_SONNET_4 = "litellm:anthropic-claude-sonnet-4"
47
+ ANTHROPIC_CLAUDE_SONNET_4_5 = "litellm:anthropic-claude-sonnet-4-5"
48
+ ANTHROPIC_CLAUDE_OPUS_4 = "litellm:anthropic-claude-opus-4"
49
+ ANTHROPIC_CLAUDE_OPUS_4_1 = "litellm:anthropic-claude-opus-4-1"
50
+ ANTHROPIC_CLAUDE_OPUS_4_5 = "litellm:anthropic-claude-opus-4-5"
51
+ GEMINI_2_0_FLASH = "litellm:gemini-2-0-flash"
52
+ GEMINI_2_5_FLASH = "litellm:gemini-2-5-flash"
53
+ GEMINI_2_5_FLASH_LITE = "litellm:gemini-2-5-flash-lite"
54
+ GEMINI_2_5_FLASH_LITE_PREVIEW_0617 = "litellm:gemini-2-5-flash-lite-preview-06-17"
55
+ GEMINI_2_5_FLASH_PREVIEW_0520 = "litellm:gemini-2-5-flash-preview-05-20"
56
+ GEMINI_2_5_PRO = "litellm:gemini-2-5-pro"
57
+ GEMINI_2_5_PRO_EXP_0325 = "litellm:gemini-2-5-pro-exp-03-25"
58
+ GEMINI_2_5_PRO_PREVIEW_0605 = "litellm:gemini-2-5-pro-preview-06-05"
59
+ GEMINI_3_PRO_PREVIEW = "litellm:gemini-3-pro-preview"
60
+ LITELLM_OPENAI_GPT_5 = "litellm:openai-gpt-5"
61
+ LITELLM_OPENAI_GPT_5_MINI = "litellm:openai-gpt-5-mini"
62
+ LITELLM_OPENAI_GPT_5_NANO = "litellm:openai-gpt-5-nano"
63
+ LITELLM_OPENAI_GPT_5_CHAT = "litellm:openai-gpt-5-chat"
64
+ LITELLM_OPENAI_GPT_5_PRO = "litellm:openai-gpt-5-pro"
65
+ LITELLM_OPENAI_GPT_51 = "litellm:openai-gpt-5-1"
66
+ LITELLM_OPENAI_GPT_51_THINKING = "litellm:openai-gpt-5-1-thinking"
67
+ LITELLM_OPENAI_O1 = "litellm:openai-o1"
68
+ LITELLM_OPENAI_O3 = "litellm:openai-o3"
69
+ LITELLM_OPENAI_O3_DEEP_RESEARCH = "litellm:openai-o3-deep-research"
70
+ LITELLM_OPENAI_O3_PRO = "litellm:openai-o3-pro"
71
+ LITELLM_OPENAI_O4_MINI = "litellm:openai-o4-mini"
72
+ LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH = "litellm:openai-o4-mini-deep-research"
73
+ LITELLM_OPENAI_GPT_4_1_MINI = "litellm:openai-gpt-4-1-mini"
74
+ LITELLM_OPENAI_GPT_4_1_NANO = "litellm:openai-gpt-4-1-nano"
75
+ LITELLM_DEEPSEEK_R1 = "litellm:deepseek-r1"
76
+ LITELLM_DEEPSEEK_V3 = "litellm:deepseek-v3-1"
77
+ LITELLM_QWEN_3 = "litellm:qwen-3-235B-A22B"
78
+ LITELLM_QWEN_3_THINKING = "litellm:qwen-3-235B-A22B-thinking"
26
79
 
27
80
 
28
81
  class EncoderName(StrEnum):
@@ -45,6 +98,32 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
45
98
  LMN.AZURE_GPT_4o_2024_0513
46
99
  | LMN.AZURE_GPT_4o_2024_0806
47
100
  | LMN.AZURE_GPT_4o_MINI_2024_0718
101
+ | LMN.AZURE_GPT_4o_2024_1120
102
+ | LMN.AZURE_GPT_5_2025_0807
103
+ | LMN.AZURE_GPT_5_MINI_2025_0807
104
+ | LMN.AZURE_GPT_5_NANO_2025_0807
105
+ | LMN.AZURE_GPT_5_CHAT_2025_0807
106
+ | LMN.AZURE_GPT_5_PRO_2025_1006
107
+ | LMN.AZURE_GPT_51_2025_1113
108
+ | LMN.AZURE_GPT_51_THINKING_2025_1113
109
+ | LMN.AZURE_GPT_51_CHAT_2025_1113
110
+ | LMN.AZURE_GPT_51_CODEX_2025_1113
111
+ | LMN.AZURE_GPT_51_CODEX_MINI_2025_1113
112
+ | LMN.LITELLM_OPENAI_GPT_5
113
+ | LMN.LITELLM_OPENAI_GPT_5_MINI
114
+ | LMN.LITELLM_OPENAI_GPT_5_NANO
115
+ | LMN.LITELLM_OPENAI_GPT_5_CHAT
116
+ | LMN.LITELLM_OPENAI_GPT_5_PRO
117
+ | LMN.LITELLM_OPENAI_GPT_51
118
+ | LMN.LITELLM_OPENAI_GPT_51_THINKING
119
+ | LMN.LITELLM_OPENAI_O1
120
+ | LMN.LITELLM_OPENAI_O3
121
+ | LMN.LITELLM_OPENAI_O3_DEEP_RESEARCH
122
+ | LMN.LITELLM_OPENAI_O4_MINI
123
+ | LMN.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH
124
+ | LMN.LITELLM_OPENAI_GPT_4_1_MINI
125
+ | LMN.LITELLM_OPENAI_GPT_4_1_NANO
126
+ | LMN.LITELLM_OPENAI_O3_PRO
48
127
  ):
49
128
  return EncoderName.O200K_BASE
50
129
  case _:
@@ -57,22 +136,34 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
57
136
  class LanguageModelProvider(StrEnum):
58
137
  AZURE = "AZURE"
59
138
  CUSTOM = "CUSTOM"
139
+ LITELLM = "LITELLM"
60
140
 
61
141
 
62
142
  class ModelCapabilities(StrEnum):
143
+ CHAT_COMPLETIONS_API = "chat_completions_api"
63
144
  FUNCTION_CALLING = "function_calling"
64
145
  PARALLEL_FUNCTION_CALLING = "parallel_function_calling"
146
+ REASONING = "reasoning"
65
147
  REPRODUCIBLE_OUTPUT = "reproducible_output"
148
+ RESPONSES_API = "responses_api"
66
149
  STRUCTURED_OUTPUT = "structured_output"
67
- VISION = "vision"
68
150
  STREAMING = "streaming"
69
- REASONING = "reasoning"
151
+ VISION = "vision"
152
+
153
+
154
+ class TemperatureBounds(BaseModel):
155
+ min_temperature: float
156
+ max_temperature: float
70
157
 
71
158
 
72
159
  class LanguageModelInfo(BaseModel):
73
- name: LanguageModelName | str
74
- version: str
75
- provider: LanguageModelProvider
160
+ model_config = get_configuration_dict()
161
+ name: (
162
+ Annotated[str, Field(title="Custom Model Name")]
163
+ | SkipJsonSchema[LanguageModelName]
164
+ ) = Field(title="Model Name", default=LanguageModelName.AZURE_GPT_4o_2024_1120)
165
+ provider: LanguageModelProvider = LanguageModelProvider.AZURE
166
+ version: str = Field(title="Model Version", default="")
76
167
 
77
168
  encoder_name: EncoderName = EncoderName.CL100K_BASE
78
169
 
@@ -80,14 +171,36 @@ class LanguageModelInfo(BaseModel):
80
171
  token_limits: LanguageModelTokenLimits = LanguageModelTokenLimits(
81
172
  token_limit_input=7_000, token_limit_output=1_000
82
173
  )
174
+
83
175
  capabilities: list[ModelCapabilities] = [ModelCapabilities.STREAMING]
84
176
 
85
- info_cutoff_at: date | SkipJsonSchema[None] = None
86
- published_at: date | SkipJsonSchema[None] = None
87
- retirement_at: date | SkipJsonSchema[None] = None
177
+ info_cutoff_at: (
178
+ Annotated[date, Field(title="Info Cutoff")]
179
+ | Annotated[None, Field(title="Info Cutoff Unknown")]
180
+ ) = None
88
181
 
89
- deprecated_at: date | SkipJsonSchema[None] = None
90
- retirement_text: str | SkipJsonSchema[None] = None
182
+ published_at: (
183
+ Annotated[date, Field(title="Publishing Date")]
184
+ | Annotated[None, Field(title="Publishing Date Unknown")]
185
+ ) = None
186
+
187
+ retirement_at: (
188
+ Annotated[date, Field(title="Retirement Date")]
189
+ | Annotated[None, Field(title="Retirement Date Unknown")]
190
+ ) = date(2225, 12, 31)
191
+
192
+ deprecated_at: (
193
+ Annotated[date, Field(title="Deprecated Date")]
194
+ | Annotated[None, Field(title="Deprecated Date Unknown")]
195
+ ) = date(2225, 12, 31)
196
+
197
+ retirement_text: str = "This model is no longer supported."
198
+
199
+ temperature_bounds: (
200
+ TemperatureBounds | Annotated[None, Field(title="Temperature Bounds Unknown")]
201
+ ) = None
202
+
203
+ default_options: dict[str, Any] = {}
91
204
 
92
205
  @classmethod
93
206
  def from_name(cls, model_name: LanguageModelName) -> Self:
@@ -97,7 +210,6 @@ class LanguageModelInfo(BaseModel):
97
210
  name=model_name,
98
211
  provider=LanguageModelProvider.AZURE,
99
212
  capabilities=[
100
- ModelCapabilities.STRUCTURED_OUTPUT,
101
213
  ModelCapabilities.FUNCTION_CALLING,
102
214
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
103
215
  ModelCapabilities.REPRODUCIBLE_OUTPUT,
@@ -143,16 +255,285 @@ class LanguageModelInfo(BaseModel):
143
255
  deprecated_at=date(2024, 10, 1),
144
256
  retirement_at=date(2025, 6, 6),
145
257
  )
146
- case LanguageModelName.AZURE_GPT_4_TURBO_2024_0409:
258
+ case LanguageModelName.AZURE_GPT_5_2025_0807:
147
259
  return cls(
148
260
  name=model_name,
149
- encoder_name=EncoderName.CL100K_BASE,
261
+ provider=LanguageModelProvider.AZURE,
262
+ version="2025-08-07",
263
+ encoder_name=EncoderName.O200K_BASE,
264
+ capabilities=[
265
+ ModelCapabilities.CHAT_COMPLETIONS_API,
266
+ ModelCapabilities.FUNCTION_CALLING,
267
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
268
+ ModelCapabilities.REASONING,
269
+ ModelCapabilities.RESPONSES_API,
270
+ ModelCapabilities.STREAMING,
271
+ ModelCapabilities.STRUCTURED_OUTPUT,
272
+ ModelCapabilities.VISION,
273
+ ],
274
+ token_limits=LanguageModelTokenLimits(
275
+ token_limit_input=272000, token_limit_output=128000
276
+ ),
277
+ info_cutoff_at=date(2024, 10, 24),
278
+ published_at=date(2025, 8, 7),
279
+ deprecated_at=date(2026, 8, 7),
280
+ retirement_at=date(2026, 8, 7),
281
+ temperature_bounds=TemperatureBounds(
282
+ min_temperature=1.0, max_temperature=1.0
283
+ ),
284
+ default_options={
285
+ "reasoning_effort": "minimal",
286
+ },
287
+ )
288
+ case LanguageModelName.AZURE_GPT_5_MINI_2025_0807:
289
+ return cls(
290
+ name=model_name,
291
+ provider=LanguageModelProvider.AZURE,
292
+ version="2025-08-07",
293
+ encoder_name=EncoderName.O200K_BASE,
294
+ capabilities=[
295
+ ModelCapabilities.CHAT_COMPLETIONS_API,
296
+ ModelCapabilities.FUNCTION_CALLING,
297
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
298
+ ModelCapabilities.REASONING,
299
+ ModelCapabilities.RESPONSES_API,
300
+ ModelCapabilities.STREAMING,
301
+ ModelCapabilities.STRUCTURED_OUTPUT,
302
+ ModelCapabilities.VISION,
303
+ ],
304
+ token_limits=LanguageModelTokenLimits(
305
+ token_limit_input=272000, token_limit_output=128000
306
+ ),
307
+ info_cutoff_at=date(2024, 6, 24),
308
+ published_at=date(2025, 8, 7),
309
+ deprecated_at=date(2026, 8, 7),
310
+ retirement_at=date(2026, 8, 7),
311
+ temperature_bounds=TemperatureBounds(
312
+ min_temperature=1.0, max_temperature=1.0
313
+ ),
314
+ default_options={
315
+ "reasoning_effort": "minimal",
316
+ },
317
+ )
318
+ case LanguageModelName.AZURE_GPT_5_NANO_2025_0807:
319
+ return cls(
320
+ name=model_name,
321
+ provider=LanguageModelProvider.AZURE,
322
+ version="2025-08-07",
323
+ encoder_name=EncoderName.O200K_BASE,
324
+ capabilities=[
325
+ ModelCapabilities.CHAT_COMPLETIONS_API,
326
+ ModelCapabilities.FUNCTION_CALLING,
327
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
328
+ ModelCapabilities.REASONING,
329
+ ModelCapabilities.RESPONSES_API,
330
+ ModelCapabilities.STREAMING,
331
+ ModelCapabilities.STRUCTURED_OUTPUT,
332
+ ModelCapabilities.VISION,
333
+ ],
334
+ token_limits=LanguageModelTokenLimits(
335
+ token_limit_input=272000, token_limit_output=128000
336
+ ),
337
+ info_cutoff_at=date(2024, 5, 31),
338
+ published_at=date(2025, 8, 7),
339
+ deprecated_at=date(2026, 8, 7),
340
+ retirement_at=date(2026, 8, 7),
341
+ temperature_bounds=TemperatureBounds(
342
+ min_temperature=1.0, max_temperature=1.0
343
+ ),
344
+ default_options={
345
+ "reasoning_effort": "minimal",
346
+ },
347
+ )
348
+ case LanguageModelName.AZURE_GPT_5_CHAT_2025_0807:
349
+ return cls(
350
+ name=model_name,
351
+ provider=LanguageModelProvider.AZURE,
352
+ version="2025-08-07",
353
+ encoder_name=EncoderName.O200K_BASE,
354
+ capabilities=[
355
+ ModelCapabilities.RESPONSES_API,
356
+ ModelCapabilities.STREAMING,
357
+ ModelCapabilities.VISION,
358
+ ],
359
+ token_limits=LanguageModelTokenLimits(
360
+ token_limit_input=128000, token_limit_output=16384
361
+ ),
362
+ info_cutoff_at=date(2024, 10, 24),
363
+ published_at=date(2025, 8, 7),
364
+ deprecated_at=date(2026, 8, 7),
365
+ retirement_at=date(2026, 8, 7),
366
+ )
367
+ case LanguageModelName.AZURE_GPT_5_PRO_2025_1006:
368
+ return cls(
369
+ name=model_name,
370
+ provider=LanguageModelProvider.AZURE,
371
+ version="2025-10-06",
372
+ encoder_name=EncoderName.O200K_BASE,
373
+ capabilities=[
374
+ ModelCapabilities.FUNCTION_CALLING,
375
+ ModelCapabilities.REASONING,
376
+ ModelCapabilities.RESPONSES_API,
377
+ ModelCapabilities.STRUCTURED_OUTPUT,
378
+ ModelCapabilities.VISION,
379
+ ],
380
+ token_limits=LanguageModelTokenLimits(
381
+ token_limit_input=272000, token_limit_output=128000
382
+ ),
383
+ info_cutoff_at=date(2024, 10, 30),
384
+ published_at=date(2025, 10, 6),
385
+ temperature_bounds=TemperatureBounds(
386
+ min_temperature=1.0, max_temperature=1.0
387
+ ),
388
+ )
389
+ case LanguageModelName.AZURE_GPT_51_2025_1113:
390
+ return cls(
391
+ name=model_name,
392
+ provider=LanguageModelProvider.AZURE,
393
+ version="2025-11-13",
394
+ encoder_name=EncoderName.O200K_BASE,
395
+ capabilities=[
396
+ ModelCapabilities.CHAT_COMPLETIONS_API,
397
+ ModelCapabilities.FUNCTION_CALLING,
398
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
399
+ ModelCapabilities.REASONING,
400
+ ModelCapabilities.RESPONSES_API,
401
+ ModelCapabilities.STREAMING,
402
+ ModelCapabilities.STRUCTURED_OUTPUT,
403
+ ModelCapabilities.VISION,
404
+ ],
405
+ token_limits=LanguageModelTokenLimits(
406
+ token_limit_input=272000, token_limit_output=128000
407
+ ),
408
+ info_cutoff_at=date(2024, 9, 30),
409
+ published_at=date(2025, 11, 13),
410
+ temperature_bounds=TemperatureBounds(
411
+ min_temperature=1.0, max_temperature=1.0
412
+ ),
413
+ default_options={
414
+ "reasoning_effort": None,
415
+ },
416
+ )
417
+ case LanguageModelName.AZURE_GPT_51_THINKING_2025_1113:
418
+ return cls(
419
+ name=model_name,
420
+ provider=LanguageModelProvider.AZURE,
421
+ version="2025-11-13",
422
+ encoder_name=EncoderName.O200K_BASE,
423
+ capabilities=[
424
+ ModelCapabilities.CHAT_COMPLETIONS_API,
425
+ ModelCapabilities.FUNCTION_CALLING,
426
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
427
+ ModelCapabilities.REASONING,
428
+ ModelCapabilities.RESPONSES_API,
429
+ ModelCapabilities.STREAMING,
430
+ ModelCapabilities.STRUCTURED_OUTPUT,
431
+ ModelCapabilities.VISION,
432
+ ],
433
+ token_limits=LanguageModelTokenLimits(
434
+ token_limit_input=272000, token_limit_output=128000
435
+ ),
436
+ info_cutoff_at=date(2024, 9, 30),
437
+ published_at=date(2025, 11, 13),
438
+ temperature_bounds=TemperatureBounds(
439
+ min_temperature=1.0, max_temperature=1.0
440
+ ),
441
+ default_options={
442
+ "reasoning_effort": "medium",
443
+ },
444
+ )
445
+ case LanguageModelName.AZURE_GPT_51_CHAT_2025_1113:
446
+ return cls(
447
+ name=model_name,
448
+ provider=LanguageModelProvider.AZURE,
449
+ version="2025-11-13",
450
+ encoder_name=EncoderName.O200K_BASE,
451
+ capabilities=[
452
+ ModelCapabilities.CHAT_COMPLETIONS_API,
453
+ ModelCapabilities.FUNCTION_CALLING,
454
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
455
+ ModelCapabilities.REASONING,
456
+ ModelCapabilities.RESPONSES_API,
457
+ ModelCapabilities.STREAMING,
458
+ ModelCapabilities.STRUCTURED_OUTPUT,
459
+ ModelCapabilities.VISION,
460
+ ],
461
+ token_limits=LanguageModelTokenLimits(
462
+ token_limit_input=128_000, token_limit_output=16_384
463
+ ),
464
+ info_cutoff_at=date(2024, 9, 30),
465
+ published_at=date(2025, 11, 13),
466
+ temperature_bounds=TemperatureBounds(
467
+ min_temperature=1.0, max_temperature=1.0
468
+ ),
469
+ default_options={
470
+ "reasoning_effort": "medium",
471
+ },
472
+ )
473
+ case LanguageModelName.AZURE_GPT_51_CODEX_2025_1113:
474
+ return cls(
475
+ name=model_name,
476
+ provider=LanguageModelProvider.AZURE,
477
+ version="2025-11-13",
478
+ encoder_name=EncoderName.O200K_BASE,
479
+ capabilities=[
480
+ ModelCapabilities.FUNCTION_CALLING,
481
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
482
+ ModelCapabilities.REASONING,
483
+ ModelCapabilities.RESPONSES_API,
484
+ ModelCapabilities.STREAMING,
485
+ ModelCapabilities.STRUCTURED_OUTPUT,
486
+ ModelCapabilities.VISION,
487
+ ],
488
+ token_limits=LanguageModelTokenLimits(
489
+ token_limit_input=272_000, token_limit_output=128_000
490
+ ),
491
+ info_cutoff_at=date(2024, 9, 30),
492
+ published_at=date(2025, 11, 13),
493
+ temperature_bounds=TemperatureBounds(
494
+ min_temperature=1.0, max_temperature=1.0
495
+ ),
496
+ default_options={
497
+ "reasoning_effort": "medium",
498
+ },
499
+ )
500
+ case LanguageModelName.AZURE_GPT_51_CODEX_MINI_2025_1113:
501
+ return cls(
502
+ name=model_name,
503
+ provider=LanguageModelProvider.AZURE,
504
+ version="2025-11-13",
505
+ encoder_name=EncoderName.O200K_BASE,
150
506
  capabilities=[
151
507
  ModelCapabilities.FUNCTION_CALLING,
152
508
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
509
+ ModelCapabilities.REASONING,
510
+ ModelCapabilities.RESPONSES_API,
511
+ ModelCapabilities.STREAMING,
153
512
  ModelCapabilities.STRUCTURED_OUTPUT,
154
513
  ModelCapabilities.VISION,
514
+ ],
515
+ token_limits=LanguageModelTokenLimits(
516
+ token_limit_input=272_000, token_limit_output=128_000
517
+ ),
518
+ info_cutoff_at=date(2024, 9, 30),
519
+ published_at=date(2025, 11, 13),
520
+ temperature_bounds=TemperatureBounds(
521
+ min_temperature=1.0, max_temperature=1.0
522
+ ),
523
+ default_options={
524
+ "reasoning_effort": "medium",
525
+ },
526
+ )
527
+ case LanguageModelName.AZURE_GPT_4_TURBO_2024_0409:
528
+ return cls(
529
+ name=model_name,
530
+ encoder_name=EncoderName.CL100K_BASE,
531
+ capabilities=[
532
+ ModelCapabilities.CHAT_COMPLETIONS_API,
533
+ ModelCapabilities.FUNCTION_CALLING,
534
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
155
535
  ModelCapabilities.STREAMING,
536
+ ModelCapabilities.VISION,
156
537
  ],
157
538
  provider=LanguageModelProvider.AZURE,
158
539
  version="turbo-2024-04-09",
@@ -167,7 +548,7 @@ class LanguageModelInfo(BaseModel):
167
548
  name=model_name,
168
549
  encoder_name=EncoderName.O200K_BASE,
169
550
  capabilities=[
170
- ModelCapabilities.STRUCTURED_OUTPUT,
551
+ ModelCapabilities.CHAT_COMPLETIONS_API,
171
552
  ModelCapabilities.FUNCTION_CALLING,
172
553
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
173
554
  ModelCapabilities.STREAMING,
@@ -186,6 +567,7 @@ class LanguageModelInfo(BaseModel):
186
567
  name=model_name,
187
568
  encoder_name=EncoderName.O200K_BASE,
188
569
  capabilities=[
570
+ ModelCapabilities.CHAT_COMPLETIONS_API,
189
571
  ModelCapabilities.STRUCTURED_OUTPUT,
190
572
  ModelCapabilities.FUNCTION_CALLING,
191
573
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
@@ -205,6 +587,7 @@ class LanguageModelInfo(BaseModel):
205
587
  name=model_name,
206
588
  encoder_name=EncoderName.O200K_BASE,
207
589
  capabilities=[
590
+ ModelCapabilities.CHAT_COMPLETIONS_API,
208
591
  ModelCapabilities.STRUCTURED_OUTPUT,
209
592
  ModelCapabilities.FUNCTION_CALLING,
210
593
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
@@ -223,7 +606,7 @@ class LanguageModelInfo(BaseModel):
223
606
  return cls(
224
607
  name=model_name,
225
608
  capabilities=[
226
- ModelCapabilities.STRUCTURED_OUTPUT,
609
+ ModelCapabilities.CHAT_COMPLETIONS_API,
227
610
  ModelCapabilities.FUNCTION_CALLING,
228
611
  ModelCapabilities.PARALLEL_FUNCTION_CALLING,
229
612
  ModelCapabilities.STREAMING,
@@ -242,6 +625,7 @@ class LanguageModelInfo(BaseModel):
242
625
  return cls(
243
626
  name=model_name,
244
627
  capabilities=[
628
+ ModelCapabilities.CHAT_COMPLETIONS_API,
245
629
  ModelCapabilities.STRUCTURED_OUTPUT,
246
630
  ModelCapabilities.FUNCTION_CALLING,
247
631
  ModelCapabilities.STREAMING,
@@ -256,16 +640,21 @@ class LanguageModelInfo(BaseModel):
256
640
  ),
257
641
  info_cutoff_at=date(2023, 10, 1),
258
642
  published_at=date(2024, 9, 12),
643
+ temperature_bounds=TemperatureBounds(
644
+ min_temperature=1.0, max_temperature=1.0
645
+ ),
259
646
  )
260
647
  case LanguageModelName.AZURE_o1_2024_1217:
261
648
  return cls(
262
649
  name=model_name,
263
650
  capabilities=[
264
- ModelCapabilities.STRUCTURED_OUTPUT,
651
+ ModelCapabilities.CHAT_COMPLETIONS_API,
265
652
  ModelCapabilities.FUNCTION_CALLING,
653
+ ModelCapabilities.REASONING,
654
+ ModelCapabilities.RESPONSES_API,
266
655
  ModelCapabilities.STREAMING,
656
+ ModelCapabilities.STRUCTURED_OUTPUT,
267
657
  ModelCapabilities.VISION,
268
- ModelCapabilities.REASONING,
269
658
  ],
270
659
  provider=LanguageModelProvider.AZURE,
271
660
  version="2024-12-17",
@@ -275,15 +664,20 @@ class LanguageModelInfo(BaseModel):
275
664
  ),
276
665
  info_cutoff_at=date(2023, 10, 1),
277
666
  published_at=date(2024, 12, 17),
667
+ temperature_bounds=TemperatureBounds(
668
+ min_temperature=1.0, max_temperature=1.0
669
+ ),
278
670
  )
279
671
  case LanguageModelName.AZURE_o3_MINI_2025_0131:
280
672
  return cls(
281
673
  name=model_name,
282
674
  capabilities=[
283
- ModelCapabilities.STRUCTURED_OUTPUT,
675
+ ModelCapabilities.CHAT_COMPLETIONS_API,
284
676
  ModelCapabilities.FUNCTION_CALLING,
285
- ModelCapabilities.STREAMING,
286
677
  ModelCapabilities.REASONING,
678
+ ModelCapabilities.RESPONSES_API,
679
+ ModelCapabilities.STREAMING,
680
+ ModelCapabilities.STRUCTURED_OUTPUT,
287
681
  ],
288
682
  provider=LanguageModelProvider.AZURE,
289
683
  version="2025-01-31",
@@ -293,13 +687,65 @@ class LanguageModelInfo(BaseModel):
293
687
  ),
294
688
  info_cutoff_at=date(2023, 10, 1),
295
689
  published_at=date(2025, 1, 31),
690
+ temperature_bounds=TemperatureBounds(
691
+ min_temperature=1.0, max_temperature=1.0
692
+ ),
296
693
  )
297
- case LanguageModelName.AZURE_GPT_45_PREVIEW_2025_0227:
694
+ case LanguageModelName.AZURE_o3_2025_0416:
695
+ return cls(
696
+ name=model_name,
697
+ capabilities=[
698
+ ModelCapabilities.CHAT_COMPLETIONS_API,
699
+ ModelCapabilities.FUNCTION_CALLING,
700
+ ModelCapabilities.REASONING,
701
+ ModelCapabilities.RESPONSES_API,
702
+ ModelCapabilities.STREAMING,
703
+ ModelCapabilities.STRUCTURED_OUTPUT,
704
+ ModelCapabilities.VISION,
705
+ ],
706
+ provider=LanguageModelProvider.AZURE,
707
+ version="2025-04-16",
708
+ encoder_name=EncoderName.O200K_BASE,
709
+ token_limits=LanguageModelTokenLimits(
710
+ token_limit_input=200_000, token_limit_output=100_000
711
+ ),
712
+ info_cutoff_at=date(2024, 5, 31),
713
+ published_at=date(2025, 4, 16),
714
+ temperature_bounds=TemperatureBounds(
715
+ min_temperature=1.0, max_temperature=1.0
716
+ ),
717
+ )
718
+ case LanguageModelName.AZURE_o4_MINI_2025_0416:
298
719
  return cls(
299
720
  name=model_name,
300
721
  capabilities=[
722
+ ModelCapabilities.CHAT_COMPLETIONS_API,
723
+ ModelCapabilities.FUNCTION_CALLING,
724
+ ModelCapabilities.REASONING,
725
+ ModelCapabilities.RESPONSES_API,
726
+ ModelCapabilities.STREAMING,
301
727
  ModelCapabilities.STRUCTURED_OUTPUT,
728
+ ModelCapabilities.VISION,
729
+ ],
730
+ provider=LanguageModelProvider.AZURE,
731
+ version="2025-04-16",
732
+ encoder_name=EncoderName.O200K_BASE,
733
+ token_limits=LanguageModelTokenLimits(
734
+ token_limit_input=200_000, token_limit_output=100_000
735
+ ),
736
+ info_cutoff_at=date(2024, 5, 31),
737
+ published_at=date(2025, 4, 16),
738
+ temperature_bounds=TemperatureBounds(
739
+ min_temperature=1.0, max_temperature=1.0
740
+ ),
741
+ )
742
+ case LanguageModelName.AZURE_GPT_45_PREVIEW_2025_0227:
743
+ return cls(
744
+ name=model_name,
745
+ capabilities=[
746
+ ModelCapabilities.CHAT_COMPLETIONS_API,
302
747
  ModelCapabilities.FUNCTION_CALLING,
748
+ ModelCapabilities.STRUCTURED_OUTPUT,
303
749
  ModelCapabilities.STREAMING,
304
750
  ModelCapabilities.VISION,
305
751
  ],
@@ -316,9 +762,11 @@ class LanguageModelInfo(BaseModel):
316
762
  return cls(
317
763
  name=model_name,
318
764
  capabilities=[
319
- ModelCapabilities.STRUCTURED_OUTPUT,
765
+ ModelCapabilities.CHAT_COMPLETIONS_API,
320
766
  ModelCapabilities.FUNCTION_CALLING,
767
+ ModelCapabilities.RESPONSES_API,
321
768
  ModelCapabilities.STREAMING,
769
+ ModelCapabilities.STRUCTURED_OUTPUT,
322
770
  ModelCapabilities.VISION,
323
771
  ],
324
772
  provider=LanguageModelProvider.AZURE,
@@ -330,34 +778,828 @@ class LanguageModelInfo(BaseModel):
330
778
  info_cutoff_at=date(2024, 5, 31),
331
779
  published_at=date(2025, 4, 14),
332
780
  )
333
- case _:
334
- if isinstance(model_name, LanguageModelName):
335
- raise ValueError(
336
- f"{model_name} is not supported. Please add model information in toolkit."
337
- )
338
-
781
+ case LanguageModelName.AZURE_GPT_41_MINI_2025_0414:
339
782
  return cls(
340
783
  name=model_name,
341
- version="custom",
342
- provider=LanguageModelProvider.CUSTOM,
784
+ capabilities=[
785
+ ModelCapabilities.CHAT_COMPLETIONS_API,
786
+ ModelCapabilities.FUNCTION_CALLING,
787
+ ModelCapabilities.RESPONSES_API,
788
+ ModelCapabilities.STREAMING,
789
+ ModelCapabilities.STRUCTURED_OUTPUT,
790
+ ModelCapabilities.VISION,
791
+ ],
792
+ provider=LanguageModelProvider.AZURE,
793
+ version="2025-04-14",
794
+ encoder_name=EncoderName.O200K_BASE,
795
+ token_limits=LanguageModelTokenLimits(
796
+ token_limit_input=1_047_576, token_limit_output=32_768
797
+ ),
798
+ info_cutoff_at=date(2024, 5, 31),
799
+ published_at=date(2025, 4, 14),
343
800
  )
801
+ case LanguageModelName.AZURE_GPT_41_NANO_2025_0414:
802
+ return cls(
803
+ name=model_name,
804
+ capabilities=[
805
+ ModelCapabilities.CHAT_COMPLETIONS_API,
806
+ ModelCapabilities.FUNCTION_CALLING,
807
+ ModelCapabilities.RESPONSES_API,
808
+ ModelCapabilities.STREAMING,
809
+ ModelCapabilities.STRUCTURED_OUTPUT,
810
+ ModelCapabilities.VISION,
811
+ ],
812
+ provider=LanguageModelProvider.AZURE,
813
+ version="2025-04-14",
814
+ encoder_name=EncoderName.O200K_BASE,
815
+ token_limits=LanguageModelTokenLimits(
816
+ token_limit_input=1_047_576, token_limit_output=32_768
817
+ ),
818
+ info_cutoff_at=date(2024, 5, 31),
819
+ published_at=date(2025, 4, 14),
820
+ )
821
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET:
822
+ return cls(
823
+ name=model_name,
824
+ capabilities=[
825
+ ModelCapabilities.FUNCTION_CALLING,
826
+ ModelCapabilities.STREAMING,
827
+ ModelCapabilities.VISION,
828
+ ],
829
+ provider=LanguageModelProvider.LITELLM,
830
+ version="claude-3-7-sonnet",
831
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
832
+ token_limits=LanguageModelTokenLimits(
833
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
834
+ token_limit_input=180_000,
835
+ token_limit_output=64_000,
836
+ ),
837
+ info_cutoff_at=date(2024, 10, 31),
838
+ published_at=date(2025, 2, 24),
839
+ )
840
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET_THINKING:
841
+ return cls(
842
+ name=model_name,
843
+ capabilities=[
844
+ ModelCapabilities.FUNCTION_CALLING,
845
+ ModelCapabilities.STREAMING,
846
+ ModelCapabilities.VISION,
847
+ ModelCapabilities.REASONING,
848
+ ],
849
+ provider=LanguageModelProvider.LITELLM,
850
+ version="claude-3-7-sonnet-thinking",
851
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
852
+ token_limits=LanguageModelTokenLimits(
853
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
854
+ token_limit_input=180_000,
855
+ token_limit_output=64_000,
856
+ ),
857
+ info_cutoff_at=date(2024, 10, 31),
858
+ published_at=date(2025, 2, 24),
859
+ )
860
+ case LanguageModelName.ANTHROPIC_CLAUDE_HAIKU_4_5:
861
+ return cls(
862
+ name=model_name,
863
+ capabilities=[
864
+ ModelCapabilities.FUNCTION_CALLING,
865
+ ModelCapabilities.STREAMING,
866
+ ModelCapabilities.VISION,
867
+ ModelCapabilities.REASONING,
868
+ ],
869
+ provider=LanguageModelProvider.LITELLM,
870
+ version="claude-haiku-4-5",
871
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
872
+ token_limits=LanguageModelTokenLimits(
873
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
874
+ token_limit_input=180_000,
875
+ token_limit_output=64_000,
876
+ ),
877
+ info_cutoff_at=date(2025, 2, 1),
878
+ published_at=date(2025, 10, 1),
879
+ )
880
+ case LanguageModelName.ANTHROPIC_CLAUDE_SONNET_4:
881
+ return cls(
882
+ name=model_name,
883
+ capabilities=[
884
+ ModelCapabilities.FUNCTION_CALLING,
885
+ ModelCapabilities.STREAMING,
886
+ ModelCapabilities.VISION,
887
+ ModelCapabilities.REASONING,
888
+ ],
889
+ provider=LanguageModelProvider.LITELLM,
890
+ version="claude-sonnet-4",
891
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
892
+ token_limits=LanguageModelTokenLimits(
893
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
894
+ token_limit_input=180_000,
895
+ token_limit_output=64_000,
896
+ ),
897
+ info_cutoff_at=date(2025, 3, 1),
898
+ published_at=date(2025, 5, 1),
899
+ )
900
+ case LanguageModelName.ANTHROPIC_CLAUDE_SONNET_4_5:
901
+ return cls(
902
+ name=model_name,
903
+ capabilities=[
904
+ ModelCapabilities.FUNCTION_CALLING,
905
+ ModelCapabilities.STREAMING,
906
+ ModelCapabilities.VISION,
907
+ ModelCapabilities.REASONING,
908
+ ],
909
+ provider=LanguageModelProvider.LITELLM,
910
+ version="claude-sonnet-4-5",
911
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
912
+ token_limits=LanguageModelTokenLimits(
913
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
914
+ token_limit_input=180_000,
915
+ token_limit_output=64_000,
916
+ ),
917
+ info_cutoff_at=date(2025, 7, 1),
918
+ published_at=date(2025, 9, 29),
919
+ )
920
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4:
921
+ return cls(
922
+ name=model_name,
923
+ capabilities=[
924
+ ModelCapabilities.FUNCTION_CALLING,
925
+ ModelCapabilities.STREAMING,
926
+ ModelCapabilities.VISION,
927
+ ModelCapabilities.REASONING,
928
+ ],
929
+ provider=LanguageModelProvider.LITELLM,
930
+ version="claude-opus-4",
931
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
932
+ token_limits=LanguageModelTokenLimits(
933
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
934
+ token_limit_input=180_000,
935
+ token_limit_output=32_000,
936
+ ),
937
+ info_cutoff_at=date(2025, 3, 1),
938
+ published_at=date(2025, 5, 1),
939
+ )
940
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4_1:
941
+ return cls(
942
+ name=model_name,
943
+ capabilities=[
944
+ ModelCapabilities.FUNCTION_CALLING,
945
+ ModelCapabilities.STREAMING,
946
+ ModelCapabilities.VISION,
947
+ ModelCapabilities.REASONING,
948
+ ],
949
+ provider=LanguageModelProvider.LITELLM,
950
+ version="claude-opus-4-1",
951
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
952
+ token_limits=LanguageModelTokenLimits(
953
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
954
+ token_limit_input=180_000,
955
+ token_limit_output=32_000,
956
+ ),
957
+ info_cutoff_at=date(2025, 3, 1),
958
+ published_at=date(2025, 5, 1),
959
+ )
960
+ case LanguageModelName.ANTHROPIC_CLAUDE_OPUS_4_5:
961
+ return cls(
962
+ name=model_name,
963
+ capabilities=[
964
+ ModelCapabilities.FUNCTION_CALLING,
965
+ ModelCapabilities.STREAMING,
966
+ ModelCapabilities.VISION,
967
+ ModelCapabilities.REASONING,
968
+ ],
969
+ provider=LanguageModelProvider.LITELLM,
970
+ version="claude-opus-4-5",
971
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
972
+ token_limits=LanguageModelTokenLimits(
973
+ # Input limit is 200_000, we leave 20_000 tokens as buffer due to tokenizer mismatch
974
+ token_limit_input=180_000,
975
+ token_limit_output=64_000,
976
+ ),
977
+ info_cutoff_at=date(2025, 8, 1),
978
+ published_at=date(2025, 11, 13),
979
+ )
980
+ case LanguageModelName.GEMINI_2_0_FLASH:
981
+ return cls(
982
+ name=model_name,
983
+ capabilities=[
984
+ ModelCapabilities.FUNCTION_CALLING,
985
+ ModelCapabilities.STREAMING,
986
+ ModelCapabilities.VISION,
987
+ ModelCapabilities.STRUCTURED_OUTPUT,
988
+ ModelCapabilities.REASONING,
989
+ ],
990
+ provider=LanguageModelProvider.LITELLM,
991
+ version="gemini-2-0-flash",
992
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
993
+ token_limits=LanguageModelTokenLimits(
994
+ token_limit_input=1_048_576, token_limit_output=8_192
995
+ ),
996
+ info_cutoff_at=date(2024, 8, 1),
997
+ published_at=date(2025, 2, 1),
998
+ )
999
+ case LanguageModelName.GEMINI_2_5_FLASH:
1000
+ return cls(
1001
+ name=model_name,
1002
+ capabilities=[
1003
+ ModelCapabilities.FUNCTION_CALLING,
1004
+ ModelCapabilities.STREAMING,
1005
+ ModelCapabilities.VISION,
1006
+ ModelCapabilities.STRUCTURED_OUTPUT,
1007
+ ModelCapabilities.REASONING,
1008
+ ],
1009
+ provider=LanguageModelProvider.LITELLM,
1010
+ version="gemini-2-5-flash",
1011
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
1012
+ token_limits=LanguageModelTokenLimits(
1013
+ token_limit_input=1_048_576, token_limit_output=65_536
1014
+ ),
1015
+ info_cutoff_at=date(2025, 1, day=1),
1016
+ published_at=date(2025, 4, 1),
1017
+ )
1018
+ case LanguageModelName.GEMINI_2_5_FLASH_LITE:
1019
+ return cls(
1020
+ name=model_name,
1021
+ capabilities=[
1022
+ ModelCapabilities.FUNCTION_CALLING,
1023
+ ModelCapabilities.STREAMING,
1024
+ ModelCapabilities.VISION,
1025
+ ModelCapabilities.STRUCTURED_OUTPUT,
1026
+ ModelCapabilities.REASONING,
1027
+ ],
1028
+ provider=LanguageModelProvider.LITELLM,
1029
+ version="gemini-2-5-flash-lite",
1030
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
1031
+ token_limits=LanguageModelTokenLimits(
1032
+ token_limit_input=1_048_576, token_limit_output=65_536
1033
+ ),
1034
+ info_cutoff_at=date(2025, 1, day=1),
1035
+ published_at=date(2025, 7, 1),
1036
+ )
1037
+ case LanguageModelName.GEMINI_2_5_FLASH_LITE_PREVIEW_0617:
1038
+ return cls(
1039
+ name=model_name,
1040
+ capabilities=[
1041
+ ModelCapabilities.FUNCTION_CALLING,
1042
+ ModelCapabilities.STREAMING,
1043
+ ModelCapabilities.VISION,
1044
+ ModelCapabilities.STRUCTURED_OUTPUT,
1045
+ ModelCapabilities.REASONING,
1046
+ ],
1047
+ provider=LanguageModelProvider.LITELLM,
1048
+ version="gemini-2-5-flash-lite-preview-06-17",
1049
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
1050
+ token_limits=LanguageModelTokenLimits(
1051
+ token_limit_input=1_000_000, token_limit_output=64_000
1052
+ ),
1053
+ info_cutoff_at=date(2025, 1, day=1),
1054
+ published_at=date(2025, 6, 17),
1055
+ )
1056
+ case LanguageModelName.GEMINI_2_5_FLASH_PREVIEW_0520:
1057
+ return cls(
1058
+ name=model_name,
1059
+ capabilities=[
1060
+ ModelCapabilities.FUNCTION_CALLING,
1061
+ ModelCapabilities.STREAMING,
1062
+ ModelCapabilities.VISION,
1063
+ ModelCapabilities.STRUCTURED_OUTPUT,
1064
+ ModelCapabilities.REASONING,
1065
+ ],
1066
+ provider=LanguageModelProvider.LITELLM,
1067
+ version="gemini-2-5-flash-preview-05-20",
1068
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
1069
+ token_limits=LanguageModelTokenLimits(
1070
+ token_limit_input=1_048_576, token_limit_output=65_536
1071
+ ),
1072
+ info_cutoff_at=date(2025, 1, day=1),
1073
+ published_at=date(2025, 4, 1),
1074
+ )
1075
+ case LanguageModelName.GEMINI_2_5_PRO:
1076
+ return cls(
1077
+ name=model_name,
1078
+ capabilities=[
1079
+ ModelCapabilities.FUNCTION_CALLING,
1080
+ ModelCapabilities.STREAMING,
1081
+ ModelCapabilities.VISION,
1082
+ ModelCapabilities.STRUCTURED_OUTPUT,
1083
+ ModelCapabilities.REASONING,
1084
+ ],
1085
+ provider=LanguageModelProvider.LITELLM,
1086
+ version="gemini-2-5-pro",
1087
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
1088
+ token_limits=LanguageModelTokenLimits(
1089
+ token_limit_input=1_048_576, token_limit_output=65_536
1090
+ ),
1091
+ info_cutoff_at=date(2025, 1, day=1),
1092
+ published_at=date(2025, 6, 17),
1093
+ )
1094
+ case LanguageModelName.GEMINI_2_5_PRO_EXP_0325:
1095
+ return cls(
1096
+ name=model_name,
1097
+ capabilities=[
1098
+ ModelCapabilities.FUNCTION_CALLING,
1099
+ ModelCapabilities.STREAMING,
1100
+ ModelCapabilities.VISION,
1101
+ ModelCapabilities.STRUCTURED_OUTPUT,
1102
+ ModelCapabilities.REASONING,
1103
+ ],
1104
+ provider=LanguageModelProvider.LITELLM,
1105
+ version="gemini-2-5-pro-exp-0325",
1106
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
1107
+ token_limits=LanguageModelTokenLimits(
1108
+ token_limit_input=1_048_576, token_limit_output=65_536
1109
+ ),
1110
+ info_cutoff_at=date(2025, 1, day=1),
1111
+ published_at=date(2025, 3, 1),
1112
+ )
1113
+ case LanguageModelName.GEMINI_2_5_PRO_PREVIEW_0605:
1114
+ return cls(
1115
+ name=model_name,
1116
+ capabilities=[
1117
+ ModelCapabilities.FUNCTION_CALLING,
1118
+ ModelCapabilities.STREAMING,
1119
+ ModelCapabilities.VISION,
1120
+ ModelCapabilities.STRUCTURED_OUTPUT,
1121
+ ModelCapabilities.REASONING,
1122
+ ],
1123
+ provider=LanguageModelProvider.LITELLM,
1124
+ version="gemini-2-5-pro-preview-06-05",
1125
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
1126
+ token_limits=LanguageModelTokenLimits(
1127
+ token_limit_input=1_048_576, token_limit_output=65_536
1128
+ ),
1129
+ info_cutoff_at=date(2025, 1, day=1),
1130
+ published_at=date(2025, 6, 5),
1131
+ )
1132
+ case LanguageModelName.GEMINI_3_PRO_PREVIEW:
1133
+ return cls(
1134
+ name=model_name,
1135
+ capabilities=[
1136
+ ModelCapabilities.FUNCTION_CALLING,
1137
+ ModelCapabilities.STREAMING,
1138
+ ModelCapabilities.VISION,
1139
+ ModelCapabilities.STRUCTURED_OUTPUT,
1140
+ ModelCapabilities.REASONING,
1141
+ ],
1142
+ provider=LanguageModelProvider.LITELLM,
1143
+ version="gemini-3-pro-preview",
1144
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
1145
+ token_limits=LanguageModelTokenLimits(
1146
+ token_limit_input=1_048_576, token_limit_output=65_536
1147
+ ),
1148
+ info_cutoff_at=date(2025, 1, day=1),
1149
+ published_at=date(2025, 11, 13),
1150
+ )
1151
+ case LanguageModelName.LITELLM_OPENAI_GPT_5:
1152
+ return cls(
1153
+ name=model_name,
1154
+ provider=LanguageModelProvider.LITELLM,
1155
+ version="gpt-5",
1156
+ encoder_name=EncoderName.O200K_BASE,
1157
+ capabilities=[
1158
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1159
+ ModelCapabilities.FUNCTION_CALLING,
1160
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
1161
+ ModelCapabilities.REASONING,
1162
+ ModelCapabilities.RESPONSES_API,
1163
+ ModelCapabilities.STREAMING,
1164
+ ModelCapabilities.STRUCTURED_OUTPUT,
1165
+ ModelCapabilities.VISION,
1166
+ ],
1167
+ token_limits=LanguageModelTokenLimits(
1168
+ token_limit_input=272000, token_limit_output=128000
1169
+ ),
1170
+ info_cutoff_at=date(2024, 10, 24),
1171
+ published_at=date(2025, 8, 7),
1172
+ deprecated_at=date(2026, 8, 7),
1173
+ retirement_at=date(2026, 8, 7),
1174
+ temperature_bounds=TemperatureBounds(
1175
+ min_temperature=1.0, max_temperature=1.0
1176
+ ),
1177
+ default_options={
1178
+ "reasoning_effort": "minimal",
1179
+ },
1180
+ )
1181
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_MINI:
1182
+ return cls(
1183
+ name=model_name,
1184
+ provider=LanguageModelProvider.LITELLM,
1185
+ version="gpt-5-mini",
1186
+ encoder_name=EncoderName.O200K_BASE,
1187
+ capabilities=[
1188
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1189
+ ModelCapabilities.FUNCTION_CALLING,
1190
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
1191
+ ModelCapabilities.REASONING,
1192
+ ModelCapabilities.RESPONSES_API,
1193
+ ModelCapabilities.STREAMING,
1194
+ ModelCapabilities.STRUCTURED_OUTPUT,
1195
+ ModelCapabilities.VISION,
1196
+ ],
1197
+ token_limits=LanguageModelTokenLimits(
1198
+ token_limit_input=272000, token_limit_output=128000
1199
+ ),
1200
+ info_cutoff_at=date(2024, 6, 24),
1201
+ published_at=date(2025, 8, 7),
1202
+ deprecated_at=date(2026, 8, 7),
1203
+ retirement_at=date(2026, 8, 7),
1204
+ temperature_bounds=TemperatureBounds(
1205
+ min_temperature=1.0, max_temperature=1.0
1206
+ ),
1207
+ default_options={
1208
+ "reasoning_effort": "minimal",
1209
+ },
1210
+ )
1211
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_NANO:
1212
+ return cls(
1213
+ name=model_name,
1214
+ provider=LanguageModelProvider.LITELLM,
1215
+ version="gpt-5-nano",
1216
+ encoder_name=EncoderName.O200K_BASE,
1217
+ capabilities=[
1218
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1219
+ ModelCapabilities.FUNCTION_CALLING,
1220
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
1221
+ ModelCapabilities.REASONING,
1222
+ ModelCapabilities.RESPONSES_API,
1223
+ ModelCapabilities.STREAMING,
1224
+ ModelCapabilities.STRUCTURED_OUTPUT,
1225
+ ModelCapabilities.VISION,
1226
+ ],
1227
+ token_limits=LanguageModelTokenLimits(
1228
+ token_limit_input=272000, token_limit_output=128000
1229
+ ),
1230
+ info_cutoff_at=date(2024, 5, 31),
1231
+ published_at=date(2025, 8, 7),
1232
+ deprecated_at=date(2026, 8, 7),
1233
+ retirement_at=date(2026, 8, 7),
1234
+ temperature_bounds=TemperatureBounds(
1235
+ min_temperature=1.0, max_temperature=1.0
1236
+ ),
1237
+ default_options={
1238
+ "reasoning_effort": "minimal",
1239
+ },
1240
+ )
1241
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_CHAT:
1242
+ return cls(
1243
+ name=model_name,
1244
+ provider=LanguageModelProvider.LITELLM,
1245
+ version="gpt-5-chat",
1246
+ encoder_name=EncoderName.O200K_BASE,
1247
+ capabilities=[
1248
+ ModelCapabilities.RESPONSES_API,
1249
+ ModelCapabilities.STREAMING,
1250
+ ModelCapabilities.VISION,
1251
+ ],
1252
+ token_limits=LanguageModelTokenLimits(
1253
+ token_limit_input=128000, token_limit_output=16384
1254
+ ),
1255
+ info_cutoff_at=date(2024, 10, 24),
1256
+ published_at=date(2025, 8, 7),
1257
+ deprecated_at=date(2026, 8, 7),
1258
+ retirement_at=date(2026, 8, 7),
1259
+ )
1260
+ case LanguageModelName.LITELLM_OPENAI_GPT_5_PRO:
1261
+ return cls(
1262
+ name=model_name,
1263
+ provider=LanguageModelProvider.LITELLM,
1264
+ version="2025-10-06",
1265
+ encoder_name=EncoderName.O200K_BASE,
1266
+ capabilities=[
1267
+ ModelCapabilities.FUNCTION_CALLING,
1268
+ ModelCapabilities.REASONING,
1269
+ ModelCapabilities.RESPONSES_API,
1270
+ ModelCapabilities.STRUCTURED_OUTPUT,
1271
+ ModelCapabilities.VISION,
1272
+ ],
1273
+ token_limits=LanguageModelTokenLimits(
1274
+ token_limit_input=272000, token_limit_output=128000
1275
+ ),
1276
+ info_cutoff_at=date(2024, 10, 30),
1277
+ published_at=date(2025, 10, 6),
1278
+ temperature_bounds=TemperatureBounds(
1279
+ min_temperature=1.0, max_temperature=1.0
1280
+ ),
1281
+ )
1282
+ case LanguageModelName.LITELLM_OPENAI_GPT_51:
1283
+ return cls(
1284
+ name=model_name,
1285
+ provider=LanguageModelProvider.LITELLM,
1286
+ version="2025-11-13",
1287
+ encoder_name=EncoderName.O200K_BASE,
1288
+ capabilities=[
1289
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1290
+ ModelCapabilities.FUNCTION_CALLING,
1291
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
1292
+ ModelCapabilities.REASONING,
1293
+ ModelCapabilities.RESPONSES_API,
1294
+ ModelCapabilities.STREAMING,
1295
+ ModelCapabilities.STRUCTURED_OUTPUT,
1296
+ ModelCapabilities.VISION,
1297
+ ],
1298
+ token_limits=LanguageModelTokenLimits(
1299
+ token_limit_input=272000, token_limit_output=128000
1300
+ ),
1301
+ info_cutoff_at=date(2024, 9, 30),
1302
+ published_at=date(2025, 11, 13),
1303
+ temperature_bounds=TemperatureBounds(
1304
+ min_temperature=1.0, max_temperature=1.0
1305
+ ),
1306
+ default_options={
1307
+ "reasoning_effort": None,
1308
+ },
1309
+ )
1310
+ case LanguageModelName.LITELLM_OPENAI_GPT_51_THINKING:
1311
+ return cls(
1312
+ name=model_name,
1313
+ provider=LanguageModelProvider.LITELLM,
1314
+ version="2025-11-13",
1315
+ encoder_name=EncoderName.O200K_BASE,
1316
+ capabilities=[
1317
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1318
+ ModelCapabilities.FUNCTION_CALLING,
1319
+ ModelCapabilities.PARALLEL_FUNCTION_CALLING,
1320
+ ModelCapabilities.REASONING,
1321
+ ModelCapabilities.RESPONSES_API,
1322
+ ModelCapabilities.STREAMING,
1323
+ ModelCapabilities.STRUCTURED_OUTPUT,
1324
+ ModelCapabilities.VISION,
1325
+ ],
1326
+ token_limits=LanguageModelTokenLimits(
1327
+ token_limit_input=272000, token_limit_output=128000
1328
+ ),
1329
+ info_cutoff_at=date(2024, 9, 30),
1330
+ published_at=date(2025, 11, 13),
1331
+ temperature_bounds=TemperatureBounds(
1332
+ min_temperature=1.0, max_temperature=1.0
1333
+ ),
1334
+ default_options={
1335
+ "reasoning_effort": "medium",
1336
+ },
1337
+ )
1338
+ case LanguageModelName.LITELLM_OPENAI_O1:
1339
+ return cls(
1340
+ name=model_name,
1341
+ provider=LanguageModelProvider.LITELLM,
1342
+ version="2024-12-17",
1343
+ encoder_name=EncoderName.O200K_BASE,
1344
+ capabilities=[
1345
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1346
+ ModelCapabilities.FUNCTION_CALLING,
1347
+ ModelCapabilities.REASONING,
1348
+ ModelCapabilities.RESPONSES_API,
1349
+ ModelCapabilities.STREAMING,
1350
+ ModelCapabilities.STRUCTURED_OUTPUT,
1351
+ ModelCapabilities.VISION,
1352
+ ],
1353
+ token_limits=LanguageModelTokenLimits(
1354
+ token_limit_input=200_000, token_limit_output=100_000
1355
+ ),
1356
+ info_cutoff_at=date(2023, 10, 1),
1357
+ published_at=date(2024, 12, 17),
1358
+ temperature_bounds=TemperatureBounds(
1359
+ min_temperature=1.0, max_temperature=1.0
1360
+ ),
1361
+ )
1362
+ case LanguageModelName.LITELLM_OPENAI_O3:
1363
+ return cls(
1364
+ name=model_name,
1365
+ provider=LanguageModelProvider.LITELLM,
1366
+ version="2025-04-16",
1367
+ encoder_name=EncoderName.O200K_BASE,
1368
+ capabilities=[
1369
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1370
+ ModelCapabilities.FUNCTION_CALLING,
1371
+ ModelCapabilities.REASONING,
1372
+ ModelCapabilities.RESPONSES_API,
1373
+ ModelCapabilities.STREAMING,
1374
+ ModelCapabilities.STRUCTURED_OUTPUT,
1375
+ ModelCapabilities.VISION,
1376
+ ],
1377
+ token_limits=LanguageModelTokenLimits(
1378
+ token_limit_input=200_000, token_limit_output=100_000
1379
+ ),
1380
+ temperature_bounds=TemperatureBounds(
1381
+ min_temperature=1.0, max_temperature=1.0
1382
+ ),
1383
+ published_at=date(2025, 4, 16),
1384
+ info_cutoff_at=date(2024, 6, 1),
1385
+ )
1386
+ case LanguageModelName.LITELLM_OPENAI_O3_DEEP_RESEARCH:
1387
+ return cls(
1388
+ name=model_name,
1389
+ provider=LanguageModelProvider.LITELLM,
1390
+ version="2025-06-26",
1391
+ encoder_name=EncoderName.O200K_BASE,
1392
+ capabilities=[
1393
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1394
+ ModelCapabilities.FUNCTION_CALLING,
1395
+ ModelCapabilities.REASONING,
1396
+ ModelCapabilities.RESPONSES_API,
1397
+ ModelCapabilities.STREAMING,
1398
+ ModelCapabilities.STRUCTURED_OUTPUT,
1399
+ ModelCapabilities.VISION,
1400
+ ],
1401
+ token_limits=LanguageModelTokenLimits(
1402
+ token_limit_input=200_000, token_limit_output=100_000
1403
+ ),
1404
+ published_at=date(2025, 4, 16),
1405
+ info_cutoff_at=date(2024, 6, 1),
1406
+ )
1407
+ case LanguageModelName.LITELLM_OPENAI_O3_PRO:
1408
+ return cls(
1409
+ name=model_name,
1410
+ provider=LanguageModelProvider.LITELLM,
1411
+ version="2025-06-10",
1412
+ encoder_name=EncoderName.O200K_BASE,
1413
+ capabilities=[
1414
+ ModelCapabilities.FUNCTION_CALLING,
1415
+ ModelCapabilities.REASONING,
1416
+ ModelCapabilities.STRUCTURED_OUTPUT,
1417
+ ],
1418
+ token_limits=LanguageModelTokenLimits(
1419
+ token_limit_input=200_000, token_limit_output=100_000
1420
+ ),
1421
+ published_at=date(2025, 6, 10),
1422
+ info_cutoff_at=date(2024, 6, 1),
1423
+ )
1424
+ case LanguageModelName.LITELLM_OPENAI_O4_MINI:
1425
+ return cls(
1426
+ name=model_name,
1427
+ provider=LanguageModelProvider.LITELLM,
1428
+ version="2025-04-16",
1429
+ encoder_name=EncoderName.O200K_BASE,
1430
+ capabilities=[
1431
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1432
+ ModelCapabilities.FUNCTION_CALLING,
1433
+ ModelCapabilities.REASONING,
1434
+ ModelCapabilities.RESPONSES_API,
1435
+ ModelCapabilities.STREAMING,
1436
+ ModelCapabilities.STRUCTURED_OUTPUT,
1437
+ ModelCapabilities.VISION,
1438
+ ],
1439
+ token_limits=LanguageModelTokenLimits(
1440
+ token_limit_input=200_000, token_limit_output=100_000
1441
+ ),
1442
+ published_at=date(2025, 4, 16),
1443
+ info_cutoff_at=date(2024, 6, 1),
1444
+ temperature_bounds=TemperatureBounds(
1445
+ min_temperature=1.0, max_temperature=1.0
1446
+ ),
1447
+ )
1448
+ case LanguageModelName.LITELLM_OPENAI_O4_MINI_DEEP_RESEARCH:
1449
+ return cls(
1450
+ name=model_name,
1451
+ provider=LanguageModelProvider.LITELLM,
1452
+ version="2025-06-26",
1453
+ encoder_name=EncoderName.O200K_BASE,
1454
+ capabilities=[
1455
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1456
+ ModelCapabilities.FUNCTION_CALLING,
1457
+ ModelCapabilities.REASONING,
1458
+ ModelCapabilities.RESPONSES_API,
1459
+ ModelCapabilities.STREAMING,
1460
+ ModelCapabilities.STRUCTURED_OUTPUT,
1461
+ ModelCapabilities.VISION,
1462
+ ],
1463
+ token_limits=LanguageModelTokenLimits(
1464
+ token_limit_input=200_000, token_limit_output=100_000
1465
+ ),
1466
+ published_at=date(2025, 4, 16),
1467
+ info_cutoff_at=date(2024, 6, 1),
1468
+ )
1469
+ case LanguageModelName.LITELLM_OPENAI_GPT_4_1_MINI:
1470
+ return cls(
1471
+ name=model_name,
1472
+ provider=LanguageModelProvider.LITELLM,
1473
+ version="2025-04-14",
1474
+ encoder_name=EncoderName.O200K_BASE,
1475
+ published_at=date(2025, 4, 14),
1476
+ info_cutoff_at=date(2024, 6, 1),
1477
+ token_limits=LanguageModelTokenLimits(
1478
+ token_limit_input=1_047_576, token_limit_output=32_768
1479
+ ),
1480
+ capabilities=[
1481
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1482
+ ModelCapabilities.FUNCTION_CALLING,
1483
+ ModelCapabilities.RESPONSES_API,
1484
+ ModelCapabilities.STREAMING,
1485
+ ModelCapabilities.STRUCTURED_OUTPUT,
1486
+ ModelCapabilities.VISION,
1487
+ ],
1488
+ )
1489
+ case LanguageModelName.LITELLM_OPENAI_GPT_4_1_NANO:
1490
+ return cls(
1491
+ name=model_name,
1492
+ provider=LanguageModelProvider.LITELLM,
1493
+ version="2025-04-14",
1494
+ encoder_name=EncoderName.O200K_BASE,
1495
+ published_at=date(2025, 4, 14),
1496
+ info_cutoff_at=date(2024, 6, 1),
1497
+ token_limits=LanguageModelTokenLimits(
1498
+ token_limit_input=1_047_576, token_limit_output=32_768
1499
+ ),
1500
+ capabilities=[
1501
+ ModelCapabilities.CHAT_COMPLETIONS_API,
1502
+ ModelCapabilities.FUNCTION_CALLING,
1503
+ ModelCapabilities.RESPONSES_API,
1504
+ ModelCapabilities.STREAMING,
1505
+ ModelCapabilities.STRUCTURED_OUTPUT,
1506
+ ModelCapabilities.VISION,
1507
+ ],
1508
+ )
1509
+ case LanguageModelName.LITELLM_DEEPSEEK_R1:
1510
+ return cls(
1511
+ name=model_name,
1512
+ provider=LanguageModelProvider.LITELLM,
1513
+ version="deepseek-r1",
1514
+ capabilities=[
1515
+ ModelCapabilities.FUNCTION_CALLING,
1516
+ ModelCapabilities.STRUCTURED_OUTPUT,
1517
+ ModelCapabilities.STREAMING,
1518
+ ModelCapabilities.REASONING,
1519
+ ],
1520
+ token_limits=LanguageModelTokenLimits(
1521
+ token_limit_input=64_000, token_limit_output=4_000
1522
+ ),
1523
+ published_at=date(2025, 1, 25),
1524
+ )
1525
+ case LanguageModelName.LITELLM_DEEPSEEK_V3:
1526
+ return cls(
1527
+ name=model_name,
1528
+ provider=LanguageModelProvider.LITELLM,
1529
+ version="deepseek-v3-1",
1530
+ capabilities=[
1531
+ ModelCapabilities.FUNCTION_CALLING,
1532
+ ModelCapabilities.STRUCTURED_OUTPUT,
1533
+ ModelCapabilities.REASONING,
1534
+ ],
1535
+ token_limits=LanguageModelTokenLimits(
1536
+ token_limit_input=128_000, token_limit_output=4_000
1537
+ ),
1538
+ published_at=date(2025, 8, 1),
1539
+ )
1540
+ case LanguageModelName.LITELLM_QWEN_3:
1541
+ return cls(
1542
+ name=model_name,
1543
+ provider=LanguageModelProvider.LITELLM,
1544
+ version="qwen-3",
1545
+ capabilities=[
1546
+ ModelCapabilities.FUNCTION_CALLING,
1547
+ ModelCapabilities.STRUCTURED_OUTPUT,
1548
+ ModelCapabilities.STREAMING,
1549
+ ModelCapabilities.REASONING,
1550
+ ],
1551
+ published_at=date(2025, 4, 29),
1552
+ token_limits=LanguageModelTokenLimits(
1553
+ token_limit_input=256_000, token_limit_output=32_768
1554
+ ),
1555
+ )
1556
+ case LanguageModelName.LITELLM_QWEN_3_THINKING:
1557
+ return cls(
1558
+ name=model_name,
1559
+ provider=LanguageModelProvider.LITELLM,
1560
+ version="qwen-3-thinking",
1561
+ capabilities=[
1562
+ ModelCapabilities.FUNCTION_CALLING,
1563
+ ModelCapabilities.STRUCTURED_OUTPUT,
1564
+ ModelCapabilities.STREAMING,
1565
+ ModelCapabilities.REASONING,
1566
+ ],
1567
+ token_limits=LanguageModelTokenLimits(
1568
+ token_limit_input=256_000, token_limit_output=32_768
1569
+ ),
1570
+ published_at=date(2025, 7, 25),
1571
+ )
1572
+
1573
+ case _:
1574
+ if isinstance(model_name, LanguageModelName):
1575
+ raise ValueError(
1576
+ f"{model_name} is not supported. Please add model information in toolkit."
1577
+ )
1578
+
1579
+ return cls(
1580
+ name=model_name,
1581
+ version="custom",
1582
+ provider=LanguageModelProvider.CUSTOM,
1583
+ )
1584
+
1585
+ @property
1586
+ def display_name(self) -> str:
1587
+ """
1588
+ Returns the name of the model as a string.
1589
+ """
1590
+
1591
+ if isinstance(self.name, LanguageModelName):
1592
+ return self.name.value
1593
+ else:
1594
+ return self.name
344
1595
 
345
- @property
346
- def display_name(self) -> str:
347
- """
348
- Returns the name of the model as a string.
349
- """
350
-
351
- if isinstance(self.name, LanguageModelName):
352
- return self.name.value
353
- else:
354
- return self.name
355
-
356
1596
 
357
1597
  @deprecated(
358
1598
  """
359
- Use `LanguageModelInfo` instead of `LanguageModel`
360
- """
1599
+ Use `LanguageModelInfo` instead of `LanguageModel`.
1600
+
1601
+ `LanguageModel` will be deprecated on 31.12.2025
1602
+ """,
361
1603
  )
362
1604
  class LanguageModel:
363
1605
  _info: ClassVar[LanguageModelInfo]
@@ -367,8 +1609,8 @@ class LanguageModel:
367
1609
 
368
1610
  @property
369
1611
  def info(self) -> LanguageModelInfo:
370
- """
371
- Returns all infos about the model:
1612
+ """Return all infos about the model.
1613
+
372
1614
  - name
373
1615
  - version
374
1616
  - provider