together 1.2.11__py3-none-any.whl → 2.0.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. together/__init__.py +101 -63
  2. together/_base_client.py +1995 -0
  3. together/_client.py +1033 -0
  4. together/_compat.py +219 -0
  5. together/_constants.py +14 -0
  6. together/_exceptions.py +108 -0
  7. together/_files.py +123 -0
  8. together/_models.py +857 -0
  9. together/_qs.py +150 -0
  10. together/_resource.py +43 -0
  11. together/_response.py +830 -0
  12. together/_streaming.py +370 -0
  13. together/_types.py +260 -0
  14. together/_utils/__init__.py +64 -0
  15. together/_utils/_compat.py +45 -0
  16. together/_utils/_datetime_parse.py +136 -0
  17. together/_utils/_logs.py +25 -0
  18. together/_utils/_proxy.py +65 -0
  19. together/_utils/_reflection.py +42 -0
  20. together/_utils/_resources_proxy.py +24 -0
  21. together/_utils/_streams.py +12 -0
  22. together/_utils/_sync.py +58 -0
  23. together/_utils/_transform.py +457 -0
  24. together/_utils/_typing.py +156 -0
  25. together/_utils/_utils.py +421 -0
  26. together/_version.py +4 -0
  27. together/lib/.keep +4 -0
  28. together/lib/__init__.py +23 -0
  29. together/lib/cli/api/endpoints.py +467 -0
  30. together/lib/cli/api/evals.py +588 -0
  31. together/{cli → lib/cli}/api/files.py +20 -17
  32. together/lib/cli/api/fine_tuning.py +566 -0
  33. together/lib/cli/api/models.py +140 -0
  34. together/lib/cli/api/utils.py +50 -0
  35. together/{cli → lib/cli}/cli.py +17 -23
  36. together/lib/constants.py +61 -0
  37. together/lib/resources/__init__.py +11 -0
  38. together/lib/resources/files.py +999 -0
  39. together/lib/resources/fine_tuning.py +280 -0
  40. together/lib/resources/models.py +35 -0
  41. together/lib/types/__init__.py +13 -0
  42. together/lib/types/error.py +9 -0
  43. together/lib/types/fine_tuning.py +455 -0
  44. together/{utils → lib/utils}/__init__.py +7 -10
  45. together/{utils → lib/utils}/_log.py +18 -13
  46. together/lib/utils/files.py +628 -0
  47. together/lib/utils/serializer.py +10 -0
  48. together/{utils → lib/utils}/tools.py +17 -2
  49. together/resources/__init__.py +225 -24
  50. together/resources/audio/__init__.py +75 -0
  51. together/resources/audio/audio.py +198 -0
  52. together/resources/audio/speech.py +605 -0
  53. together/resources/audio/transcriptions.py +282 -0
  54. together/resources/audio/translations.py +256 -0
  55. together/resources/audio/voices.py +135 -0
  56. together/resources/batches.py +417 -0
  57. together/resources/chat/__init__.py +30 -21
  58. together/resources/chat/chat.py +102 -0
  59. together/resources/chat/completions.py +1063 -257
  60. together/resources/code_interpreter/__init__.py +33 -0
  61. together/resources/code_interpreter/code_interpreter.py +258 -0
  62. together/resources/code_interpreter/sessions.py +135 -0
  63. together/resources/completions.py +890 -225
  64. together/resources/embeddings.py +172 -68
  65. together/resources/endpoints.py +711 -0
  66. together/resources/evals.py +452 -0
  67. together/resources/files.py +397 -120
  68. together/resources/fine_tuning.py +1033 -0
  69. together/resources/hardware.py +181 -0
  70. together/resources/images.py +256 -108
  71. together/resources/jobs.py +214 -0
  72. together/resources/models.py +251 -44
  73. together/resources/rerank.py +190 -92
  74. together/resources/videos.py +374 -0
  75. together/types/__init__.py +66 -73
  76. together/types/audio/__init__.py +10 -0
  77. together/types/audio/speech_create_params.py +75 -0
  78. together/types/audio/transcription_create_params.py +54 -0
  79. together/types/audio/transcription_create_response.py +111 -0
  80. together/types/audio/translation_create_params.py +40 -0
  81. together/types/audio/translation_create_response.py +70 -0
  82. together/types/audio/voice_list_response.py +23 -0
  83. together/types/audio_speech_stream_chunk.py +16 -0
  84. together/types/autoscaling.py +13 -0
  85. together/types/autoscaling_param.py +15 -0
  86. together/types/batch_create_params.py +24 -0
  87. together/types/batch_create_response.py +14 -0
  88. together/types/batch_job.py +45 -0
  89. together/types/batch_list_response.py +10 -0
  90. together/types/chat/__init__.py +18 -0
  91. together/types/chat/chat_completion.py +60 -0
  92. together/types/chat/chat_completion_chunk.py +61 -0
  93. together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
  94. together/types/chat/chat_completion_structured_message_text_param.py +13 -0
  95. together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
  96. together/types/chat/chat_completion_usage.py +13 -0
  97. together/types/chat/chat_completion_warning.py +9 -0
  98. together/types/chat/completion_create_params.py +329 -0
  99. together/types/code_interpreter/__init__.py +5 -0
  100. together/types/code_interpreter/session_list_response.py +31 -0
  101. together/types/code_interpreter_execute_params.py +45 -0
  102. together/types/completion.py +42 -0
  103. together/types/completion_chunk.py +66 -0
  104. together/types/completion_create_params.py +138 -0
  105. together/types/dedicated_endpoint.py +44 -0
  106. together/types/embedding.py +24 -0
  107. together/types/embedding_create_params.py +31 -0
  108. together/types/endpoint_create_params.py +43 -0
  109. together/types/endpoint_list_avzones_response.py +11 -0
  110. together/types/endpoint_list_params.py +18 -0
  111. together/types/endpoint_list_response.py +41 -0
  112. together/types/endpoint_update_params.py +27 -0
  113. together/types/eval_create_params.py +263 -0
  114. together/types/eval_create_response.py +16 -0
  115. together/types/eval_list_params.py +21 -0
  116. together/types/eval_list_response.py +10 -0
  117. together/types/eval_status_response.py +100 -0
  118. together/types/evaluation_job.py +139 -0
  119. together/types/execute_response.py +108 -0
  120. together/types/file_delete_response.py +13 -0
  121. together/types/file_list.py +12 -0
  122. together/types/file_purpose.py +9 -0
  123. together/types/file_response.py +31 -0
  124. together/types/file_type.py +7 -0
  125. together/types/fine_tuning_cancel_response.py +194 -0
  126. together/types/fine_tuning_content_params.py +24 -0
  127. together/types/fine_tuning_delete_params.py +11 -0
  128. together/types/fine_tuning_delete_response.py +12 -0
  129. together/types/fine_tuning_list_checkpoints_response.py +21 -0
  130. together/types/fine_tuning_list_events_response.py +12 -0
  131. together/types/fine_tuning_list_response.py +199 -0
  132. together/types/finetune_event.py +41 -0
  133. together/types/finetune_event_type.py +33 -0
  134. together/types/finetune_response.py +177 -0
  135. together/types/hardware_list_params.py +16 -0
  136. together/types/hardware_list_response.py +58 -0
  137. together/types/image_data_b64.py +15 -0
  138. together/types/image_data_url.py +15 -0
  139. together/types/image_file.py +23 -0
  140. together/types/image_generate_params.py +85 -0
  141. together/types/job_list_response.py +47 -0
  142. together/types/job_retrieve_response.py +43 -0
  143. together/types/log_probs.py +18 -0
  144. together/types/model_list_response.py +10 -0
  145. together/types/model_object.py +42 -0
  146. together/types/model_upload_params.py +36 -0
  147. together/types/model_upload_response.py +23 -0
  148. together/types/rerank_create_params.py +36 -0
  149. together/types/rerank_create_response.py +36 -0
  150. together/types/tool_choice.py +23 -0
  151. together/types/tool_choice_param.py +23 -0
  152. together/types/tools_param.py +23 -0
  153. together/types/training_method_dpo.py +22 -0
  154. together/types/training_method_sft.py +18 -0
  155. together/types/video_create_params.py +86 -0
  156. together/types/video_job.py +57 -0
  157. together-2.0.0a8.dist-info/METADATA +680 -0
  158. together-2.0.0a8.dist-info/RECORD +164 -0
  159. {together-1.2.11.dist-info → together-2.0.0a8.dist-info}/WHEEL +1 -1
  160. together-2.0.0a8.dist-info/entry_points.txt +2 -0
  161. {together-1.2.11.dist-info → together-2.0.0a8.dist-info/licenses}/LICENSE +1 -1
  162. together/abstract/api_requestor.py +0 -723
  163. together/cli/api/chat.py +0 -276
  164. together/cli/api/completions.py +0 -119
  165. together/cli/api/finetune.py +0 -272
  166. together/cli/api/images.py +0 -82
  167. together/cli/api/models.py +0 -42
  168. together/client.py +0 -157
  169. together/constants.py +0 -31
  170. together/error.py +0 -191
  171. together/filemanager.py +0 -388
  172. together/legacy/__init__.py +0 -0
  173. together/legacy/base.py +0 -27
  174. together/legacy/complete.py +0 -93
  175. together/legacy/embeddings.py +0 -27
  176. together/legacy/files.py +0 -146
  177. together/legacy/finetune.py +0 -177
  178. together/legacy/images.py +0 -27
  179. together/legacy/models.py +0 -44
  180. together/resources/finetune.py +0 -489
  181. together/together_response.py +0 -50
  182. together/types/abstract.py +0 -26
  183. together/types/chat_completions.py +0 -171
  184. together/types/common.py +0 -65
  185. together/types/completions.py +0 -104
  186. together/types/embeddings.py +0 -35
  187. together/types/error.py +0 -16
  188. together/types/files.py +0 -89
  189. together/types/finetune.py +0 -265
  190. together/types/images.py +0 -42
  191. together/types/models.py +0 -44
  192. together/types/rerank.py +0 -43
  193. together/utils/api_helpers.py +0 -84
  194. together/utils/files.py +0 -204
  195. together/version.py +0 -6
  196. together-1.2.11.dist-info/METADATA +0 -408
  197. together-1.2.11.dist-info/RECORD +0 -58
  198. together-1.2.11.dist-info/entry_points.txt +0 -3
  199. /together/{abstract → lib/cli}/__init__.py +0 -0
  200. /together/{cli → lib/cli/api}/__init__.py +0 -0
  201. /together/{cli/api/__init__.py → py.typed} +0 -0
@@ -1,265 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from enum import Enum
4
- from typing import List, Literal
5
-
6
- from pydantic import Field, validator, field_validator
7
-
8
- from together.types.abstract import BaseModel
9
- from together.types.common import (
10
- ObjectType,
11
- )
12
-
13
-
14
- class FinetuneJobStatus(str, Enum):
15
- """
16
- Possible fine-tune job status
17
- """
18
-
19
- STATUS_PENDING = "pending"
20
- STATUS_QUEUED = "queued"
21
- STATUS_RUNNING = "running"
22
- STATUS_COMPRESSING = "compressing"
23
- STATUS_UPLOADING = "uploading"
24
- STATUS_CANCEL_REQUESTED = "cancel_requested"
25
- STATUS_CANCELLED = "cancelled"
26
- STATUS_ERROR = "error"
27
- STATUS_USER_ERROR = "user_error"
28
- STATUS_COMPLETED = "completed"
29
-
30
-
31
- class FinetuneEventLevels(str, Enum):
32
- """
33
- Fine-tune job event status levels
34
- """
35
-
36
- NULL = ""
37
- INFO = "Info"
38
- WARNING = "Warning"
39
- ERROR = "Error"
40
- LEGACY_INFO = "info"
41
- LEGACY_IWARNING = "warning"
42
- LEGACY_IERROR = "error"
43
-
44
-
45
- class FinetuneEventType(str, Enum):
46
- """
47
- Fine-tune job event types
48
- """
49
-
50
- JOB_PENDING = "JOB_PENDING"
51
- JOB_START = "JOB_START"
52
- JOB_STOPPED = "JOB_STOPPED"
53
- MODEL_DOWNLOADING = "MODEL_DOWNLOADING"
54
- MODEL_DOWNLOAD_COMPLETE = "MODEL_DOWNLOAD_COMPLETE"
55
- TRAINING_DATA_DOWNLOADING = "TRAINING_DATA_DOWNLOADING"
56
- TRAINING_DATA_DOWNLOAD_COMPLETE = "TRAINING_DATA_DOWNLOAD_COMPLETE"
57
- VALIDATION_DATA_DOWNLOADING = "VALIDATION_DATA_DOWNLOADING"
58
- VALIDATION_DATA_DOWNLOAD_COMPLETE = "VALIDATION_DATA_DOWNLOAD_COMPLETE"
59
- WANDB_INIT = "WANDB_INIT"
60
- TRAINING_START = "TRAINING_START"
61
- CHECKPOINT_SAVE = "CHECKPOINT_SAVE"
62
- BILLING_LIMIT = "BILLING_LIMIT"
63
- EPOCH_COMPLETE = "EPOCH_COMPLETE"
64
- EVAL_COMPLETE = "EVAL_COMPLETE"
65
- TRAINING_COMPLETE = "TRAINING_COMPLETE"
66
- MODEL_COMPRESSING = "COMPRESSING_MODEL"
67
- MODEL_COMPRESSION_COMPLETE = "MODEL_COMPRESSION_COMPLETE"
68
- MODEL_UPLOADING = "MODEL_UPLOADING"
69
- MODEL_UPLOAD_COMPLETE = "MODEL_UPLOAD_COMPLETE"
70
- JOB_COMPLETE = "JOB_COMPLETE"
71
- JOB_ERROR = "JOB_ERROR"
72
- JOB_USER_ERROR = "JOB_USER_ERROR"
73
- CANCEL_REQUESTED = "CANCEL_REQUESTED"
74
- JOB_RESTARTED = "JOB_RESTARTED"
75
- REFUND = "REFUND"
76
- WARNING = "WARNING"
77
-
78
-
79
- class DownloadCheckpointType(Enum):
80
- DEFAULT = "default"
81
- MERGED = "merged"
82
- ADAPTER = "adapter"
83
-
84
-
85
- class FinetuneEvent(BaseModel):
86
- """
87
- Fine-tune event type
88
- """
89
-
90
- # object type
91
- object: Literal[ObjectType.FinetuneEvent]
92
- # created at datetime stamp
93
- created_at: str | None = None
94
- # event log level
95
- level: FinetuneEventLevels | None = None
96
- # event message string
97
- message: str | None = None
98
- # event type
99
- type: FinetuneEventType | None = None
100
- # optional: model parameter count
101
- param_count: int | None = None
102
- # optional: dataset token count
103
- token_count: int | None = None
104
- # optional: weights & biases url
105
- wandb_url: str | None = None
106
- # event hash
107
- hash: str | None = None
108
-
109
-
110
- class TrainingType(BaseModel):
111
- """
112
- Abstract training type
113
- """
114
-
115
- type: str
116
-
117
-
118
- class FullTrainingType(TrainingType):
119
- """
120
- Training type for full fine-tuning
121
- """
122
-
123
- type: str = "Full"
124
-
125
-
126
- class LoRATrainingType(TrainingType):
127
- """
128
- Training type for LoRA adapters training
129
- """
130
-
131
- lora_r: int
132
- lora_alpha: int
133
- lora_dropout: float = 0.0
134
- lora_trainable_modules: str = "all-linear"
135
- type: str = "Lora"
136
-
137
-
138
- class FinetuneRequest(BaseModel):
139
- """
140
- Fine-tune request type
141
- """
142
-
143
- # training file ID
144
- training_file: str
145
- # validation file id
146
- validation_file: str | None = None
147
- # base model string
148
- model: str
149
- # number of epochs to train for
150
- n_epochs: int
151
- # training learning rate
152
- learning_rate: float
153
- # number of checkpoints to save
154
- n_checkpoints: int | None = None
155
- # number of evaluation loops to run
156
- n_evals: int | None = None
157
- # training batch size
158
- batch_size: int | None = None
159
- # up to 40 character suffix for output model name
160
- suffix: str | None = None
161
- # weights & biases api key
162
- wandb_key: str | None = None
163
- training_type: FullTrainingType | LoRATrainingType | None = None
164
-
165
-
166
- class FinetuneResponse(BaseModel):
167
- """
168
- Fine-tune API response type
169
- """
170
-
171
- # job ID
172
- id: str | None = None
173
- # training file id
174
- training_file: str | None = None
175
- # validation file id
176
- validation_file: str | None = None
177
- # base model name
178
- model: str | None = None
179
- # output model name
180
- output_name: str | None = Field(None, alias="model_output_name")
181
- # adapter output name
182
- adapter_output_name: str | None = None
183
- # number of epochs
184
- n_epochs: int | None = None
185
- # number of checkpoints to save
186
- n_checkpoints: int | None = None
187
- # number of evaluation loops
188
- n_evals: int | None = None
189
- # training batch size
190
- batch_size: int | None = None
191
- # training learning rate
192
- learning_rate: float | None = None
193
- # number of steps between evals
194
- eval_steps: int | None = None
195
- # training type
196
- training_type: TrainingType | None = None
197
- # created/updated datetime stamps
198
- created_at: str | None = None
199
- updated_at: str | None = None
200
- # job status
201
- status: FinetuneJobStatus | None = None
202
- # job id
203
- job_id: str | None = None
204
- # list of fine-tune events
205
- events: List[FinetuneEvent] | None = None
206
- # dataset token count
207
- token_count: int | None = None
208
- # model parameter count
209
- param_count: int | None = None
210
- # fine-tune job price
211
- total_price: int | None = None
212
- # total number of training steps
213
- total_steps: int | None = None
214
- # number of steps completed (incrementing counter)
215
- steps_completed: int | None = None
216
- # number of epochs completed (incrementing counter)
217
- epochs_completed: int | None = None
218
- # number of evaluation loops completed (incrementing counter)
219
- evals_completed: int | None = None
220
- # place in job queue (decrementing counter)
221
- queue_depth: int | None = None
222
- # weights & biases project name
223
- wandb_project_name: str | None = None
224
- # weights & biases job url
225
- wandb_url: str | None = None
226
- # training file metadata
227
- training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines")
228
- training_file_size: int | None = Field(None, alias="TrainingFileSize")
229
-
230
- @field_validator("training_type")
231
- @classmethod
232
- def validate_training_type(cls, v: TrainingType) -> TrainingType:
233
- if v.type == "Full" or v.type == "":
234
- return FullTrainingType(**v.model_dump())
235
- elif v.type == "Lora":
236
- return LoRATrainingType(**v.model_dump())
237
- else:
238
- raise ValueError("Unknown training type")
239
-
240
-
241
- class FinetuneList(BaseModel):
242
- # object type
243
- object: Literal["list"] | None = None
244
- # list of fine-tune job objects
245
- data: List[FinetuneResponse] | None = None
246
-
247
-
248
- class FinetuneListEvents(BaseModel):
249
- # object type
250
- object: Literal["list"] | None = None
251
- # list of fine-tune events
252
- data: List[FinetuneEvent] | None = None
253
-
254
-
255
- class FinetuneDownloadResult(BaseModel):
256
- # object type
257
- object: Literal["local"] | None = None
258
- # fine-tune job id
259
- id: str | None = None
260
- # checkpoint step number
261
- checkpoint_step: int | None = None
262
- # local path filename
263
- filename: str | None = None
264
- # size in bytes
265
- size: int | None = None
together/types/images.py DELETED
@@ -1,42 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import List, Literal
4
-
5
- from together.types.abstract import BaseModel
6
-
7
-
8
- class ImageRequest(BaseModel):
9
- # input or list of inputs
10
- prompt: str
11
- # model to query
12
- model: str
13
- # num generation steps
14
- steps: int | None = 20
15
- # seed
16
- seed: int | None = None
17
- # number of results to return
18
- n: int | None = 1
19
- # pixel height
20
- height: int | None = 1024
21
- # pixel width
22
- width: int | None = 1024
23
- # negative prompt
24
- negative_prompt: str | None = None
25
-
26
-
27
- class ImageChoicesData(BaseModel):
28
- # response index
29
- index: int
30
- # base64 image response
31
- b64_json: str
32
-
33
-
34
- class ImageResponse(BaseModel):
35
- # job id
36
- id: str | None = None
37
- # query model
38
- model: str | None = None
39
- # object type
40
- object: Literal["list"] | None = None
41
- # list of embedding choices
42
- data: List[ImageChoicesData] | None = None
together/types/models.py DELETED
@@ -1,44 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from enum import Enum
4
- from typing import Literal
5
-
6
- from together.types.abstract import BaseModel
7
- from together.types.common import ObjectType
8
-
9
-
10
- class ModelType(str, Enum):
11
- CHAT = "chat"
12
- LANGUAGE = "language"
13
- CODE = "code"
14
- IMAGE = "image"
15
- EMBEDDING = "embedding"
16
- MODERATION = "moderation"
17
- RERANK = "rerank"
18
-
19
-
20
- class PricingObject(BaseModel):
21
- input: float | None = None
22
- output: float | None = None
23
- hourly: float | None = None
24
- base: float | None = None
25
- finetune: float | None = None
26
-
27
-
28
- class ModelObject(BaseModel):
29
- # model id
30
- id: str
31
- # object type
32
- object: Literal[ObjectType.Model]
33
- created: int | None = None
34
- # model type
35
- type: ModelType | None = None
36
- # pretty name
37
- display_name: str | None = None
38
- # model creator organization
39
- organization: str | None = None
40
- # link to model resource
41
- link: str | None = None
42
- license: str | None = None
43
- context_length: int | None = None
44
- pricing: PricingObject
together/types/rerank.py DELETED
@@ -1,43 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import List, Literal, Dict, Any
4
-
5
- from together.types.abstract import BaseModel
6
- from together.types.common import UsageData
7
-
8
-
9
- class RerankRequest(BaseModel):
10
- # model to query
11
- model: str
12
- # input or list of inputs
13
- query: str
14
- # list of documents
15
- documents: List[str] | List[Dict[str, Any]]
16
- # return top_n results
17
- top_n: int | None = None
18
- # boolean to return documents
19
- return_documents: bool = False
20
- # field selector for documents
21
- rank_fields: List[str] | None = None
22
-
23
-
24
- class RerankChoicesData(BaseModel):
25
- # response index
26
- index: int
27
- # object type
28
- relevance_score: float
29
- # rerank response
30
- document: Dict[str, Any] | None = None
31
-
32
-
33
- class RerankResponse(BaseModel):
34
- # job id
35
- id: str | None = None
36
- # object type
37
- object: Literal["rerank"] | None = None
38
- # query model
39
- model: str | None = None
40
- # list of reranked results
41
- results: List[RerankChoicesData] | None = None
42
- # usage stats
43
- usage: UsageData | None = None
@@ -1,84 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- import platform
6
- from typing import TYPE_CHECKING, Any, Dict
7
-
8
-
9
- if TYPE_CHECKING:
10
- from _typeshed import SupportsKeysAndGetItem
11
-
12
- import together
13
- from together import error
14
- from together.utils._log import _console_log_level
15
-
16
-
17
- def get_headers(
18
- method: str | None = None,
19
- api_key: str | None = None,
20
- extra: "SupportsKeysAndGetItem[str, Any] | None" = None,
21
- ) -> Dict[str, str]:
22
- """
23
- Generates request headers with API key, metadata, and supplied headers
24
-
25
- Args:
26
- method (str, optional): HTTP request type (POST, GET, etc.)
27
- Defaults to None.
28
- api_key (str, optional): API key to add as an Authorization header.
29
- Defaults to None.
30
- extra (SupportsKeysAndGetItem[str, Any], optional): Additional headers to add to request.
31
- Defaults to None.
32
-
33
- Returns:
34
- headers (Dict[str, str]): Compiled headers from data
35
- """
36
-
37
- user_agent = "Together/v1 PythonBindings/%s" % (together.version,)
38
-
39
- uname_without_node = " ".join(
40
- v for k, v in platform.uname()._asdict().items() if k != "node"
41
- )
42
- ua = {
43
- "bindings_version": together.version,
44
- "httplib": "requests",
45
- "lang": "python",
46
- "lang_version": platform.python_version(),
47
- "platform": platform.platform(),
48
- "publisher": "together",
49
- "uname": uname_without_node,
50
- }
51
-
52
- headers: Dict[str, Any] = {
53
- "X-Together-Client-User-Agent": json.dumps(ua),
54
- "Authorization": f"Bearer {default_api_key(api_key)}",
55
- "User-Agent": user_agent,
56
- }
57
-
58
- if _console_log_level():
59
- headers["Together-Debug"] = _console_log_level()
60
- if extra:
61
- headers.update(extra)
62
-
63
- return headers
64
-
65
-
66
- def default_api_key(api_key: str | None = None) -> str | None:
67
- """
68
- API key fallback logic from input argument and environment variable
69
-
70
- Args:
71
- api_key (str, optional): Supplied API key. This argument takes priority over env var
72
-
73
- Returns:
74
- together_api_key (str): Returns API key from supplied input or env var
75
-
76
- Raises:
77
- together.error.AuthenticationError: if API key not found
78
- """
79
- if api_key:
80
- return api_key
81
- if os.environ.get("TOGETHER_API_KEY"):
82
- return os.environ.get("TOGETHER_API_KEY")
83
-
84
- raise error.AuthenticationError(together.constants.MISSING_API_KEY_MESSAGE)
together/utils/files.py DELETED
@@ -1,204 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from pathlib import Path
6
- from traceback import format_exc
7
- from typing import Any, Dict
8
-
9
- from pyarrow import ArrowInvalid, parquet
10
-
11
- from together.constants import (
12
- MAX_FILE_SIZE_GB,
13
- MIN_SAMPLES,
14
- NUM_BYTES_IN_GB,
15
- PARQUET_EXPECTED_COLUMNS,
16
- )
17
-
18
-
19
- def check_file(
20
- file: Path | str,
21
- ) -> Dict[str, Any]:
22
- if not isinstance(file, Path):
23
- file = Path(file)
24
-
25
- report_dict = {
26
- "is_check_passed": True,
27
- "message": "Checks passed",
28
- "found": None,
29
- "file_size": None,
30
- "utf8": None,
31
- "line_type": None,
32
- "text_field": None,
33
- "key_value": None,
34
- "min_samples": None,
35
- "num_samples": None,
36
- "load_json": None,
37
- }
38
-
39
- if not file.is_file():
40
- report_dict["found"] = False
41
- report_dict["is_check_passed"] = False
42
- return report_dict
43
- else:
44
- report_dict["found"] = True
45
-
46
- file_size = os.stat(file.as_posix()).st_size
47
-
48
- if file_size > MAX_FILE_SIZE_GB * NUM_BYTES_IN_GB:
49
- report_dict["message"] = (
50
- f"Maximum supported file size is {MAX_FILE_SIZE_GB} GB. Found file with size of {round(file_size / NUM_BYTES_IN_GB ,3)} GB."
51
- )
52
- report_dict["is_check_passed"] = False
53
- elif file_size == 0:
54
- report_dict["message"] = "File is empty"
55
- report_dict["file_size"] = 0
56
- report_dict["is_check_passed"] = False
57
- return report_dict
58
- else:
59
- report_dict["file_size"] = file_size
60
-
61
- if file.suffix == ".jsonl":
62
- report_dict["filetype"] = "jsonl"
63
- data_report_dict = _check_jsonl(file)
64
- elif file.suffix == ".parquet":
65
- report_dict["filetype"] = "parquet"
66
- data_report_dict = _check_parquet(file)
67
- else:
68
- report_dict["filetype"] = (
69
- f"Unknown extension of file {file}. "
70
- "Only files with extensions .jsonl and .parquet are supported."
71
- )
72
- report_dict["is_check_passed"] = False
73
-
74
- report_dict.update(data_report_dict)
75
- return report_dict
76
-
77
-
78
- def _check_jsonl(file: Path) -> Dict[str, Any]:
79
- report_dict: Dict[str, Any] = {}
80
- # Check that the file is UTF-8 encoded. If not report where the error occurs.
81
- try:
82
- with file.open(encoding="utf-8") as f:
83
- f.read()
84
- report_dict["utf8"] = True
85
- except UnicodeDecodeError as e:
86
- report_dict["utf8"] = False
87
- report_dict["message"] = f"File is not UTF-8 encoded. Error raised: {e}."
88
- report_dict["is_check_passed"] = False
89
- return report_dict
90
-
91
- with file.open() as f:
92
- # idx must be instantiated so decode errors (e.g. file is a tar) or empty files are caught
93
- idx = -1
94
- try:
95
- for idx, line in enumerate(f):
96
- json_line = json.loads(line) # each line in jsonlines should be a json
97
-
98
- if not isinstance(json_line, dict):
99
- report_dict["line_type"] = False
100
- report_dict["message"] = (
101
- f"Error parsing file. Invalid format on line {idx + 1} of the input file. "
102
- 'Example of valid json: {"text": "my sample string"}. '
103
- )
104
-
105
- report_dict["is_check_passed"] = False
106
-
107
- if "text" not in json_line.keys():
108
- report_dict["text_field"] = False
109
- report_dict["message"] = (
110
- f"Missing 'text' field was found on line {idx + 1} of the the input file. "
111
- "Expected format: {'text': 'my sample string'}. "
112
- )
113
- report_dict["is_check_passed"] = False
114
- else:
115
- # check to make sure the value of the "text" key is a string
116
- if not isinstance(json_line["text"], str):
117
- report_dict["key_value"] = False
118
- report_dict["message"] = (
119
- f'Invalid value type for "text" key on line {idx + 1}. '
120
- f'Expected string. Found {type(json_line["text"])}.'
121
- )
122
-
123
- report_dict["is_check_passed"] = False
124
-
125
- # make sure this is outside the for idx, line in enumerate(f): for loop
126
- if idx + 1 < MIN_SAMPLES:
127
- report_dict["min_samples"] = False
128
- report_dict["message"] = (
129
- f"Processing {file} resulted in only {idx + 1} samples. "
130
- f"Our minimum is {MIN_SAMPLES} samples. "
131
- )
132
- report_dict["is_check_passed"] = False
133
- else:
134
- report_dict["num_samples"] = idx + 1
135
- report_dict["min_samples"] = True
136
-
137
- report_dict["load_json"] = True
138
-
139
- except ValueError:
140
- report_dict["load_json"] = False
141
- if idx < 0:
142
- report_dict["message"] = (
143
- "Unable to decode file. "
144
- "File may be empty or in an unsupported format. "
145
- )
146
- else:
147
- report_dict["message"] = (
148
- f"Error parsing json payload. Unexpected format on line {idx + 1}."
149
- )
150
- report_dict["is_check_passed"] = False
151
-
152
- if "text_field" not in report_dict:
153
- report_dict["text_field"] = True
154
- if "line_type" not in report_dict:
155
- report_dict["line_type"] = True
156
- if "key_value" not in report_dict:
157
- report_dict["key_value"] = True
158
- return report_dict
159
-
160
-
161
- def _check_parquet(file: Path) -> Dict[str, Any]:
162
- report_dict: Dict[str, Any] = {}
163
-
164
- try:
165
- table = parquet.read_table(str(file), memory_map=True)
166
- except ArrowInvalid:
167
- report_dict["load_parquet"] = (
168
- f"An exception has occurred when loading the Parquet file {file}. Please check the file for corruption. "
169
- f"Exception trace:\n{format_exc()}"
170
- )
171
- report_dict["is_check_passed"] = False
172
- return report_dict
173
-
174
- column_names = table.schema.names
175
- if "input_ids" not in column_names:
176
- report_dict["load_parquet"] = (
177
- f"Parquet file {file} does not contain the `input_ids` column."
178
- )
179
- report_dict["is_check_passed"] = False
180
- return report_dict
181
-
182
- for column_name in column_names:
183
- if column_name not in PARQUET_EXPECTED_COLUMNS:
184
- report_dict["load_parquet"] = (
185
- f"Parquet file {file} contains an unexpected column {column_name}. "
186
- f"Only columns {PARQUET_EXPECTED_COLUMNS} are supported."
187
- )
188
- report_dict["is_check_passed"] = False
189
- return report_dict
190
-
191
- num_samples = len(table)
192
- if num_samples < MIN_SAMPLES:
193
- report_dict["min_samples"] = (
194
- f"Processing {file} resulted in only {num_samples} samples. "
195
- f"Our minimum is {MIN_SAMPLES} samples. "
196
- )
197
- report_dict["is_check_passed"] = False
198
- return report_dict
199
- else:
200
- report_dict["num_samples"] = num_samples
201
-
202
- report_dict["is_check_passed"] = True
203
-
204
- return report_dict
together/version.py DELETED
@@ -1,6 +0,0 @@
1
- import importlib.metadata
2
-
3
-
4
- VERSION = importlib.metadata.version(
5
- "together"
6
- ) # gets version number from pyproject.toml