together 1.5.21__py3-none-any.whl → 1.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,724 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from together.abstract import api_requestor
6
+ from together.together_response import TogetherResponse
7
+ from together.types import (
8
+ TogetherClient,
9
+ TogetherRequest,
10
+ )
11
+ from together.types.evaluation import (
12
+ ClassifyParameters,
13
+ CompareParameters,
14
+ EvaluationCreateResponse,
15
+ EvaluationJob,
16
+ EvaluationStatusResponse,
17
+ JudgeModelConfig,
18
+ ModelRequest,
19
+ ScoreParameters,
20
+ )
21
+
22
+
23
+ class Evaluation:
24
+ def __init__(self, client: TogetherClient) -> None:
25
+ self._client = client
26
+
27
+ def create(
28
+ self,
29
+ type: str,
30
+ judge_model_name: str,
31
+ judge_system_template: str,
32
+ input_data_file_path: str,
33
+ # Classify-specific parameters
34
+ labels: Optional[List[str]] = None,
35
+ pass_labels: Optional[List[str]] = None,
36
+ # Score-specific parameters
37
+ min_score: Optional[float] = None,
38
+ max_score: Optional[float] = None,
39
+ pass_threshold: Optional[float] = None,
40
+ # Compare-specific parameters (model_a and model_b handled below)
41
+ # Common optional parameters
42
+ model_a: Optional[Union[str, Dict[str, Any]]] = None,
43
+ model_b: Optional[Union[str, Dict[str, Any]]] = None,
44
+ model_to_evaluate: Optional[Union[str, Dict[str, Any]]] = None,
45
+ ) -> EvaluationCreateResponse:
46
+ """
47
+ Create a new evaluation job.
48
+
49
+ Args:
50
+ type: The type of evaluation ("classify", "score", or "compare")
51
+ judge_model_name: Name of the judge model
52
+ judge_system_template: System template for the judge
53
+ input_data_file_path: Path to input data file
54
+ labels: List of classification labels (required for classify)
55
+ pass_labels: List of labels considered as passing (required for classify)
56
+ min_score: Minimum score value (required for score)
57
+ max_score: Maximum score value (required for score)
58
+ pass_threshold: Threshold score for passing (required for score)
59
+ model_to_evaluate: Model to evaluate for classify/score types
60
+ model_a: Model A for compare type
61
+ model_b: Model B for compare type
62
+
63
+ Returns:
64
+ EvaluationCreateResponse with workflow_id and status
65
+ """
66
+ requestor = api_requestor.APIRequestor(
67
+ client=self._client,
68
+ )
69
+
70
+ # Build judge config
71
+ judge_config = JudgeModelConfig(
72
+ model_name=judge_model_name,
73
+ system_template=judge_system_template,
74
+ )
75
+ parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
76
+ # Build parameters based on type
77
+ if type == "classify":
78
+ if labels is None or pass_labels is None:
79
+ raise ValueError(
80
+ "labels and pass_labels are required for classify evaluation"
81
+ )
82
+
83
+ # Validate that no score-specific parameters are provided
84
+ if any(
85
+ [
86
+ min_score is not None,
87
+ max_score is not None,
88
+ pass_threshold is not None,
89
+ ]
90
+ ):
91
+ raise ValueError(
92
+ "min_score, max_score, and pass_threshold parameters are exclusive to the score mode"
93
+ )
94
+
95
+ # Validate that no compare-specific parameters are provided
96
+ if any([model_a is not None, model_b is not None]):
97
+ raise ValueError(
98
+ "model_a and model_b parameters are exclusive to the compare mode"
99
+ )
100
+
101
+ parameters = ClassifyParameters(
102
+ judge=judge_config,
103
+ labels=labels,
104
+ pass_labels=pass_labels,
105
+ input_data_file_path=input_data_file_path,
106
+ )
107
+
108
+ # Handle model_to_evaluate
109
+ if model_to_evaluate is not None:
110
+ if isinstance(model_to_evaluate, str):
111
+ parameters.model_to_evaluate = model_to_evaluate
112
+ elif isinstance(model_to_evaluate, dict):
113
+ # Validate that all required fields are present for model config
114
+ required_fields = [
115
+ "model_name",
116
+ "max_tokens",
117
+ "temperature",
118
+ "system_template",
119
+ "input_template",
120
+ ]
121
+ missing_fields = [
122
+ field
123
+ for field in required_fields
124
+ if field not in model_to_evaluate
125
+ ]
126
+ if missing_fields:
127
+ raise ValueError(
128
+ f"All model config parameters are required when using detailed configuration. "
129
+ f"Missing: {', '.join(missing_fields)}"
130
+ )
131
+ parameters.model_to_evaluate = ModelRequest(**model_to_evaluate)
132
+
133
+ elif type == "score":
134
+ if min_score is None or max_score is None or pass_threshold is None:
135
+ raise ValueError(
136
+ "min_score, max_score, and pass_threshold are required for score evaluation"
137
+ )
138
+
139
+ # Validate that no classify-specific parameters are provided
140
+ if any([labels is not None, pass_labels is not None]):
141
+ raise ValueError(
142
+ "labels and pass_labels parameters are exclusive to the classify mode"
143
+ )
144
+
145
+ # Validate that no compare-specific parameters are provided
146
+ if any([model_a is not None, model_b is not None]):
147
+ raise ValueError(
148
+ "model_a and model_b parameters are exclusive to the compare mode"
149
+ )
150
+
151
+ parameters = ScoreParameters(
152
+ judge=judge_config,
153
+ min_score=min_score,
154
+ max_score=max_score,
155
+ pass_threshold=pass_threshold,
156
+ input_data_file_path=input_data_file_path,
157
+ )
158
+
159
+ # Handle model_to_evaluate
160
+ if model_to_evaluate is not None:
161
+ if isinstance(model_to_evaluate, str):
162
+ parameters.model_to_evaluate = model_to_evaluate
163
+ elif isinstance(model_to_evaluate, dict):
164
+ # Validate that all required fields are present for model config
165
+ required_fields = [
166
+ "model_name",
167
+ "max_tokens",
168
+ "temperature",
169
+ "system_template",
170
+ "input_template",
171
+ ]
172
+ missing_fields = [
173
+ field
174
+ for field in required_fields
175
+ if field not in model_to_evaluate
176
+ ]
177
+ if missing_fields:
178
+ raise ValueError(
179
+ f"All model config parameters are required when using detailed configuration. "
180
+ f"Missing: {', '.join(missing_fields)}"
181
+ )
182
+ parameters.model_to_evaluate = ModelRequest(**model_to_evaluate)
183
+
184
+ elif type == "compare":
185
+ # Validate that model_a and model_b are provided
186
+ if model_a is None or model_b is None:
187
+ raise ValueError(
188
+ "model_a and model_b parameters are required for compare evaluation"
189
+ )
190
+
191
+ # Validate that no classify-specific parameters are provided
192
+ if any([labels is not None, pass_labels is not None]):
193
+ raise ValueError(
194
+ "labels and pass_labels parameters are exclusive to the classify mode"
195
+ )
196
+
197
+ # Validate that no score-specific parameters are provided
198
+ if any(
199
+ [
200
+ min_score is not None,
201
+ max_score is not None,
202
+ pass_threshold is not None,
203
+ ]
204
+ ):
205
+ raise ValueError(
206
+ "min_score, max_score, and pass_threshold parameters are exclusive to the score mode"
207
+ )
208
+
209
+ # Validate that model_to_evaluate is not provided
210
+ if model_to_evaluate is not None:
211
+ raise ValueError(
212
+ "model_to_evaluate parameter is exclusive to classify and score modes"
213
+ )
214
+
215
+ parameters = CompareParameters(
216
+ judge=judge_config,
217
+ input_data_file_path=input_data_file_path,
218
+ )
219
+
220
+ # Handle model_a
221
+ if isinstance(model_a, str):
222
+ parameters.model_a = model_a
223
+ elif isinstance(model_a, dict):
224
+ # Validate that all required fields are present for model config
225
+ required_fields = [
226
+ "model_name",
227
+ "max_tokens",
228
+ "temperature",
229
+ "system_template",
230
+ "input_template",
231
+ ]
232
+ missing_fields = [
233
+ field for field in required_fields if field not in model_a
234
+ ]
235
+ if missing_fields:
236
+ raise ValueError(
237
+ f"All model config parameters are required for model_a when using detailed configuration. "
238
+ f"Missing: {', '.join(missing_fields)}"
239
+ )
240
+ parameters.model_a = ModelRequest(**model_a)
241
+
242
+ # Handle model_b
243
+ if isinstance(model_b, str):
244
+ parameters.model_b = model_b
245
+ elif isinstance(model_b, dict):
246
+ # Validate that all required fields are present for model config
247
+ required_fields = [
248
+ "model_name",
249
+ "max_tokens",
250
+ "temperature",
251
+ "system_template",
252
+ "input_template",
253
+ ]
254
+ missing_fields = [
255
+ field for field in required_fields if field not in model_b
256
+ ]
257
+ if missing_fields:
258
+ raise ValueError(
259
+ f"All model config parameters are required for model_b when using detailed configuration. "
260
+ f"Missing: {', '.join(missing_fields)}"
261
+ )
262
+ parameters.model_b = ModelRequest(**model_b)
263
+
264
+ else:
265
+ raise ValueError(
266
+ f"Invalid evaluation type: {type}. Must be 'classify', 'score', or 'compare'"
267
+ )
268
+
269
+ payload = {
270
+ "type": type,
271
+ "parameters": parameters.model_dump(),
272
+ }
273
+
274
+ response, _, _ = requestor.request(
275
+ options=TogetherRequest(
276
+ method="POST",
277
+ url="evaluation",
278
+ params=payload,
279
+ ),
280
+ stream=False,
281
+ )
282
+
283
+ assert isinstance(response, TogetherResponse)
284
+ return EvaluationCreateResponse(**response.data)
285
+
286
+ def list(
287
+ self,
288
+ status: Optional[str] = None,
289
+ limit: Optional[int] = None,
290
+ ) -> List[EvaluationJob]:
291
+ """
292
+ List evaluation jobs.
293
+
294
+ Args:
295
+ status: Optional filter by job status
296
+ limit: Optional limit on number of results (max 100)
297
+
298
+ Returns:
299
+ List of EvaluationJob objects
300
+ """
301
+ requestor = api_requestor.APIRequestor(
302
+ client=self._client,
303
+ )
304
+
305
+ params: Dict[str, Any] = {}
306
+ if status is not None:
307
+ params["status"] = status
308
+ if limit is not None:
309
+ params["limit"] = limit
310
+
311
+ response, _, _ = requestor.request(
312
+ options=TogetherRequest(
313
+ method="GET",
314
+ url="evaluations",
315
+ params=params if params else None,
316
+ ),
317
+ stream=False,
318
+ )
319
+
320
+ assert isinstance(response, TogetherResponse)
321
+ jobs = response.data or []
322
+ return [EvaluationJob(**job) for job in jobs]
323
+
324
+ def retrieve(self, evaluation_id: str) -> EvaluationJob:
325
+ """
326
+ Get details of a specific evaluation job.
327
+
328
+ Args:
329
+ evaluation_id: The workflow ID of the evaluation job
330
+
331
+ Returns:
332
+ EvaluationJob object with full details
333
+ """
334
+ requestor = api_requestor.APIRequestor(
335
+ client=self._client,
336
+ )
337
+
338
+ response, _, _ = requestor.request(
339
+ options=TogetherRequest(
340
+ method="GET",
341
+ url=f"evaluation/{evaluation_id}",
342
+ ),
343
+ stream=False,
344
+ )
345
+
346
+ assert isinstance(response, TogetherResponse)
347
+ return EvaluationJob(**response.data)
348
+
349
+ def status(self, evaluation_id: str) -> EvaluationStatusResponse:
350
+ """
351
+ Get the status and results of a specific evaluation job.
352
+
353
+ Args:
354
+ evaluation_id: The workflow ID of the evaluation job
355
+
356
+ Returns:
357
+ EvaluationStatusResponse with status and results
358
+ """
359
+ requestor = api_requestor.APIRequestor(
360
+ client=self._client,
361
+ )
362
+
363
+ response, _, _ = requestor.request(
364
+ options=TogetherRequest(
365
+ method="GET",
366
+ url=f"evaluation/{evaluation_id}/status",
367
+ ),
368
+ stream=False,
369
+ )
370
+
371
+ assert isinstance(response, TogetherResponse)
372
+ return EvaluationStatusResponse(**response.data)
373
+
374
+
375
+ class AsyncEvaluation:
376
+ def __init__(self, client: TogetherClient) -> None:
377
+ self._client = client
378
+
379
+ async def create(
380
+ self,
381
+ type: str,
382
+ judge_model_name: str,
383
+ judge_system_template: str,
384
+ input_data_file_path: str,
385
+ # Classify-specific parameters
386
+ labels: Optional[List[str]] = None,
387
+ pass_labels: Optional[List[str]] = None,
388
+ # Score-specific parameters
389
+ min_score: Optional[float] = None,
390
+ max_score: Optional[float] = None,
391
+ pass_threshold: Optional[float] = None,
392
+ # Compare-specific parameters (model_a and model_b handled below)
393
+ # Common optional parameters
394
+ model_to_evaluate: Optional[Union[str, Dict[str, Any]]] = None,
395
+ model_a: Optional[Union[str, Dict[str, Any]]] = None,
396
+ model_b: Optional[Union[str, Dict[str, Any]]] = None,
397
+ ) -> EvaluationCreateResponse:
398
+ """
399
+ Create a new evaluation job.
400
+
401
+ Args:
402
+ type: The type of evaluation ("classify", "score", or "compare")
403
+ judge_model_name: Name of the judge model
404
+ judge_system_template: System template for the judge
405
+ input_data_file_path: Path to input data file
406
+ labels: List of classification labels (required for classify)
407
+ pass_labels: List of labels considered as passing (required for classify)
408
+ min_score: Minimum score value (required for score)
409
+ max_score: Maximum score value (required for score)
410
+ pass_threshold: Threshold score for passing (required for score)
411
+ model_to_evaluate: Model to evaluate for classify/score types
412
+ model_a: Model A for compare type
413
+ model_b: Model B for compare type
414
+
415
+ Returns:
416
+ EvaluationCreateResponse with workflow_id and status
417
+ """
418
+ requestor = api_requestor.APIRequestor(
419
+ client=self._client,
420
+ )
421
+
422
+ # Build judge config
423
+ judge_config = JudgeModelConfig(
424
+ model_name=judge_model_name,
425
+ system_template=judge_system_template,
426
+ )
427
+ parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
428
+ # Build parameters based on type
429
+ if type == "classify":
430
+ if labels is None or pass_labels is None:
431
+ raise ValueError(
432
+ "labels and pass_labels are required for classify evaluation"
433
+ )
434
+
435
+ # Validate that no score-specific parameters are provided
436
+ if any(
437
+ [
438
+ min_score is not None,
439
+ max_score is not None,
440
+ pass_threshold is not None,
441
+ ]
442
+ ):
443
+ raise ValueError(
444
+ "min_score, max_score, and pass_threshold parameters are exclusive to the score mode"
445
+ )
446
+
447
+ # Validate that no compare-specific parameters are provided
448
+ if any([model_a is not None, model_b is not None]):
449
+ raise ValueError(
450
+ "model_a and model_b parameters are exclusive to the compare mode"
451
+ )
452
+
453
+ parameters = ClassifyParameters(
454
+ judge=judge_config,
455
+ labels=labels,
456
+ pass_labels=pass_labels,
457
+ input_data_file_path=input_data_file_path,
458
+ )
459
+
460
+ # Handle model_to_evaluate
461
+ if model_to_evaluate is not None:
462
+ if isinstance(model_to_evaluate, str):
463
+ parameters.model_to_evaluate = model_to_evaluate
464
+ elif isinstance(model_to_evaluate, dict):
465
+ # Validate that all required fields are present for model config
466
+ required_fields = [
467
+ "model_name",
468
+ "max_tokens",
469
+ "temperature",
470
+ "system_template",
471
+ "input_template",
472
+ ]
473
+ missing_fields = [
474
+ field
475
+ for field in required_fields
476
+ if field not in model_to_evaluate
477
+ ]
478
+ if missing_fields:
479
+ raise ValueError(
480
+ f"All model config parameters are required when using detailed configuration. "
481
+ f"Missing: {', '.join(missing_fields)}"
482
+ )
483
+ parameters.model_to_evaluate = ModelRequest(**model_to_evaluate)
484
+
485
+ elif type == "score":
486
+ if min_score is None or max_score is None or pass_threshold is None:
487
+ raise ValueError(
488
+ "min_score, max_score, and pass_threshold are required for score evaluation"
489
+ )
490
+
491
+ # Validate that no classify-specific parameters are provided
492
+ if any([labels is not None, pass_labels is not None]):
493
+ raise ValueError(
494
+ "labels and pass_labels parameters are exclusive to the classify mode"
495
+ )
496
+
497
+ # Validate that no compare-specific parameters are provided
498
+ if any([model_a is not None, model_b is not None]):
499
+ raise ValueError(
500
+ "model_a and model_b parameters are exclusive to the compare mode"
501
+ )
502
+
503
+ parameters = ScoreParameters(
504
+ judge=judge_config,
505
+ min_score=min_score,
506
+ max_score=max_score,
507
+ pass_threshold=pass_threshold,
508
+ input_data_file_path=input_data_file_path,
509
+ )
510
+
511
+ # Handle model_to_evaluate
512
+ if model_to_evaluate is not None:
513
+ if isinstance(model_to_evaluate, str):
514
+ parameters.model_to_evaluate = model_to_evaluate
515
+ elif isinstance(model_to_evaluate, dict):
516
+ # Validate that all required fields are present for model config
517
+ required_fields = [
518
+ "model_name",
519
+ "max_tokens",
520
+ "temperature",
521
+ "system_template",
522
+ "input_template",
523
+ ]
524
+ missing_fields = [
525
+ field
526
+ for field in required_fields
527
+ if field not in model_to_evaluate
528
+ ]
529
+ if missing_fields:
530
+ raise ValueError(
531
+ f"All model config parameters are required when using detailed configuration. "
532
+ f"Missing: {', '.join(missing_fields)}"
533
+ )
534
+ parameters.model_to_evaluate = ModelRequest(**model_to_evaluate)
535
+
536
+ elif type == "compare":
537
+ parameters = CompareParameters(
538
+ judge=judge_config,
539
+ input_data_file_path=input_data_file_path,
540
+ )
541
+
542
+ # Validate that model_a and model_b are provided
543
+ if model_a is None or model_b is None:
544
+ raise ValueError(
545
+ "model_a and model_b parameters are required for compare evaluation"
546
+ )
547
+
548
+ # Validate that no classify-specific parameters are provided
549
+ if any([labels is not None, pass_labels is not None]):
550
+ raise ValueError(
551
+ "labels and pass_labels parameters are exclusive to the classify mode"
552
+ )
553
+
554
+ # Validate that no score-specific parameters are provided
555
+ if any(
556
+ [
557
+ min_score is not None,
558
+ max_score is not None,
559
+ pass_threshold is not None,
560
+ ]
561
+ ):
562
+ raise ValueError(
563
+ "min_score, max_score, and pass_threshold parameters are exclusive to the score mode"
564
+ )
565
+
566
+ # Validate that model_to_evaluate is not provided
567
+ if model_to_evaluate is not None:
568
+ raise ValueError(
569
+ "model_to_evaluate parameter is exclusive to classify and score modes"
570
+ )
571
+
572
+ # Handle model_a
573
+ if isinstance(model_a, str):
574
+ parameters.model_a = model_a
575
+ elif isinstance(model_a, dict):
576
+ # Validate that all required fields are present for model config
577
+ required_fields = [
578
+ "model_name",
579
+ "max_tokens",
580
+ "temperature",
581
+ "system_template",
582
+ "input_template",
583
+ ]
584
+ missing_fields = [
585
+ field for field in required_fields if field not in model_a
586
+ ]
587
+ if missing_fields:
588
+ raise ValueError(
589
+ f"All model config parameters are required for model_a when using detailed configuration. "
590
+ f"Missing: {', '.join(missing_fields)}"
591
+ )
592
+ parameters.model_a = ModelRequest(**model_a)
593
+
594
+ # Handle model_b
595
+ if isinstance(model_b, str):
596
+ parameters.model_b = model_b
597
+ elif isinstance(model_b, dict):
598
+ # Validate that all required fields are present for model config
599
+ required_fields = [
600
+ "model_name",
601
+ "max_tokens",
602
+ "temperature",
603
+ "system_template",
604
+ "input_template",
605
+ ]
606
+ missing_fields = [
607
+ field for field in required_fields if field not in model_b
608
+ ]
609
+ if missing_fields:
610
+ raise ValueError(
611
+ f"All model config parameters are required for model_b when using detailed configuration. "
612
+ f"Missing: {', '.join(missing_fields)}"
613
+ )
614
+ parameters.model_b = ModelRequest(**model_b)
615
+
616
+ else:
617
+ raise ValueError(
618
+ f"Invalid evaluation type: {type}. Must be 'classify', 'score', or 'compare'"
619
+ )
620
+
621
+ payload = {
622
+ "type": type,
623
+ "parameters": parameters.model_dump(),
624
+ }
625
+
626
+ response, _, _ = await requestor.arequest(
627
+ options=TogetherRequest(
628
+ method="POST",
629
+ url="evaluation",
630
+ params=payload,
631
+ ),
632
+ stream=False,
633
+ )
634
+
635
+ assert isinstance(response, TogetherResponse)
636
+ return EvaluationCreateResponse(**response.data)
637
+
638
+ async def list(
639
+ self,
640
+ status: Optional[str] = None,
641
+ limit: Optional[int] = None,
642
+ ) -> List[EvaluationJob]:
643
+ """
644
+ List evaluation jobs.
645
+
646
+ Args:
647
+ status: Optional filter by job status
648
+ limit: Optional limit on number of results (max 100)
649
+
650
+ Returns:
651
+ List of EvaluationJob objects
652
+ """
653
+ requestor = api_requestor.APIRequestor(
654
+ client=self._client,
655
+ )
656
+
657
+ params: Dict[str, Any] = {}
658
+ if status is not None:
659
+ params["status"] = status
660
+ if limit is not None:
661
+ params["limit"] = limit
662
+
663
+ response, _, _ = await requestor.arequest(
664
+ options=TogetherRequest(
665
+ method="GET",
666
+ url="evaluations",
667
+ params=params if params else None,
668
+ ),
669
+ stream=False,
670
+ )
671
+
672
+ assert isinstance(response, TogetherResponse)
673
+ jobs = response.data or []
674
+ return [EvaluationJob(**job) for job in jobs]
675
+
676
+ async def retrieve(self, evaluation_id: str) -> EvaluationJob:
677
+ """
678
+ Get details of a specific evaluation job.
679
+
680
+ Args:
681
+ evaluation_id: The workflow ID of the evaluation job
682
+
683
+ Returns:
684
+ EvaluationJob object with full details
685
+ """
686
+ requestor = api_requestor.APIRequestor(
687
+ client=self._client,
688
+ )
689
+
690
+ response, _, _ = await requestor.arequest(
691
+ options=TogetherRequest(
692
+ method="GET",
693
+ url=f"evaluation/{evaluation_id}",
694
+ ),
695
+ stream=False,
696
+ )
697
+
698
+ assert isinstance(response, TogetherResponse)
699
+ return EvaluationJob(**response.data)
700
+
701
+ async def status(self, evaluation_id: str) -> EvaluationStatusResponse:
702
+ """
703
+ Get the status and results of a specific evaluation job.
704
+
705
+ Args:
706
+ evaluation_id: The workflow ID of the evaluation job
707
+
708
+ Returns:
709
+ EvaluationStatusResponse with status and results
710
+ """
711
+ requestor = api_requestor.APIRequestor(
712
+ client=self._client,
713
+ )
714
+
715
+ response, _, _ = await requestor.arequest(
716
+ options=TogetherRequest(
717
+ method="GET",
718
+ url=f"evaluation/{evaluation_id}/status",
719
+ ),
720
+ stream=False,
721
+ )
722
+
723
+ assert isinstance(response, TogetherResponse)
724
+ return EvaluationStatusResponse(**response.data)