speedy-utils 1.0.15__py3-none-any.whl → 1.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_utils/__init__.py CHANGED
@@ -9,8 +9,8 @@ from .chat_format import (
9
9
  transform_messages,
10
10
  transform_messages_to_chatml,
11
11
  )
12
- from .lm.lm import LM, LLMTask
13
- from .lm.alm import AsyncLM
12
+ from .lm.async_lm import AsyncLLMTask, AsyncLM
13
+ from .lm.sync_lm import LM, LLMTask
14
14
 
15
15
  __all__ = [
16
16
  "transform_messages",
@@ -26,4 +26,5 @@ __all__ = [
26
26
  "AsyncLM",
27
27
  "display_chat_messages_as_html",
28
28
  "LLMTask",
29
+ "AsyncLLMTask",
29
30
  ]
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
- from typing import List, Tuple, Sequence, Any, Dict, Optional
2
+ from typing import Any, Optional
3
3
  from IPython.display import HTML, display
4
4
  from difflib import SequenceMatcher
5
5
 
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
  from copy import deepcopy
3
- from typing import Callable, Dict, List, Sequence
4
3
 
5
4
 
6
5
  def identify_format(item):
@@ -114,7 +113,7 @@ def transform_messages(
114
113
 
115
114
  def transform_messages_to_chatml(input_data, input_format="auto"):
116
115
  if input_format == "auto":
117
- input_data = raw_data = deepcopy(input_data)
116
+ input_data = deepcopy(input_data)
118
117
  if isinstance(input_data, list):
119
118
  input_format = "chatlm"
120
119
  assert (
@@ -76,7 +76,7 @@ def group_messages_by_len(
76
76
  """
77
77
  if messages is None:
78
78
  raise ValueError("messages parameter cannot be None")
79
- from transformers.models.auto.tokenization_auto import AutoTokenizer
79
+ from transformers.models.auto.tokenization_auto import AutoTokenizer # type: ignore
80
80
 
81
81
  tokenizer = AutoTokenizer.from_pretrained(model_name)
82
82
 
llm_utils/lm/__init__.py CHANGED
@@ -1,9 +1,12 @@
1
- from .lm import LM
2
- from .alm import AsyncLM
3
- OAI_LM = LM
1
+ # from .async_lm import AsyncLLMTask, AsyncLM
2
+ # from .lm import LM, LLMTask
4
3
 
5
- __all__ = [
6
- "LM",
7
- "OAI_LM",
8
- "AsyncLM",
9
- ]
4
+ # OAI_LM = LM
5
+
6
+ # __all__ = [
7
+ # "LM",
8
+ # "OAI_LM",
9
+ # "AsyncLM",
10
+ # "LLMTask",
11
+ # "AsyncLLMTask",
12
+ # ]
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  """An **asynchronous** drop‑in replacement for the original `LM` class.
4
2
 
5
3
  Usage example (Python ≥3.8):
@@ -15,26 +13,30 @@ Usage example (Python ≥3.8):
15
13
  asyncio.run(main())
16
14
  """
17
15
 
18
- import asyncio
19
16
  import base64
20
17
  import hashlib
21
18
  import json
22
19
  import os
20
+ from abc import ABC
21
+ from functools import lru_cache
23
22
  from typing import (
24
23
  Any,
25
24
  Dict,
26
25
  List,
26
+ Literal,
27
27
  Optional,
28
28
  Sequence,
29
29
  Type,
30
30
  TypeVar,
31
31
  Union,
32
- overload,
33
32
  cast,
33
+ overload,
34
34
  )
35
35
 
36
36
  from httpx import URL
37
+ from loguru import logger
37
38
  from openai import AsyncOpenAI, AuthenticationError, BadRequestError, RateLimitError
39
+ from openai.pagination import AsyncPage as AsyncSyncPage
38
40
 
39
41
  # from openai.pagination import AsyncSyncPage
40
42
  from openai.types.chat import (
@@ -44,11 +46,10 @@ from openai.types.chat import (
44
46
  ChatCompletionToolMessageParam,
45
47
  ChatCompletionUserMessageParam,
46
48
  )
47
- from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
48
49
  from openai.types.model import Model
49
50
  from pydantic import BaseModel
50
- from loguru import logger
51
- from openai.pagination import AsyncPage as AsyncSyncPage
51
+
52
+ from llm_utils.chat_format.display import get_conversation_one_turn
52
53
 
53
54
  # --------------------------------------------------------------------------- #
54
55
  # type helpers
@@ -67,10 +68,20 @@ def _color(code: int, text: str) -> str:
67
68
  return f"\x1b[{code}m{text}\x1b[0m"
68
69
 
69
70
 
70
- _red = lambda t: _color(31, t)
71
- _green = lambda t: _color(32, t)
72
- _blue = lambda t: _color(34, t)
73
- _yellow = lambda t: _color(33, t)
71
+ def _red(t):
72
+ return _color(31, t)
73
+
74
+
75
+ def _green(t):
76
+ return _color(32, t)
77
+
78
+
79
+ def _blue(t):
80
+ return _color(34, t)
81
+
82
+
83
+ def _yellow(t):
84
+ return _color(33, t)
74
85
 
75
86
 
76
87
  class AsyncLM:
@@ -100,6 +111,7 @@ class AsyncLM:
100
111
  self.openai_kwargs = openai_kwargs
101
112
  self.do_cache = cache
102
113
  self.ports = ports
114
+ self._init_port = port # <-- store the port provided at init
103
115
 
104
116
  # Async client
105
117
 
@@ -375,10 +387,182 @@ class AsyncLM:
375
387
  except Exception:
376
388
  return None
377
389
 
390
+ # ------------------------------------------------------------------ #
391
+ # Missing methods from LM class
392
+ # ------------------------------------------------------------------ #
393
+ async def parse(
394
+ self,
395
+ response_model: Type[BaseModel],
396
+ instruction: Optional[str] = None,
397
+ prompt: Optional[str] = None,
398
+ messages: Optional[RawMsgs] = None,
399
+ think: Literal[True, False, None] = None,
400
+ add_json_schema_to_instruction: bool = False,
401
+ temperature: Optional[float] = None,
402
+ max_tokens: Optional[int] = None,
403
+ return_openai_response: bool = False,
404
+ cache: Optional[bool] = True,
405
+ **kwargs,
406
+ ):
407
+ """Parse response using guided JSON generation."""
408
+ if messages is None:
409
+ assert instruction is not None, "Instruction must be provided."
410
+ assert prompt is not None, "Prompt must be provided."
411
+ messages = [
412
+ {
413
+ "role": "system",
414
+ "content": instruction,
415
+ },
416
+ {
417
+ "role": "user",
418
+ "content": prompt,
419
+ },
420
+ ] # type: ignore
421
+
422
+ post_fix = ""
423
+ json_schema = response_model.model_json_schema()
424
+ if add_json_schema_to_instruction and response_model:
425
+ _schema = f"\n\n<output_json_schema>\n{json.dumps(json_schema, indent=2)}\n</output_json_schema>"
426
+ post_fix += _schema
427
+
428
+ if think:
429
+ post_fix += "\n\n/think"
430
+ elif not think:
431
+ post_fix += "\n\n/no_think"
432
+
433
+ assert isinstance(messages, list), "Messages must be a list."
434
+ assert len(messages) > 0, "Messages cannot be empty."
435
+ assert messages[0]["role"] == "system", (
436
+ "First message must be a system message with instruction."
437
+ )
438
+ messages[0]["content"] += post_fix # type: ignore
439
+
440
+ model_kwargs = {}
441
+ if temperature is not None:
442
+ model_kwargs["temperature"] = temperature
443
+ if max_tokens is not None:
444
+ model_kwargs["max_tokens"] = max_tokens
445
+ model_kwargs.update(kwargs)
446
+
447
+ use_cache = self.do_cache if cache is None else cache
448
+ cache_key = None
449
+ if use_cache:
450
+ cache_data = {
451
+ "messages": messages,
452
+ "model_kwargs": model_kwargs,
453
+ "guided_json": json_schema,
454
+ "response_format": response_model.__name__,
455
+ }
456
+ cache_key = self._cache_key(cache_data, {}, response_model)
457
+ cached_response = self._load_cache(cache_key)
458
+ self.last_log = [prompt, messages, cached_response]
459
+ if cached_response is not None:
460
+ if return_openai_response:
461
+ return cached_response
462
+ return self._parse_complete_output(cached_response, response_model)
463
+
464
+ completion = await self.client.chat.completions.create(
465
+ model=self.model, # type: ignore
466
+ messages=messages, # type: ignore
467
+ extra_body={"guided_json": json_schema},
468
+ **model_kwargs,
469
+ )
470
+
471
+ if cache_key:
472
+ self._dump_cache(cache_key, completion)
473
+
474
+ self.last_log = [prompt, messages, completion]
475
+ if return_openai_response:
476
+ return completion
477
+ return self._parse_complete_output(completion, response_model)
478
+
479
+ def _parse_complete_output(
480
+ self, completion: Any, response_model: Type[BaseModel]
481
+ ) -> BaseModel:
482
+ """Parse completion output to response model."""
483
+ if hasattr(completion, "model_dump"):
484
+ completion = completion.model_dump()
485
+
486
+ if "choices" not in completion or not completion["choices"]:
487
+ raise ValueError("No choices in OpenAI response")
488
+
489
+ content = completion["choices"][0]["message"]["content"]
490
+ if not content:
491
+ raise ValueError("Empty content in response")
492
+
493
+ try:
494
+ data = json.loads(content)
495
+ return response_model.model_validate(data)
496
+ except Exception as exc:
497
+ raise ValueError(
498
+ f"Failed to parse response as {response_model.__name__}: {content}"
499
+ ) from exc
500
+
501
+ async def inspect_word_probs(
502
+ self,
503
+ messages: Optional[List[Dict[str, Any]]] = None,
504
+ tokenizer: Optional[Any] = None,
505
+ do_print=True,
506
+ add_think: bool = True,
507
+ ) -> tuple[List[Dict[str, Any]], Any, str]:
508
+ """
509
+ Inspect word probabilities in a language model response.
510
+
511
+ Args:
512
+ tokenizer: Tokenizer instance to encode words.
513
+ messages: List of messages to analyze.
514
+
515
+ Returns:
516
+ A tuple containing:
517
+ - List of word probabilities with their log probabilities.
518
+ - Token log probability dictionaries.
519
+ - Rendered string with colored word probabilities.
520
+ """
521
+ if messages is None:
522
+ messages = await self.last_messages(add_think=add_think)
523
+ if messages is None:
524
+ raise ValueError("No messages provided and no last messages available.")
525
+
526
+ if tokenizer is None:
527
+ tokenizer = get_tokenizer(self.model)
528
+
529
+ ret = await inspect_word_probs_async(self, tokenizer, messages)
530
+ if do_print:
531
+ print(ret[-1])
532
+ return ret
533
+
534
+ async def last_messages(
535
+ self, add_think: bool = True
536
+ ) -> Optional[List[Dict[str, str]]]:
537
+ """Get the last conversation messages including assistant response."""
538
+ if not hasattr(self, "last_log"):
539
+ return None
540
+
541
+ last_conv = self.last_log
542
+ messages = last_conv[1] if len(last_conv) > 1 else None
543
+ last_msg = last_conv[2]
544
+ if not isinstance(last_msg, dict):
545
+ last_conv[2] = last_conv[2].model_dump() # type: ignore
546
+ msg = last_conv[2]
547
+ # Ensure msg is a dict
548
+ if hasattr(msg, "model_dump"):
549
+ msg = msg.model_dump()
550
+ message = msg["choices"][0]["message"]
551
+ reasoning = message.get("reasoning_content")
552
+ answer = message.get("content")
553
+ if reasoning and add_think:
554
+ final_answer = f"<think>{reasoning}</think>\n{answer}"
555
+ else:
556
+ final_answer = f"<think>\n\n</think>\n{answer}"
557
+ assistant = {"role": "assistant", "content": final_answer}
558
+ messages = messages + [assistant] # type: ignore
559
+ return messages if messages else None
560
+
378
561
  # ------------------------------------------------------------------ #
379
562
  # Utility helpers
380
563
  # ------------------------------------------------------------------ #
381
564
  async def inspect_history(self) -> None:
565
+ """Inspect the conversation history with proper formatting."""
382
566
  if not hasattr(self, "last_log"):
383
567
  raise ValueError("No history available. Please call the model first.")
384
568
 
@@ -466,3 +650,210 @@ class AsyncLM:
466
650
  except Exception as exc:
467
651
  logger.error(f"Failed to list models: {exc}")
468
652
  return []
653
+
654
+
655
+ # --------------------------------------------------------------------------- #
656
+ # Module-level utility functions (async versions)
657
+ # --------------------------------------------------------------------------- #
658
+
659
+
660
+ @lru_cache(maxsize=10)
661
+ def get_tokenizer(model_name: str) -> Any:
662
+ """Get tokenizer for the given model."""
663
+ from transformers import AutoTokenizer # type: ignore
664
+
665
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
666
+ return tokenizer
667
+
668
+
669
+ async def inspect_word_probs_async(lm, tokenizer, messages):
670
+ """Async version of inspect_word_probs."""
671
+
672
+ import numpy as np
673
+
674
+ async def compute_word_log_probs(
675
+ tokenizer: Any,
676
+ lm_client: Any,
677
+ ) -> tuple[List[Dict[str, Any]], Any]:
678
+ # Build a prompt that preserves literal newlines
679
+ prompt = tokenizer.apply_chat_template(
680
+ messages,
681
+ tokenize=False, # Don't tokenize yet, we need raw text
682
+ add_generation_prompt=False, # No generation prompt needed
683
+ )
684
+
685
+ # Request token logprobs
686
+ response = await lm_client.client.completions.create(
687
+ model=lm_client.model, # type: ignore
688
+ prompt=prompt,
689
+ max_tokens=1,
690
+ logprobs=1,
691
+ extra_body={"prompt_logprobs": 0},
692
+ )
693
+ token_logprob_dicts = response.choices[0].prompt_logprobs # type: ignore
694
+
695
+ # Override first token to known start marker
696
+ start_id = tokenizer.encode("<|im_start|>")[0]
697
+ token_logprob_dicts[0] = {
698
+ str(start_id): {
699
+ "logprob": -1,
700
+ "rank": 1,
701
+ "decoded_token": "<|im_start|>",
702
+ }
703
+ }
704
+
705
+ # Flatten tokens
706
+ tokens: List[Dict[str, Any]] = [
707
+ {"id": int(tid), **tdata}
708
+ for td in token_logprob_dicts
709
+ for tid, tdata in td.items()
710
+ ]
711
+
712
+ # Validate tokenization
713
+ tokenized = tokenizer.tokenize(prompt)
714
+ if len(tokenized) != len(tokens):
715
+ raise ValueError(f"Token count mismatch: {len(tokenized)} vs {len(tokens)}")
716
+ for idx, tok in enumerate(tokens):
717
+ if tokenized[idx] != tok["decoded_token"]:
718
+ raise AssertionError(
719
+ f"Token mismatch at {idx}: "
720
+ f"{tokenized[idx]} != {tok['decoded_token']}"
721
+ )
722
+
723
+ # Split on newline sentinel
724
+ split_prompt = prompt.replace("\n", " <NL> ")
725
+ words = split_prompt.split()
726
+
727
+ word_log_probs: List[Dict[str, Any]] = []
728
+ token_idx = 0
729
+
730
+ for word in words:
731
+ # Map sentinel back to actual newline for encoding
732
+ target = "\n" if word == "<NL>" else word
733
+ sub_ids = tokenizer.encode(target, add_special_tokens=False)
734
+ count = len(sub_ids)
735
+ if count == 0:
736
+ continue
737
+
738
+ subs = tokens[token_idx : token_idx + count]
739
+ avg_logprob = sum(s["logprob"] for s in subs) / count
740
+ prob = float(np.exp(avg_logprob))
741
+ word_log_probs.append({"word": target, "probability": prob})
742
+ token_idx += count
743
+
744
+ return word_log_probs, token_logprob_dicts # type: ignore
745
+
746
+ def render_by_logprob(word_log_probs: List[Dict[str, Any]]) -> str:
747
+ """
748
+ Return an ANSI-colored string for word probabilities (red → green).
749
+ """
750
+ if not word_log_probs:
751
+ return ""
752
+
753
+ probs = [entry["probability"] for entry in word_log_probs]
754
+ min_p, max_p = min(probs), max(probs)
755
+ parts: List[str] = []
756
+
757
+ for entry in word_log_probs:
758
+ word = entry["word"]
759
+ # Preserve actual line breaks
760
+ if word == "\n":
761
+ parts.append("\n")
762
+ continue
763
+
764
+ p = entry["probability"]
765
+ norm = (p - min_p) / (max_p - min_p or 1.0)
766
+ r = int(255 * (1 - norm)) # red component (high when prob is low)
767
+ g = int(255 * norm) # green component (high when prob is high)
768
+ b = 0 # no blue for red-green gradient
769
+ colored = f"\x1b[38;2;{r};{g};{b}m{word}\x1b[0m"
770
+ parts.append(colored + " ")
771
+
772
+ return "".join(parts).rstrip()
773
+
774
+ word_probs, token_logprob_dicts = await compute_word_log_probs(tokenizer, lm)
775
+ return word_probs, token_logprob_dicts, render_by_logprob(word_probs)
776
+
777
+
778
+ # --------------------------------------------------------------------------- #
779
+ # Async LLMTask class
780
+ # --------------------------------------------------------------------------- #
781
+
782
+
783
+ class AsyncLLMTask(ABC):
784
+ """
785
+ Async callable wrapper around an AsyncLM endpoint.
786
+
787
+ Sub-classes must set:
788
+ • lm – the async language-model instance
789
+ • InputModel – a Pydantic input class
790
+ • OutputModel – a Pydantic output class
791
+
792
+ Optional flags:
793
+ • temperature – float (default 0.6)
794
+ • think – bool (if the backend supports "chain-of-thought")
795
+ • add_json_schema – bool (include schema in the instruction)
796
+
797
+ The **docstring** of each sub-class is sent as the LM instruction.
798
+ Example
799
+ ```python
800
+ class DemoTask(AsyncLLMTask):
801
+ "TODO: SYSTEM_PROMPT_INSTURCTION HERE"
802
+
803
+ lm = AsyncLM(port=8130, cache=False, model="gpt-3.5-turbo")
804
+
805
+ class InputModel(BaseModel):
806
+ text_to_translate:str
807
+
808
+ class OutputModel(BaseModel):
809
+ translation:str
810
+ glossary_use:str
811
+
812
+ temperature = 0.6
813
+ think=False
814
+
815
+ demo_task = DemoTask()
816
+ result = await demo_task({'text_to_translate': 'Translate from english to vietnamese: Hello how are you'})
817
+ ```
818
+ """
819
+
820
+ lm: "AsyncLM"
821
+ InputModel: Type[BaseModel]
822
+ OutputModel: Type[BaseModel]
823
+
824
+ temperature: float = 0.6
825
+ think: bool = False
826
+ add_json_schema: bool = False
827
+
828
+ async def __call__(self, data: BaseModel | dict) -> BaseModel:
829
+ if (
830
+ not hasattr(self, "InputModel")
831
+ or not hasattr(self, "OutputModel")
832
+ or not hasattr(self, "lm")
833
+ ):
834
+ raise NotImplementedError(
835
+ f"{self.__class__.__name__} must define lm, InputModel, and OutputModel as class attributes."
836
+ )
837
+
838
+ item = data if isinstance(data, BaseModel) else self.InputModel(**data)
839
+
840
+ return await self.lm.parse(
841
+ prompt=item.model_dump_json(),
842
+ instruction=self.__doc__ or "",
843
+ response_model=self.OutputModel,
844
+ temperature=self.temperature,
845
+ think=self.think,
846
+ add_json_schema_to_instruction=self.add_json_schema,
847
+ )
848
+
849
+ def generate_training_data(
850
+ self, input_dict: Dict[str, Any], output: Dict[str, Any]
851
+ ):
852
+ """Return share gpt like format"""
853
+ system_prompt = self.__doc__ or ""
854
+ user_msg = self.InputModel(**input_dict).model_dump_json() # type: ignore[attr-defined]
855
+ assistant_msg = self.OutputModel(**output).model_dump_json() # type: ignore[attr-defined]
856
+ messages = get_conversation_one_turn(
857
+ system_msg=system_prompt, user_msg=user_msg, assistant_msg=assistant_msg
858
+ )
859
+ return {"messages": messages}
llm_utils/lm/chat_html.py CHANGED
@@ -1,4 +1,5 @@
1
- from .lm import *
1
+ from typing import Any, Optional, cast
2
+ from .sync_lm import LM, Messages, LegacyMsgs, RawMsgs
2
3
  import sys
3
4
 
4
5
  # Configuration
@@ -231,7 +232,8 @@ class LMChatHtml(LM):
231
232
  padding=display_padding,
232
233
  inner_padding=display_inner_padding,
233
234
  )
234
- display_handle.update(HTML(html_content))
235
+ if display_handle is not None:
236
+ display_handle.update(HTML(html_content))
235
237
  else:
236
238
  # Console streaming mode (original behavior)
237
239
  for chunk in stream:
llm_utils/lm/lm_json.py CHANGED
@@ -1,11 +1,7 @@
1
- import json
2
- import re
3
- from functools import cache
4
- from typing import *
1
+ from typing import Any, Optional
5
2
 
6
- from pydantic import BaseModel
7
3
 
8
- from llm_utils.lm.lm import LM, RawMsgs
4
+ from llm_utils.lm.sync_lm import LM, RawMsgs
9
5
 
10
6
 
11
7
  class LMJson(LM):
@@ -4,8 +4,8 @@ import base64
4
4
  import hashlib
5
5
  import json
6
6
  import os
7
- import warnings
8
7
  from abc import ABC
8
+ from functools import lru_cache
9
9
  from typing import (
10
10
  Any,
11
11
  Dict,
@@ -20,10 +20,7 @@ from typing import (
20
20
  overload,
21
21
  )
22
22
 
23
- from httpx import URL
24
- from huggingface_hub import repo_info
25
23
  from loguru import logger
26
- from numpy import isin
27
24
  from openai import AuthenticationError, OpenAI, RateLimitError
28
25
  from openai.pagination import SyncPage
29
26
  from openai.types.chat import (
@@ -33,10 +30,10 @@ from openai.types.chat import (
33
30
  ChatCompletionToolMessageParam,
34
31
  ChatCompletionUserMessageParam,
35
32
  )
36
- from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
37
33
  from openai.types.model import Model
38
34
  from pydantic import BaseModel
39
35
 
36
+ from llm_utils.chat_format.display import get_conversation_one_turn
40
37
  from speedy_utils.common.utils_io import jdumps
41
38
 
42
39
  # --------------------------------------------------------------------------- #
@@ -549,7 +546,7 @@ class LM:
549
546
 
550
547
  if think:
551
548
  post_fix += "\n\n/think"
552
- elif think == False:
549
+ elif not think:
553
550
  post_fix += "\n\n/no_think"
554
551
 
555
552
  assert isinstance(messages, list), "Messages must be a list."
@@ -675,20 +672,15 @@ class LM:
675
672
  return messages if messages else None
676
673
 
677
674
 
678
- from functools import lru_cache
679
-
680
-
681
675
  @lru_cache(maxsize=10)
682
676
  def get_tokenizer(model_name: str) -> Any:
683
- from transformers import AutoTokenizer
677
+ from transformers import AutoTokenizer # type: ignore
684
678
 
685
679
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
686
680
  return tokenizer
687
681
 
688
682
 
689
683
  def inspect_word_probs(lm, tokenizer, messages):
690
- import re
691
- from typing import Any, Dict, List
692
684
 
693
685
  import numpy as np
694
686
 
llm_utils/lm/utils.py CHANGED
@@ -82,7 +82,7 @@ def retry_on_exception(max_retries=10, exceptions=(Exception,), sleep_time=3):
82
82
  try:
83
83
  return func(self, *args, **kwargs)
84
84
  except exceptions as e:
85
- import litellm
85
+ import litellm # type: ignore
86
86
 
87
87
  if isinstance(
88
88
  e, (litellm.exceptions.APIError, litellm.exceptions.Timeout)
@@ -1,7 +1,6 @@
1
1
  import asyncio
2
2
  import random
3
3
  from collections import defaultdict
4
- import time
5
4
  from tabulate import tabulate
6
5
  import contextlib
7
6
  import aiohttp # <-- Import aiohttp
@@ -2,7 +2,7 @@ import functools
2
2
  import time
3
3
  import traceback
4
4
  from collections.abc import Callable
5
- from typing import Any, Tuple, Type, Union
5
+ from typing import Any
6
6
 
7
7
  from loguru import logger
8
8
 
@@ -26,8 +26,6 @@ def retry_runtime(
26
26
  def decorator(func: Callable) -> Callable:
27
27
  @functools.wraps(func)
28
28
  def wrapper(*args: Any, **kwargs: Any) -> Any:
29
- last_exception = None
30
-
31
29
  for attempt in range(1, max_retry + 1):
32
30
  try:
33
31
  return func(*args, **kwargs)
@@ -40,7 +38,6 @@ def retry_runtime(
40
38
  raise
41
39
 
42
40
  except exceptions as e:
43
- last_exception = e
44
41
  if attempt == max_retry:
45
42
  logger.opt(depth=1).error(
46
43
  f"Function {func.__name__} failed after {max_retry} retries: {str(e)}"
@@ -5,7 +5,7 @@ import re
5
5
  import sys
6
6
  import time
7
7
  from collections import OrderedDict
8
- from typing import Annotated, Literal, Optional
8
+ from typing import Annotated, Literal
9
9
 
10
10
  from loguru import logger
11
11
 
@@ -2,7 +2,6 @@ import os
2
2
  from collections import defaultdict
3
3
  from datetime import datetime
4
4
 
5
- from fastcore.all import threaded
6
5
 
7
6
 
8
7
  class ReportManager:
@@ -40,7 +39,7 @@ class ReportManager:
40
39
 
41
40
  md_content.extend(
42
41
  [
43
- f"\n### Results Overview",
42
+ "\n### Results Overview",
44
43
  f"- Total items processed: {len(results)}",
45
44
  f"- Success rate: {(len(results) - len(errors))/len(results)*100:.1f}%",
46
45
  f"- Total errors: {len(errors)}",
@@ -61,7 +60,7 @@ class ReportManager:
61
60
  for error_type, errs in error_groups.items():
62
61
  md_content.extend(
63
62
  [
64
- f"\n<details>",
63
+ "\n<details>",
65
64
  f"<summary><b>{error_type}</b> ({len(errs)} occurrences)</summary>\n",
66
65
  "| Index | Input | Error Message |",
67
66
  "|-------|-------|---------------|",
@@ -5,7 +5,8 @@ import os
5
5
  import os.path as osp
6
6
  import pickle
7
7
  import uuid
8
- from typing import Any, List, Literal
8
+ from threading import Lock
9
+ from typing import Any, Literal
9
10
 
10
11
  import cachetools
11
12
  import pandas as pd
@@ -13,12 +14,11 @@ import xxhash
13
14
  from loguru import logger
14
15
  from pydantic import BaseModel
15
16
 
16
- from .utils_io import dump_json_or_pickle, load_json_or_pickle
17
- from .utils_misc import mkdir_or_exist
17
+ from src.speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
18
+ from src.speedy_utils.common.utils_misc import mkdir_or_exist
18
19
 
19
20
  SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/speedy_cache")
20
21
  LRU_MEM_CACHE = cachetools.LRUCache(maxsize=128_000)
21
- from threading import Lock
22
22
 
23
23
  thread_locker = Lock()
24
24
 
@@ -2,9 +2,8 @@
2
2
 
3
3
  import inspect
4
4
  import os
5
- import sys
6
5
  from collections.abc import Callable
7
- from typing import Any, List
6
+ from typing import Any
8
7
 
9
8
  from pydantic import BaseModel
10
9
 
@@ -1,12 +1,10 @@
1
- import inspect
2
1
  import multiprocessing
3
2
  import os
4
- import time
5
3
  import traceback
6
- from collections.abc import Callable, Iterable, Iterator, Sequence
4
+ from collections.abc import Callable, Iterable, Iterator
7
5
  from concurrent.futures import ProcessPoolExecutor, as_completed
8
6
  from itertools import islice
9
- from typing import Any, List, TypeVar, cast
7
+ from typing import Any, TypeVar, cast
10
8
 
11
9
  T = TypeVar("T")
12
10
 
@@ -5,7 +5,6 @@ import multiprocessing # Import multiprocessing module
5
5
  import os
6
6
  import shlex # To properly escape command line arguments
7
7
  import shutil
8
- import subprocess
9
8
 
10
9
  taskset_path = shutil.which("taskset")
11
10
 
@@ -80,7 +79,7 @@ def main():
80
79
  cmd_str = shlex.join(args.cmd)
81
80
 
82
81
  gpus = args.gpus.split(",")
83
- gpus = [gpu for gpu in gpus if not gpu in args.ignore_gpus.split(",")]
82
+ gpus = [gpu for gpu in gpus if gpu not in args.ignore_gpus.split(",")]
84
83
  num_gpus = len(gpus)
85
84
 
86
85
  cpu_per_process = max(args.total_cpu // args.total_fold, 1)
@@ -10,12 +10,8 @@ Usage:
10
10
 
11
11
  import argparse
12
12
  import json
13
- import os
14
- import signal
15
- import subprocess
16
13
  import sys
17
- import time
18
- from typing import Any, Dict, List, Optional
14
+ from typing import Any, Dict, List
19
15
 
20
16
 
21
17
  def pascal_case(s: str) -> str:
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.0.15
3
+ Version: 1.0.20
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
7
- Requires-Python: >=3.9
7
+ Requires-Python: >=3.8
8
8
  Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.8
9
10
  Classifier: Programming Language :: Python :: 3.9
10
11
  Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
@@ -19,7 +20,7 @@ Requires-Dist: fastprogress
19
20
  Requires-Dist: freezegun (>=1.5.1,<2.0.0)
20
21
  Requires-Dist: ipdb
21
22
  Requires-Dist: ipywidgets
22
- Requires-Dist: json-repair (>=0.40.0,<0.41.0)
23
+ Requires-Dist: json-repair (>=0.25.0,<0.31.0)
23
24
  Requires-Dist: jupyterlab
24
25
  Requires-Dist: loguru
25
26
  Requires-Dist: matplotlib
@@ -0,0 +1,37 @@
1
+ llm_utils/__init__.py,sha256=SlaCMArn_uKVw4r0psz0q0IOQ1VFGdgCFOAKxQ81WTI,694
2
+ llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
3
+ llm_utils/chat_format/display.py,sha256=qaEGADGP8iQFzWOuzEO7_HyrqAFdEnUfkHAH28b0ym0,9772
4
+ llm_utils/chat_format/transform.py,sha256=8TZhvUS5DrjUeMNtDIuWY54B_QZ7jjpXEL9c8F5z79w,5400
5
+ llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
6
+ llm_utils/group_messages.py,sha256=8CU9nKOja3xeuhdrX5CvYVveSqSKb2zQ0eeNzA88aTQ,3621
7
+ llm_utils/lm/__init__.py,sha256=rX36_MsnekM5GHwWS56XELbm4W5x2TDwnPERDTfo0eU,194
8
+ llm_utils/lm/async_lm.py,sha256=06QVT7iWAa8Rz48oRmnzkS-dJYOiJ0zSfKqEhZY11S8,30825
9
+ llm_utils/lm/chat_html.py,sha256=FkGo0Dv_nAHYBMZzXfMu_bGQKaCx302goh3XaT-_ETc,8674
10
+ llm_utils/lm/lm_json.py,sha256=fMt42phzFV2f6ulrtWcDXsWHi8WcG7gGkCzpIq8VSSM,1975
11
+ llm_utils/lm/sync_lm.py,sha256=FMiAhltqUNZdzGejMBPqGfQ3PE-fUoIGW8hfjTO_fNo,31035
12
+ llm_utils/lm/utils.py,sha256=GMvs64DRzVnXAki4SZ-A6mx2Fi9IgeF11BA-5FB-CYg,4777
13
+ llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
14
+ llm_utils/scripts/vllm_load_balancer.py,sha256=qsVhQ3ubJ3JnOAu4psHB1PyKy9muX-mZZkFqepH8WcU,17507
15
+ llm_utils/scripts/vllm_serve.py,sha256=CbW_3Y9Vt7eQYoGGPT3yj1nhbLYOc3b1LdJBy1sVX-Y,11976
16
+ speedy_utils/__init__.py,sha256=kxQk4PGS3Xkxnerm0YqjF6GKTpgoaTc1vudKid-2c_A,3388
17
+ speedy_utils/all.py,sha256=A9jiKGjo950eg1pscS9x38OWAjKGyusoAN5mrfweY4E,3090
18
+ speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
20
+ speedy_utils/common/function_decorator.py,sha256=BspJ0YuGL6elS7lWBAgELZ-sCfED_1N2P5fgH-fCRUQ,2132
21
+ speedy_utils/common/logger.py,sha256=JqW9gG4ujfq4RldNeYP2p52BYgCwjkYeGGYyzLn6mfY,6422
22
+ speedy_utils/common/notebook_utils.py,sha256=7tFXMaE365ubrD9VCwDoZe7oBqZ2qG5t_Vla4Qvg1M8,2062
23
+ speedy_utils/common/report_manager.py,sha256=sEuwLcyYpYlj9uxIsC8yiAu1eirydkQIldKRtFStswI,3894
24
+ speedy_utils/common/utils_cache.py,sha256=Fp1vvi19q4xIcXi7ZuVjk49gOqdl3i7bv2Z6UnERMjM,8424
25
+ speedy_utils/common/utils_io.py,sha256=d7PKz5tOPrwHDr7GXuYmILvjXJOFEwfzAEIuUcYaI60,4790
26
+ speedy_utils/common/utils_misc.py,sha256=cdEuBBpiB1xpuzj0UBDHDuTIerqsMIw37ENq6EXliOw,1795
27
+ speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
28
+ speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ speedy_utils/multi_worker/process.py,sha256=BI-sgzzQ0_N8kOfaS_3ZAGZ3d6panYzJ3-BGZthY4dQ,6824
30
+ speedy_utils/multi_worker/thread.py,sha256=9pXjvgjD0s0Hp0cZ6I3M0ndp1OlYZ1yvqbs_bcun_Kw,12775
31
+ speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ speedy_utils/scripts/mpython.py,sha256=73PHm1jqbCt2APN4xuNjD0VDKwzOj4EZsViEMQiZU2g,3853
33
+ speedy_utils/scripts/openapi_client_codegen.py,sha256=f2125S_q0PILgH5dyzoKRz7pIvNEjCkzpi4Q4pPFRZE,9683
34
+ speedy_utils-1.0.20.dist-info/METADATA,sha256=ujYqU8i_7O40c4av_5M16eoPIDsA20df2uOSFUVOY-Q,7442
35
+ speedy_utils-1.0.20.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
+ speedy_utils-1.0.20.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
37
+ speedy_utils-1.0.20.dist-info/RECORD,,
@@ -1,269 +0,0 @@
1
- """
2
- Beautiful example script for interacting with VLLM server.
3
-
4
- This script demonstrates various ways to use the VLLM API server
5
- for text generation tasks.
6
- """
7
-
8
- import asyncio
9
- import json
10
- from typing import Dict, List, Optional, Any
11
-
12
- import aiohttp
13
- from loguru import logger
14
- from pydantic import BaseModel, Field
15
-
16
-
17
- class VLLMRequest(BaseModel):
18
- """Request model for VLLM API."""
19
- prompt: str
20
- max_tokens: int = Field(default=512, ge=1, le=8192)
21
- temperature: float = Field(default=0.7, ge=0.0, le=2.0)
22
- top_p: float = Field(default=0.9, ge=0.0, le=1.0)
23
- stream: bool = False
24
- stop: Optional[List[str]] = None
25
-
26
-
27
- class VLLMResponse(BaseModel):
28
- """Response model from VLLM API."""
29
- text: str
30
- finish_reason: str
31
- prompt_tokens: int
32
- completion_tokens: int
33
- total_tokens: int
34
-
35
-
36
- class VLLMClient:
37
- """Client for interacting with VLLM server."""
38
-
39
- def __init__(self, base_url: str = 'http://localhost:8140'):
40
- self.base_url = base_url
41
- self.model_name = 'selfeval_8b'
42
-
43
- async def generate_text(
44
- self,
45
- request: VLLMRequest
46
- ) -> VLLMResponse:
47
- """Generate text using VLLM API."""
48
- url = f'{self.base_url}/v1/completions'
49
-
50
- payload = {
51
- 'model': self.model_name,
52
- 'prompt': request.prompt,
53
- 'max_tokens': request.max_tokens,
54
- 'temperature': request.temperature,
55
- 'top_p': request.top_p,
56
- 'stream': request.stream,
57
- }
58
-
59
- if request.stop:
60
- payload['stop'] = request.stop
61
-
62
- async with aiohttp.ClientSession() as session:
63
- try:
64
- async with session.post(
65
- url,
66
- json=payload,
67
- timeout=aiohttp.ClientTimeout(total=60)
68
- ) as response:
69
- response.raise_for_status()
70
- data = await response.json()
71
-
72
- choice = data['choices'][0]
73
- usage = data['usage']
74
-
75
- return VLLMResponse(
76
- text=choice['text'],
77
- finish_reason=choice['finish_reason'],
78
- prompt_tokens=usage['prompt_tokens'],
79
- completion_tokens=usage['completion_tokens'],
80
- total_tokens=usage['total_tokens']
81
- )
82
-
83
- except aiohttp.ClientError as e:
84
- logger.error(f'HTTP error: {e}')
85
- raise
86
- except Exception as e:
87
- logger.error(f'Unexpected error: {e}')
88
- raise
89
-
90
- async def generate_batch(
91
- self,
92
- requests: List[VLLMRequest]
93
- ) -> List[VLLMResponse]:
94
- """Generate text for multiple requests concurrently."""
95
- tasks = [self.generate_text(req) for req in requests]
96
- return await asyncio.gather(*tasks, return_exceptions=True)
97
-
98
- async def health_check(self) -> bool:
99
- """Check if the VLLM server is healthy."""
100
- url = f'{self.base_url}/health'
101
-
102
- try:
103
- async with aiohttp.ClientSession() as session:
104
- async with session.get(
105
- url,
106
- timeout=aiohttp.ClientTimeout(total=10)
107
- ) as response:
108
- return response.status == 200
109
- except Exception as e:
110
- logger.warning(f'Health check failed: {e}')
111
- return False
112
-
113
-
114
- async def example_basic_generation():
115
- """Example: Basic text generation."""
116
- logger.info('🚀 Running basic generation example')
117
-
118
- client = VLLMClient()
119
-
120
- # Check server health
121
- if not await client.health_check():
122
- logger.error('❌ Server is not healthy')
123
- return
124
-
125
- request = VLLMRequest(
126
- prompt='Explain the concept of machine learning in simple terms:',
127
- max_tokens=256,
128
- temperature=0.7,
129
- stop=['\n\n']
130
- )
131
-
132
- try:
133
- response = await client.generate_text(request)
134
-
135
- logger.success('✅ Generation completed')
136
- logger.info(f'📝 Generated text:\n{response.text}')
137
- logger.info(f'📊 Tokens: {response.total_tokens} total '
138
- f'({response.prompt_tokens} prompt + '
139
- f'{response.completion_tokens} completion)')
140
-
141
- except Exception as e:
142
- logger.error(f'❌ Generation failed: {e}')
143
-
144
-
145
- async def example_batch_generation():
146
- """Example: Batch text generation."""
147
- logger.info('🚀 Running batch generation example')
148
-
149
- client = VLLMClient()
150
-
151
- prompts = [
152
- 'What is artificial intelligence?',
153
- 'Explain quantum computing briefly:',
154
- 'What are the benefits of renewable energy?'
155
- ]
156
-
157
- requests = [
158
- VLLMRequest(
159
- prompt=prompt,
160
- max_tokens=128,
161
- temperature=0.8
162
- ) for prompt in prompts
163
- ]
164
-
165
- try:
166
- responses = await client.generate_batch(requests)
167
-
168
- for i, response in enumerate(responses):
169
- if isinstance(response, Exception):
170
- logger.error(f'❌ Request {i+1} failed: {response}')
171
- else:
172
- logger.success(f'✅ Request {i+1} completed')
173
- logger.info(f'📝 Response {i+1}:\n{response.text}\n')
174
-
175
- except Exception as e:
176
- logger.error(f'❌ Batch generation failed: {e}')
177
-
178
-
179
- async def example_creative_writing():
180
- """Example: Creative writing with specific parameters."""
181
- logger.info('🚀 Running creative writing example')
182
-
183
- client = VLLMClient()
184
-
185
- request = VLLMRequest(
186
- prompt=(
187
- 'Write a short story about a robot discovering emotions. '
188
- 'The story should be exactly 3 paragraphs:\n\n'
189
- ),
190
- max_tokens=400,
191
- temperature=1.2, # Higher temperature for creativity
192
- top_p=0.95,
193
- stop=['THE END', '\n\n\n']
194
- )
195
-
196
- try:
197
- response = await client.generate_text(request)
198
-
199
- logger.success('✅ Creative writing completed')
200
- logger.info(f'📚 Story:\n{response.text}')
201
- logger.info(f'🎯 Finish reason: {response.finish_reason}')
202
-
203
- except Exception as e:
204
- logger.error(f'❌ Creative writing failed: {e}')
205
-
206
-
207
- async def example_code_generation():
208
- """Example: Code generation."""
209
- logger.info('🚀 Running code generation example')
210
-
211
- client = VLLMClient()
212
-
213
- request = VLLMRequest(
214
- prompt=(
215
- 'Write a Python function that calculates the fibonacci '
216
- 'sequence up to n terms:\n\n```python\n'
217
- ),
218
- max_tokens=300,
219
- temperature=0.2, # Lower temperature for code
220
- stop=['```', '\n\n\n']
221
- )
222
-
223
- try:
224
- response = await client.generate_text(request)
225
-
226
- logger.success('✅ Code generation completed')
227
- logger.info(f'💻 Generated code:\n```python\n{response.text}\n```')
228
-
229
- except Exception as e:
230
- logger.error(f'❌ Code generation failed: {e}')
231
-
232
-
233
- async def main():
234
- """Run all examples."""
235
- logger.info('🎯 Starting VLLM Client Examples')
236
- logger.info('=' * 50)
237
-
238
- examples = [
239
- example_basic_generation,
240
- example_batch_generation,
241
- example_creative_writing,
242
- example_code_generation
243
- ]
244
-
245
- for example in examples:
246
- await example()
247
- logger.info('-' * 50)
248
- await asyncio.sleep(1) # Brief pause between examples
249
-
250
- logger.info('🎉 All examples completed!')
251
-
252
-
253
- if __name__ == '__main__':
254
- # Configure logger
255
- logger.remove()
256
- logger.add(
257
- lambda msg: print(msg, end=''),
258
- format='<green>{time:HH:mm:ss}</green> | '
259
- '<level>{level: <8}</level> | '
260
- '<cyan>{message}</cyan>',
261
- level='INFO'
262
- )
263
-
264
- try:
265
- asyncio.run(main())
266
- except KeyboardInterrupt:
267
- logger.info('\n👋 Goodbye!')
268
- except Exception as e:
269
- logger.error(f'❌ Script failed: {e}')
@@ -1,3 +0,0 @@
1
- aiohttp>=3.8.0
2
- loguru>=0.6.0
3
- pydantic>=2.0.0
@@ -1,2 +0,0 @@
1
- HF_HOME=/home/anhvth5/.cache/huggingface CUDA_VISIBLE_DEVICES=0 /home/anhvth5/miniconda3/envs/unsloth_env/bin/vllm serve ./outputs/8B_selfeval_retranslate/Qwen3-8B_2025_05_30/ls_response_only_r8_a8_sq8192_lr5e_06_bz64_ep1_4/ --port 8140 --tensor-parallel 1 --gpu-memory-utilization 0.9 --dtype auto --max-model-len 8192 --enable-prefix-caching --disable-log-requests --served-model-name selfeval_8b
2
- Logging to /tmp/vllm_8140.txt
@@ -1,40 +0,0 @@
1
- llm_utils/__init__.py,sha256=0y7y1ia3jyvY3qgOnVp1xCqwMT5UYgw_cSAU_9AjHUo,650
2
- llm_utils/chat_format/__init__.py,sha256=8dBIUqFJvkgQYedxBtcyxt-4tt8JxAKVap2JlTXmgaM,737
3
- llm_utils/chat_format/display.py,sha256=a3zWzo47SUf4i-uic-dwf-vxtu6gZWLbnJrszjjZjQ8,9801
4
- llm_utils/chat_format/transform.py,sha256=328V18FOgRQzljAl9Mh8NF4Tl-N3cZZIPmAwHQspXCY,5461
5
- llm_utils/chat_format/utils.py,sha256=xTxN4HrLHcRO2PfCTR43nH1M5zCa7v0kTTdzAcGkZg0,1229
6
- llm_utils/group_messages.py,sha256=wyiZzs7O8yK2lyIakV2x-1CrrWVT12sjnP1vVnmPet4,3606
7
- llm_utils/lm/__init__.py,sha256=e8eCWlLo39GZjq9CokludZGHYVZ7BnbWZ6GOJoiWGzE,110
8
- llm_utils/lm/alm.py,sha256=_q8dtvUy_JkYrCV83WwSLD5o5t8e2h9eTRW3DJfWbOM,16850
9
- llm_utils/lm/chat_html.py,sha256=cbrKjHWIblCat5TRe0X-N7kT7bBtutNYQaIEK-myLq4,8543
10
- llm_utils/lm/lm.py,sha256=BqACb05k5yV7fU4PLfTuA8e21wXnpseh6OPLKjOlhV0,31185
11
- llm_utils/lm/lm_json.py,sha256=At_sHZJRU_7_FtvpZ9HbkKLfftO6DkDNhRc9XVoUfx4,2039
12
- llm_utils/lm/utils.py,sha256=-fDNueiXKQI6RDoNHJYNyORomf2XlCf2doJZ3GEV2Io,4762
13
- llm_utils/scripts/README.md,sha256=yuOLnLa2od2jp4wVy3rV0rESeiV3o8zol5MNMsZx0DY,999
14
- llm_utils/scripts/example_vllm_client.py,sha256=tmSRlqHOzBEQ7nTq5I_pY4SH1QKhZF_OMjgD_PV3zp8,8165
15
- llm_utils/scripts/requirements_example.txt,sha256=mN0Qckft6kUCyuqnvGp1Q3N_RHrbg64AlMvZ-2ougOc,45
16
- llm_utils/scripts/serve_script.sh,sha256=W9V-wfdWbAWl2riYcXvtYtP5TtmAUss4z8uwZf8wZXs,428
17
- llm_utils/scripts/vllm_load_balancer.py,sha256=17zaq8RJseikHVoAibGOz0p_MCLcNlnhZDkk7g4cuLc,17519
18
- llm_utils/scripts/vllm_serve.py,sha256=CbW_3Y9Vt7eQYoGGPT3yj1nhbLYOc3b1LdJBy1sVX-Y,11976
19
- speedy_utils/__init__.py,sha256=kxQk4PGS3Xkxnerm0YqjF6GKTpgoaTc1vudKid-2c_A,3388
20
- speedy_utils/all.py,sha256=A9jiKGjo950eg1pscS9x38OWAjKGyusoAN5mrfweY4E,3090
21
- speedy_utils/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- speedy_utils/common/clock.py,sha256=3n4FkCW0dz46O8By09V5Pve1DSMgpLDRbWEVRryryeQ,7423
23
- speedy_utils/common/function_decorator.py,sha256=r_r42qCWuNcu0_aH7musf2BWvcJfgZrD81G28mDcolw,2226
24
- speedy_utils/common/logger.py,sha256=NIOlhhACpcc0BUTSJ8oDYrLp23J2gW_KJFyRVdLN2tY,6432
25
- speedy_utils/common/notebook_utils.py,sha256=7tFXMaE365ubrD9VCwDoZe7oBqZ2qG5t_Vla4Qvg1M8,2062
26
- speedy_utils/common/report_manager.py,sha256=dgGfS_fHbZiQMsLzkgnj0OfB758t1x6B4MhjsetSl9A,3930
27
- speedy_utils/common/utils_cache.py,sha256=yHtoe29Zpzgkp9sGknkIVctm6qcRa6BMR5x6PhOCzvk,8384
28
- speedy_utils/common/utils_io.py,sha256=d7PKz5tOPrwHDr7GXuYmILvjXJOFEwfzAEIuUcYaI60,4790
29
- speedy_utils/common/utils_misc.py,sha256=nsQOu2jcplcFHVQ1CnOjEpNcdxIINveGxB493Cqo63U,1812
30
- speedy_utils/common/utils_print.py,sha256=iQqnOYw2EFC8TqeSDbrcnIQAUKT7FbB8Mec8b2aGAzw,4833
31
- speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- speedy_utils/multi_worker/process.py,sha256=f5DKWysQqxSHwEZ8RobxqxfyMPMb712_raynYb2cPmc,6867
33
- speedy_utils/multi_worker/thread.py,sha256=9pXjvgjD0s0Hp0cZ6I3M0ndp1OlYZ1yvqbs_bcun_Kw,12775
34
- speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- speedy_utils/scripts/mpython.py,sha256=_J3Y38FsMcCNf-ZWSZ9iOt1Kqed9gr0WrmMHRleaeVY,3871
36
- speedy_utils/scripts/openapi_client_codegen.py,sha256=4Y1HO8Uht-hCfHEVxY6TlSeTOcZZRVwJ-SBu-bFsAOk,9747
37
- speedy_utils-1.0.15.dist-info/METADATA,sha256=AeZXiLOUj76H0kAiduMoozmo30SjlFSgus6cg23bG7k,7392
38
- speedy_utils-1.0.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
39
- speedy_utils-1.0.15.dist-info/entry_points.txt,sha256=T1t85jwx8fK6m5msdkBGIXH5R5Kd0zSL0S6erXERPzg,237
40
- speedy_utils-1.0.15.dist-info/RECORD,,