PyPI - speedy-utils - Versions diffs - 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl - Mend

speedy-utils 1.0.13py3-none-any.whl → 1.0.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

llm_utils/__init__.py +10 -15
llm_utils/lm/alm.py +24 -3
llm_utils/lm/chat_html.py +244 -0
llm_utils/lm/lm.py +390 -74
llm_utils/lm/lm_json.py +72 -0
llm_utils/scripts/README.md +48 -0
llm_utils/scripts/example_vllm_client.py +269 -0
llm_utils/scripts/requirements_example.txt +3 -0
llm_utils/scripts/serve_script.sh +2 -0
speedy_utils/__init__.py +96 -5
speedy_utils/common/notebook_utils.py +63 -0
speedy_utils/common/utils_cache.py +3 -3
speedy_utils/common/utils_print.py +2 -65
speedy_utils/multi_worker/process.py +7 -0
speedy_utils/scripts/__init__.py +0 -0
speedy_utils/scripts/mpython.py +3 -2
speedy_utils/scripts/openapi_client_codegen.py +258 -0
{speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/METADATA +1 -1
{speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/RECORD +21 -12
{speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/entry_points.txt +1 -0
{speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/WHEEL +0 -0

llm_utils/scripts/example_vllm_client.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""
+Beautiful example script for interacting with VLLM server.
+This script demonstrates various ways to use the VLLM API server
+for text generation tasks.
+"""
+import asyncio
+import json
+from typing import Dict, List, Optional, Any
+import aiohttp
+from loguru import logger
+from pydantic import BaseModel, Field
+class VLLMRequest(BaseModel):
+    """Request model for VLLM API."""
+    prompt: str
+    max_tokens: int = Field(default=512, ge=1, le=8192)
+    temperature: float = Field(default=0.7, ge=0.0, le=2.0)
+    top_p: float = Field(default=0.9, ge=0.0, le=1.0)
+    stream: bool = False
+    stop: Optional[List[str]] = None
+class VLLMResponse(BaseModel):
+    """Response model from VLLM API."""
+    text: str
+    finish_reason: str
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class VLLMClient:
+    """Client for interacting with VLLM server."""
+    def __init__(self, base_url: str = 'http://localhost:8140'):
+        self.base_url = base_url
+        self.model_name = 'selfeval_8b'
+    async def generate_text(
+        self,
+        request: VLLMRequest
+    ) -> VLLMResponse:
+        """Generate text using VLLM API."""
+        url = f'{self.base_url}/v1/completions'
+        payload = {
+            'model': self.model_name,
+            'prompt': request.prompt,
+            'max_tokens': request.max_tokens,
+            'temperature': request.temperature,
+            'top_p': request.top_p,
+            'stream': request.stream,
+        }
+        if request.stop:
+            payload['stop'] = request.stop
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.post(
+                    url,
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=60)
+                ) as response:
+                    response.raise_for_status()
+                    data = await response.json()
+                    choice = data['choices'][0]
+                    usage = data['usage']
+                    return VLLMResponse(
+                        text=choice['text'],
+                        finish_reason=choice['finish_reason'],
+                        prompt_tokens=usage['prompt_tokens'],
+                        completion_tokens=usage['completion_tokens'],
+                        total_tokens=usage['total_tokens']
+                    )
+            except aiohttp.ClientError as e:
+                logger.error(f'HTTP error: {e}')
+                raise
+            except Exception as e:
+                logger.error(f'Unexpected error: {e}')
+                raise
+    async def generate_batch(
+        self,
+        requests: List[VLLMRequest]
+    ) -> List[VLLMResponse]:
+        """Generate text for multiple requests concurrently."""
+        tasks = [self.generate_text(req) for req in requests]
+        return await asyncio.gather(*tasks, return_exceptions=True)
+    async def health_check(self) -> bool:
+        """Check if the VLLM server is healthy."""
+        url = f'{self.base_url}/health'
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(
+                    url,
+                    timeout=aiohttp.ClientTimeout(total=10)
+                ) as response:
+                    return response.status == 200
+        except Exception as e:
+            logger.warning(f'Health check failed: {e}')
+            return False
+async def example_basic_generation():
+    """Example: Basic text generation."""
+    logger.info('🚀 Running basic generation example')
+    client = VLLMClient()
+    # Check server health
+    if not await client.health_check():
+        logger.error('❌ Server is not healthy')
+        return
+    request = VLLMRequest(
+        prompt='Explain the concept of machine learning in simple terms:',
+        max_tokens=256,
+        temperature=0.7,
+        stop=['\n\n']
+    )
+    try:
+        response = await client.generate_text(request)
+        logger.success('✅ Generation completed')
+        logger.info(f'📝 Generated text:\n{response.text}')
+        logger.info(f'📊 Tokens: {response.total_tokens} total '
+                   f'({response.prompt_tokens} prompt + '
+                   f'{response.completion_tokens} completion)')
+    except Exception as e:
+        logger.error(f'❌ Generation failed: {e}')
+async def example_batch_generation():
+    """Example: Batch text generation."""
+    logger.info('🚀 Running batch generation example')
+    client = VLLMClient()
+    prompts = [
+        'What is artificial intelligence?',
+        'Explain quantum computing briefly:',
+        'What are the benefits of renewable energy?'
+    ]
+    requests = [
+        VLLMRequest(
+            prompt=prompt,
+            max_tokens=128,
+            temperature=0.8
+        ) for prompt in prompts
+    ]
+    try:
+        responses = await client.generate_batch(requests)
+        for i, response in enumerate(responses):
+            if isinstance(response, Exception):
+                logger.error(f'❌ Request {i+1} failed: {response}')
+            else:
+                logger.success(f'✅ Request {i+1} completed')
+                logger.info(f'📝 Response {i+1}:\n{response.text}\n')
+    except Exception as e:
+        logger.error(f'❌ Batch generation failed: {e}')
+async def example_creative_writing():
+    """Example: Creative writing with specific parameters."""
+    logger.info('🚀 Running creative writing example')
+    client = VLLMClient()
+    request = VLLMRequest(
+        prompt=(
+            'Write a short story about a robot discovering emotions. '
+            'The story should be exactly 3 paragraphs:\n\n'
+        ),
+        max_tokens=400,
+        temperature=1.2,  # Higher temperature for creativity
+        top_p=0.95,
+        stop=['THE END', '\n\n\n']
+    )
+    try:
+        response = await client.generate_text(request)
+        logger.success('✅ Creative writing completed')
+        logger.info(f'📚 Story:\n{response.text}')
+        logger.info(f'🎯 Finish reason: {response.finish_reason}')
+    except Exception as e:
+        logger.error(f'❌ Creative writing failed: {e}')
+async def example_code_generation():
+    """Example: Code generation."""
+    logger.info('🚀 Running code generation example')
+    client = VLLMClient()
+    request = VLLMRequest(
+        prompt=(
+            'Write a Python function that calculates the fibonacci '
+            'sequence up to n terms:\n\n```python\n'
+        ),
+        max_tokens=300,
+        temperature=0.2,  # Lower temperature for code
+        stop=['```', '\n\n\n']
+    )
+    try:
+        response = await client.generate_text(request)
+        logger.success('✅ Code generation completed')
+        logger.info(f'💻 Generated code:\n```python\n{response.text}\n```')
+    except Exception as e:
+        logger.error(f'❌ Code generation failed: {e}')
+async def main():
+    """Run all examples."""
+    logger.info('🎯 Starting VLLM Client Examples')
+    logger.info('=' * 50)
+    examples = [
+        example_basic_generation,
+        example_batch_generation,
+        example_creative_writing,
+        example_code_generation
+    ]
+    for example in examples:
+        await example()
+        logger.info('-' * 50)
+        await asyncio.sleep(1)  # Brief pause between examples
+    logger.info('🎉 All examples completed!')
+if __name__ == '__main__':
+    # Configure logger
+    logger.remove()
+    logger.add(
+        lambda msg: print(msg, end=''),
+        format='<green>{time:HH:mm:ss}</green> | '
+               '<level>{level: <8}</level> | '
+               '<cyan>{message}</cyan>',
+        level='INFO'
+    )
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        logger.info('\n👋 Goodbye!')
+    except Exception as e:
+        logger.error(f'❌ Script failed: {e}')

llm_utils/scripts/requirements_example.txt ADDED Viewed

@@ -0,0 +1,3 @@
+aiohttp>=3.8.0
+loguru>=0.6.0
+pydantic>=2.0.0

llm_utils/scripts/serve_script.sh ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ HF_HOME=/home/anhvth5/.cache/huggingface CUDA_VISIBLE_DEVICES=0 /home/anhvth5/miniconda3/envs/unsloth_env/bin/vllm serve ./outputs/8B_selfeval_retranslate/Qwen3-8B_2025_05_30/ls_response_only_r8_a8_sq8192_lr5e_06_bz64_ep1_4/ --port 8140 --tensor-parallel 1 --gpu-memory-utilization 0.9 --dtype auto --max-model-len 8192 --enable-prefix-caching --disable-log-requests --served-model-name selfeval_8b
2	+ Logging to /tmp/vllm_8140.txt

speedy_utils/__init__.py CHANGED Viewed

@@ -33,9 +33,11 @@ from .common.utils_misc import (
 # Print utilities
 from .common.utils_print import (
-    display_pretty_table_html,
     flatten_dict,
     fprint,
+)
+from .common.notebook_utils import (
+    display_pretty_table_html,
     print_table,
 )
@@ -43,8 +45,98 @@ from .common.utils_print import (
 from .multi_worker.process import multi_process
 from .multi_worker.thread import multi_thread
+# notebook
+from .common.notebook_utils import change_dir
+# Standard library imports
+import copy
+import functools
+import gc
+import inspect
+import json
+import multiprocessing
+import os
+import os.path as osp
+import pickle
+import pprint
+import random
+import re
+import sys
+import textwrap
+import threading
+import time
+import traceback
+import uuid
+from collections import Counter, defaultdict
+from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from glob import glob
+from multiprocessing import Pool
+from pathlib import Path
+from threading import Lock
+from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+# Third-party imports
+import numpy as np
+import pandas as pd
+import xxhash
+from IPython.core.getipython import get_ipython
+from IPython.display import HTML, display
+from loguru import logger
+from pydantic import BaseModel
+from tabulate import tabulate
+from tqdm import tqdm
 # Define __all__ explicitly
 __all__ = [
+    # Standard library
+    "random",
+    "copy",
+    "functools",
+    "gc",
+    "inspect",
+    "json",
+    "multiprocessing",
+    "os",
+    "osp",
+    "pickle",
+    "pprint",
+    "re",
+    "sys",
+    "textwrap",
+    "threading",
+    "time",
+    "traceback",
+    "uuid",
+    "Counter",
+    "ThreadPoolExecutor",
+    "as_completed",
+    "glob",
+    "Pool",
+    "Path",
+    "Lock",
+    "defaultdict",
+    # Typing
+    "Any",
+    "Callable",
+    "Dict",
+    "Generic",
+    "List",
+    "Literal",
+    "Optional",
+    "TypeVar",
+    "Union",
+    # Third-party
+    "pd",
+    "xxhash",
+    "get_ipython",
+    "HTML",
+    "display",
+    "logger",
+    "BaseModel",
+    "tabulate",
+    "tqdm",
+    "np",
     # Clock module
     "Clock",
     "speedy_timer",
@@ -79,7 +171,6 @@ __all__ = [
     # Multi-worker processing
     "multi_process",
     "multi_thread",
-]
-# Setup default logger
-# setup_logger('D')
+    # Notebook utilities
+    "change_dir",
+]

speedy_utils/common/notebook_utils.py ADDED Viewed

@@ -0,0 +1,63 @@
+# jupyter notebook utilities
+import json
+import os
+import pathlib
+from typing import Any
+from IPython.display import HTML, display
+from tabulate import tabulate
+def change_dir(target_directory: str = 'POLY') -> None:
+    """Change directory to the first occurrence of x in the current path."""
+    cur_dir = pathlib.Path('./')
+    target_dir = str(cur_dir.absolute()).split(target_directory)[0] + target_directory
+    os.chdir(target_dir)
+    print(f'Current dir: {target_dir}')
+def display_pretty_table_html(data: dict) -> None:
+    """Display a pretty HTML table in Jupyter notebooks."""
+    table = "<table>"
+    for key, value in data.items():
+        table += f"<tr><td>{key}</td><td>{value}</td></tr>"
+    table += "</table>"
+    display(HTML(table))
+def print_table(data: Any, use_html: bool = True) -> None:
+    """Print data as a table. If use_html is True, display using IPython HTML."""
+    def __get_table(data: Any) -> str:
+        if isinstance(data, str):
+            try:
+                data = json.loads(data)
+            except json.JSONDecodeError as exc:
+                raise ValueError("String input could not be decoded as JSON") from exc
+        if isinstance(data, list):
+            if all(isinstance(item, dict) for item in data):
+                headers = list(data[0].keys())
+                rows = [list(item.values()) for item in data]
+                return tabulate(
+                    rows, headers=headers, tablefmt="html" if use_html else "grid"
+                )
+            else:
+                raise ValueError("List must contain dictionaries")
+        if isinstance(data, dict):
+            headers = ["Key", "Value"]
+            rows = list(data.items())
+            return tabulate(
+                rows, headers=headers, tablefmt="html" if use_html else "grid"
+            )
+        raise TypeError(
+            "Input data must be a list of dictionaries, a dictionary, or a JSON string"
+        )
+    table = __get_table(data)
+    if use_html:
+        display(HTML(table))
+    else:
+        print(table)

speedy_utils/common/utils_cache.py CHANGED Viewed

@@ -80,9 +80,9 @@ def identify(obj: Any, depth=0, max_depth=2) -> str:
     elif obj is None:
         return identify("None", depth + 1, max_depth)
     else:
-        primitive_types = [int, float, str, bool]
-        if not type(obj) in primitive_types:
-            logger.warning(f"Unknown type: {type(obj)}")
+        # primitive_types = [int, float, str, bool]
+        # if not type(obj) in primitive_types:
+        #     logger.warning(f"Unknown type: {type(obj)}")
         return xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)

speedy_utils/common/utils_print.py CHANGED Viewed

@@ -1,32 +1,13 @@
 # utils/utils_print.py
 import copy
-import inspect
-import json
 import pprint
-import re
-import sys
 import textwrap
-import time
-from collections import OrderedDict
-from typing import Annotated, Any, Dict, List, Literal, Optional
+from typing import Any
-from IPython.display import HTML, display
-from loguru import logger
 from tabulate import tabulate
-from .utils_misc import is_notebook
-def display_pretty_table_html(data: dict) -> None:
-    """
-    Display a pretty HTML table in Jupyter notebooks.
-    """
-    table = "<table>"
-    for key, value in data.items():
-        table += f"<tr><td>{key}</td><td>{value}</td></tr>"
-    table += "</table>"
-    display(HTML(table))
+from .notebook_utils import display_pretty_table_html
 # Flattening the dictionary using "." notation for keys
@@ -166,51 +147,7 @@ def fprint(
         printer.pprint(processed_data)
-def print_table(data: Any, use_html: bool = True) -> None:
-    """
-    Print data as a table. If use_html is True, display using IPython HTML.
-    """
-    def __get_table(data: Any) -> str:
-        if isinstance(data, str):
-            try:
-                data = json.loads(data)
-            except json.JSONDecodeError as exc:
-                raise ValueError("String input could not be decoded as JSON") from exc
-        if isinstance(data, list):
-            if all(isinstance(item, dict) for item in data):
-                headers = list(data[0].keys())
-                rows = [list(item.values()) for item in data]
-                return tabulate(
-                    rows, headers=headers, tablefmt="html" if use_html else "grid"
-                )
-            else:
-                raise ValueError("List must contain dictionaries")
-        if isinstance(data, dict):
-            headers = ["Key", "Value"]
-            rows = list(data.items())
-            return tabulate(
-                rows, headers=headers, tablefmt="html" if use_html else "grid"
-            )
-        raise TypeError(
-            "Input data must be a list of dictionaries, a dictionary, or a JSON string"
-        )
-    table = __get_table(data)
-    if use_html:
-        display(HTML(table))
-    else:
-        print(table)
 __all__ = [
-    "display_pretty_table_html",
     "flatten_dict",
     "fprint",
-    "print_table",
-    # "setup_logger",
-    # "log",
 ]

speedy_utils/multi_worker/process.py CHANGED Viewed

@@ -75,6 +75,7 @@ def multi_process(
     timeout: float | None = None,
     stop_on_error: bool = True,
     process_update_interval=10,
+    for_loop: bool = False,
     **fixed_kwargs,
 ) -> list[Any]:
     """
@@ -95,6 +96,12 @@ def multi_process(
                     substitute failing result with ``None``.
     **fixed_kwargs – static keyword args forwarded to every ``func()`` call.
     """
+    if for_loop:
+        ret = []
+        for arg in inputs:
+            ret.append(func(arg, **fixed_kwargs))
+        return ret
     if workers is None:
         workers = os.cpu_count() or 1
     if inflight is None:

speedy_utils/scripts/__init__.py ADDED Viewed

File without changes

speedy_utils/scripts/mpython.py CHANGED Viewed

@@ -85,6 +85,7 @@ def main():
     cpu_per_process = max(args.total_cpu // args.total_fold, 1)
     cmds = []
+    path_python = shutil.which("python")
     for i in range(args.total_fold):
         gpu = gpus[i % num_gpus]
         cpu_start = (i * cpu_per_process) % args.total_cpu
@@ -92,10 +93,10 @@ def main():
         ENV = f"CUDA_VISIBLE_DEVICES={gpu} MP_ID={i} MP_TOTAL={args.total_fold}"
         if taskset_path:
             fold_cmd = (
-                f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end}  python {cmd_str}"
+                f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end}  {path_python} {cmd_str}"
             )
         else:
-            fold_cmd = f"{ENV} python {cmd_str}"
+            fold_cmd = f"{ENV} {path_python} {cmd_str}"
         cmds.append(fold_cmd)

speedy-utils 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

speedy-utils 1.0.13py3-none-any.whl → 1.0.15py3-none-any.whl