tuft 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tuft/__main__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Entry point for running tuft as a module: python -m tuft."""
2
+
3
+ from tuft.cli import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ main()
tuft/cli.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ import os
6
7
  from pathlib import Path
7
8
 
8
9
  import typer
@@ -14,34 +15,66 @@ from .telemetry import init_telemetry
14
15
  from .telemetry.metrics import ResourceMetricsCollector
15
16
 
16
17
 
17
- app = typer.Typer(help="TuFT - Tenant-unified Fine-Tuning Server.")
18
+ app = typer.Typer(help="TuFT - Tenant-unified Fine-Tuning Server.", no_args_is_help=True)
19
+
20
+
21
+ # Required for Typer to recognize subcommands when using no_args_is_help=True
22
+ @app.callback()
23
+ def callback() -> None:
24
+ """TuFT - Tenant-unified Fine-Tuning Server."""
25
+
26
+
27
+ # Default paths based on TUFT_HOME
28
+ _TUFT_HOME = Path(os.environ.get("TUFT_HOME", Path.home() / ".tuft"))
29
+ _DEFAULT_CONFIG_PATH = _TUFT_HOME / "configs" / "tuft_config.yaml"
30
+ _DEFAULT_CHECKPOINT_DIR = _TUFT_HOME / "checkpoints"
18
31
 
19
32
  _HOST_OPTION = typer.Option("127.0.0.1", "--host", help="Interface to bind", envvar="TUFT_HOST")
20
33
  _PORT_OPTION = typer.Option(10610, "--port", "-p", help="Port to bind", envvar="TUFT_PORT")
21
- _LOG_LEVEL_OPTION = typer.Option("info", "--log-level", help="Uvicorn log level")
34
+ _LOG_LEVEL_OPTION = typer.Option(
35
+ "info", "--log-level", help="Uvicorn log level", envvar="TUFT_LOG_LEVEL"
36
+ )
22
37
  _RELOAD_OPTION = typer.Option(False, "--reload", help="Enable auto-reload (development only)")
23
38
  _CONFIG_OPTION = typer.Option(
24
39
  None,
25
40
  "--config",
26
41
  "-c",
27
- help="Path to a TuFT configuration file (YAML)",
42
+ help=f"Path to a TuFT configuration file (YAML). Defaults to {_DEFAULT_CONFIG_PATH}",
43
+ envvar="TUFT_CONFIG",
28
44
  )
29
45
  _CHECKPOINT_DIR_OPTION = typer.Option(
30
46
  None,
31
47
  "--checkpoint-dir",
32
- help="Override checkpoint_dir from config file. Defaults to ~/.cache/tuft/checkpoints.",
48
+ help=f"Override checkpoint_dir from config file. Defaults to {_DEFAULT_CHECKPOINT_DIR}",
49
+ envvar="TUFT_CHECKPOINT_DIR",
33
50
  )
34
51
 
35
52
 
53
+ def _resolve_config_path(config_path: Path | None) -> Path:
54
+ """Resolve the config path, falling back to default if not provided."""
55
+ if config_path is not None:
56
+ return config_path
57
+ if _DEFAULT_CONFIG_PATH.exists():
58
+ return _DEFAULT_CONFIG_PATH
59
+ raise typer.BadParameter(
60
+ f"Configuration file must be provided via --config or TUFT_CONFIG, "
61
+ f"or create a default config at {_DEFAULT_CONFIG_PATH}"
62
+ )
63
+
64
+
36
65
  def _build_config(
37
66
  config_path: Path | None,
38
67
  checkpoint_dir: Path | None,
39
68
  ) -> AppConfig:
40
- if config_path is None:
41
- raise typer.BadParameter("Configuration file must be provided via --config")
42
- config = load_yaml_config(config_path)
69
+ resolved_config_path = _resolve_config_path(config_path)
70
+ config = load_yaml_config(resolved_config_path)
71
+ # Apply checkpoint_dir override, or use default if not in config
43
72
  if checkpoint_dir is not None:
44
73
  config.checkpoint_dir = checkpoint_dir.expanduser()
74
+ elif config.checkpoint_dir is None:
75
+ config.checkpoint_dir = _DEFAULT_CHECKPOINT_DIR
76
+ # Guarantee checkpoint_dir is set after resolution
77
+ assert config.checkpoint_dir is not None, "checkpoint_dir must be set after config resolution"
45
78
  config.ensure_directories()
46
79
  return config
47
80
 
@@ -84,7 +117,7 @@ def launch(
84
117
 
85
118
 
86
119
  def main() -> None:
87
- app()
120
+ app(prog_name="tuft")
88
121
 
89
122
 
90
123
  if __name__ == "__main__":
tuft/config.py CHANGED
@@ -9,8 +9,9 @@ from typing import Dict, Iterable, List
9
9
  from .persistence import PersistenceConfig
10
10
 
11
11
 
12
- def _default_checkpoint_dir() -> Path:
13
- return Path.home() / ".cache" / "tuft" / "checkpoints"
12
+ def _default_checkpoint_dir() -> Path | None:
13
+ """Return None to let CLI set the default based on TUFT_HOME."""
14
+ return None
14
15
 
15
16
 
16
17
  def _default_persistence_config() -> PersistenceConfig:
@@ -42,7 +43,7 @@ def _default_telemetry_config() -> TelemetryConfig:
42
43
  class AppConfig:
43
44
  """Runtime configuration for the TuFT server."""
44
45
 
45
- checkpoint_dir: Path = field(default_factory=_default_checkpoint_dir)
46
+ checkpoint_dir: Path | None = field(default_factory=_default_checkpoint_dir)
46
47
  supported_models: List[ModelConfig] = field(default_factory=list)
47
48
  model_owner: str = "local-user"
48
49
  toy_backend_seed: int = 0
@@ -53,7 +54,8 @@ class AppConfig:
53
54
  telemetry: TelemetryConfig = field(default_factory=_default_telemetry_config)
54
55
 
55
56
  def ensure_directories(self) -> None:
56
- self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
57
+ if self.checkpoint_dir is not None:
58
+ self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
57
59
 
58
60
  def check_validity(self) -> None:
59
61
  if not self.supported_models:
@@ -181,8 +181,10 @@ class SamplingController:
181
181
  if model_path:
182
182
  # model_path should have higher priority than base_model
183
183
  try:
184
+ assert self.config.checkpoint_dir is not None
184
185
  parsed_checkpoint = CheckpointRecord.from_tinker_path(
185
- model_path, self.config.checkpoint_dir
186
+ model_path,
187
+ self.config.checkpoint_dir,
186
188
  )
187
189
  except FileNotFoundError as exc:
188
190
  raise CheckpointNotFoundException(checkpoint_id=model_path) from exc
@@ -508,6 +508,7 @@ class TrainingController:
508
508
  logger.info("Checkpoint save begin: %s", checkpoint_id)
509
509
 
510
510
  setattr(training_run, counter_attr, counter + 1)
511
+ assert self.config.checkpoint_dir is not None
511
512
  checkpoint = CheckpointRecord.from_training_run(
512
513
  training_run_id=training_run.training_run_id,
513
514
  checkpoint_name=checkpoint_name,
@@ -567,7 +568,11 @@ class TrainingController:
567
568
  ) -> None:
568
569
  """Load a checkpoint."""
569
570
  try:
570
- parsed_checkpoint = CheckpointRecord.from_tinker_path(path, self.config.checkpoint_dir)
571
+ assert self.config.checkpoint_dir is not None
572
+ parsed_checkpoint = CheckpointRecord.from_tinker_path(
573
+ path,
574
+ self.config.checkpoint_dir,
575
+ )
571
576
  except FileNotFoundError as exc:
572
577
  raise CheckpointNotFoundException(checkpoint_id=model_id) from exc
573
578
  source_model_id = parsed_checkpoint.training_run_id or model_id
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tuft
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A multi-tenant fine-tuning platform for LLMs with Tinker-compatible API
5
5
  Author-email: TuFT Developers <tuft@list.alibaba-inc.com>
6
6
  License: MIT License
@@ -29,6 +29,7 @@ Requires-Python: >=3.11
29
29
  Requires-Dist: fastapi>=0.125.0
30
30
  Requires-Dist: httpx>=0.28.1
31
31
  Requires-Dist: numpy<2.0.0
32
+ Requires-Dist: nvidia-ml-py>=13.0.0
32
33
  Requires-Dist: omegaconf>=2.3.0
33
34
  Requires-Dist: opentelemetry-api>=1.20.0
34
35
  Requires-Dist: opentelemetry-exporter-otlp>=1.20.0
@@ -36,7 +37,6 @@ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
36
37
  Requires-Dist: opentelemetry-instrumentation-logging>=0.41b0
37
38
  Requires-Dist: opentelemetry-sdk>=1.20.0
38
39
  Requires-Dist: psutil>=5.9.0
39
- Requires-Dist: pynvml>=11.5.0
40
40
  Requires-Dist: ray>=2.50.0
41
41
  Requires-Dist: tinker>=0.7.0
42
42
  Requires-Dist: transformers<5.0.0,>=4.57.3
@@ -1,15 +1,16 @@
1
1
  tuft/__init__.py,sha256=BJu6iJ_QGwcJXRXDgR1LjV25KgM6sVd7_WqIXVTEuVM,97
2
+ tuft/__main__.py,sha256=MPhC9msQXf9py5xkLPQ4JoqrvCpL_qXVwksasNUj7ig,131
2
3
  tuft/auth.py,sha256=2Wk9ATXlAiGm1Irpj66CfIyORuHzciSNAOzVwM8PeO0,1071
3
4
  tuft/backend.py,sha256=ftiaaNds2MXroszZW8l6DEq515qiw1KmrODI3x6AHE4,10254
4
5
  tuft/checkpoints.py,sha256=bObo2NzDrfzp5BiS6I_FIA3frLFic_sT4o4c-PEzfpk,6917
5
- tuft/cli.py,sha256=PJ89JfrJ7wB5Zd-pe9rkWLcfavmWRtWLmQ_r1Y_Qdwk,2725
6
- tuft/config.py,sha256=EGkDmnYNNHgtkEuffHoBE9R-hc2epAJe4sUEVBP_6Ug,4292
6
+ tuft/cli.py,sha256=-WhmHGIHmWtL46LvXRlhTPVPhBUjZHVLJi0nYR_pqoE,4024
7
+ tuft/config.py,sha256=bX6NuSora0Wqhk5Q5lsnc0lojeevxnLHfiijJHMdtVg,4380
7
8
  tuft/exceptions.py,sha256=_xdsL8bx3Y6jvC5VYHVCa73uAEWXxcl2YwVc09lJXFk,4088
8
9
  tuft/futures.py,sha256=0gRLgDJJQRGGmULYsKdUs3VDsrLN8QfuFfXV00kxHO4,16375
9
- tuft/sampling_controller.py,sha256=WIQ29aVD9JWuxZ8JV4a71nYZXh8Es2wtA4QhaaGRSXQ,15151
10
+ tuft/sampling_controller.py,sha256=c02VQ6Qww9IQC9VJYzQO9Z9v45kK2QeaOKlknYWjSI4,15250
10
11
  tuft/server.py,sha256=NUapRGdQbQH6PbuCfMZeMVi_7vM6nM7xmxepCPkgyko,24996
11
12
  tuft/state.py,sha256=J9R5Wd9JlMtpYcaY_6t5RvgJbY3EX5ZJTZfoQhwZ9hU,12853
12
- tuft/training_controller.py,sha256=fQI6sxtG3v2JYgbd1y501MLzuTUAp0NIvzv6cBOm-T8,29841
13
+ tuft/training_controller.py,sha256=V4JMgyEnf4wYGrk72AR5rHH1iYl488vt7d0c-ubTrO0,30008
13
14
  tuft/backends/__init__.py,sha256=7A6Pu-vEMbcMWapAh-zkI1O5WtBHO0OxwED8qAy9kAQ,262
14
15
  tuft/backends/base_backend.py,sha256=bdlx3hRyEj00GKFlh2fAczn7h4zANz7bdKgXb_F18y4,3462
15
16
  tuft/backends/hf_training_model.py,sha256=XQa598SpY7DnYYU0rTaHjlh-5dRCPueFtcdxrcjXWIc,16993
@@ -28,8 +29,8 @@ tuft/telemetry/__init__.py,sha256=dlSGiJ_pMElhwEe31olGg88ZrjoBeGUBn2P17qFNymM,33
28
29
  tuft/telemetry/metrics.py,sha256=Yz6s2AQ5CptFXvEm-PbO-Ib17-aF0rnoG8vZxH-Pawo,11538
29
30
  tuft/telemetry/provider.py,sha256=jGKqTMsP-WekKGCMN9QHwt-g_1Lk1xUOy1BO-__xG5I,6700
30
31
  tuft/telemetry/tracing.py,sha256=GL-wEEQtzM1ycgfI4sMsHUeIC7qj5MyOH-sBwHihbsE,957
31
- tuft-0.1.1.dist-info/METADATA,sha256=zH2lHrE8kZh2O61cvI7_uSyGCJ9obARa0FZGLnj0HQY,20375
32
- tuft-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
33
- tuft-0.1.1.dist-info/entry_points.txt,sha256=T48zU7Vdi2ZsARDeOZ9jK6XGuYNaCbSaUTd5POouLms,39
34
- tuft-0.1.1.dist-info/licenses/LICENSE,sha256=fJHdoqbikZ-GATzLNmixfKDot1w_cJuHKY3mH4qSmYs,1069
35
- tuft-0.1.1.dist-info/RECORD,,
32
+ tuft-0.1.2.dist-info/METADATA,sha256=UlTE_gR3cPFLzV69GyIHD6TOm-dHSmSM5NcEHt8L0Pg,20381
33
+ tuft-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
+ tuft-0.1.2.dist-info/entry_points.txt,sha256=T48zU7Vdi2ZsARDeOZ9jK6XGuYNaCbSaUTd5POouLms,39
35
+ tuft-0.1.2.dist-info/licenses/LICENSE,sha256=fJHdoqbikZ-GATzLNmixfKDot1w_cJuHKY3mH4qSmYs,1069
36
+ tuft-0.1.2.dist-info/RECORD,,
File without changes