twinkle-kit 0.1__tar.gz → 0.2.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/PKG-INFO +23 -21
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/README.md +22 -20
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/pyproject.toml +1 -1
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/constant.py +1 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/gpt_bridge.py +6 -2
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/gpts/qwen3_next.py +6 -2
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/qwen3_5.py +17 -9
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/__main__.py +1 -1
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/sampler.py +1 -1
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/version.py +1 -1
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_kit.egg-info/PKG-INFO +23 -21
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/LICENSE +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/setup.cfg +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/advantage/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/advantage/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/advantage/grpo.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/advantage/rloo.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/hccl_checkpoint_engine.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/mixin.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/nccl_checkpoint_engine.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/input_feature.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/message.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/output.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/sampling.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/data_format/trajectory.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataloader/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataloader/dataloader.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataloader/device_mesh_fetcher.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataloader/device_mesh_sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataloader/retry_sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/iterable_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/iterable_packing_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/lazy_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/dataset/packing_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/gym/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/gym/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/hub/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/hub/hub.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/infra/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/infra/_ray/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/infra/_ray/ray_helper.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/infra/_ray/resource_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/kernel/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/kernel/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/kernel/function.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/kernel/layer.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/kernel/registry.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/chunked_cross_entropy.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/cross_entropy.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/grpo.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/mse.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss/vocab_parallel_cross_entropy.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss_scale/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/loss_scale/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/accuracy.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/completion_and_reward.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/loss.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/metric/train_metric.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/args.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/megatron.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/gpt_model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/gpts/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpt_model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/qwen.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/qwen3_vl.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/register.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/rope.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/multi_lora_megatron.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/strategy/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/strategy/megatron.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/tuners/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/tuners/lora.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/tuners/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/utils/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/utils/config.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/utils/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/multi_lora.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/moe/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/moe/expert_parallel.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/multi_lora_transformers.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/strategy/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/strategy/accelerate.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/strategy/native_fsdp.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/strategy/sequence_parallel.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/transformers/transformers.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/module/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/module/scheduler/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/module/scheduler/cosine_warmup.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/module/scheduler/linear_warmup.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/patch/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/patch/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/patch/megatron_peft.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/patch/vllm_lora_weights.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/patch/vllm_moe_loader.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/preprocessor/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/preprocessor/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/preprocessor/llm.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/processor/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/processor/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/processor/grpo.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/reward/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/reward/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/reward/format_reward.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/reward/gsm8k.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/reward/math_reward.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/base_engine.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/torch_sampler/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/torch_sampler/torch_sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/torch_sampler/transformers_engine.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/vllm_sampler/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/vllm_sampler/vllm_engine.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/vllm_sampler/vllm_sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/launcher.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/compat_base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/datum.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/io_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/megatron_model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/router.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/common/transformers_model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/proxy.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/tinker/server.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/common/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/common/io_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/common/serialize.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/model.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/processor.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/twinkle/server.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/adapter_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/device_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/io_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/rate_limiter.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/config_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/future_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/model_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/models.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/sampling_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/server_state.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/state/session_manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/task_queue.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/server/utils/validation.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/template/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/template/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/template/qwen3_vl.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/template/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/dequantizer.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/device_mesh.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/framework.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/grad_clip.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/import_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/loader.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/logger.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/network.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/parallel.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/platforms/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/platforms/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/platforms/gpu.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/platforms/mps.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/platforms/npu.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/safetensors.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/torch_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/transformers_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/unsafe.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/utils/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataloader/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataloader/dataloader.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/iterable_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/iterable_packing_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/lazy_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/dataset/packing_dataset.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/http/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/http/heartbeat.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/http/http_utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/http/utils.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/manager.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/model/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/model/multi_lora_transformers.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/processor/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/processor/base.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/processor/grpo.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/reward/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/reward/math_reward.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/sampler/__init__.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/sampler/vllm_sampler.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_client/utils/patch_tinker.py +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_kit.egg-info/SOURCES.txt +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_kit.egg-info/dependency_links.txt +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_kit.egg-info/requires.txt +0 -0
- {twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle_kit.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: twinkle-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.dev0
|
|
4
4
|
Summary: Training API for large language models with efficient data handling and advanced optimization techniques.
|
|
5
5
|
Author-email: ModelScope <contact@modelscope.cn>
|
|
6
6
|
Requires-Python: <3.13,>=3.11
|
|
@@ -155,25 +155,27 @@ supported on Twinkle✨ framework.
|
|
|
155
155
|
> both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
|
|
156
156
|
> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
|
|
157
157
|
|
|
158
|
-
| Model Type | Model ID on [ModelScope](https://modelscope.cn)
|
|
159
|
-
|
|
160
|
-
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base)
|
|
161
|
-
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B)
|
|
162
|
-
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base)
|
|
163
|
-
|
|
|
164
|
-
|
|
|
165
|
-
|
|
|
166
|
-
| | [Qwen/Qwen2
|
|
167
|
-
|
|
|
168
|
-
| | [Qwen/
|
|
169
|
-
|
|
|
170
|
-
|
|
|
171
|
-
|
|
|
172
|
-
|
|
|
173
|
-
|
|
|
174
|
-
|
|
|
175
|
-
|
|
|
176
|
-
|
|
|
158
|
+
| Model Type | Model ID on [ModelScope](https://modelscope.cn) | Model Size | Requires | Support Megatron | HF Model ID |
|
|
159
|
+
|---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
|
|
160
|
+
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base) | 0.6B/1.7B/4B/8B/14B | transformers>=4.51 | ✔ | [Qwen/Qwen3-14B-Base](https://huggingface.co/Qwen/Qwen3-14B-Base) |
|
|
161
|
+
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B) | 0.6B/1.7B/4B/8B/14B/32B | transformers>=4.51 | ✔ | [Qwen/Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) |
|
|
162
|
+
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | 30B-A3B/A3B-Base,235B-A22B | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) |
|
|
163
|
+
| qwen3.5 moe series | [Qwen/Qwen3.5-35B-A3B](https://www.modelscope.cn/models/Qwen/Qwen3.5-35B-A3B) | 35B-A3B,122B-A10B, etc. | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-35B-A3B](https://huggingface.co/Qwen/Qwen3.5-35B-A3B) |
|
|
164
|
+
| qwen3.5 series | [Qwen/Qwen3.5-9B](https://www.modelscope.cn/models/Qwen/Qwen3.5-9B) | 2B ~ 27B | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-9B](https://huggingface.co/Qwen/Qwen3.5-9B) |
|
|
165
|
+
| qwen2 series | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
|
|
166
|
+
| | [Qwen/Qwen2-1.5B](https://modelscope.cn/models/Qwen/Qwen2-1.5B) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) |
|
|
167
|
+
| | [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct) | 0.5B/1.5B/3B/7B/14B/32B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) |
|
|
168
|
+
| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B) | 0.5B/1.5B/3B/7B/14B/32B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) |
|
|
169
|
+
| qwen2_moe series | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) |
|
|
170
|
+
| | [Qwen/Qwen1.5-MoE-A2.7B](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) |
|
|
171
|
+
| chatglm3 series | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | 6b/6b-base/6b-32k/6b-128k | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) |
|
|
172
|
+
| chatglm4 series | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | glm-4-9b/glm-4-9b-chat/glm-4-9b-chat-1m | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) |
|
|
173
|
+
| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | - | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) |
|
|
174
|
+
| glm_edge series | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | 1.5b-chat/4b-chat | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) |
|
|
175
|
+
| internlm2 series | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | 1_8b/chat-1_8b-sft/base-7b/7b/chat-7b/ | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) |
|
|
176
|
+
| deepseek_v1 | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | V2/V2-Lite/V2-Chat/2-Lite-Chat/V2.5 | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) |
|
|
177
|
+
| | [deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B) |
|
|
178
|
+
| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) |
|
|
177
179
|
| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1.5B/7B/14B/32B | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) |
|
|
178
180
|
|
|
179
181
|
For more detailed model support list 👉 [Quick Start](docs/source_en/Usage%20Guide/Quick-Start.md)
|
|
@@ -202,7 +204,7 @@ twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_me
|
|
|
202
204
|
|
|
203
205
|
def train():
|
|
204
206
|
# to load model from Hugging Face, use 'hf://...'
|
|
205
|
-
base_model = 'ms://Qwen/Qwen3-4B'
|
|
207
|
+
base_model = 'ms://Qwen/Qwen3.5-4B'
|
|
206
208
|
# 1000 samples
|
|
207
209
|
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
|
|
208
210
|
# Set template to prepare encoding
|
|
@@ -112,25 +112,27 @@ supported on Twinkle✨ framework.
|
|
|
112
112
|
> both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
|
|
113
113
|
> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
|
|
114
114
|
|
|
115
|
-
| Model Type | Model ID on [ModelScope](https://modelscope.cn)
|
|
116
|
-
|
|
117
|
-
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base)
|
|
118
|
-
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B)
|
|
119
|
-
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base)
|
|
120
|
-
|
|
|
121
|
-
|
|
|
122
|
-
|
|
|
123
|
-
| | [Qwen/Qwen2
|
|
124
|
-
|
|
|
125
|
-
| | [Qwen/
|
|
126
|
-
|
|
|
127
|
-
|
|
|
128
|
-
|
|
|
129
|
-
|
|
|
130
|
-
|
|
|
131
|
-
|
|
|
132
|
-
|
|
|
133
|
-
|
|
|
115
|
+
| Model Type | Model ID on [ModelScope](https://modelscope.cn) | Model Size | Requires | Support Megatron | HF Model ID |
|
|
116
|
+
|---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
|
|
117
|
+
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base) | 0.6B/1.7B/4B/8B/14B | transformers>=4.51 | ✔ | [Qwen/Qwen3-14B-Base](https://huggingface.co/Qwen/Qwen3-14B-Base) |
|
|
118
|
+
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B) | 0.6B/1.7B/4B/8B/14B/32B | transformers>=4.51 | ✔ | [Qwen/Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) |
|
|
119
|
+
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | 30B-A3B/A3B-Base,235B-A22B | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) |
|
|
120
|
+
| qwen3.5 moe series | [Qwen/Qwen3.5-35B-A3B](https://www.modelscope.cn/models/Qwen/Qwen3.5-35B-A3B) | 35B-A3B,122B-A10B, etc. | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-35B-A3B](https://huggingface.co/Qwen/Qwen3.5-35B-A3B) |
|
|
121
|
+
| qwen3.5 series | [Qwen/Qwen3.5-9B](https://www.modelscope.cn/models/Qwen/Qwen3.5-9B) | 2B ~ 27B | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-9B](https://huggingface.co/Qwen/Qwen3.5-9B) |
|
|
122
|
+
| qwen2 series | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
|
|
123
|
+
| | [Qwen/Qwen2-1.5B](https://modelscope.cn/models/Qwen/Qwen2-1.5B) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) |
|
|
124
|
+
| | [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct) | 0.5B/1.5B/3B/7B/14B/32B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) |
|
|
125
|
+
| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B) | 0.5B/1.5B/3B/7B/14B/32B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) |
|
|
126
|
+
| qwen2_moe series | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) |
|
|
127
|
+
| | [Qwen/Qwen1.5-MoE-A2.7B](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) |
|
|
128
|
+
| chatglm3 series | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | 6b/6b-base/6b-32k/6b-128k | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) |
|
|
129
|
+
| chatglm4 series | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | glm-4-9b/glm-4-9b-chat/glm-4-9b-chat-1m | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) |
|
|
130
|
+
| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | - | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) |
|
|
131
|
+
| glm_edge series | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | 1.5b-chat/4b-chat | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) |
|
|
132
|
+
| internlm2 series | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | 1_8b/chat-1_8b-sft/base-7b/7b/chat-7b/ | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) |
|
|
133
|
+
| deepseek_v1 | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | V2/V2-Lite/V2-Chat/2-Lite-Chat/V2.5 | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) |
|
|
134
|
+
| | [deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B) |
|
|
135
|
+
| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) |
|
|
134
136
|
| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1.5B/7B/14B/32B | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) |
|
|
135
137
|
|
|
136
138
|
For more detailed model support list 👉 [Quick Start](docs/source_en/Usage%20Guide/Quick-Start.md)
|
|
@@ -159,7 +161,7 @@ twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_me
|
|
|
159
161
|
|
|
160
162
|
def train():
|
|
161
163
|
# to load model from Hugging Face, use 'hf://...'
|
|
162
|
-
base_model = 'ms://Qwen/Qwen3-4B'
|
|
164
|
+
base_model = 'ms://Qwen/Qwen3.5-4B'
|
|
163
165
|
# 1000 samples
|
|
164
166
|
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
|
|
165
167
|
# Set template to prepare encoding
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "twinkle-kit"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.dev0"
|
|
4
4
|
description = "Training API for large language models with efficient data handling and advanced optimization techniques."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{ name = "ModelScope", email = "contact@modelscope.cn" }]
|
|
@@ -1317,8 +1317,12 @@ class GPTBridge:
|
|
|
1317
1317
|
to_mcore)
|
|
1318
1318
|
else:
|
|
1319
1319
|
hf_state_dict.update(self._set_mlp_state(mg_mlp, hf_state_dict, f'{hf_mlp_prefix}.', layer_idx, to_mcore))
|
|
1320
|
-
self.
|
|
1321
|
-
|
|
1320
|
+
if self.args.hf_model_type == 'qwen3_5':
|
|
1321
|
+
self._set_state_dict(mg_layer, 'pre_mlp_layernorm.weight', hf_state_dict,
|
|
1322
|
+
'post_attention_layernorm.weight', to_mcore)
|
|
1323
|
+
else:
|
|
1324
|
+
self._set_state_dict(mg_layer, 'mlp.linear_fc1.layer_norm_weight', hf_state_dict,
|
|
1325
|
+
'post_attention_layernorm.weight', to_mcore)
|
|
1322
1326
|
return hf_state_dict
|
|
1323
1327
|
|
|
1324
1328
|
def _set_layer_state(self, mg_layer, hf_state_dict, hf_prefix: str, layer_idx: int, to_mcore: bool):
|
{twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/gpts/qwen3_next.py
RENAMED
|
@@ -458,10 +458,14 @@ def get_qwen3_next_layer_spec(config, args, gated_delta_net_cls):
|
|
|
458
458
|
elif layer_type == 'full_attention':
|
|
459
459
|
layer_spec.submodules.self_attention.submodules.linear_qkv = TEColumnParallelLinear
|
|
460
460
|
layer_spec.submodules.self_attention.module = Qwen3NextSelfAttention
|
|
461
|
+
# Replace ALL layernorms with Qwen3NextRMSNorm (Zero-Centered)
|
|
461
462
|
layer_spec.submodules.input_layernorm = layer_norm_impl
|
|
462
|
-
if hasattr(layer_spec.submodules,
|
|
463
|
-
'pre_mlp_layernorm') and layer_spec.submodules.pre_mlp_layernorm is not IdentityOp:
|
|
463
|
+
if hasattr(layer_spec.submodules, 'pre_mlp_layernorm'):
|
|
464
464
|
layer_spec.submodules.pre_mlp_layernorm = layer_norm_impl
|
|
465
|
+
# qwen3.5 dense
|
|
466
|
+
if args.hf_model_type == 'qwen3_5':
|
|
467
|
+
layer_spec.submodules.mlp.submodules.linear_fc1 = TEColumnParallelLinear
|
|
468
|
+
# Replace qk_layernorm if present
|
|
465
469
|
if hasattr(layer_spec.submodules.self_attention.submodules, 'q_layernorm'):
|
|
466
470
|
layer_spec.submodules.self_attention.submodules.q_layernorm = layer_norm_impl
|
|
467
471
|
if hasattr(layer_spec.submodules.self_attention.submodules, 'k_layernorm'):
|
{twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/model/megatron/model/mm_gpts/qwen3_5.py
RENAMED
|
@@ -139,28 +139,36 @@ try:
|
|
|
139
139
|
except ImportError:
|
|
140
140
|
Qwen3_5MoeForConditionalGeneration = None
|
|
141
141
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
_auto_model_cls = AutoModel
|
|
147
|
-
except ImportError:
|
|
148
|
-
_auto_model_cls = None
|
|
142
|
+
try:
|
|
143
|
+
from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5ForConditionalGeneration
|
|
144
|
+
except ImportError:
|
|
145
|
+
Qwen3_5ForConditionalGeneration = None
|
|
149
146
|
|
|
150
147
|
|
|
151
148
|
class Qwen3_5MoeLoader(Qwen3NextLoader):
|
|
152
149
|
gated_delta_net = Qwen3_5MoeGatedDeltaNet
|
|
153
150
|
|
|
154
151
|
|
|
152
|
+
register_megatron_model(
|
|
153
|
+
MegatronModelMeta(
|
|
154
|
+
MegatronModelType.qwen3_5_moe,
|
|
155
|
+
[
|
|
156
|
+
ModelType.qwen3_5_moe,
|
|
157
|
+
],
|
|
158
|
+
bridge_cls=Qwen3_5Bridge,
|
|
159
|
+
visual_cls=Qwen3_5Vit,
|
|
160
|
+
auto_model_cls=Qwen3_5MoeForConditionalGeneration,
|
|
161
|
+
loader=Qwen3_5MoeLoader,
|
|
162
|
+
))
|
|
163
|
+
|
|
155
164
|
register_megatron_model(
|
|
156
165
|
MegatronModelMeta(
|
|
157
166
|
MegatronModelType.qwen3_5,
|
|
158
167
|
[
|
|
159
168
|
ModelType.qwen3_5,
|
|
160
|
-
ModelType.qwen3_5_moe,
|
|
161
169
|
],
|
|
162
170
|
bridge_cls=Qwen3_5Bridge,
|
|
163
171
|
visual_cls=Qwen3_5Vit,
|
|
164
|
-
auto_model_cls=
|
|
172
|
+
auto_model_cls=Qwen3_5ForConditionalGeneration,
|
|
165
173
|
loader=Qwen3_5MoeLoader,
|
|
166
174
|
))
|
|
@@ -10,7 +10,7 @@ Usage:
|
|
|
10
10
|
python -m twinkle.server --config server_config.yaml --server-type tinker
|
|
11
11
|
|
|
12
12
|
# Quick start with minimal args
|
|
13
|
-
python -m twinkle.server --server-type tinker --port 8000 --model-id "Qwen/Qwen3-4B"
|
|
13
|
+
python -m twinkle.server --server-type tinker --port 8000 --model-id "Qwen/Qwen3.5-4B"
|
|
14
14
|
"""
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
@@ -104,7 +104,7 @@ def build_sampler_app(model_id: str,
|
|
|
104
104
|
"""Build a sampler application for text generation inference.
|
|
105
105
|
|
|
106
106
|
Args:
|
|
107
|
-
model_id: Model identifier (e.g., "Qwen/Qwen3-4B")
|
|
107
|
+
model_id: Model identifier (e.g., "Qwen/Qwen3.5-4B")
|
|
108
108
|
nproc_per_node: Number of GPU processes per node
|
|
109
109
|
device_group: Device group configuration dict
|
|
110
110
|
device_mesh: Device mesh configuration dict for parallelism
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# Make sure to modify __release_datetime__ to release time when making official release.
|
|
2
|
-
__version__ = '0.
|
|
2
|
+
__version__ = '0.2.dev0'
|
|
3
3
|
# default release datetime for branches under active development is set
|
|
4
4
|
# to be a time far-far-away-into-the-future
|
|
5
5
|
__release_datetime__ = '2099-10-13 08:56:12'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: twinkle-kit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.dev0
|
|
4
4
|
Summary: Training API for large language models with efficient data handling and advanced optimization techniques.
|
|
5
5
|
Author-email: ModelScope <contact@modelscope.cn>
|
|
6
6
|
Requires-Python: <3.13,>=3.11
|
|
@@ -155,25 +155,27 @@ supported on Twinkle✨ framework.
|
|
|
155
155
|
> both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
|
|
156
156
|
> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
|
|
157
157
|
|
|
158
|
-
| Model Type | Model ID on [ModelScope](https://modelscope.cn)
|
|
159
|
-
|
|
160
|
-
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base)
|
|
161
|
-
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B)
|
|
162
|
-
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base)
|
|
163
|
-
|
|
|
164
|
-
|
|
|
165
|
-
|
|
|
166
|
-
| | [Qwen/Qwen2
|
|
167
|
-
|
|
|
168
|
-
| | [Qwen/
|
|
169
|
-
|
|
|
170
|
-
|
|
|
171
|
-
|
|
|
172
|
-
|
|
|
173
|
-
|
|
|
174
|
-
|
|
|
175
|
-
|
|
|
176
|
-
|
|
|
158
|
+
| Model Type | Model ID on [ModelScope](https://modelscope.cn) | Model Size | Requires | Support Megatron | HF Model ID |
|
|
159
|
+
|---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
|
|
160
|
+
| qwen3 series | [Qwen/Qwen3-14B-Base](https://modelscope.cn/models/Qwen/Qwen3-14B-Base) | 0.6B/1.7B/4B/8B/14B | transformers>=4.51 | ✔ | [Qwen/Qwen3-14B-Base](https://huggingface.co/Qwen/Qwen3-14B-Base) |
|
|
161
|
+
| | [Qwen/Qwen3-32B](https://modelscope.cn/models/Qwen/Qwen3-32B) | 0.6B/1.7B/4B/8B/14B/32B | transformers>=4.51 | ✔ | [Qwen/Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) |
|
|
162
|
+
| qwen3_moe series | [Qwen/Qwen3-30B-A3B-Base](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Base) | 30B-A3B/A3B-Base,235B-A22B | transformers>=4.51 | ✔ | [Qwen/Qwen3-30B-A3B-Base](https://huggingface.co/Qwen/Qwen3-30B-A3B-Base) |
|
|
163
|
+
| qwen3.5 moe series | [Qwen/Qwen3.5-35B-A3B](https://www.modelscope.cn/models/Qwen/Qwen3.5-35B-A3B) | 35B-A3B,122B-A10B, etc. | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-35B-A3B](https://huggingface.co/Qwen/Qwen3.5-35B-A3B) |
|
|
164
|
+
| qwen3.5 series | [Qwen/Qwen3.5-9B](https://www.modelscope.cn/models/Qwen/Qwen3.5-9B) | 2B ~ 27B | transformers>=5.20 | ✔ | [Qwen/Qwen3.5-9B](https://huggingface.co/Qwen/Qwen3.5-9B) |
|
|
165
|
+
| qwen2 series | [Qwen/Qwen2-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2-0.5B-Instruct) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) |
|
|
166
|
+
| | [Qwen/Qwen2-1.5B](https://modelscope.cn/models/Qwen/Qwen2-1.5B) | 0.5B/1.5B/7B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B) |
|
|
167
|
+
| | [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct) | 0.5B/1.5B/3B/7B/14B/32B/72B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) |
|
|
168
|
+
| | [Qwen/Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B) | 0.5B/1.5B/3B/7B/14B/32B | transformers>=4.37 | ✔ | [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B) |
|
|
169
|
+
| qwen2_moe series | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B-Chat) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat) |
|
|
170
|
+
| | [Qwen/Qwen1.5-MoE-A2.7B](https://modelscope.cn/models/Qwen/Qwen1.5-MoE-A2.7B) | - | transformers>=4.40 | ✔ | [Qwen/Qwen1.5-MoE-A2.7B](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B) |
|
|
171
|
+
| chatglm3 series | [ZhipuAI/chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b) | 6b/6b-base/6b-32k/6b-128k | transformers<4.42 | ✘ | [zai-org/chatglm3-6b](https://huggingface.co/zai-org/chatglm3-6b) |
|
|
172
|
+
| chatglm4 series | [ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat) | glm-4-9b/glm-4-9b-chat/glm-4-9b-chat-1m | transformers>=4.42 | ✘ | [zai-org/glm-4-9b-chat](https://huggingface.co/zai-org/glm-4-9b-chat) |
|
|
173
|
+
| | [ZhipuAI/LongWriter-glm4-9b](https://modelscope.cn/models/ZhipuAI/LongWriter-glm4-9b) | - | transformers>=4.42 | ✘ | [zai-org/LongWriter-glm4-9b](https://huggingface.co/zai-org/LongWriter-glm4-9b) |
|
|
174
|
+
| glm_edge series | [ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat) | 1.5b-chat/4b-chat | transformers>=4.46 | ✘ | [zai-org/glm-edge-1.5b-chat](https://huggingface.co/zai-org/glm-edge-1.5b-chat) |
|
|
175
|
+
| internlm2 series | [Shanghai_AI_Laboratory/internlm2-1_8b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b) | 1_8b/chat-1_8b-sft/base-7b/7b/chat-7b/ | transformers>=4.38 | ✘ | [internlm/internlm2-1_8b](https://huggingface.co/internlm/internlm2-1_8b) |
|
|
176
|
+
| deepseek_v1 | [deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite) | V2/V2-Lite/V2-Chat/2-Lite-Chat/V2.5 | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite) |
|
|
177
|
+
| | [deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B) |
|
|
178
|
+
| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | - | transformers>=4.39.3 | ✔ | [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) |
|
|
177
179
|
| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1.5B/7B/14B/32B | transformers>=4.37 | ✔ | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) |
|
|
178
180
|
|
|
179
181
|
For more detailed model support list 👉 [Quick Start](docs/source_en/Usage%20Guide/Quick-Start.md)
|
|
@@ -202,7 +204,7 @@ twinkle.initialize(mode='ray', groups=device_group, global_device_mesh=device_me
|
|
|
202
204
|
|
|
203
205
|
def train():
|
|
204
206
|
# to load model from Hugging Face, use 'hf://...'
|
|
205
|
-
base_model = 'ms://Qwen/Qwen3-4B'
|
|
207
|
+
base_model = 'ms://Qwen/Qwen3.5-4B'
|
|
206
208
|
# 1000 samples
|
|
207
209
|
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
|
|
208
210
|
# Set template to prepare encoding
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/hccl_checkpoint_engine.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{twinkle_kit-0.1 → twinkle_kit-0.2.dev0}/src/twinkle/checkpoint_engine/nccl_checkpoint_engine.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|