PyPI - xinference - Versions diffs - 1.8.1rc1__py3-none-any.whl → 1.9.1__py3-none-any.whl - Mend

xinference 1.8.1rc1py3-none-any.whl → 1.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (108) hide show

xinference/model/video/model_spec.json CHANGED Viewed

@@ -224,7 +224,7 @@
     },
     "virtualenv": {
       "packages": [
-        "git+https://github.com/huggingface/diffusers",
+        "diffusers==0.35.1",
         "ftfy",
         "imageio-ffmpeg",
         "imageio",
@@ -241,5 +241,99 @@
         "model_revision": "master"
       }
     }
+  },
+  {
+    "version": 2,
+    "model_name": "Wan2.2-A14B",
+    "model_family": "Wan",
+    "model_ability": [
+      "text2video"
+    ],
+    "default_model_config": {
+      "torch_dtype": "bfloat16"
+    },
+    "default_generate_config": {},
+    "virtualenv": {
+      "packages": [
+        "diffusers==0.35.1",
+        "ftfy",
+        "imageio-ffmpeg",
+        "imageio",
+        "#system_numpy#"
+      ]
+    },
+    "model_src": {
+      "huggingface": {
+        "model_id": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+        "model_revision": "5be7df9619b54f4e2667b2755bc6a756675b5cd7"
+      },
+      "modelscope": {
+        "model_id": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+        "model_revision": "master"
+      }
+    }
+  },
+  {
+    "version": 2,
+    "model_name": "Wan2.2-i2v-A14B",
+    "model_family": "Wan",
+    "model_ability": [
+      "image2video"
+    ],
+    "default_model_config": {
+      "torch_dtype": "bfloat16"
+    },
+    "default_generate_config": {},
+    "virtualenv": {
+      "packages": [
+        "diffusers==0.35.1",
+        "ftfy",
+        "imageio-ffmpeg",
+        "imageio",
+        "#system_numpy#"
+      ]
+    },
+    "model_src": {
+      "huggingface": {
+        "model_id": "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
+        "model_revision": "596658fd9ca6b7b71d5057529bbf319ecbc61d74"
+      },
+      "modelscope": {
+        "model_id": "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
+        "model_revision": "master"
+      }
+    }
+  },
+  {
+    "version": 2,
+    "model_name": "Wan2.2-ti2v-5B",
+    "model_family": "Wan",
+    "model_ability": [
+      "text2video",
+      "image2video"
+    ],
+    "default_model_config": {
+      "torch_dtype": "bfloat16"
+    },
+    "default_generate_config": {},
+    "virtualenv": {
+      "packages": [
+        "diffusers==0.35.1",
+        "ftfy",
+        "imageio-ffmpeg",
+        "imageio",
+        "#system_numpy#"
+      ]
+    },
+    "model_src": {
+      "huggingface": {
+        "model_id": "Wan-AI/Wan2.2-TI2V-5B-Diffusers",
+        "model_revision": "b8fff7315c768468a5333511427288870b2e9635"
+      },
+      "modelscope": {
+        "model_id": "Wan-AI/Wan2.2-TI2V-5B-Diffusers",
+        "model_revision": "master"
+      }
+    }
   }
 ]

xinference/thirdparty/cosyvoice/bin/export_jit.py CHANGED Viewed

@@ -61,8 +61,7 @@ def main():
         model = CosyVoice(args.model_dir)
     except Exception:
         try:
-            # NOTE set use_flow_cache=True when export jit for cache inference
-            model = CosyVoice2(args.model_dir, use_flow_cache=True)
+            model = CosyVoice2(args.model_dir)
         except Exception:
             raise TypeError('no valid model_type!')
@@ -93,9 +92,9 @@ def main():
     else:
         # 3. export flow encoder
         flow_encoder = model.model.flow.encoder
-        script = get_optimized_script(flow_encoder, ['forward_chunk'])
+        script = get_optimized_script(flow_encoder)
         script.save('{}/flow.encoder.fp32.zip'.format(args.model_dir))
-        script = get_optimized_script(flow_encoder.half(), ['forward_chunk'])
+        script = get_optimized_script(flow_encoder.half())
         script.save('{}/flow.encoder.fp16.zip'.format(args.model_dir))
         logging.info('successfully export flow_encoder')

xinference/thirdparty/cosyvoice/bin/export_onnx.py CHANGED Viewed

@@ -62,135 +62,58 @@ def main():
         model = CosyVoice(args.model_dir)
     except Exception:
         try:
-            # NOTE set use_flow_cache=True when export jit for cache inference
-            model = CosyVoice2(args.model_dir, use_flow_cache=True)
+            model = CosyVoice2(args.model_dir)
         except Exception:
             raise TypeError('no valid model_type!')
-    if not isinstance(model, CosyVoice2):
-        # 1. export flow decoder estimator
-        estimator = model.model.flow.decoder.estimator
-        estimator.eval()
-        device = model.model.device
-        batch_size, seq_len = 2, 256
-        out_channels = model.model.flow.decoder.estimator.out_channels
-        x, mask, mu, t, spks, cond = get_dummy_input(batch_size, seq_len, out_channels, device)
-        torch.onnx.export(
-            estimator,
-            (x, mask, mu, t, spks, cond),
-            '{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
-            export_params=True,
-            opset_version=18,
-            do_constant_folding=True,
-            input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
-            output_names=['estimator_out'],
-            dynamic_axes={
-                'x': {2: 'seq_len'},
-                'mask': {2: 'seq_len'},
-                'mu': {2: 'seq_len'},
-                'cond': {2: 'seq_len'},
-                'estimator_out': {2: 'seq_len'},
-            }
-        )
-        # 2. test computation consistency
-        option = onnxruntime.SessionOptions()
-        option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        option.intra_op_num_threads = 1
-        providers = ['CUDAExecutionProvider' if torch.cuda.is_available() else 'CPUExecutionProvider']
-        estimator_onnx = onnxruntime.InferenceSession('{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
-                                                      sess_options=option, providers=providers)
-        for _ in tqdm(range(10)):
-            x, mask, mu, t, spks, cond = get_dummy_input(batch_size, random.randint(16, 512), out_channels, device)
-            output_pytorch = estimator(x, mask, mu, t, spks, cond)
-            ort_inputs = {
-                'x': x.cpu().numpy(),
-                'mask': mask.cpu().numpy(),
-                'mu': mu.cpu().numpy(),
-                't': t.cpu().numpy(),
-                'spks': spks.cpu().numpy(),
-                'cond': cond.cpu().numpy()
-            }
-            output_onnx = estimator_onnx.run(None, ort_inputs)[0]
-            torch.testing.assert_allclose(output_pytorch, torch.from_numpy(output_onnx).to(device), rtol=1e-2, atol=1e-4)
-        logging.info('successfully export estimator')
-    else:
-        # 1. export flow decoder estimator
-        estimator = model.model.flow.decoder.estimator
-        estimator.forward = estimator.forward_chunk
-        estimator.eval()
-        device = model.model.device
-        batch_size, seq_len = 2, 256
-        out_channels = model.model.flow.decoder.estimator.out_channels
-        x, mask, mu, t, spks, cond = get_dummy_input(batch_size, seq_len, out_channels, device)
-        cache = model.model.init_flow_cache()['decoder_cache']
-        cache.pop('offset')
-        cache = {k: v[0] for k, v in cache.items()}
-        torch.onnx.export(
-            estimator,
-            (x, mask, mu, t, spks, cond,
-             cache['down_blocks_conv_cache'],
-             cache['down_blocks_kv_cache'],
-             cache['mid_blocks_conv_cache'],
-             cache['mid_blocks_kv_cache'],
-             cache['up_blocks_conv_cache'],
-             cache['up_blocks_kv_cache'],
-             cache['final_blocks_conv_cache']),
-            '{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
-            export_params=True,
-            opset_version=18,
-            do_constant_folding=True,
-            input_names=['x', 'mask', 'mu', 't', 'spks', 'cond', 'down_blocks_conv_cache', 'down_blocks_kv_cache', 'mid_blocks_conv_cache', 'mid_blocks_kv_cache',
-                         'up_blocks_conv_cache', 'up_blocks_kv_cache', 'final_blocks_conv_cache'],
-            output_names=['estimator_out', 'down_blocks_conv_cache_out', 'down_blocks_kv_cache_out', 'mid_blocks_conv_cache_out', 'mid_blocks_kv_cache_out',
-                          'up_blocks_conv_cache_out', 'up_blocks_kv_cache_out', 'final_blocks_conv_cache_out'],
-            dynamic_axes={
-                'x': {2: 'seq_len'},
-                'mask': {2: 'seq_len'},
-                'mu': {2: 'seq_len'},
-                'cond': {2: 'seq_len'},
-                'down_blocks_kv_cache': {3: 'cache_in_len'},
-                'mid_blocks_kv_cache': {3: 'cache_in_len'},
-                'up_blocks_kv_cache': {3: 'cache_in_len'},
-                'estimator_out': {2: 'seq_len'},
-                'down_blocks_kv_cache_out': {3: 'cache_out_len'},
-                'mid_blocks_kv_cache_out': {3: 'cache_out_len'},
-                'up_blocks_kv_cache_out': {3: 'cache_out_len'},
-            }
-        )
-        # 2. test computation consistency
-        option = onnxruntime.SessionOptions()
-        option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-        option.intra_op_num_threads = 1
-        providers = ['CUDAExecutionProvider' if torch.cuda.is_available() else 'CPUExecutionProvider']
-        estimator_onnx = onnxruntime.InferenceSession('{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
-                                                      sess_options=option, providers=providers)
-        for iter in tqdm(range(10)):
-            x, mask, mu, t, spks, cond = get_dummy_input(batch_size, random.randint(16, 512), out_channels, device)
-            cache = model.model.init_flow_cache()['decoder_cache']
-            cache.pop('offset')
-            cache = {k: v[0] for k, v in cache.items()}
-            output_pytorch = estimator(x, mask, mu, t, spks, cond, **{k: v.clone() for k, v in cache.items()})
-            ort_inputs = {
-                'x': x.cpu().numpy(),
-                'mask': mask.cpu().numpy(),
-                'mu': mu.cpu().numpy(),
-                't': t.cpu().numpy(),
-                'spks': spks.cpu().numpy(),
-                'cond': cond.cpu().numpy(),
-            }
-            output_onnx = estimator_onnx.run(None, {**ort_inputs, **{k: v.clone().cpu().numpy() for k, v in cache.items()}})
-            if iter == 0:
-                # NOTE why can not pass first iteration check?
-                continue
-            for i, j in zip(output_pytorch, output_onnx):
-                torch.testing.assert_allclose(i, torch.from_numpy(j).to(device), rtol=1e-2, atol=1e-4)
-        logging.info('successfully export estimator')
+    # 1. export flow decoder estimator
+    estimator = model.model.flow.decoder.estimator
+    estimator.eval()
+    device = model.model.device
+    batch_size, seq_len = 2, 256
+    out_channels = model.model.flow.decoder.estimator.out_channels
+    x, mask, mu, t, spks, cond = get_dummy_input(batch_size, seq_len, out_channels, device)
+    torch.onnx.export(
+        estimator,
+        (x, mask, mu, t, spks, cond),
+        '{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
+        export_params=True,
+        opset_version=18,
+        do_constant_folding=True,
+        input_names=['x', 'mask', 'mu', 't', 'spks', 'cond'],
+        output_names=['estimator_out'],
+        dynamic_axes={
+            'x': {2: 'seq_len'},
+            'mask': {2: 'seq_len'},
+            'mu': {2: 'seq_len'},
+            'cond': {2: 'seq_len'},
+            'estimator_out': {2: 'seq_len'},
+        }
+    )
+    # 2. test computation consistency
+    option = onnxruntime.SessionOptions()
+    option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+    option.intra_op_num_threads = 1
+    providers = ['CUDAExecutionProvider' if torch.cuda.is_available() else 'CPUExecutionProvider']
+    estimator_onnx = onnxruntime.InferenceSession('{}/flow.decoder.estimator.fp32.onnx'.format(args.model_dir),
+                                                  sess_options=option, providers=providers)
+    for _ in tqdm(range(10)):
+        x, mask, mu, t, spks, cond = get_dummy_input(batch_size, random.randint(16, 512), out_channels, device)
+        output_pytorch = estimator(x, mask, mu, t, spks, cond)
+        ort_inputs = {
+            'x': x.cpu().numpy(),
+            'mask': mask.cpu().numpy(),
+            'mu': mu.cpu().numpy(),
+            't': t.cpu().numpy(),
+            'spks': spks.cpu().numpy(),
+            'cond': cond.cpu().numpy()
+        }
+        output_onnx = estimator_onnx.run(None, ort_inputs)[0]
+        torch.testing.assert_allclose(output_pytorch, torch.from_numpy(output_onnx).to(device), rtol=1e-2, atol=1e-4)
+    logging.info('successfully export estimator')
 if __name__ == "__main__":

xinference/thirdparty/cosyvoice/bin/{inference.py → inference_deprecated.py} RENAMED Viewed

@@ -122,4 +122,5 @@ def main():
 if __name__ == '__main__':
+    logging.warning('this code has been deprecated, please refer to README for CosyVoice inference usage!')
     main()

xinference/thirdparty/cosyvoice/bin/train.py CHANGED Viewed

@@ -27,6 +27,7 @@ from hyperpyyaml import load_hyperpyyaml
 from torch.distributed.elastic.multiprocessing.errors import record
+from cosyvoice.utils.losses import DPOLoss
 from cosyvoice.utils.executor import Executor
 from cosyvoice.utils.train_utils import (
     init_distributed,
@@ -43,6 +44,7 @@ def get_args():
                         choices=['torch_ddp', 'deepspeed'],
                         help='Engine for paralleled training')
     parser.add_argument('--model', required=True, help='model which will be trained')
+    parser.add_argument('--ref_model', required=False, help='ref model used in dpo')
     parser.add_argument('--config', required=True, help='config file')
     parser.add_argument('--train_data', required=True, help='train data file')
     parser.add_argument('--cv_data', required=True, help='cv data file')
@@ -73,6 +75,10 @@ def get_args():
                         action='store_true',
                         default=False,
                         help='Use automatic mixed precision training')
+    parser.add_argument('--dpo',
+                        action='store_true',
+                        default=False,
+                        help='Use Direct Preference Optimization')
     parser.add_argument('--deepspeed.save_states',
                         dest='save_states',
                         default='model_only',
@@ -113,7 +119,7 @@ def main():
     # Get dataset & dataloader
     train_dataset, cv_dataset, train_data_loader, cv_data_loader = \
-        init_dataset_and_dataloader(args, configs, gan)
+        init_dataset_and_dataloader(args, configs, gan, args.dpo)
     # Do some sanity checks and save config to arsg.model_dir
     configs = check_modify_and_save_config(args, configs)
@@ -122,6 +128,8 @@ def main():
     writer = init_summarywriter(args)
     # load checkpoint
+    if args.dpo is True:
+        configs[args.model].forward = configs[args.model].forward_dpo
     model = configs[args.model]
     start_step, start_epoch = 0, -1
     if args.checkpoint is not None:
@@ -150,13 +158,25 @@ def main():
     info_dict['epoch'] = start_epoch
     save_model(model, 'init', info_dict)
+    # DPO related
+    if args.dpo is True:
+        ref_model = deepcopy(configs[args.model])
+        state_dict = torch.load(args.ref_model, map_location='cpu')
+        ref_model.load_state_dict(state_dict, strict=False)
+        dpo_loss = DPOLoss(beta=0.01, label_smoothing=0.0, ipo=False)
+        # NOTE maybe it is not needed to wrap ref_model as ddp because its parameter is not updated
+        ref_model = wrap_cuda_model(args, ref_model)
+    else:
+        ref_model, dpo_loss = None, None
     # Get executor
-    executor = Executor(gan=gan)
+    executor = Executor(gan=gan, ref_model=ref_model, dpo_loss=dpo_loss)
     executor.step = start_step
     # Init scaler, used for pytorch amp mixed precision training
     scaler = torch.cuda.amp.GradScaler() if args.use_amp else None
     print('start step {} start epoch {}'.format(start_step, start_epoch))
     # Start training loop
     for epoch in range(start_epoch + 1, info_dict['max_epoch']):
         executor.epoch = epoch
@@ -167,7 +187,7 @@ def main():
             executor.train_one_epoc_gan(model, optimizer, scheduler, optimizer_d, scheduler_d, train_data_loader, cv_data_loader,
                                         writer, info_dict, scaler, group_join)
         else:
-            executor.train_one_epoc(model, optimizer, scheduler, train_data_loader, cv_data_loader, writer, info_dict, scaler, group_join)
+            executor.train_one_epoc(model, optimizer, scheduler, train_data_loader, cv_data_loader, writer, info_dict, scaler, group_join, ref_model=ref_model)
         dist.destroy_process_group(group_join)

xinference/thirdparty/cosyvoice/cli/cosyvoice.py CHANGED Viewed

@@ -26,7 +26,7 @@ from cosyvoice.utils.class_utils import get_model_type
 class CosyVoice:
-    def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False):
+    def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False, trt_concurrent=1):
         self.instruct = True if '-Instruct' in model_dir else False
         self.model_dir = model_dir
         self.fp16 = fp16
@@ -59,6 +59,7 @@ class CosyVoice:
         if load_trt:
             self.model.load_trt('{}/flow.decoder.estimator.{}.mygpu.plan'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'),
                                 '{}/flow.decoder.estimator.fp32.onnx'.format(model_dir),
+                                trt_concurrent,
                                 self.fp16)
         del configs
@@ -140,7 +141,7 @@ class CosyVoice:
 class CosyVoice2(CosyVoice):
-    def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False, use_flow_cache=False):
+    def __init__(self, model_dir, load_jit=False, load_trt=False, load_vllm=False, fp16=False, trt_concurrent=1):
         self.instruct = True if '-Instruct' in model_dir else False
         self.model_dir = model_dir
         self.fp16 = fp16
@@ -162,15 +163,18 @@ class CosyVoice2(CosyVoice):
         if torch.cuda.is_available() is False and (load_jit is True or load_trt is True or fp16 is True):
             load_jit, load_trt, fp16 = False, False, False
             logging.warning('no cuda device, set load_jit/load_trt/fp16 to False')
-        self.model = CosyVoice2Model(configs['llm'], configs['flow'], configs['hift'], fp16, use_flow_cache)
+        self.model = CosyVoice2Model(configs['llm'], configs['flow'], configs['hift'], fp16)
         self.model.load('{}/llm.pt'.format(model_dir),
-                        '{}/flow.pt'.format(model_dir) if use_flow_cache is False else '{}/flow.cache.pt'.format(model_dir),
+                        '{}/flow.pt'.format(model_dir),
                         '{}/hift.pt'.format(model_dir))
+        if load_vllm:
+            self.model.load_vllm('{}/vllm'.format(model_dir))
         if load_jit:
             self.model.load_jit('{}/flow.encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'))
         if load_trt:
             self.model.load_trt('{}/flow.decoder.estimator.{}.mygpu.plan'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'),
                                 '{}/flow.decoder.estimator.fp32.onnx'.format(model_dir),
+                                trt_concurrent,
                                 self.fp16)
         del configs

xinference/thirdparty/cosyvoice/cli/frontend.py CHANGED Viewed

@@ -28,9 +28,9 @@ try:
     import ttsfrd
     use_ttsfrd = True
 except ImportError:
-    print("failed to import ttsfrd, use WeTextProcessing instead")
-    from tn.chinese.normalizer import Normalizer as ZhNormalizer
-    from tn.english.normalizer import Normalizer as EnNormalizer
+    print("failed to import ttsfrd, use wetext instead")
+    from wetext import Normalizer as ZhNormalizer
+    from wetext import Normalizer as EnNormalizer
     use_ttsfrd = False
 from cosyvoice.utils.file_utils import logging
 from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation
@@ -68,7 +68,7 @@ class CosyVoiceFrontEnd:
                 'failed to initialize ttsfrd resource'
             self.frd.set_lang_type('pinyinvg')
         else:
-            self.zh_tn_model = ZhNormalizer(remove_erhua=False, full_to_half=False, overwrite_cache=True)
+            self.zh_tn_model = ZhNormalizer(remove_erhua=False)
             self.en_tn_model = EnNormalizer()
             self.inflect_parser = inflect.engine()

xinference 1.8.1rc1__py3-none-any.whl → 1.9.1__py3-none-any.whl

Potentially problematic release.

xinference 1.8.1rc1py3-none-any.whl → 1.9.1py3-none-any.whl