PyPI - tico - Versions diffs - 0.2.0.dev260520__tar.gz → 0.2.0.dev260522__tar.gz - Mend

tico 0.2.0.dev260520tar.gz → 0.2.0.dev260522tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (390) hide show

{tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tico
-Version: 0.2.0.dev260520
+Version: 0.2.0.dev260522
 Summary: Convert Exported Torch Module To Circle
 License: This file provides full text of licenses used in this project

tico-0.2.0.dev260522/tico/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.2.0.dev260522"

{tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/mmmu_eval_utils.py RENAMED Viewed

@@ -19,49 +19,64 @@ from typing import Any, Iterable
 import torch
 from datasets import load_dataset
-from tico.quantization.evaluation.vlm_eval_utils import generate_answer
-MMMU_DATASET = "MMMU/MMMU"
-MMMU_SUBJECTS: list[str] = [
-    "Accounting",
-    "Agriculture",
-    "Architecture_and_Engineering",
-    "Art",
-    "Art_Theory",
-    "Basic_Medical_Science",
-    "Biology",
-    "Chemistry",
-    "Clinical_Medicine",
-    "Computer_Science",
-    "Design",
-    "Diagnostics_and_Laboratory_Medicine",
-    "Economics",
-    "Electronics",
-    "Energy_and_Power",
-    "Finance",
-    "Geography",
-    "History",
-    "Literature",
-    "Manage",
-    "Marketing",
-    "Materials",
-    "Math",
-    "Mechanical_Engineering",
-    "Music",
-    "Pharmacy",
-    "Physics",
-    "Psychology",
-    "Public_Health",
-    "Sociology",
-]
-MMMU_SPLITS: list[str] = [
-    "dev",
-    "validation",
-    "test",
-]
+from tico.quantization.evaluation.vlm_eval_utils import (
+    generate_answer,
+    generate_image_only_answer,
+)
+MMMU_DATASETS = ["MMMU/MMMU", "MMMU/MMMU_Pro"]
+MMMU_SUBJECTS: dict[str, list[str]] = {
+    "MMMU/MMMU": [
+        "Accounting",
+        "Agriculture",
+        "Architecture_and_Engineering",
+        "Art",
+        "Art_Theory",
+        "Basic_Medical_Science",
+        "Biology",
+        "Chemistry",
+        "Clinical_Medicine",
+        "Computer_Science",
+        "Design",
+        "Diagnostics_and_Laboratory_Medicine",
+        "Economics",
+        "Electronics",
+        "Energy_and_Power",
+        "Finance",
+        "Geography",
+        "History",
+        "Literature",
+        "Manage",
+        "Marketing",
+        "Materials",
+        "Math",
+        "Mechanical_Engineering",
+        "Music",
+        "Pharmacy",
+        "Physics",
+        "Psychology",
+        "Public_Health",
+        "Sociology",
+    ],
+    "MMMU/MMMU_Pro": [
+        "standard (10 options)",
+        "standard (4 options)",
+        "vision",
+    ],
+}
+MMMU_SPLITS: dict[str, list[str]] = {
+    "MMMU/MMMU": [
+        "dev",
+        "validation",
+        "test",
+    ],
+    "MMMU/MMMU_Pro": [
+        "test",
+    ],
+}
 def take_from_dataset(ds, start: int, n: int) -> Iterable[dict[str, Any]]:
@@ -76,20 +91,25 @@ def take_from_dataset(ds, start: int, n: int) -> Iterable[dict[str, Any]]:
 def load_data(
+    dataset: str,
     subject: str,
-    split: str = "validation",
+    split: str,
     start: int = 0,
     n_samples: int = -1,
     streaming: bool = True,
 ) -> Iterable[dict[str, Any]]:
-    if subject not in MMMU_SUBJECTS:
+    if dataset not in MMMU_DATASETS:
+        raise ValueError(f"Invalid dataset '{dataset}'")
+    if subject not in MMMU_SUBJECTS[dataset]:
         raise ValueError(f"Invalid subject '{subject}'")
-    if split not in MMMU_SPLITS:
+    if split not in MMMU_SPLITS[dataset]:
         raise ValueError(f"Invalid split '{split}'")
     ds: Iterable[dict[str, Any]] = load_dataset(
-        path=MMMU_DATASET,
+        path=dataset,
         name=subject,
         split=split,
         streaming=streaming,
@@ -109,8 +129,8 @@ def get_item_mmmu(ex: dict[str, Any]) -> dict[str, Any]:
     return {
         "id": ex["id"],
-        "image": ex["image_1"],
-        "question": ex["question"],
+        "image": ex["image_1"] if "image_1" in ex else ex["image"],
+        "question": ex["question"] if "question" in ex else "",
         "choices": choices,
         "answer": ex["answer"],
     }
@@ -203,15 +223,30 @@ def extract_answer(generated_text: str) -> str | None:
     """
     text = generated_text.strip()
-    # Look for standalone letter [A-H] at the beginning, e.g. "A", "a", "A.", "a.", "A. Answer", "A Answer"
-    first_char_match = re.match(r"^([A-H])([.\s]+[^\s]+)?\.?$", text, re.IGNORECASE)
+    # Look for a letter at the beginning, e.g. "A", "A.", "(A)", "A Answer".
+    first_char_match = re.match(
+        r"^\s*\(?([A-J])\)?(?:[.)\s]|$)",
+        text,
+        re.IGNORECASE,
+    )
     if first_char_match:
         return first_char_match.group(1).upper()
+    # Common verbose outputs, e.g. "The answer is C", "Answer: C", "Option C".
+    answer_match = re.search(
+        r"\b(?:answer|option|choice)\s*(?:is|:)?\s*\(?([A-J])\)?\b",
+        text,
+        re.IGNORECASE,
+    )
+    if answer_match:
+        return answer_match.group(1).upper()
     return text
 def load_few_shot_examples(
+    dataset: str,
+    split: str,
     subject: str,
     n_shots: int = 5,
 ) -> list[dict[str, Any]]:
@@ -219,6 +254,8 @@ def load_few_shot_examples(
     Load few-shot examples for a given MMMU subject from the 'dev' split.
     Args:
+        dataset: Dataset name.
+        split: Split name (e.g. 'train', 'test', 'validation').
         subject: The subject name.
         n_shots: Number of few-shot examples to load.
@@ -229,8 +266,10 @@ def load_few_shot_examples(
         return []
     ds = load_data(
+        dataset=dataset,
         subject=subject,
-        split="dev",
+        split=split,
+        start=0,
         n_samples=n_shots,
         streaming=True,
     )
@@ -238,9 +277,16 @@ def load_few_shot_examples(
     return [get_item_mmmu(ex) for ex in ds]
+def is_mmmu_pro_vision(dataset: str, subject: str) -> bool:
+    return dataset == "MMMU/MMMU_Pro" and subject == "vision"
 def evaluate_subject(
     model,
     processor,
+    dataset: str,
+    eval_split: str,
+    few_shot_split: str,
     subject: str,
     device: str | torch.device,
     max_new_tokens: int,
@@ -255,7 +301,10 @@ def evaluate_subject(
     Args:
         model: Language model with generation capability.
-        tokenizer: Matching tokenizer for the model.
+        processor: Matching processor for the model.
+        dataset: Dataset name.
+        eval_split: Split name for evaluation (e.g. 'train', 'test', 'validation').
+        few_shot_split: Split name for few-shot examples (e.g. 'train', 'test', 'validation').
         subject: The MMMU subject to evaluate.
         device: Device for inference.
         n_shots: Number of few-shot examples.
@@ -267,11 +316,31 @@ def evaluate_subject(
     Returns:
         A tuple of (correct_count, total_count, skipped_count).
     """
-    few_shot_examples = load_few_shot_examples(subject=subject, n_shots=n_shots)
+    vision_only = is_mmmu_pro_vision(dataset, subject)
+    if vision_only:
+        if n_shots > 0 and verbose:
+            print(
+                "\n[WARNING] MMMU-Pro vision subset is evaluated image-only; "
+                f"ignoring n_shots={n_shots}."
+            )
+        few_shot_examples: list[dict[str, Any]] = []
+    else:
+        few_shot_examples = load_few_shot_examples(
+            dataset=dataset, split=few_shot_split, subject=subject, n_shots=n_shots
+        )
+    # If we take few-shot examples from the same split as evaluation examples,
+    # then exclude few-shot examples from the evaluation set by adjusting start argument to load_data.
+    if few_shot_examples and eval_split == few_shot_split:
+        start = n_shots
+    else:
+        start = 0
     test_data = load_data(
+        dataset=dataset,
         subject=subject,
-        split="validation",
+        split=eval_split,
+        start=start,
         n_samples=n_samples,
         streaming=True,
     )
@@ -283,7 +352,7 @@ def evaluate_subject(
     ex: dict[str, Any]
     for ex in test_data:
         # Skip questions with multiple images
-        if ex["image_2"] is not None:
+        if "image_2" in ex and ex["image_2"] is not None:
             skipped += 1
             if verbose:
                 question: str = ex["question"]
@@ -292,23 +361,59 @@ def evaluate_subject(
         item = get_item_mmmu(ex)
-        prompt = build_few_shot_prompt(
-            question=item["question"],
-            choices=item["choices"],
-            subject=subject,
-            few_shot_examples=few_shot_examples,
-        )
+        if vision_only:
+            prompt = "<image-only>"
+        else:
+            prompt = build_few_shot_prompt(
+                question=item["question"],
+                choices=item["choices"],
+                subject=subject,
+                few_shot_examples=few_shot_examples,
+            )
-        generated = generate_answer(
-            model=model,
-            processor=processor,
-            question=prompt,
-            image=item["image"],
-            device=device,
-            max_new_tokens=max_new_tokens,
-            max_seq_len=max_seq_len,
-            temperature=temperature,
-        )
+        try:
+            if vision_only:
+                generated = generate_image_only_answer(
+                    model=model,
+                    processor=processor,
+                    image=item["image"],
+                    question="Answer the multiple-choice question shown in the image. Return only one letter from A to J.",
+                    device=device,
+                    max_new_tokens=max_new_tokens,
+                    max_seq_len=max_seq_len,
+                    temperature=temperature,
+                )
+            else:
+                generated = generate_answer(
+                    model=model,
+                    processor=processor,
+                    question=prompt,
+                    image=item["image"],
+                    device=device,
+                    max_new_tokens=max_new_tokens,
+                    max_seq_len=max_seq_len,
+                    temperature=temperature,
+                )
+        except ValueError as error:
+            if "Mismatch in `image` token count between text and `input_ids`." in str(
+                error
+            ):
+                if verbose:
+                    print(
+                        f"\n[WARNING] prompt too long for the specified max_seq_len={max_seq_len}. Skipping."
+                    )
+                    print(f"Error: {error}")
+                    print(f"Prompt: {prompt}")
+                skipped += 1
+                continue
+            else:
+                raise error
+        except RuntimeError as error:
+            if verbose:
+                print(f"[ERROR]: {error}")
+                print(f"Prompt: {prompt}")
+            skipped += 1
+            continue
         predicted = extract_answer(generated)
         gold = item["answer"].upper()
@@ -319,7 +424,10 @@ def evaluate_subject(
         if verbose:
             print(f"\n[Sample {total}] Subject: {subject}")
-            print(f"Q: {item['question'][:100]}...")
+            if vision_only:
+                print("Q: <embedded in image>")
+            else:
+                print(f"Q: {item['question'][:100]}...")
             print(f"Choices: {item['choices']}")
             print(
                 f"Generated: {generated}, Predicted: {predicted}, Gold: {gold}, Correct: {is_correct}"
@@ -331,6 +439,7 @@ def evaluate_subject(
 def evaluate_mmmu(
     model,
     processor,
+    dataset: str,
     subjects: list[str] | None = None,
     device: str | torch.device = "cuda",
     n_shots: int = 5,
@@ -345,7 +454,8 @@ def evaluate_mmmu(
     Args:
         model: Language model with generation capability.
-        tokenizer: Matching tokenizer for the model.
+        processor: Matching processor for the model.
+        dataset: Dataset name.
         subjects: List of subjects to evaluate. Use None for all subjects.
         device: Device for inference.
         n_shots: Number of few-shot examples per subject.
@@ -357,8 +467,14 @@ def evaluate_mmmu(
     Returns:
         Aggregated results dictionary in '{ subject: (correct, total, skipped) }' format.
     """
+    if dataset not in MMMU_DATASETS:
+        raise ValueError(f"Invalid dataset '{dataset}'")
     if subjects is None:
-        subjects = MMMU_SUBJECTS
+        subjects = MMMU_SUBJECTS[dataset]
+    eval_split = "validation" if dataset == "MMMU/MMMU" else "test"
+    few_shot_split = "test"
     # { subject: (correct, total) }
     results: dict[str, tuple[int, int, int]] = {}
@@ -370,6 +486,9 @@ def evaluate_mmmu(
         correct, total, skipped = evaluate_subject(
             model=model,
             processor=processor,
+            dataset=dataset,
+            eval_split=eval_split,
+            few_shot_split=few_shot_split,
             subject=subject,
             device=device,
             n_shots=n_shots,

{tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/vlm_eval_utils.py RENAMED Viewed

@@ -423,6 +423,85 @@ def generate_answer(
     return processor.tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
+@torch.no_grad()
+def generate_image_only_answer(
+    model,
+    processor,
+    image,
+    device: str | torch.device,
+    question: str | None = None,
+    max_new_tokens: int = 16,
+    temperature: float = 0.0,
+    max_seq_len: int | None = None,
+) -> str:
+    """
+    Generate an answer from the image only.
+    Args:
+        model: Vision-language generation model.
+        processor: Matching processor for the model.
+        image: Input image.
+        question: Optional text question.
+        device: Device on which generation should run.
+        max_new_tokens: Maximum number of generated tokens.
+        temperature: Sampling temperature. Greedy decoding is used when this
+                     value is less than or equal to zero.
+        max_seq_len: Optional maximum text sequence length for processor
+                     tokenization.
+    Returns:
+        The decoded model answer string.
+    """
+    content: list = [{"type": "image"}]
+    if question is not None:
+        content.append(
+            {
+                "type": "text",
+                "text": question,
+            }
+        )
+    messages = [
+        {
+            "role": "user",
+            "content": content,
+        }
+    ]
+    prompt = processor.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    processor_kwargs: dict[str, Any] = {
+        "text": prompt,
+        "images": image,
+        "return_tensors": "pt",
+    }
+    if max_seq_len is not None and max_seq_len > 0:
+        processor_kwargs["truncation"] = True
+        processor_kwargs["max_length"] = max_seq_len
+    inputs = processor(**processor_kwargs)
+    inputs = move_inputs_to_device(inputs, device)
+    do_sample = temperature > 0.0
+    gen_kwargs: dict[str, Any] = {
+        "max_new_tokens": max_new_tokens,
+        "do_sample": do_sample,
+    }
+    if do_sample:
+        gen_kwargs["temperature"] = temperature
+    out_ids = model.generate(**inputs, **gen_kwargs)
+    input_len = inputs["input_ids"].shape[1]
+    gen_ids = out_ids[0, input_len:]
+    return processor.tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
 class CocoResult(TypedDict):
     image_id: str
     caption: str

tico-0.2.0.dev260522/tico/quantization/examples/evaluate.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import torch
+from tico.quantization.recipes.adapters import get_adapter
+from tico.quantization.recipes.config import load_recipe_config
+from tico.quantization.recipes.context import RecipeContext
+from tico.quantization.recipes.utils import set_seed
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Evaluate an FP or fake-quant checkpoint."
+    )
+    parser.add_argument("--config", required=True, help="Base recipe config.")
+    parser.add_argument(
+        "--checkpoint", default=None, help="Optional torch checkpoint to evaluate."
+    )
+    parser.add_argument("--model", default=None, help="Override model.name_or_path.")
+    parser.add_argument("--device", default=None, help="Override runtime.device.")
+    parser.add_argument(
+        "--tasks",
+        default=None,
+        help="Override eval tasks. LLM: lm_eval_tasks, VLM: vlm_tasks.",
+    )
+    parser.add_argument("--set", action="append", default=[], metavar="KEY=VALUE")
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    overrides = list(args.set)
+    overrides.append("evaluation.enabled=true")
+    if args.model:
+        overrides.append(f"model.name_or_path={args.model}")
+    if args.device:
+        overrides.append(f"runtime.device={args.device}")
+    cfg = load_recipe_config(args.config, overrides=overrides)
+    set_seed(cfg.get("runtime", {}).get("seed", 42))
+    adapter = get_adapter(cfg["model"]["family"])
+    ctx = RecipeContext(cfg=cfg, adapter=adapter)
+    ctx = adapter.load_model(ctx)
+    if args.checkpoint:
+        ctx.model = torch.load(args.checkpoint, weights_only=False).eval()
+    if args.tasks:
+        if adapter.family == "llama":
+            cfg.setdefault("evaluation", {})["lm_eval_tasks"] = args.tasks
+        else:
+            cfg.setdefault("evaluation", {})["vlm_tasks"] = [
+                t.strip() for t in args.tasks.split(",") if t.strip()
+            ]
+    adapter.evaluate(ctx)
+if __name__ == "__main__":
+    main()

tico-0.2.0.dev260522/tico/quantization/examples/inspect.py ADDED Viewed

@@ -0,0 +1,87 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from tico.quantization.recipes.adapters import get_adapter
+from tico.quantization.recipes.config import load_recipe_config
+from tico.quantization.recipes.context import RecipeContext
+from tico.quantization.recipes.debug.static_llama_runtime import (
+    run_static_llama_runtime,
+    StaticLlamaRuntimeConfig,
+)
+from tico.quantization.recipes.debug.tied_embedding import (
+    run_tied_embedding_smoke,
+    TiedEmbeddingSmokeConfig,
+)
+from tico.quantization.recipes.debug.trace import trace_ptq_parity
+from tico.quantization.recipes.utils import set_seed
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Inspect/debug quantization recipes.")
+    parser.add_argument("--config", required=True, help="Recipe config.")
+    parser.add_argument(
+        "--mode",
+        choices=["trace", "static-llama-runtime", "tied-embedding-smoke"],
+        default="trace",
+    )
+    parser.add_argument("--model", default=None, help="Override model.name_or_path.")
+    parser.add_argument("--device", default=None, help="Override runtime.device.")
+    parser.add_argument("--enable-quantization", action="store_true")
+    parser.add_argument("--interesting-modules", nargs="*", default=[])
+    parser.add_argument("--set", action="append", default=[], metavar="KEY=VALUE")
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    overrides = list(args.set)
+    if args.model:
+        overrides.append(f"model.name_or_path={args.model}")
+    if args.device:
+        overrides.append(f"runtime.device={args.device}")
+    cfg = load_recipe_config(args.config, overrides=overrides)
+    set_seed(cfg.get("runtime", {}).get("seed", 42))
+    if args.mode == "tied-embedding-smoke":
+        smoke_cfg = TiedEmbeddingSmokeConfig(
+            **cfg.get("debug", {}).get("tied_embedding", {})
+        )
+        run_tied_embedding_smoke(smoke_cfg)
+        return
+    if args.mode == "static-llama-runtime":
+        runtime_cfg = StaticLlamaRuntimeConfig(
+            **cfg.get("debug", {}).get("static_llama_runtime", {})
+        )
+        run_static_llama_runtime(runtime_cfg)
+        return
+    adapter = get_adapter(cfg["model"]["family"])
+    ctx = RecipeContext(cfg=cfg, adapter=adapter)
+    ctx = adapter.load_model(ctx)
+    ctx.calibration_inputs = adapter.build_calibration_inputs(ctx)
+    if args.mode == "trace":
+        trace_ptq_parity(
+            ctx,
+            enable_quantization=args.enable_quantization,
+            interesting_modules=args.interesting_modules,
+        )
+if __name__ == "__main__":
+    main()

tico 0.2.0.dev260520__tar.gz → 0.2.0.dev260522__tar.gz

tico 0.2.0.dev260520tar.gz → 0.2.0.dev260522tar.gz