PyPI - wisent - Versions diffs - 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl - Mend

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (391) hide show

wisent/parameters/lm_eval/working_benchmarks.json ADDED Viewed

@@ -0,0 +1,206 @@
+[
+  "AraDiCE",
+  "ArabCulture",
+  "Tag",
+  "aclue",
+  "acp_bench",
+  "aexams",
+  "afrimgsm_direct_amh",
+  "afrimmlu_direct_amh",
+  "agentbench",
+  "aider_polyglot",
+  "aime",
+  "anli",
+  "apps",
+  "arabic_leaderboard_complete",
+  "arabic_leaderboard_light",
+  "arabicmmlu",
+  "arena_hard",
+  "arithmetic",
+  "asdiv",
+  "babi",
+  "babilong",
+  "bangla_mmlu",
+  "basqueglue",
+  "bbh",
+  "bbq",
+  "belebele",
+  "bertaqa",
+  "blimp",
+  "browsecomp",
+  "c4",
+  "careqa",
+  "catalan_bench",
+  "ceval",
+  "chartqa",
+  "chinese_simpleqa",
+  "cmmlu",
+  "cnmo",
+  "cnmo_2024",
+  "commonsense_qa",
+  "conala",
+  "concode",
+  "copal_id",
+  "coqa",
+  "crows_pairs",
+  "csatqa",
+  "curate",
+  "darija_bench",
+  "darijahellaswag",
+  "darijammlu",
+  "donotanswer",
+  "drop",
+  "ds1000",
+  "eq_bench",
+  "eus_exams",
+  "eus_proficiency",
+  "eus_reading",
+  "eus_trivia",
+  "evalita_LLM",
+  "facts_grounding",
+  "faithbench",
+  "fda",
+  "finsearchcomp",
+  "flames",
+  "fld",
+  "frames",
+  "french_bench",
+  "galician_bench",
+  "global_mmlu",
+  "gpqa",
+  "groundcocoa",
+  "gsm8k",
+  "haerae",
+  "hallucinations_leaderboard",
+  "halueval",
+  "halulens",
+  "headqa",
+  "healthbench",
+  "hellaswag",
+  "hendrycks_ethics",
+  "hendrycks_math",
+  "histoires_morales",
+  "hmmt",
+  "hmmt_feb_2025",
+  "hrm8k",
+  "humaneval",
+  "humaneval_plus",
+  "humanevalpack",
+  "inverse_scaling",
+  "kbl",
+  "kobest",
+  "kormedmcqa",
+  "lambada",
+  "lambada_cloze",
+  "lambada_multilingual",
+  "lambada_multilingual_stablelm",
+  "lingoly",
+  "livecodebench",
+  "livecodebench_lite",
+  "livecodebench_v5",
+  "livecodebench_v6",
+  "livemathbench_cnmo_en",
+  "logiqa",
+  "logiqa2",
+  "longform",
+  "longform_writing",
+  "mastermind",
+  "math",
+  "math500",
+  "mathqa",
+  "mc_taco",
+  "med_concepts_qa",
+  "meddialog",
+  "medmcqa",
+  "medqa",
+  "mercury",
+  "metabench",
+  "mgsm",
+  "mlqa",
+  "mmlu",
+  "mmlu-pro-plus",
+  "mmlu_pro",
+  "mmlu_prox",
+  "mmlusr",
+  "mmmu",
+  "model_written_evals",
+  "moral_stories",
+  "multipl_e",
+  "multiple_cpp",
+  "multiple_go",
+  "multiple_java",
+  "multiple_js",
+  "multiple_py",
+  "multiple_rs",
+  "mutual",
+  "nq_open",
+  "oj_bench",
+  "okapi/hellaswag_multilingual",
+  "okapi/mmlu_multilingual",
+  "okapi/truthfulqa_multilingual",
+  "openbookqa",
+  "paloma",
+  "paws-x",
+  "piqa",
+  "planbench",
+  "polemo2",
+  "politicalbias_qa",
+  "polyglottoxicityprompts",
+  "polymath_en_high",
+  "polymath_en_medium",
+  "polymath_zh_high",
+  "polymath_zh_medium",
+  "prost",
+  "pubmedqa",
+  "qa4mre",
+  "qasper",
+  "race",
+  "realtoxicityprompts",
+  "recode",
+  "refusalbench",
+  "scicode",
+  "sciq",
+  "score",
+  "seal",
+  "seal_0",
+  "simple_cooccurrence_bias",
+  "simpleqa",
+  "siqa",
+  "spanish_bench",
+  "squad_completion",
+  "squadv2",
+  "storycloze",
+  "swag",
+  "swde",
+  "swe_bench_verified",
+  "swe_verified",
+  "tau_bench",
+  "terminal_bench",
+  "tmmluplus",
+  "toolbench",
+  "toolemu",
+  "toolllm",
+  "toxigen",
+  "translation",
+  "travelplanner",
+  "triviaqa",
+  "truthfulqa",
+  "truthfulqa_generation",
+  "webqs",
+  "wikitext",
+  "winogender",
+  "winogrande",
+  "wmdp",
+  "wmt14_en_fr",
+  "wmt14_fr_en",
+  "wmt16_de_en",
+  "wmt16_en_de",
+  "wmt2016",
+  "wsc273",
+  "xcopa",
+  "xnli",
+  "xnli_eu",
+  "xquad",
+  "xstorycloze",
+  "xwinograd"
+]

wisent/parameters/lm_eval/working_benchmarks_categorized.json ADDED Viewed

@@ -0,0 +1,236 @@
+{
+  "coding": [
+    "aider_polyglot",
+    "apps",
+    "conala",
+    "concode",
+    "donotanswer",
+    "ds1000",
+    "humaneval",
+    "humaneval_plus",
+    "humanevalpack",
+    "livecodebench",
+    "livecodebench_lite",
+    "livecodebench_v5",
+    "livecodebench_v6",
+    "mercury",
+    "multipl_e",
+    "multiple_cpp",
+    "multiple_go",
+    "multiple_java",
+    "multiple_js",
+    "multiple_py",
+    "multiple_rs",
+    "oj_bench",
+    "recode",
+    "scicode",
+    "swe_bench_verified",
+    "swe_verified",
+    "terminal_bench"
+  ],
+  "commonsense": [
+    "anli",
+    "hellaswag",
+    "piqa",
+    "prost",
+    "siqa",
+    "storycloze",
+    "swag",
+    "winogender",
+    "winogrande",
+    "wsc273"
+  ],
+  "ethics_values": [
+    "hendrycks_ethics",
+    "histoires_morales",
+    "model_written_evals",
+    "moral_stories"
+  ],
+  "hallucination_factuality": [
+    "browsecomp",
+    "chinese_simpleqa",
+    "facts_grounding",
+    "faithbench",
+    "hallucinations_leaderboard",
+    "halueval",
+    "halulens",
+    "okapi/truthfulqa_multilingual",
+    "simpleqa",
+    "truthfulqa",
+    "truthfulqa_generation"
+  ],
+  "instruction_following": [
+    "arena_hard",
+    "chartqa",
+    "eq_bench",
+    "groundcocoa",
+    "longform",
+    "longform_writing",
+    "mmmu",
+    "travelplanner"
+  ],
+  "knowledge_qa": [
+    "aclue",
+    "commonsense_qa",
+    "metabench",
+    "mmlu",
+    "mmlu-pro-plus",
+    "mmlu_pro",
+    "mmlu_prox",
+    "mmlusr",
+    "nq_open",
+    "openbookqa",
+    "triviaqa",
+    "webqs"
+  ],
+  "language_understanding": [
+    "blimp",
+    "lambada",
+    "lambada_cloze",
+    "mc_taco",
+    "mutual",
+    "paloma",
+    "paws-x",
+    "wikitext"
+  ],
+  "math": [
+    "afrimgsm_direct_amh",
+    "aime",
+    "arithmetic",
+    "asdiv",
+    "cnmo",
+    "cnmo_2024",
+    "gsm8k",
+    "hendrycks_math",
+    "hmmt",
+    "hmmt_feb_2025",
+    "hrm8k",
+    "livemathbench_cnmo_en",
+    "math",
+    "math500",
+    "mathqa",
+    "mgsm",
+    "polymath_en_high",
+    "polymath_en_medium",
+    "polymath_zh_high",
+    "polymath_zh_medium"
+  ],
+  "multilingual": [
+    "AraDiCE",
+    "ArabCulture",
+    "Tag",
+    "aexams",
+    "afrimmlu_direct_amh",
+    "arabic_leaderboard_complete",
+    "arabic_leaderboard_light",
+    "arabicmmlu",
+    "bangla_mmlu",
+    "basqueglue",
+    "belebele",
+    "bertaqa",
+    "catalan_bench",
+    "ceval",
+    "cmmlu",
+    "copal_id",
+    "csatqa",
+    "darija_bench",
+    "darijahellaswag",
+    "darijammlu",
+    "eus_exams",
+    "eus_proficiency",
+    "eus_reading",
+    "eus_trivia",
+    "evalita_LLM",
+    "french_bench",
+    "galician_bench",
+    "global_mmlu",
+    "haerae",
+    "kbl",
+    "kobest",
+    "lambada_multilingual",
+    "lambada_multilingual_stablelm",
+    "mlqa",
+    "okapi/hellaswag_multilingual",
+    "okapi/mmlu_multilingual",
+    "polemo2",
+    "spanish_bench",
+    "tmmluplus",
+    "xcopa",
+    "xnli",
+    "xnli_eu",
+    "xquad",
+    "xstorycloze",
+    "xwinograd"
+  ],
+  "reading_comprehension": [
+    "c4",
+    "coqa",
+    "drop",
+    "qa4mre",
+    "qasper",
+    "race",
+    "squad_completion",
+    "squadv2",
+    "swde"
+  ],
+  "reasoning_logic": [
+    "acp_bench",
+    "babi",
+    "babilong",
+    "bbh",
+    "fld",
+    "frames",
+    "inverse_scaling",
+    "lingoly",
+    "logiqa",
+    "logiqa2",
+    "mastermind",
+    "planbench",
+    "score"
+  ],
+  "safety_bias": [
+    "bbq",
+    "crows_pairs",
+    "curate",
+    "flames",
+    "politicalbias_qa",
+    "polyglottoxicityprompts",
+    "realtoxicityprompts",
+    "refusalbench",
+    "simple_cooccurrence_bias",
+    "toxigen",
+    "wmdp"
+  ],
+  "science_medical": [
+    "careqa",
+    "fda",
+    "gpqa",
+    "headqa",
+    "healthbench",
+    "kormedmcqa",
+    "med_concepts_qa",
+    "meddialog",
+    "medmcqa",
+    "medqa",
+    "pubmedqa",
+    "sciq"
+  ],
+  "tool_use_agents": [
+    "agentbench",
+    "finsearchcomp",
+    "seal",
+    "seal_0",
+    "tau_bench",
+    "toolbench",
+    "toolemu",
+    "toolllm"
+  ],
+  "translation": [
+    "translation",
+    "wmt14_en_fr",
+    "wmt14_fr_en",
+    "wmt16_de_en",
+    "wmt16_en_de",
+    "wmt2016"
+  ]
+}

wisent/tests/test_detector_accuracy.py CHANGED Viewed

@@ -37,7 +37,7 @@ def main():
     pairs = extractor.extract_contrastive_pairs(limit=args.num_pairs)
     print(f"Collecting activations from layer {args.layer}...")
-    collector = ActivationCollector(model=wisent_model, store_device="cpu")
+    collector = ActivationCollector(model=wisent_model)
     pos_activations = []
     neg_activations = []

wisent/tests/visualize_geometry.py CHANGED Viewed

@@ -119,7 +119,7 @@ def main():
     extractor = get_extractor(args.task)
     pairs = extractor.extract_contrastive_pairs(limit=args.num_pairs)
-    collector = ActivationCollector(model=wisent_model, store_device="cpu")
+    collector = ActivationCollector(model=wisent_model)
     if args.multi_config:
         # Run for each structure's best config

{wisent-0.7.701.dist-info → wisent-0.7.1045.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wisent
-Version: 0.7.701
+Version: 0.7.1045
 Summary: Monitor and influence AI Brains
 Home-page: https://github.com/wisent-ai/wisent
 Author: Lukasz Bartoszcze and the Wisent Team
@@ -27,10 +27,14 @@ Requires-Dist: faiss-cpu>=1.7.0
 Requires-Dist: uncensorbench>=0.2.0
 Requires-Dist: pebble>=5.0.0
 Requires-Dist: latex2sympy2_extended>=1.0.0
+Requires-Dist: sae_lens>=0.1.0
+Requires-Dist: trl>=0.7.0
 Provides-Extra: harness
 Requires-Dist: lm-eval==0.4.8; extra == "harness"
 Provides-Extra: cuda
 Requires-Dist: flash-attn>=2.5.0; extra == "cuda"
+Provides-Extra: sparsify
+Requires-Dist: sparsify>=0.1.0; extra == "sparsify"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

wisent 0.7.701__py3-none-any.whl → 0.7.1045__py3-none-any.whl

wisent 0.7.701py3-none-any.whl → 0.7.1045py3-none-any.whl