PyPI - statedict2pytree - Versions diffs - 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

statedict2pytree 0.5.2py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

client/public/bundle.js +615 -234
client/public/bundle.js.map +1 -1
client/public/output.css +164 -217
client/src/App.svelte +331 -91
{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/METADATA +23 -10
{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/RECORD +7 -7
{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/WHEEL +0 -0

client/src/App.svelte CHANGED Viewed

@@ -5,6 +5,7 @@
     import Swal from "sweetalert2";
     let model: string = "model.eqx";
+    let anthropicModel: "opus" | "sonnet" | "haiku" = "haiku";
     const Toast = Swal.mixin({
         toast: true,
@@ -26,7 +27,6 @@
     let jaxFields: Field[] = [];
     let torchFields: Field[] = [];
-    let torchSortable: Sortable;
     onMount(async () => {
         let req = await fetch("/startup/getJaxFields");
         jaxFields = (await req.json()) as Field[];
@@ -35,10 +35,14 @@
         setTimeout(() => {
             initSortable();
         }, 100);
+        setTimeout(() => {
+            onEnd();
+        }, 500);
     });
     function initSortable() {
-        torchSortable = new Sortable(document.getElementById("torch-fields"), {
+        new Sortable(document.getElementById("torch-fields"), {
             animation: 150,
             multiDrag: true,
             ghostClass: "bg-blue-400",
@@ -48,13 +52,6 @@
         });
     }
-    function swap(a: any, b: any, array: any[]) {
-        const temp = array[a];
-        array[a] = array[b];
-        array[b] = temp;
-        return array;
-    }
     function fetchJaxAndTorchFields() {
         let allTorchElements =
             document.getElementById("torch-fields")?.children;
@@ -105,60 +102,58 @@
     }
     function onEnd() {
-        const updatedFields = fetchJaxAndTorchFields();
-        if (updatedFields.error) {
-            Toast.fire({
-                icon: "error",
-                title: updatedFields.error,
-            });
-            return;
-        }
-        for (let i = 0; i < updatedFields.jaxFields.length; i++) {
-            let jaxField = updatedFields.jaxFields[i];
-            let torchField = updatedFields.torchFields[i];
-            if (torchField === undefined) continue;
-            if (torchField.skip === true) {
-                document
-                    .getElementById("jax-" + i)
-                    ?.classList.remove("bg-error");
-                document
-                    .getElementById("torch-" + i)
-                    ?.classList.remove("bg-error");
-                continue;
+        setTimeout(() => {
+            const updatedFields = fetchJaxAndTorchFields();
+            if (updatedFields.error) {
+                Toast.fire({
+                    icon: "error",
+                    title: updatedFields.error,
+                });
+                return;
             }
-            let jaxShape = jaxField.shape;
-            let torchShape = torchField.shape;
-            //@ts-ignore
-            let jaxShapeProduct = jaxShape.reduce((a, b) => a * b, 1);
-            //@ts-ignore
-            let torchShapeProduct = torchShape.reduce((a, b) => a * b, 1);
-            if (jaxShapeProduct !== torchShapeProduct) {
-                document.getElementById("jax-" + i)?.classList.add("bg-error");
-                document
-                    .getElementById("torch-" + i)
-                    ?.classList.add("bg-error");
-            } else {
-                document
-                    .getElementById("jax-" + i)
-                    ?.classList.remove("bg-error");
-                document
-                    .getElementById("torch-" + i)
-                    ?.classList.remove("bg-error");
+            for (let i = 0; i < updatedFields.jaxFields.length; i++) {
+                let jaxField = updatedFields.jaxFields[i];
+                let torchField = updatedFields.torchFields[i];
+                if (torchField === undefined) continue;
+                if (torchField.skip === true) {
+                    document
+                        .getElementById("jax-" + i)
+                        ?.classList.remove("bg-error");
+                    continue;
+                }
+                let jaxShape = jaxField.shape;
+                let torchShape = torchField.shape;
+                //@ts-ignore
+                let jaxShapeProduct = jaxShape.reduce((a, b) => a * b, 1);
+                //@ts-ignore
+                let torchShapeProduct = torchShape.reduce((a, b) => a * b, 1);
+                if (jaxShapeProduct !== torchShapeProduct) {
+                    document
+                        .getElementById("jax-" + i)
+                        ?.classList.add("bg-error");
+                } else {
+                    document
+                        .getElementById("jax-" + i)
+                        ?.classList.remove("bg-error");
+                }
             }
-        }
-        if (updatedFields.torchFields.length > updatedFields.jaxFields.length) {
-            for (
-                let i = updatedFields.jaxFields.length;
-                i < updatedFields.torchFields.length;
-                i++
+            if (
+                updatedFields.torchFields.length >
+                updatedFields.jaxFields.length
             ) {
-                document
-                    .getElementById("torch-" + i)
-                    ?.classList.remove("bg-error");
+                for (
+                    let i = updatedFields.jaxFields.length;
+                    i < updatedFields.torchFields.length;
+                    i++
+                ) {
+                    document
+                        .getElementById("torch-" + i)
+                        ?.classList.remove("bg-error");
+                }
             }
-        }
+        }, 100);
     }
     function checkFields(jaxFields: Field[], torchFields: Field[]) {
         if (jaxFields.length > torchFields.length) {
@@ -194,13 +189,21 @@
         }, 100);
     }
     function addSkipLayer(index: number) {
+        let fields = fetchJaxAndTorchFields();
+        if (fields.error) {
+            Toast.fire({
+                icon: "error",
+                text: fields.error,
+            });
+            return;
+        }
         const newField = {
             skip: true,
-            shape: [],
-            path: "",
-            type: "",
+            shape: [0],
+            path: "SKIP",
+            type: "SKIP",
         } as Field;
-        torchFields = torchFields.toSpliced(index, 0, newField);
+        torchFields = fields.torchFields.toSpliced(index, 0, newField);
         setTimeout(() => {
             onEnd();
         }, 100);
@@ -252,12 +255,247 @@
             });
         }
     }
+    function padToMatch() {
+        let fields = fetchJaxAndTorchFields();
+        if (fields.error) {
+            Toast.fire({
+                icon: "error",
+                text: fields.error,
+            });
+            return;
+        }
+        if (fields.torchFields.length < fields.jaxFields.length) {
+            let toAdd = fields.jaxFields.length - fields.torchFields.length;
+            for (let i = 0; i < toAdd; i++) {
+                setTimeout(() => {
+                    console.log("adding skip at ", i);
+                    addSkipLayer(fields.jaxFields.length + i);
+                }, 100);
+            }
+        }
+    }
+    function removeAllSkipLayers() {
+        let fields = fetchJaxAndTorchFields();
+        if (fields.error) {
+            Toast.fire({
+                icon: "error",
+                text: fields.error,
+            });
+            return;
+        }
+        let filteredFields = [];
+        for (let i = 0; i < fields.torchFields.length; i++) {
+            if (fields.torchFields[i].skip === false) {
+                filteredFields.push(fields.torchFields[i]);
+            }
+        }
+        torchFields = filteredFields;
+    }
+    async function matchByName() {
+        let fields = fetchJaxAndTorchFields();
+        if (fields.error) {
+            Toast.fire({
+                icon: "error",
+                text: fields.error,
+            });
+            return;
+        }
+        if (fields.jaxFields.length !== fields.torchFields.length) {
+            Toast.fire({
+                icon: "error",
+                text: "PyTree and State Dict have diffent lengths. Make sure to pad first!",
+            });
+            return;
+        }
+        Toast.fire({
+            icon: "info",
+            title: "Matching by name...",
+            text: "This can take a while! Hold tight.",
+        });
+        let content = `
+You will get two lists of strings. These strings are fields of a JAX and PyTorch model.
+For example:
+--JAX START--
+.layers[0].weight
+.layers[1].weight
+.layers[2].weight
+.layers[3].weight
+.layers[4].weight
+--JAX END--
+--PYTORCH START--
+layers.0.weight
+layers.1.weight
+layers.4.weight
+layers.2.weight
+layers.3.weight
+--PYTORCH END--
+As you can see, the order doesn't match. This means, you should look at the PyTorch fields and
+rearrange them, such that they match the JAX model. In the above example, the expected return value
+is:
+--PYTORCH START--
+layers.0.weight
+layers.1.weight
+layers.2.weight
+layers.3.weight
+layers.4.weight
+--PYTORCH END--
+Here's another example:
+--JAX START--
+.conv1.weight
+.bn1.weight
+.bn1.bias
+.bn1.state_index.init[0]
+.bn1.state_index.init[1]
+--JAX END--
+--PYTORCH START--
+bn1.running_mean
+bn1.running_var
+conv1.weight
+bn1.weight
+bn1.bias
+--PYTORCH END--
+The expected return value in this case is:
+--PYTORCH START--
+conv1.weight
+bn1.weight
+bn1.bias
+bn1.running_mean
+bn1.running_var
+--PYTORCH END--
+Sometimes, there are so-called "skip-layers" in the PyTorch model. Those can be put anywhere, preferably to
+the end, because your priority is to match those fields that can be matched first! Here's an example:
+--JAX START--
+.layers[0].weight
+.layers[1].weight
+.layers[2].weight
+.layers[3].weight
+.layers[4].weight
+--JAX END--
+--PYTORCH START--
+layers.0.weight
+SKIP
+layers.3.weight
+layers.2.weight
+layers.1.weight
+--PYTORCH START--
+This should return
+--PYTORCH START--
+layers.0.weight
+layers.1.weight
+layers.2.weight
+layers.3.weight
+SKIP
+--PYTORCH START--
+It's not always 100% which belongs to which. Use your best judgement. Start your response with
+--PYTORCH START-- and end it with --PYTORCH END--.
+Here's your input:
+--JAX START--
+        `;
+        for (let i = 0; i < fields.jaxFields.length; i++) {
+            content += fields.jaxFields[i].path + "\n";
+        }
+        content += "--JAX END--\n";
+        content += "\n";
+        content += "--PYTORCH START--\n";
+        for (let i = 0; i < fields.torchFields.length; i++) {
+            content += fields.torchFields[i].path + "\n";
+        }
+        content += "--PYTORCH END--";
+        console.log(content);
+        let req = await fetch("/anthropic", {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+            },
+            body: JSON.stringify({
+                content: content,
+                model: anthropicModel,
+            }),
+        });
+        let res = await req.json();
+        if (res.error) {
+            Toast.fire({
+                icon: "error",
+                text: res.error,
+            });
+            return;
+        }
+        console.log(res);
+        let responseContent = res.content;
+        let lines = responseContent.split("\n");
+        console.log(lines);
+        let rearrangedTorchFields = [];
+        for (let i = 0; i < lines.length; i++) {
+            let matchingTorchField = fields.torchFields.find(
+                (field) => field.path === lines[i],
+            );
+            if (matchingTorchField) {
+                rearrangedTorchFields.push(matchingTorchField);
+            }
+        }
+        if (fields.torchFields.length !== rearrangedTorchFields.length) {
+            Toast.fire({
+                icon: "error",
+                text: "Some fields are missing in the response. Try a different model instead.",
+            });
+            return;
+        }
+        console.log("rearrangedTorchFields", rearrangedTorchFields);
+        setTimeout(() => {
+            torchFields = rearrangedTorchFields;
+            onEnd();
+            Toast.fire({
+                icon: "success",
+                title: "Success",
+            });
+        }, 500);
+    }
 </script>
 <svelte:head><title>Statedict2PyTree</title></svelte:head>
 <h1 class="text-3xl my-12">Welcome to Torch2Jax</h1>
+<div class="my-4 flex justify-evenly">
+    <button on:click={padToMatch} class="btn btn-accent">Pad To Match</button>
+    <button on:click={removeAllSkipLayers} class="btn btn-secondary"
+        >Remove All Skip Layers</button
+    >
+    <div>
+        <button on:click={matchByName} class="btn btn-warning"
+            >Match By Name</button
+        >
+        <select bind:value={anthropicModel}>
+            <option value="opus">opus</option>
+            <option value="sonnet">sonnet</option>
+            <option value="haiku">haiku</option>
+        </select>
+    </div>
+</div>
 <div class="grid grid-cols-2 gap-x-2">
     <div class="">
         <h2 class="text-2xl">JAX</h2>
@@ -286,40 +524,42 @@
     <div class="">
         <h2 class="text-2xl">PyTorch</h2>
         <div id="torch-fields" class="">
-            {#each torchFields as field, i}
-                <div class="flex space-x-2 border h-12 rounded-xl">
-                    <div
-                        id={"torch-" + String(i)}
-                        data-torch="torch"
-                        data-path={field.path}
-                        data-shape={field.shape}
-                        data-skip={field.skip}
-                        data-type={field.type}
-                        class="flex-1 mx-2 my-auto whitespace-nowrap overflow-x-scroll cursor-pointer"
-                    >
+            {#key torchFields}
+                {#each torchFields as field, i}
+                    <div class="flex space-x-2 border h-12 rounded-xl">
+                        <div
+                            id={"torch-" + String(i)}
+                            data-torch="torch"
+                            data-path={field.path}
+                            data-shape={field.shape}
+                            data-skip={field.skip}
+                            data-type={field.type}
+                            class="flex-1 mx-2 my-auto whitespace-nowrap overflow-x-scroll cursor-pointer"
+                        >
+                            {#if field.skip}
+                                SKIP
+                            {:else}
+                                {field.path}
+                                {field.shape}
+                            {/if}
+                        </div>
                         {#if field.skip}
-                            SKIP
-                        {:else}
-                            {field.path}
-                            {field.shape}
+                            <button
+                                class="btn btn-ghost"
+                                on:click={() => {
+                                    removeSkipLayer(i);
+                                }}>-</button
+                            >
                         {/if}
-                    </div>
-                    {#if field.skip}
                         <button
                             class="btn btn-ghost"
                             on:click={() => {
-                                removeSkipLayer(i);
-                            }}>-</button
+                                addSkipLayer(i);
+                            }}>+</button
                         >
-                    {/if}
-                    <button
-                        class="btn btn-ghost"
-                        on:click={() => {
-                            addSkipLayer(i);
-                        }}>+</button
-                    >
-                </div>
-            {/each}
+                    </div>
+                {/each}
+            {/key}
         </div>
     </div>
 </div>

{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,10 @@
 Metadata-Version: 2.3
 Name: statedict2pytree
-Version: 0.5.2
+Version: 0.5.3
 Summary: Converts torch models into PyTrees for Equinox
 Author-email: "Artur A. Galstyan" <mail@arturgalstyan.dev>
 Requires-Python: ~=3.10
+Requires-Dist: anthropic
 Requires-Dist: beartype
 Requires-Dist: equinox
 Requires-Dist: flask
@@ -14,6 +15,7 @@ Requires-Dist: jaxtyping
 Requires-Dist: loguru
 Requires-Dist: penzai
 Requires-Dist: pydantic
+Requires-Dist: python-dotenv
 Requires-Dist: torch
 Requires-Dist: torchvision
 Requires-Dist: typing-extensions
@@ -37,6 +39,21 @@ PRs and other contributions are *highly* welcome! :)
 ## Info
+`statedict2pytree` is a powerful tool for converting PyTorch state dictionaries to JAX pytrees. It provides both programmatic and UI-based methods for mapping between PyTorch and JAX model parameters.
+## Features
+- Convert PyTorch statedicts to JAX pytrees
+- Handle large models with chunked file conversion
+- Provide an "intuitive-ish" UI for parameter mapping
+- Support both in-memory and file-based conversions
+## Installation
+```bash
+pip install statedict2pytree
+```
 The goal of this package is to simplify the conversion from PyTorch models into JAX PyTrees (which can be used e.g. in Equinox). The way this works is by putting both models side my side and aligning the weights in the right order. Then, all statedict2pytree is doing, is iterating over both lists and matching the weight matrices.
 Usually, if you _declared the fields in the same order as in the PyTorch model_, you don't have to rearrange anything -- but the option is there if you need it.
@@ -49,16 +66,12 @@ Currently, there is no sophisticated shape matching in place. Two matrices are c
 (8, 1, 1) and (8, ) match, because (8 _ 1 _ 1 = 8)
-## Get Started
-### Installation
-Run
-```bash
-pip install statedict2pytree
-```
 ### Docs
 Documentation will appear as soon as I have all the necessary features implemented. Until then, check out the "main.py" file for a better example.
+### Disclaimer
+Some of the docstrings and the docs have been written with the help of
+Claude.

{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/RECORD RENAMED Viewed

@@ -4,14 +4,14 @@ client/package.json,sha256=Ad-MDEQeh7BPHWPYLd3u9sXk8YVuO_dXmpkxxU1Pglo,1044
 client/rollup.config.mjs,sha256=RAepJhL2V5Rf-BlJBZJxllVl0mxtr67GSVr9aU0JUnA,1073
 client/tailwind.config.js,sha256=TfN5eOoFOUPGBou6OoK54M14PtokgxWDJUsV4qkurS8,175
 client/tsconfig.json,sha256=cLHEFXx-Q55XqbF9QjQ4XScSEQ15n-vS5tsTcqY4UAY,158
-client/public/bundle.js,sha256=l4Vu_8_7v6k7XFisnI8jCFzyMeztr9jc3Z3lrrPDpk0,347197
-client/public/bundle.js.map,sha256=s9zOkP-34BWrQFunVbkouwTKVAfL8EyIUcfKgSicH8M,682783
+client/public/bundle.js,sha256=EEQ1-tpIHl5Nr2POytTpdKp9vriYvUGvbUb7mRv4-7s,356425
+client/public/bundle.js.map,sha256=l_aFN6UUVGz9YoLVsk2IwsOWGojpbXmZdvVE-pC9UNQ,694164
 client/public/index.html,sha256=jUx-NPKkFN2EF2lj-8Ml49CEHxKJFWK9seszauI4GE0,335
 client/public/input.css,sha256=zBp60NAZ3bHTLQ7LWIugrCbOQdhiXdbDZjSLJfg6KOw,59
-client/public/output.css,sha256=3iiBiTGfqAeVKuRZRqgcixX3ztSnlp0zqHXkjSKtmVs,38664
-client/src/App.svelte,sha256=hHVoQ_C2xGMmd4d86ftZFeTlGmOcOW_wJ6abq0_qvWo,11170
+client/public/output.css,sha256=80svlSgNV_Fw82IYgqeRjnY86GXMBW0gWm2VyA7n1A8,36658
+client/src/App.svelte,sha256=9pAfpz96Bk-A-5uQEBASvxEht-dPctKAZLBs2cOb2kE,17650
 client/src/empty.ts,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 client/src/main.js,sha256=O_8UgVd1vJM8BcHO7U_6jkL76ZSA6oC7GLLcL9F3JLA,118
-statedict2pytree-0.5.2.dist-info/METADATA,sha256=YgE7bgWDMI6urA71bH-zWleR_mw6SJ5QPneUZVvHL2E,2242
-statedict2pytree-0.5.2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
-statedict2pytree-0.5.2.dist-info/RECORD,,
+statedict2pytree-0.5.3.dist-info/METADATA,sha256=J_rlj3ymHkqbf4NxSHnCgh-Nc6LGgK0mhBoG4exnoqo,2788
+statedict2pytree-0.5.3.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+statedict2pytree-0.5.3.dist-info/RECORD,,

{statedict2pytree-0.5.2.dist-info → statedict2pytree-0.5.3.dist-info}/WHEEL RENAMED Viewed

File without changes

statedict2pytree 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

statedict2pytree 0.5.2py3-none-any.whl → 0.5.3py3-none-any.whl