trme 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {trme-0.1.0 → trme-0.2.0}/Cargo.toml +1 -1
  2. {trme-0.1.0 → trme-0.2.0}/Dockerfile +0 -1
  3. {trme-0.1.0 → trme-0.2.0}/PKG-INFO +1 -1
  4. trme-0.2.0/build.rs +40 -0
  5. {trme-0.1.0 → trme-0.2.0}/pyproject.toml +1 -1
  6. {trme-0.1.0 → trme-0.2.0}/src/lib.rs +1 -1
  7. trme-0.2.0/trme/__init__.py +67 -0
  8. trme-0.1.0/build.rs +0 -40
  9. trme-0.1.0/trme_torch.py +0 -95
  10. {trme-0.1.0 → trme-0.2.0}/Cargo.lock +0 -0
  11. {trme-0.1.0 → trme-0.2.0}/Makefile +0 -0
  12. {trme-0.1.0 → trme-0.2.0}/README.md +0 -0
  13. {trme-0.1.0 → trme-0.2.0}/RESEARCH_REPORT.md +0 -0
  14. {trme-0.1.0 → trme-0.2.0}/RESEARCH_REPORT_FINAL.md +0 -0
  15. {trme-0.1.0 → trme-0.2.0}/RESEARCH_REPORT_V4.md +0 -0
  16. {trme-0.1.0 → trme-0.2.0}/TRME_ISA.md +0 -0
  17. {trme-0.1.0 → trme-0.2.0}/benchmark_rsr.py +0 -0
  18. {trme-0.1.0 → trme-0.2.0}/cosim_verilator.cpp +0 -0
  19. {trme-0.1.0 → trme-0.2.0}/energy_estimator.py +0 -0
  20. {trme-0.1.0 → trme-0.2.0}/fmm_core.py +0 -0
  21. {trme-0.1.0 → trme-0.2.0}/fmm_cpp_binding.py +0 -0
  22. {trme-0.1.0 → trme-0.2.0}/fmm_octree.cpp +0 -0
  23. {trme-0.1.0 → trme-0.2.0}/optical_fdtd.py +0 -0
  24. {trme-0.1.0 → trme-0.2.0}/optical_noise.py +0 -0
  25. {trme-0.1.0 → trme-0.2.0}/quantize.cpp +0 -0
  26. {trme-0.1.0 → trme-0.2.0}/rsr_fused.cpp +0 -0
  27. {trme-0.1.0 → trme-0.2.0}/rsr_gemm.cpp +0 -0
  28. {trme-0.1.0 → trme-0.2.0}/rtl/axi_stream_wrapper.v +0 -0
  29. {trme-0.1.0 → trme-0.2.0}/rtl/clifford_alu.v +0 -0
  30. {trme-0.1.0 → trme-0.2.0}/rtl/hbm_axi_stub.v +0 -0
  31. {trme-0.1.0 → trme-0.2.0}/rtl/lns_adder.v +0 -0
  32. {trme-0.1.0 → trme-0.2.0}/rtl/rns_core.v +0 -0
  33. {trme-0.1.0 → trme-0.2.0}/rtl/rns_pipeline.v +0 -0
  34. {trme-0.1.0 → trme-0.2.0}/rtl/rsr_unit.v +0 -0
  35. {trme-0.1.0 → trme-0.2.0}/rtl/systolic_array_4x4.v +0 -0
  36. {trme-0.1.0 → trme-0.2.0}/rtl/systolic_array_NxN.v +0 -0
  37. {trme-0.1.0 → trme-0.2.0}/rtl/tb_lns_adder.v +0 -0
  38. {trme-0.1.0 → trme-0.2.0}/rtl/tb_systolic.v +0 -0
  39. {trme-0.1.0 → trme-0.2.0}/rtl/trme_top.v +0 -0
  40. {trme-0.1.0 → trme-0.2.0}/rtl/tropical_alu.v +0 -0
  41. {trme-0.1.0 → trme-0.2.0}/test_compiler.py +0 -0
  42. {trme-0.1.0 → trme-0.2.0}/test_torch_integration.py +0 -0
  43. {trme-0.1.0 → trme-0.2.0}/triton_rsr.py +0 -0
  44. {trme-0.1.0 → trme-0.2.0}/trme_autotune.py +0 -0
  45. {trme-0.1.0 → trme-0.2.0}/trme_compiler.py +0 -0
  46. {trme-0.1.0 → trme-0.2.0}/trme_cosim.py +0 -0
  47. {trme-0.1.0 → trme-0.2.0}/trme_sim.py +0 -0
  48. {trme-0.1.0 → trme-0.2.0}/verilator/sim_main.cpp +0 -0
@@ -7,7 +7,7 @@ authors = ["TRME Team"]
7
7
  readme = "README.md"
8
8
 
9
9
  [lib]
10
- name = "trme"
10
+ name = "_trme_backend"
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
@@ -10,7 +10,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
10
10
  curl \
11
11
  git \
12
12
  libomp-dev \
13
- patchelf \
14
13
  && rm -rf /var/lib/apt/lists/*
15
14
 
16
15
  # Install Rust (for Maturin)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trme
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Requires-Dist: torch
5
5
  Requires-Dist: numpy
6
6
  Requires-Dist: scipy
trme-0.2.0/build.rs ADDED
@@ -0,0 +1,40 @@
1
+ fn main() {
2
+ // 1. Instancia o compilador C++
3
+ let mut build = cc::Build::new();
4
+
5
+ // 2. Adiciona o arquivo fonte C++
6
+ // CERTIFIQUE-SE que o arquivo "rsr_gemm.cpp" está na mesma pasta que o Cargo.toml
7
+ build.file("rsr_gemm.cpp");
8
+
9
+ // Indica que é C++
10
+ build.cpp(true);
11
+
12
+ // 3. Configura Flags baseadas no Sistema Operacional
13
+ let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
14
+
15
+ if target_os == "windows" {
16
+ // --- Configuração WINDOWS (MSVC) ---
17
+ build.flag("/O2"); // Otimização máxima
18
+ build.flag("/EHsc"); // Tratamento de exceções padrão
19
+ // build.flag("/openmp"); // Descomente se seu C++ usar OpenMP (#include <omp.h>)
20
+ } else {
21
+ // --- Configuração LINUX/MAC ---
22
+ build.flag("-O3");
23
+ build.flag("-march=native");
24
+ build.flag("-fopenmp");
25
+
26
+ // Só linka essas libs se NÃO for Windows
27
+ println!("cargo:rustc-link-lib=gomp");
28
+ println!("cargo:rustc-link-lib=stdc++");
29
+ }
30
+
31
+ // 4. Compila a biblioteca estática "trme_core"
32
+ // Isso gera o arquivo .lib que o linker estava reclamando que não existia
33
+ build.compile("trme_core");
34
+
35
+ // 5. Avisa o Cargo para recompilar se o C++ mudar
36
+ println!("cargo:rerun-if-changed=rsr_gemm.cpp");
37
+
38
+ // Avisa onde procurar a lib (na pasta de build atual)
39
+ println!("cargo:rustc-link-search=native=.");
40
+ }
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "trme"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "Tensor-RNS-Multipole Engine (TRME) Framework"
9
9
  requires-python = ">=3.8"
10
10
  dependencies = [
@@ -50,7 +50,7 @@ fn rsr_gemm(
50
50
  }
51
51
 
52
52
  #[pymodule]
53
- fn trme(_py: Python, m: &PyModule) -> PyResult<()> {
53
+ fn _trme_backend(_py: Python, m: &PyModule) -> PyResult<()> {
54
54
  m.add_function(wrap_pyfunction!(rsr_gemm, m)?)?;
55
55
  Ok(())
56
56
  }
@@ -0,0 +1,67 @@
1
+ import torch
2
+ import numpy as np
3
+ import _trme_backend
4
+
5
+ class TRMEMatmul(torch.autograd.Function):
6
+ @staticmethod
7
+ def forward(ctx, input, weight, bias=None, block_size=4):
8
+ """
9
+ input: (N, K)
10
+ weight: (M, K)
11
+ bias: (M,) Optional
12
+ """
13
+ if isinstance(bias, int):
14
+ block_size = bias
15
+ bias = None
16
+
17
+ # Quantization (A -> [0..3], B -> Int8)
18
+ # Note: We do simple casting here for the prototype.
19
+ # Ideally calling C++ quantization logic.
20
+ A_cpu = input.detach().cpu().numpy()
21
+ A_q = np.clip(np.abs(A_cpu), 0, 3).astype(np.int8)
22
+
23
+ B_cpu = weight.detach().cpu().numpy().T
24
+ B_q = np.ascontiguousarray(np.clip(B_cpu, -127, 127).astype(np.int8))
25
+
26
+ N, K = A_q.shape
27
+ _, M = B_q.shape
28
+
29
+ # Flatten for Rust FFI
30
+ a_bytes = A_q.tobytes()
31
+ b_bytes = B_q.tobytes()
32
+
33
+ # Call Rust Backend
34
+ # rsr_gemm returns list[int] (flattened C)
35
+ c_flat = _trme_backend.rsr_gemm(a_bytes, b_bytes, N, K, M, int(block_size))
36
+
37
+ # Reshape and Convert to Tensor
38
+ C_q = np.array(c_flat, dtype=np.int32).reshape(N, M)
39
+ output = torch.from_numpy(C_q.astype(np.float32))
40
+
41
+ if bias is not None:
42
+ output += bias.cpu()
43
+
44
+ ctx.save_for_backward(input, weight, bias)
45
+ return output
46
+
47
+ @staticmethod
48
+ def backward(ctx, grad_output):
49
+ input, weight, bias = ctx.saved_tensors
50
+ grad_input = grad_weight = grad_bias = None
51
+
52
+ if ctx.needs_input_grad[0]:
53
+ grad_input = grad_output.matmul(weight)
54
+ if ctx.needs_input_grad[1]:
55
+ grad_weight = grad_output.t().matmul(input)
56
+ if bias is not None and ctx.needs_input_grad[2]:
57
+ grad_bias = grad_output.sum(0)
58
+
59
+ return grad_input, grad_weight, grad_bias, None
60
+
61
+ def matmul(input, weight):
62
+ # Basic matmul interface
63
+ return TRMEMatmul.apply(input, weight)
64
+
65
+ def linear(input, weight, bias=None, block_size=4):
66
+ # nn.Linear interface
67
+ return TRMEMatmul.apply(input, weight, bias, block_size)
trme-0.1.0/build.rs DELETED
@@ -1,40 +0,0 @@
1
- fn main() {
2
- // 1. Instancia o compilador C++
3
- let mut build = cc::Build::new();
4
-
5
- // 2. Adiciona o arquivo fonte C++
6
- // IMPORTANTE: Verifique se o nome do arquivo na sua pasta é exatamente este.
7
- // Baseado no seu README, deve ser "rsr_gemm.cpp" ou "rsr_fused.cpp".
8
- // Se tiver os dois, adicione outra linha: build.file("rsr_fused.cpp");
9
- build.file("rsr_gemm.cpp");
10
-
11
- // Indica que é C++ (não C puro)
12
- build.cpp(true);
13
-
14
- // 3. Configura Flags específicas por Sistema Operacional
15
- let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
16
-
17
- if target_os == "windows" {
18
- // --- Configuração WINDOWS (MSVC) ---
19
- build.flag("/O2"); // Otimização máxima
20
- build.flag("/EHsc"); // Tratamento de exceções C++ padrão
21
- build.flag("/openmp"); // OpenMP nativo do Visual Studio
22
- // No Windows, não precisamos linkar stdc++ ou gomp manualmente
23
- } else {
24
- // --- Configuração LINUX/MAC ---
25
- build.flag("-O3");
26
- build.flag("-march=native");
27
- build.flag("-fopenmp");
28
-
29
- // No Linux, precisamos pedir para linkar as libs explicitamente no final
30
- println!("cargo:rustc-link-lib=gomp");
31
- println!("cargo:rustc-link-lib=stdc++");
32
- }
33
-
34
- // 4. Compila a biblioteca estática "trme_core.lib"
35
- // Isso vai gerar o arquivo que estava faltando no erro anterior
36
- build.compile("trme_core");
37
-
38
- // 5. Instrui o Cargo a recompilar se o C++ mudar
39
- println!("cargo:rerun-if-changed=rsr_gemm.cpp");
40
- }
trme-0.1.0/trme_torch.py DELETED
@@ -1,95 +0,0 @@
1
- import torch
2
- import ctypes
3
- import numpy as np
4
- import os
5
-
6
- # Load Library
7
- _lib_path = os.path.abspath('./librsr.so')
8
- if os.path.exists(_lib_path):
9
- _lib = ctypes.CDLL(_lib_path)
10
-
11
- _lib.rsr_gemm_dispatch.argtypes = [
12
- np.ctypeslib.ndpointer(dtype=np.int8, ndim=2, flags='C_CONTIGUOUS'),
13
- np.ctypeslib.ndpointer(dtype=np.int8, ndim=2, flags='C_CONTIGUOUS'),
14
- np.ctypeslib.ndpointer(dtype=np.int32, ndim=2, flags='C_CONTIGUOUS'),
15
- ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int
16
- ]
17
- else:
18
- print("Warning: librsr.so not found. TRME Ops will fail.")
19
- _lib = None
20
-
21
- class TRMEMatmul(torch.autograd.Function):
22
- @staticmethod
23
- def forward(ctx, input, weight, bias=None, block_size=4):
24
- """
25
- input: (N, K)
26
- weight: (M, K)
27
- bias: (M,) Optional
28
- """
29
- # Check if block_size is passed as positional arg
30
- if isinstance(bias, int):
31
- block_size = bias
32
- bias = None
33
-
34
- # Quantization
35
- A_cpu = input.detach().cpu().numpy()
36
- A_q = np.clip(np.abs(A_cpu), 0, 3).astype(np.int8)
37
-
38
- B_cpu = weight.detach().cpu().numpy().T
39
- B_q = np.ascontiguousarray(np.clip(B_cpu, -127, 127).astype(np.int8))
40
-
41
- N, K = A_q.shape
42
- _, M = B_q.shape
43
-
44
- C_q = np.zeros((N, M), dtype=np.int32)
45
-
46
- # Handle Bias (Quantized int32)
47
- bias_ptr = None
48
- if bias is not None:
49
- bias_q = bias.detach().cpu().numpy().astype(np.int32)
50
- # Check contiguity
51
- if not bias_q.flags['C_CONTIGUOUS']:
52
- bias_q = np.ascontiguousarray(bias_q)
53
- bias_ptr = bias_q.ctypes.data_as(ctypes.POINTER(ctypes.c_int32))
54
- else:
55
- bias_ptr = ctypes.POINTER(ctypes.c_int32)()
56
-
57
- # Call Kernel
58
- if _lib:
59
- # Update signature dynamically if needed
60
- # For V5, we enable the real call
61
- if not hasattr(_lib, 'rsr_gemm_dispatch_configured'):
62
- _lib.rsr_gemm_dispatch.argtypes = [
63
- np.ctypeslib.ndpointer(dtype=np.int8, ndim=2, flags='C_CONTIGUOUS'),
64
- np.ctypeslib.ndpointer(dtype=np.int8, ndim=2, flags='C_CONTIGUOUS'),
65
- np.ctypeslib.ndpointer(dtype=np.int32, ndim=2, flags='C_CONTIGUOUS'),
66
- ctypes.POINTER(ctypes.c_int32),
67
- ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int
68
- ]
69
- _lib.rsr_gemm_dispatch_configured = True
70
-
71
- _lib.rsr_gemm_dispatch(A_q, B_q, C_q, bias_ptr, N, K, M, int(block_size))
72
-
73
- output = torch.from_numpy(C_q.astype(np.float32))
74
- # Note: Bias is added in C++ now, no need to add in Python unless fallback
75
-
76
- ctx.save_for_backward(input, weight, bias)
77
- return output
78
-
79
- @staticmethod
80
- def backward(ctx, grad_output):
81
- # Backward pass is standard GEMM (simulated here)
82
- input, weight, bias = ctx.saved_tensors
83
- grad_input = grad_weight = grad_bias = None
84
-
85
- if ctx.needs_input_grad[0]:
86
- grad_input = grad_output.matmul(weight)
87
- if ctx.needs_input_grad[1]:
88
- grad_weight = grad_output.t().matmul(input)
89
- if bias is not None and ctx.needs_input_grad[2]:
90
- grad_bias = grad_output.sum(0)
91
-
92
- return grad_input, grad_weight, grad_bias, None
93
-
94
- def matmul(input, weight, block_size=4):
95
- return TRMEMatmul.apply(input, weight, block_size)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes