swifttd 0.1.9__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
swifttd/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from ._version import __version__
2
+ # Import C++ bindings module built by pybind11_add_module (swift_td)
3
+ from swift_td import SwiftTDNonSparse, SwiftTDBinaryFeatures, SwiftTD
4
+
5
+ __all__ = ["SwiftTDNonSparse", "SwiftTDBinaryFeatures", "SwiftTD", "__version__"]
swifttd/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.9"
@@ -0,0 +1,93 @@
1
+ Metadata-Version: 2.2
2
+ Name: swifttd
3
+ Version: 0.1.9
4
+ Summary: SwiftTD: Fast and Robust TD Learning
5
+ Author: Khurram Javed
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: C++
9
+ Classifier: Operating System :: OS Independent
10
+ Project-URL: Homepage, https://github.com/khurramjaved96/SwiftTD
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+
14
+ # SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
15
+
16
+ SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install SwiftTD
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ After installation, you can use the three implementations of SwiftTD in Python as:
27
+
28
+ ```python
29
+ import swifttd
30
+
31
+ # Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
32
+ td_dense = swifttd.SwiftTDNonSparse(
33
+ num_of_features=5, # Number of input features
34
+ lambda_=0.95, # Lambda parameter for eligibility traces
35
+ alpha=1e-2, # Initial learning rate
36
+ gamma=0.99, # Discount factor
37
+ epsilon=1e-5, # Small constant for numerical stability
38
+ eta=0.1, # Maximum allowed step size (bound on rate of learning)
39
+ decay=0.999, # Step size decay rate
40
+ meta_step_size=1e-3, # Meta learning rate
41
+ eta_min=1e-10 # Minimum value of the step-size parameter
42
+ )
43
+
44
+ # Feature vector
45
+ features = [1.0, 0.0, 0.5, 0.2, 0.0]
46
+ reward = 1.0
47
+ prediction = td_dense.step(features, reward)
48
+ print("Dense prediction:", prediction)
49
+
50
+ # Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
51
+ td_sparse = swifttd.SwiftTDBinaryFeatures(
52
+ num_of_features=1000, # Number of input features
53
+ lambda_=0.95, # Lambda parameter for eligibility traces
54
+ alpha=1e-2, # Initial learning rate
55
+ gamma=0.99, # Discount factor
56
+ epsilon=1e-5, # Small constant for numerical stability
57
+ eta=0.1, # Maximum allowed step size (bound on rate of learning)
58
+ decay=0.999, # Step size decay rate
59
+ meta_step_size=1e-3, # Meta learning rate
60
+ eta_min=1e-10 # Minimum value of the step-size parameter
61
+ )
62
+
63
+ # Specify the indices of the features that are 1.
64
+ active_features = [1, 42, 999] # Indices of active features
65
+ reward = 1.0
66
+ prediction = td_sparse.step(active_features, reward)
67
+ print("Sparse binary prediction:", prediction)
68
+
69
+ # Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
70
+
71
+ td_sparse_nonbinary = swifttd.SwiftTD(
72
+ num_of_features=1000, # Number of input features
73
+ lambda_=0.95, # Lambda parameter for eligibility traces
74
+ alpha=1e-2, # Initial learning rate
75
+ gamma=0.99, # Discount factor
76
+ epsilon=1e-5, # Small constant for numerical stability
77
+ eta=0.1, # Maximum allowed step size (bound on rate of learning)
78
+ decay=0.999, # Step size decay rate
79
+ meta_step_size=1e-3, # Meta learning rate
80
+ eta_min=1e-10 # Minimum value of the step-size parameter
81
+ )
82
+
83
+ # Specify the indices and values of the features that are non-zero.
84
+ feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
85
+ reward = 1.0
86
+ prediction = td_sparse_nonbinary.step(feature_values, reward)
87
+ print("Sparse non-binary prediction:", prediction)
88
+ ```
89
+
90
+ ## Resources
91
+ - [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
92
+ - [Interactive Demo](https://khurramjaved.com/swifttd.html)
93
+
@@ -0,0 +1,6 @@
1
+ swift_td.cpython-310-aarch64-linux-gnu.so,sha256=4rJ0QwyV6btRWOZ8EEYzdaNpATGUIpt6Qnp-ncm1m90,266744
2
+ swifttd/__init__.py,sha256=IiG3Lrza6VD39Z7mePN1DUl_ND_hsqLzIvHa1WRdSRA,256
3
+ swifttd/_version.py,sha256=XIaxbMbyiP-L3kguR1GhxirFblTXiHR1lMfDVITvHUI,22
4
+ swifttd-0.1.9.dist-info/METADATA,sha256=D1NCKI2Z2X5j0MV5p8gyqX-RswEZWYdUlb1Eu4sywAU,3890
5
+ swifttd-0.1.9.dist-info/WHEEL,sha256=e8r2Onm5NNvlP7oexPErjEYtutEfns1NIR2WhYHCUPg,158
6
+ swifttd-0.1.9.dist-info/RECORD,,
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: scikit-build-core 0.11.6
3
+ Root-Is-Purelib: false
4
+ Tag: cp310-cp310-manylinux_2_17_aarch64
5
+ Tag: cp310-cp310-manylinux2014_aarch64
6
+