swifttd 0.1.9__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
Binary file
|
swifttd/__init__.py
ADDED
swifttd/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.9"
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: swifttd
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: SwiftTD: Fast and Robust TD Learning
|
|
5
|
+
Author: Khurram Javed
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: C++
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Project-URL: Homepage, https://github.com/khurramjaved96/SwiftTD
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
|
|
15
|
+
|
|
16
|
+
SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install SwiftTD
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
After installation, you can use the three implementations of SwiftTD in Python as:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import swifttd
|
|
30
|
+
|
|
31
|
+
# Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
|
|
32
|
+
td_dense = swifttd.SwiftTDNonSparse(
|
|
33
|
+
num_of_features=5, # Number of input features
|
|
34
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
35
|
+
alpha=1e-2, # Initial learning rate
|
|
36
|
+
gamma=0.99, # Discount factor
|
|
37
|
+
epsilon=1e-5, # Small constant for numerical stability
|
|
38
|
+
eta=0.1, # Maximum allowed step size (bound on rate of learning)
|
|
39
|
+
decay=0.999, # Step size decay rate
|
|
40
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
41
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Feature vector
|
|
45
|
+
features = [1.0, 0.0, 0.5, 0.2, 0.0]
|
|
46
|
+
reward = 1.0
|
|
47
|
+
prediction = td_dense.step(features, reward)
|
|
48
|
+
print("Dense prediction:", prediction)
|
|
49
|
+
|
|
50
|
+
# Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
|
|
51
|
+
td_sparse = swifttd.SwiftTDBinaryFeatures(
|
|
52
|
+
num_of_features=1000, # Number of input features
|
|
53
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
54
|
+
alpha=1e-2, # Initial learning rate
|
|
55
|
+
gamma=0.99, # Discount factor
|
|
56
|
+
epsilon=1e-5, # Small constant for numerical stability
|
|
57
|
+
eta=0.1, # Maximum allowed step size (bound on rate of learning)
|
|
58
|
+
decay=0.999, # Step size decay rate
|
|
59
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
60
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Specify the indices of the features that are 1.
|
|
64
|
+
active_features = [1, 42, 999] # Indices of active features
|
|
65
|
+
reward = 1.0
|
|
66
|
+
prediction = td_sparse.step(active_features, reward)
|
|
67
|
+
print("Sparse binary prediction:", prediction)
|
|
68
|
+
|
|
69
|
+
# Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
|
|
70
|
+
|
|
71
|
+
td_sparse_nonbinary = swifttd.SwiftTD(
|
|
72
|
+
num_of_features=1000, # Number of input features
|
|
73
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
74
|
+
alpha=1e-2, # Initial learning rate
|
|
75
|
+
gamma=0.99, # Discount factor
|
|
76
|
+
epsilon=1e-5, # Small constant for numerical stability
|
|
77
|
+
eta=0.1, # Maximum allowed step size (bound on rate of learning)
|
|
78
|
+
decay=0.999, # Step size decay rate
|
|
79
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
80
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Specify the indices and values of the features that are non-zero.
|
|
84
|
+
feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
|
|
85
|
+
reward = 1.0
|
|
86
|
+
prediction = td_sparse_nonbinary.step(feature_values, reward)
|
|
87
|
+
print("Sparse non-binary prediction:", prediction)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Resources
|
|
91
|
+
- [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
|
|
92
|
+
- [Interactive Demo](https://khurramjaved.com/swifttd.html)
|
|
93
|
+
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
swift_td.cpython-310-aarch64-linux-gnu.so,sha256=4rJ0QwyV6btRWOZ8EEYzdaNpATGUIpt6Qnp-ncm1m90,266744
|
|
2
|
+
swifttd/__init__.py,sha256=IiG3Lrza6VD39Z7mePN1DUl_ND_hsqLzIvHa1WRdSRA,256
|
|
3
|
+
swifttd/_version.py,sha256=XIaxbMbyiP-L3kguR1GhxirFblTXiHR1lMfDVITvHUI,22
|
|
4
|
+
swifttd-0.1.9.dist-info/METADATA,sha256=D1NCKI2Z2X5j0MV5p8gyqX-RswEZWYdUlb1Eu4sywAU,3890
|
|
5
|
+
swifttd-0.1.9.dist-info/WHEEL,sha256=e8r2Onm5NNvlP7oexPErjEYtutEfns1NIR2WhYHCUPg,158
|
|
6
|
+
swifttd-0.1.9.dist-info/RECORD,,
|