swifttd 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of swifttd might be problematic. Click here for more details.

@@ -0,0 +1,114 @@
1
+ name: Build & Publish Wheels
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ tags: [ "v*" ]
7
+ workflow_dispatch: { }
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels (${{ matrix.os }})
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ include:
17
+ - os: ubuntu-22.04
18
+ - os: ubuntu-24.04
19
+ - os: windows-latest
20
+ - os: macos-13 # Intel x86_64
21
+ - os: macos-14 # Apple Silicon arm64
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.12"
28
+
29
+ # Enable QEMU so cibuildwheel can build aarch64 wheels on x86_64 Linux
30
+ - name: Setup QEMU (for Linux aarch64)
31
+ if: runner.os == 'Linux'
32
+ uses: docker/setup-qemu-action@v3
33
+ with:
34
+ platforms: arm64
35
+
36
+ - name: Install build tooling
37
+ run: |
38
+ python -m pip install --upgrade pip
39
+ pip install cibuildwheel==2.* twine build
40
+
41
+ - name: Build wheels with cibuildwheel
42
+ env:
43
+ # Build only x86_64 and aarch64 on Linux (drops i686)
44
+ CIBW_ARCHS_LINUX: "x86_64 aarch64"
45
+ # Optional skip list: no PyPy, no musllinux
46
+ CIBW_SKIP: "pp* *-musllinux_*"
47
+ CIBW_BUILD_VERBOSITY: "1"
48
+ CIBW_TEST_COMMAND: "python -c \"import swifttd; print(swifttd.SwiftTD)\""
49
+ CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-* cp313-*"
50
+ run: |
51
+ python -m cibuildwheel --output-dir dist
52
+
53
+ - name: Upload wheel artifacts
54
+ uses: actions/upload-artifact@v4
55
+ with:
56
+ name: dist-wheels-${{ matrix.os }}
57
+ path: dist/*.whl
58
+ if-no-files-found: error
59
+
60
+ build_sdist:
61
+ name: Build sdist
62
+ runs-on: ubuntu-22.04
63
+ steps:
64
+ - uses: actions/checkout@v4
65
+ - uses: actions/setup-python@v5
66
+ with:
67
+ python-version: "3.12"
68
+ - name: Build sdist
69
+ run: |
70
+ python -m pip install --upgrade pip
71
+ pip install build
72
+ python -m build --sdist
73
+ - name: Upload sdist artifact
74
+ uses: actions/upload-artifact@v4
75
+ with:
76
+ name: dist-sdist
77
+ path: dist/*.tar.gz
78
+ if-no-files-found: error
79
+
80
+ publish:
81
+ name: Publish to (Test)PyPI
82
+ needs: [ build_wheels, build_sdist ]
83
+ runs-on: ubuntu-22.04
84
+ steps:
85
+ - uses: actions/download-artifact@v4
86
+ with:
87
+ path: ./artifacts
88
+ - uses: actions/setup-python@v5
89
+ with:
90
+ python-version: "3.12"
91
+ - name: Combine dists
92
+ run: |
93
+ mkdir -p dist
94
+ find artifacts -type f -name "*.whl" -exec cp {} dist/ \;
95
+ find artifacts -type f -name "*.tar.gz" -exec cp {} dist/ \;
96
+ ls -la dist
97
+ - name: Install twine
98
+ run: |
99
+ python -m pip install --upgrade pip
100
+ pip install twine
101
+ - name: Upload to TestPyPI on branches
102
+ if: startsWith(github.ref, 'refs/heads/')
103
+ env:
104
+ TWINE_USERNAME: __token__
105
+ TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
106
+ run: |
107
+ twine upload --repository-url https://test.pypi.org/legacy/ dist/*
108
+ - name: Upload to PyPI on tags
109
+ if: startsWith(github.ref, 'refs/tags/')
110
+ env:
111
+ TWINE_USERNAME: __token__
112
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
113
+ run: |
114
+ twine upload dist/*
@@ -0,0 +1,38 @@
1
+ cmake_minimum_required(VERSION 3.21)
2
+
3
+ project(SwiftTD LANGUAGES CXX)
4
+
5
+ # Build settings
6
+ set(CMAKE_CXX_STANDARD 17)
7
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
8
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
9
+ set(CMAKE_CXX_VISIBILITY_PRESET hidden)
10
+ set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
11
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
12
+
13
+ # pybind11 + Python (Development.Module ensures proper extension build flags)
14
+ find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
15
+ find_package(pybind11 CONFIG REQUIRED)
16
+
17
+ # C++ core library (not installed; linked into the Python extension)
18
+ add_library(SwiftTD STATIC
19
+ src/cpp/SwiftTD.cpp
20
+ )
21
+ target_include_directories(SwiftTD PUBLIC
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp
23
+ )
24
+
25
+ # Python extension module (top-level: import swift_td)
26
+ pybind11_add_module(swift_td
27
+ src/cpp/pybind.cpp
28
+ )
29
+
30
+ target_link_libraries(swift_td PRIVATE SwiftTD)
31
+
32
+ # Let scikit-build-core place the extension correctly inside the wheel
33
+ # (use SKBUILD_PLATLIB_DIR instead of Python_SITEARCH)
34
+ install(TARGETS swift_td
35
+ LIBRARY DESTINATION "${SKBUILD_PLATLIB_DIR}"
36
+ ARCHIVE DESTINATION "${SKBUILD_PLATLIB_DIR}"
37
+ RUNTIME DESTINATION "${SKBUILD_PLATLIB_DIR}"
38
+ )
swifttd-0.1.8/PKG-INFO ADDED
@@ -0,0 +1,93 @@
1
+ Metadata-Version: 2.2
2
+ Name: swifttd
3
+ Version: 0.1.8
4
+ Summary: SwiftTD: Fast and Robust TD Learning
5
+ Author: Khurram Javed
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: C++
9
+ Classifier: Operating System :: OS Independent
10
+ Project-URL: Homepage, https://github.com/khurramjaved96/SwiftTD
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+
14
+ # SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
15
+
16
+ SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install SwiftTD
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ After installation, you can use the three implementations of SwiftTD in Python as:
27
+
28
+ ```python
29
+ import swifttd
30
+
31
+ # Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
32
+ td_dense = swifttd.SwiftTDNonSparse(
33
+ num_features=5, # Number of input features
34
+ lambda_=0.95, # Lambda parameter for eligibility traces
35
+ initial_alpha=1e-2, # Initial learning rate
36
+ gamma=0.99, # Discount factor
37
+ eps=1e-5, # Small constant for numerical stability
38
+ max_step_size=0.1, # Maximum allowed step size
39
+ step_size_decay=0.999, # Step size decay rate
40
+ meta_step_size=1e-3, # Meta learning rate
41
+ eta_min=1e-10 # Minimum value of the step-size parameter
42
+ )
43
+
44
+ # Feature vector
45
+ features = [1.0, 0.0, 0.5, 0.2, 0.0]
46
+ reward = 1.0
47
+ prediction = td_dense.step(features, reward)
48
+ print("Dense prediction:", prediction)
49
+
50
+ # Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
51
+ td_sparse = swifttd.SwiftTDBinaryFeatures(
52
+ num_features=1000, # Number of input features
53
+ lambda_=0.95, # Lambda parameter for eligibility traces
54
+ initial_alpha=1e-2, # Initial learning rate
55
+ gamma=0.99, # Discount factor
56
+ eps=1e-5, # Small constant for numerical stability
57
+ max_step_size=0.1, # Maximum allowed step size
58
+ step_size_decay=0.999, # Step size decay rate
59
+ meta_step_size=1e-3, # Meta learning rate
60
+ eta_min=1e-10 # Minimum value of the step-size parameter
61
+ )
62
+
63
+ # Specify the indices of the features that are 1.
64
+ active_features = [1, 42, 999] # Indices of active features
65
+ reward = 1.0
66
+ prediction = td_sparse.step(active_features, reward)
67
+ print("Sparse binary prediction:", prediction)
68
+
69
+ # Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
70
+
71
+ td_sparse_nonbinary = swifttd.SwiftTD(
72
+ num_features=1000, # Number of input features
73
+ lambda_=0.95, # Lambda parameter for eligibility traces
74
+ initial_alpha=1e-2, # Initial learning rate
75
+ gamma=0.99, # Discount factor
76
+ eps=1e-5, # Small constant for numerical stability
77
+ max_step_size=0.1, # Maximum allowed step size
78
+ step_size_decay=0.999, # Step size decay rate
79
+ meta_step_size=1e-3, # Meta learning rate
80
+ eta_min=1e-10 # Minimum value of the step-size parameter
81
+ )
82
+
83
+ # Specify the indices and values of the features that are non-zero.
84
+ feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
85
+ reward = 1.0
86
+ prediction = td_sparse_nonbinary.step(feature_values, reward)
87
+ print("Sparse non-binary prediction:", prediction)
88
+ ```
89
+
90
+ ## Resources
91
+ - [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
92
+ - [Interactive Demo](https://khurramjaved.com/swifttd.html)
93
+
@@ -0,0 +1,80 @@
1
+ # SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
2
+
3
+ SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install SwiftTD
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ After installation, you can use the three implementations of SwiftTD in Python as:
14
+
15
+ ```python
16
+ import swifttd
17
+
18
+ # Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
19
+ td_dense = swifttd.SwiftTDNonSparse(
20
+ num_features=5, # Number of input features
21
+ lambda_=0.95, # Lambda parameter for eligibility traces
22
+ initial_alpha=1e-2, # Initial learning rate
23
+ gamma=0.99, # Discount factor
24
+ eps=1e-5, # Small constant for numerical stability
25
+ max_step_size=0.1, # Maximum allowed step size
26
+ step_size_decay=0.999, # Step size decay rate
27
+ meta_step_size=1e-3, # Meta learning rate
28
+ eta_min=1e-10 # Minimum value of the step-size parameter
29
+ )
30
+
31
+ # Feature vector
32
+ features = [1.0, 0.0, 0.5, 0.2, 0.0]
33
+ reward = 1.0
34
+ prediction = td_dense.step(features, reward)
35
+ print("Dense prediction:", prediction)
36
+
37
+ # Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
38
+ td_sparse = swifttd.SwiftTDBinaryFeatures(
39
+ num_features=1000, # Number of input features
40
+ lambda_=0.95, # Lambda parameter for eligibility traces
41
+ initial_alpha=1e-2, # Initial learning rate
42
+ gamma=0.99, # Discount factor
43
+ eps=1e-5, # Small constant for numerical stability
44
+ max_step_size=0.1, # Maximum allowed step size
45
+ step_size_decay=0.999, # Step size decay rate
46
+ meta_step_size=1e-3, # Meta learning rate
47
+ eta_min=1e-10 # Minimum value of the step-size parameter
48
+ )
49
+
50
+ # Specify the indices of the features that are 1.
51
+ active_features = [1, 42, 999] # Indices of active features
52
+ reward = 1.0
53
+ prediction = td_sparse.step(active_features, reward)
54
+ print("Sparse binary prediction:", prediction)
55
+
56
+ # Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
57
+
58
+ td_sparse_nonbinary = swifttd.SwiftTD(
59
+ num_features=1000, # Number of input features
60
+ lambda_=0.95, # Lambda parameter for eligibility traces
61
+ initial_alpha=1e-2, # Initial learning rate
62
+ gamma=0.99, # Discount factor
63
+ eps=1e-5, # Small constant for numerical stability
64
+ max_step_size=0.1, # Maximum allowed step size
65
+ step_size_decay=0.999, # Step size decay rate
66
+ meta_step_size=1e-3, # Meta learning rate
67
+ eta_min=1e-10 # Minimum value of the step-size parameter
68
+ )
69
+
70
+ # Specify the indices and values of the features that are non-zero.
71
+ feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
72
+ reward = 1.0
73
+ prediction = td_sparse_nonbinary.step(feature_values, reward)
74
+ print("Sparse non-binary prediction:", prediction)
75
+ ```
76
+
77
+ ## Resources
78
+ - [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
79
+ - [Interactive Demo](https://khurramjaved.com/swifttd.html)
80
+
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["scikit-build-core>=0.9", "pybind11>=2.12"]
3
+ build-backend = "scikit_build_core.build"
4
+
5
+ [project]
6
+ name = "swifttd"
7
+ version = "0.1.8"
8
+ description = "SwiftTD: Fast and Robust TD Learning"
9
+ readme = "README.md"
10
+ requires-python = ">=3.7"
11
+ license = {text = "MIT"}
12
+ authors = [{name = "Khurram Javed"}]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: C++",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+
19
+ [project.urls]
20
+ Homepage = "https://github.com/khurramjaved96/SwiftTD"
21
+
22
+ [tool.scikit-build]
23
+ # Where the pure-Python package lives (create these files below)
24
+ wheel.packages = ["src/swifttd"]
25
+
26
+ [tool.scikit-build.cmake]
27
+ minimum-version = "3.21"
@@ -0,0 +1,372 @@
1
+ //
2
+ // Created by Khurram Javed on 2024-02-18.
3
+ //
4
+
5
+ #include "SwiftTD.h"
6
+ #include <vector>
7
+ #include <math.h>
8
+
9
+
10
+ SwiftTDNonSparse::SwiftTDNonSparse(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
11
+ float epsilon_init, float eta_init,
12
+ float decay_init, float meta_step_size_init, float eta_min_init)
13
+ {
14
+ this->gamma = gamma_init;
15
+ this->w = std::vector<float>(number_of_features, 0.0f);
16
+ this->featureVector = std::vector<float>(number_of_features, 0);
17
+ this->z = std::vector<float>(number_of_features, 0);
18
+ this->z_delta = std::vector<float>(number_of_features, 0);
19
+ this->delta_w = std::vector<float>(number_of_features, 0);
20
+
21
+ this->h = std::vector<float>(number_of_features, 0);
22
+ this->h_old = std::vector<float>(number_of_features, 0);
23
+ this->h_temp = std::vector<float>(number_of_features, 0);
24
+ this->beta = std::vector<float>(number_of_features, log(alpha_init));
25
+ this->z_bar = std::vector<float>(number_of_features, 0);
26
+ this->p = std::vector<float>(number_of_features, 0);
27
+
28
+ this->v_old = 0;
29
+ this->lambda = lambda_init;
30
+ this->epsilon = epsilon_init;
31
+ this->v_delta = 0;
32
+ this->eta = eta_init;
33
+ this->eta_min = eta_min_init;
34
+ this->decay = decay_init;
35
+ this->meta_step_size = meta_step_size_init;
36
+ }
37
+
38
+ float Math::DotProduct(const std::vector<float>& a, const std::vector<float>& b)
39
+ {
40
+ float result = 0;
41
+ for (int i = 0; i < a.size(); i++)
42
+ {
43
+ result += a[i] * b[i];
44
+ }
45
+ return result;
46
+ }
47
+
48
+ float SwiftTDNonSparse::Step(const std::vector<float>& features, float reward)
49
+ {
50
+ float v = 0;
51
+ for (int i = 0; i < features.size(); i++)
52
+ {
53
+ v += this->w[i] * features[i];
54
+ }
55
+
56
+ float delta = reward + gamma * v - this->v_old;
57
+ for (int i = 0; i < features.size(); i++)
58
+ {
59
+ this->delta_w[i] = delta * this->z[i] - z_delta[i] * this->v_delta;
60
+ this->w[i] += this->delta_w[i];
61
+ this->beta[i] +=
62
+ this->meta_step_size / (exp(this->beta[i])) * (delta - v_delta) * this->p[i];
63
+ if (exp(this->beta[i]) > this->eta || isinf(exp(this->beta[i])))
64
+ {
65
+ this->beta[i] = log(this->eta);
66
+ }
67
+ if(exp(this->beta[i]) < log(this->eta_min))
68
+ {
69
+ this->beta[i] = log(this->eta_min);
70
+ }
71
+ this->h_old[i] = this->h[i];
72
+ this->h[i] = this->h_temp[i] +
73
+ delta * this->z_bar[i] - this->z_delta[i] * this->v_delta;
74
+ this->h_temp[i] = this->h[i];
75
+ z_delta[i] = 0;
76
+ this->z[i] *= gamma * this->lambda;
77
+ this->p[i] *= gamma * this->lambda;
78
+ this->z_bar[i] *= gamma * this->lambda;
79
+ }
80
+ this->v_delta = 0;
81
+ float tau = 0;
82
+ for (int i = 0; i < features.size(); i++)
83
+ {
84
+ tau += exp(this->beta[i]) * features[i] * features[i];
85
+ }
86
+ float b = 0;
87
+ for (int i = 0; i < features.size(); i++)
88
+ {
89
+ b += this->z[i] * features[i];
90
+ }
91
+
92
+ for (int i = 0; i < features.size(); i++)
93
+ {
94
+ this->v_delta += this->delta_w[i] * features[i];
95
+ float multiplier = 1;
96
+ if (eta / tau < 1)
97
+ {
98
+ multiplier = eta / tau;
99
+ }
100
+ this->z_delta[i] = multiplier * exp(this->beta[i]) * features[i];
101
+ this->z[i] += this->z_delta[i] * (1 - b);
102
+ this->p[i] += this->h_old[i] * features[i];
103
+ this->z_bar[i] += this->z_delta[i] * (1 - b - this->z_bar[i] * features[i]);
104
+ this->h_temp[i] = this->h[i] - this->h_old[i] * features[i] * (this->z[i] - this->z_delta[i]) -
105
+ this->h[i] * this->z_delta[i] * features[i];
106
+ if (tau > eta)
107
+ {
108
+ this->h_temp[i] = 0;
109
+ this->h[i] = 0;
110
+ this->h_old[i] = 0;
111
+ this->z_bar[i] = 0;
112
+ this->beta[i] += log(this->decay) * features[i] * features[i];
113
+ }
114
+ }
115
+ this->v_old = v;
116
+ return v;
117
+ }
118
+
119
+ SwiftTDBinaryFeatures::SwiftTDBinaryFeatures(int number_of_features, float lambda_init, float alpha_init,
120
+ float gamma_init,
121
+ float epsilon_init, float eta_init,
122
+ float decay_init, float meta_step_size_init, float eta_min_init)
123
+ {
124
+ this->gamma = gamma_init;
125
+ this->w = std::vector<float>(number_of_features, 0);
126
+ this->featureVector = std::vector<float>(number_of_features, 0);
127
+ this->z = std::vector<float>(number_of_features, 0);
128
+ this->z_delta = std::vector<float>(number_of_features, 0);
129
+ this->delta_w = std::vector<float>(number_of_features, 0);
130
+
131
+ this->h = std::vector<float>(number_of_features, 0);
132
+ this->h_old = std::vector<float>(number_of_features, 0);
133
+ this->h_temp = std::vector<float>(number_of_features, 0);
134
+ this->beta = std::vector<float>(number_of_features, log(alpha_init));
135
+ this->z_bar = std::vector<float>(number_of_features, 0);
136
+ this->p = std::vector<float>(number_of_features, 0);
137
+
138
+ this->last_alpha = std::vector<float>(number_of_features, 0);
139
+
140
+ this->v_old = 0;
141
+ this->lambda = lambda_init;
142
+ this->epsilon = epsilon_init;
143
+ this->v_delta = 0;
144
+ this->eta = eta_init;
145
+ this->eta_min = eta_min_init;
146
+ this->decay = decay_init;
147
+
148
+ this->meta_step_size = meta_step_size_init;
149
+ }
150
+
151
+ float SwiftTDBinaryFeatures::Step(const std::vector<int>& feature_indices, float reward)
152
+ {
153
+ float v = 0;
154
+
155
+ for (auto& index : feature_indices)
156
+ {
157
+ v += this->w[index];
158
+ }
159
+
160
+ float delta = reward + gamma * v - this->v_old;
161
+ int position = 0;
162
+ while (position < this->setOfEligibleItems.size())
163
+ {
164
+ int index = this->setOfEligibleItems[position];
165
+ this->delta_w[index] = delta * this->z[index] - z_delta[index] * this->v_delta;
166
+ this->w[index] += this->delta_w[index];
167
+ this->beta[index] +=
168
+ this->meta_step_size / (exp(this->beta[index])) * (delta - v_delta) * this->p[index];
169
+ if (exp(this->beta[index]) > this->eta || isinf(exp(this->beta[index])))
170
+ {
171
+ this->beta[index] = log(this->eta);
172
+ }
173
+ if(exp(this->beta[index]) < log(this->eta_min))
174
+ {
175
+ this->beta[index] = log(this->eta_min);
176
+ }
177
+ this->h_old[index] = this->h[index];
178
+ this->h[index] = this->h_temp[index] +
179
+ delta * this->z_bar[index] - this->z_delta[index] * this->v_delta;
180
+ this->h_temp[index] = this->h[index];
181
+ z_delta[index] = 0;
182
+ this->z[index] = gamma * this->lambda * this->z[index];
183
+ this->p[index] = gamma * this->lambda * this->p[index];
184
+ this->z_bar[index] = gamma * this->lambda * this->z_bar[index];
185
+ if (this->z[index] <= this->last_alpha[index] * epsilon)
186
+ {
187
+ this->z[index] = 0;
188
+ this->p[index] = 0;
189
+ this->z_bar[index] = 0;
190
+ this->delta_w[index] = 0;
191
+ this->setOfEligibleItems[position] = this->setOfEligibleItems[this->setOfEligibleItems.size() - 1];
192
+ this->setOfEligibleItems.pop_back();
193
+ }
194
+ else
195
+ {
196
+ position++;
197
+ }
198
+ }
199
+ this->v_delta = 0;
200
+ float rate_of_learning = 0;
201
+
202
+ for (auto& index : feature_indices)
203
+ {
204
+ rate_of_learning += exp(this->beta[index]);
205
+ }
206
+ float E = this->eta;
207
+ if (rate_of_learning > this->eta)
208
+ {
209
+ E = rate_of_learning;
210
+ }
211
+
212
+
213
+ float t = 0;
214
+ for (auto& index : feature_indices)
215
+ {
216
+ t += this->z[index];
217
+ }
218
+
219
+ for (auto& index : feature_indices)
220
+ {
221
+ if (z[index] == 0)
222
+ {
223
+ this->setOfEligibleItems.push_back(index);
224
+ }
225
+ this->v_delta += this->delta_w[index];
226
+ this->z_delta[index] = (this->eta / E) * exp(this->beta[index]);
227
+ this->last_alpha[index] = this->z_delta[index];
228
+ if ((this->eta / E) < 1)
229
+ {
230
+ this->h_temp[index] = 0;
231
+ this->h[index] = 0;
232
+ this->h_old[index] = 0;
233
+ this->z_bar[index] = 0;
234
+ this->beta[index] += log(this->decay);
235
+ }
236
+ this->z[index] += this->z_delta[index] * (1 - t);
237
+ this->p[index] += this->h_old[index];
238
+ this->z_bar[index] += this->z_delta[index] * (1 - t - this->z_bar[index]);
239
+ this->h_temp[index] = this->h[index] - this->h_old[index] * (this->z[index] - this->z_delta[index]) -
240
+ this->h[index] * this->z_delta[index];
241
+ }
242
+ this->v_old = v;
243
+ return v;
244
+ }
245
+
246
+
247
+ SwiftTD::SwiftTD(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
248
+ float epsilon_init, float eta_init,
249
+ float decay_init, float meta_step_size_init, float eta_min_init)
250
+ {
251
+ this->gamma = gamma_init;
252
+ this->w = std::vector<float>(number_of_features, 0);
253
+ this->featureVector = std::vector<float>(number_of_features, 0);
254
+ this->z = std::vector<float>(number_of_features, 0);
255
+ this->z_delta = std::vector<float>(number_of_features, 0);
256
+ this->delta_w = std::vector<float>(number_of_features, 0);
257
+
258
+ this->h = std::vector<float>(number_of_features, 0);
259
+ this->h_old = std::vector<float>(number_of_features, 0);
260
+ this->h_temp = std::vector<float>(number_of_features, 0);
261
+ this->beta = std::vector<float>(number_of_features, log(alpha_init));
262
+ this->z_bar = std::vector<float>(number_of_features, 0);
263
+ this->p = std::vector<float>(number_of_features, 0);
264
+
265
+ this->last_alpha = std::vector<float>(number_of_features, 0);
266
+
267
+ this->v_old = 0;
268
+ this->lambda = lambda_init;
269
+ this->epsilon = epsilon_init;
270
+ this->v_delta = 0;
271
+ this->eta = eta_init;
272
+ this->eta_min = eta_min_init;
273
+ this->decay = decay_init;
274
+
275
+ this->meta_step_size = meta_step_size_init;
276
+ }
277
+
278
+ float SwiftTD::Step(const std::vector<std::pair<int, float>>& feature_indices, float reward)
279
+ {
280
+ float v = 0;
281
+
282
+ for (auto& index : feature_indices)
283
+ {
284
+ v += this->w[index.first] * index.second;
285
+ }
286
+
287
+ float delta = reward + gamma * v - this->v_old;
288
+ int position = 0;
289
+ while (position < this->setOfEligibleItems.size())
290
+ {
291
+ auto index = this->setOfEligibleItems[position];
292
+ this->delta_w[index.first] = delta * this->z[index.first] - z_delta[index.first] * this->v_delta;
293
+ this->w[index.first] += this->delta_w[index.first];
294
+ this->beta[index.first] +=
295
+ this->meta_step_size / (exp(this->beta[index.first])) * (delta - v_delta) * this->p[index.first];
296
+ if (exp(this->beta[index.first]) > this->eta || isinf(exp(this->beta[index.first])))
297
+ {
298
+ this->beta[index.first] = log(this->eta);
299
+ }
300
+ if(exp(this->beta[index.first]) < log(this->eta_min))
301
+ {
302
+ this->beta[index.first] = log(this->eta_min);
303
+ }
304
+ this->h_old[index.first] = this->h[index.first];
305
+ this->h[index.first] = this->h_temp[index.first] +
306
+ delta * this->z_bar[index.first] - this->z_delta[index.first] * this->v_delta;
307
+ this->h_temp[index.first] = this->h[index.first];
308
+ z_delta[index.first] = 0;
309
+ this->z[index.first] = gamma * this->lambda * this->z[index.first];
310
+ this->p[index.first] = gamma * this->lambda * this->p[index.first];
311
+ this->z_bar[index.first] = gamma * this->lambda * this->z_bar[index.first];
312
+ if (this->z[index.first] <= this->last_alpha[index.first] * epsilon)
313
+ {
314
+ this->z[index.first] = 0;
315
+ this->p[index.first] = 0;
316
+ this->z_bar[index.first] = 0;
317
+ this->delta_w[index.first] = 0;
318
+ this->setOfEligibleItems[position] = this->setOfEligibleItems[this->setOfEligibleItems.size() - 1];
319
+ this->setOfEligibleItems.pop_back();
320
+ }
321
+ else
322
+ {
323
+ position++;
324
+ }
325
+ }
326
+ this->v_delta = 0;
327
+ float rate_of_learning = 0;
328
+
329
+ for (auto& index : feature_indices)
330
+ {
331
+ rate_of_learning += exp(this->beta[index.first]) * index.second * index.second;
332
+ }
333
+ float E = this->eta;
334
+ if (rate_of_learning > this->eta)
335
+ {
336
+ E = rate_of_learning;
337
+ }
338
+
339
+ float t = 0;
340
+ for (auto& index : feature_indices)
341
+ {
342
+ t += this->z[index.first] * index.second;
343
+ }
344
+
345
+ for (auto& index : feature_indices)
346
+ {
347
+ if (z[index.first] == 0)
348
+ {
349
+ this->setOfEligibleItems.push_back(index);
350
+ }
351
+ this->v_delta += this->delta_w[index.first] * index.second;
352
+ this->z_delta[index.first] = (this->eta / E) * exp(this->beta[index.first]) * index.second;
353
+ this->last_alpha[index.first] = (this->eta / E) * exp(this->beta[index.first]);
354
+ if ((this->eta / E) < 1)
355
+ {
356
+ this->h_temp[index.first] = 0;
357
+ this->h[index.first] = 0;
358
+ this->h_old[index.first] = 0;
359
+ this->z_bar[index.first] = 0;
360
+ this->beta[index.first] += log(this->decay) * index.second * index.second;
361
+ }
362
+ this->z[index.first] += this->z_delta[index.first] * (1 - t);
363
+ this->p[index.first] += this->h_old[index.first] * index.second;
364
+ this->z_bar[index.first] += this->z_delta[index.first] * (1 - t - this->z_bar[index.first] * index.second);
365
+ this->h_temp[index.first] = this->h[index.first] - this->h_old[index.first] * index.second * (this->z[index.
366
+ first] - this->
367
+ z_delta[index.first]) -
368
+ this->h[index.first] * this->z_delta[index.first] * index.second;
369
+ }
370
+ this->v_old = v;
371
+ return v;
372
+ }
@@ -0,0 +1,132 @@
1
+
2
+ //
3
+ // Created by Khurram Javed on 2024-02-18.
4
+ //
5
+
6
+ #ifndef SWIFTTD_H
7
+ #define SWIFTTD_H
8
+
9
+ #include <vector>
10
+
11
+ class Math
12
+ {
13
+ public:
14
+ static float DotProduct(const std::vector<float>& a, const std::vector<float>& b);
15
+ };
16
+
17
+ class SwiftTDNonSparse
18
+ {
19
+ private:
20
+ std::vector<float> w;
21
+ std::vector<float> z;
22
+ std::vector<float> z_delta;
23
+ std::vector<float> delta_w;
24
+
25
+ std::vector<float> featureVector;
26
+
27
+ std::vector<float> h;
28
+ std::vector<float> h_old;
29
+ std::vector<float> h_temp;
30
+ std::vector<float> beta;
31
+ std::vector<float> z_bar;
32
+ std::vector<float> p;
33
+
34
+ float v_delta;
35
+ float lambda;
36
+ float epsilon;
37
+ float v_old;
38
+ float meta_step_size;
39
+
40
+ float eta;
41
+ float eta_min;
42
+
43
+ float decay;
44
+ float gamma;
45
+
46
+ public:
47
+ SwiftTDNonSparse(int number_of_features, float lambda_init, float alpha_init, float gamma_init, float epsilon_init,
48
+ float eta_init,
49
+ float decay_init, float meta_step_size_init, float eta_min = 1e-10);
50
+ float Step(const std::vector<float>& features, float reward);
51
+ };
52
+
53
+ class SwiftTDBinaryFeatures
54
+ {
55
+ std::vector<int> setOfEligibleItems; // set of eligible items
56
+ std::vector<float> w;
57
+ std::vector<float> z;
58
+ std::vector<float> z_delta;
59
+ std::vector<float> delta_w;
60
+
61
+ std::vector<float> featureVector;
62
+
63
+ std::vector<float> h;
64
+ std::vector<float> h_old;
65
+ std::vector<float> h_temp;
66
+ std::vector<float> beta;
67
+ std::vector<float> z_bar;
68
+ std::vector<float> p;
69
+
70
+ std::vector<float> last_alpha;
71
+
72
+
73
+ float v_delta;
74
+ float lambda;
75
+ float epsilon;
76
+ float v_old;
77
+ float meta_step_size;
78
+
79
+ float eta;
80
+ float eta_min;
81
+
82
+ float decay;
83
+ float gamma;
84
+
85
+ public:
86
+ SwiftTDBinaryFeatures(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
87
+ float epsilon_init, float eta_init,
88
+ float decay_init, float meta_step_size_init, float eta_min = 1e-10);
89
+ float Step(const std::vector<int>& feature_indices, float reward);
90
+ };
91
+
92
+
93
+ class SwiftTD
94
+ {
95
+ std::vector<std::pair<int, float>> setOfEligibleItems; // set of eligible items
96
+ std::vector<float> w;
97
+ std::vector<float> z;
98
+ std::vector<float> z_delta;
99
+ std::vector<float> delta_w;
100
+
101
+ std::vector<float> featureVector;
102
+
103
+ std::vector<float> h;
104
+ std::vector<float> h_old;
105
+ std::vector<float> h_temp;
106
+ std::vector<float> beta;
107
+ std::vector<float> z_bar;
108
+ std::vector<float> p;
109
+
110
+ std::vector<float> last_alpha;
111
+
112
+
113
+ float v_delta;
114
+ float lambda;
115
+ float epsilon;
116
+ float v_old;
117
+ float meta_step_size;
118
+
119
+ float eta;
120
+ float eta_min;
121
+
122
+ float decay;
123
+ float gamma;
124
+
125
+ public:
126
+ SwiftTD(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
127
+ float epsilon_init, float eta_init,
128
+ float decay_init, float meta_step_size_init, float eta_min = 1e-10);
129
+ float Step(const std::vector<std::pair<int, float>>& feature_indices, float reward);
130
+ };
131
+
132
+ #endif // SWIFTTD_H
@@ -0,0 +1,62 @@
1
+ #include <pybind11/pybind11.h>
2
+ #include <pybind11/stl.h>
3
+ #include "SwiftTD.h"
4
+
5
+ namespace py = pybind11;
6
+
7
+ PYBIND11_MODULE(swift_td, m)
8
+ {
9
+ m.doc() = "Python bindings for the SwiftTD reinforcement learning algorithm"; // Module docstring
10
+ py::class_<SwiftTDNonSparse>(m, "SwiftTDNonSparse")
11
+ .def(py::init<int, float, float, float, float, float, float, float, float>(),
12
+ "Initialize the SwiftTDNonSparse algorithm",
13
+ py::arg("num_of_features"),
14
+ py::arg("lambda"),
15
+ py::arg("alpha"),
16
+ py::arg("gamma"),
17
+ py::arg("epsilon"),
18
+ py::arg("eta"),
19
+ py::arg("decay"),
20
+ py::arg("meta_step_size"),
21
+ py::arg("eta_min"))
22
+ .def("step", &SwiftTDNonSparse::Step,
23
+ "Perform one step of learning",
24
+ py::arg("features"),
25
+ py::arg("reward"));
26
+
27
+ // Bind SwiftTDSparse class
28
+ py::class_<SwiftTDBinaryFeatures>(m, "SwiftTDBinaryFeatures")
29
+ .def(py::init<int, float, float, float, float, float, float, float, float>(),
30
+ "Initialize the SwiftTDBinaryFeatures algorithm",
31
+ py::arg("num_of_features"),
32
+ py::arg("lambda"),
33
+ py::arg("alpha"),
34
+ py::arg("gamma"),
35
+ py::arg("epsilon"),
36
+ py::arg("eta"),
37
+ py::arg("decay"),
38
+ py::arg("meta_step_size"),
39
+ py::arg("eta_min"))
40
+ .def("step", &SwiftTDBinaryFeatures::Step,
41
+ "Perform one step of learning with sparse features",
42
+ py::arg("features_indices"),
43
+ py::arg("reward"));
44
+
45
+ // Bind SwiftTDSparseAndNonBinaryFeatures class
46
+ py::class_<SwiftTD>(m, "SwiftTD")
47
+ .def(py::init<int, float, float, float, float, float, float, float, float>(),
48
+ "Initialize the SwiftTD algorithm",
49
+ py::arg("num_of_features"),
50
+ py::arg("lambda"),
51
+ py::arg("alpha"),
52
+ py::arg("gamma"),
53
+ py::arg("epsilon"),
54
+ py::arg("eta"),
55
+ py::arg("decay"),
56
+ py::arg("meta_step_size"),
57
+ py::arg("eta_min"))
58
+ .def("step", &SwiftTD::Step,
59
+ "Perform one step of learning with sparse non-binary features",
60
+ py::arg("feature_indices_values"),
61
+ py::arg("reward"));
62
+ }
@@ -0,0 +1,5 @@
1
+ from ._version import __version__
2
+ # Import C++ bindings module built by pybind11_add_module (swift_td)
3
+ from swift_td import SwiftTDNonSparse, SwiftTDBinaryFeatures, SwiftTD
4
+
5
+ __all__ = ["SwiftTDNonSparse", "SwiftTDBinaryFeatures", "SwiftTD", "__version__"]
@@ -0,0 +1 @@
1
+ __version__ = "0.1.8"