swifttd 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of swifttd might be problematic. Click here for more details.
- swifttd-0.1.8/.github/workflows/wheels.yml +114 -0
- swifttd-0.1.8/CMakeLists.txt +38 -0
- swifttd-0.1.8/PKG-INFO +93 -0
- swifttd-0.1.8/README.md +80 -0
- swifttd-0.1.8/pyproject.toml +27 -0
- swifttd-0.1.8/src/cpp/SwiftTD.cpp +372 -0
- swifttd-0.1.8/src/cpp/SwiftTD.h +132 -0
- swifttd-0.1.8/src/cpp/pybind.cpp +62 -0
- swifttd-0.1.8/src/swifttd/__init__.py +5 -0
- swifttd-0.1.8/src/swifttd/_version.py +1 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
name: Build & Publish Wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
tags: [ "v*" ]
|
|
7
|
+
workflow_dispatch: { }
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build_wheels:
|
|
11
|
+
name: Build wheels (${{ matrix.os }})
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
include:
|
|
17
|
+
- os: ubuntu-22.04
|
|
18
|
+
- os: ubuntu-24.04
|
|
19
|
+
- os: windows-latest
|
|
20
|
+
- os: macos-13 # Intel x86_64
|
|
21
|
+
- os: macos-14 # Apple Silicon arm64
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
|
|
29
|
+
# Enable QEMU so cibuildwheel can build aarch64 wheels on x86_64 Linux
|
|
30
|
+
- name: Setup QEMU (for Linux aarch64)
|
|
31
|
+
if: runner.os == 'Linux'
|
|
32
|
+
uses: docker/setup-qemu-action@v3
|
|
33
|
+
with:
|
|
34
|
+
platforms: arm64
|
|
35
|
+
|
|
36
|
+
- name: Install build tooling
|
|
37
|
+
run: |
|
|
38
|
+
python -m pip install --upgrade pip
|
|
39
|
+
pip install cibuildwheel==2.* twine build
|
|
40
|
+
|
|
41
|
+
- name: Build wheels with cibuildwheel
|
|
42
|
+
env:
|
|
43
|
+
# Build only x86_64 and aarch64 on Linux (drops i686)
|
|
44
|
+
CIBW_ARCHS_LINUX: "x86_64 aarch64"
|
|
45
|
+
# Optional skip list: no PyPy, no musllinux
|
|
46
|
+
CIBW_SKIP: "pp* *-musllinux_*"
|
|
47
|
+
CIBW_BUILD_VERBOSITY: "1"
|
|
48
|
+
CIBW_TEST_COMMAND: "python -c \"import swifttd; print(swifttd.SwiftTD)\""
|
|
49
|
+
CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-* cp313-*"
|
|
50
|
+
run: |
|
|
51
|
+
python -m cibuildwheel --output-dir dist
|
|
52
|
+
|
|
53
|
+
- name: Upload wheel artifacts
|
|
54
|
+
uses: actions/upload-artifact@v4
|
|
55
|
+
with:
|
|
56
|
+
name: dist-wheels-${{ matrix.os }}
|
|
57
|
+
path: dist/*.whl
|
|
58
|
+
if-no-files-found: error
|
|
59
|
+
|
|
60
|
+
build_sdist:
|
|
61
|
+
name: Build sdist
|
|
62
|
+
runs-on: ubuntu-22.04
|
|
63
|
+
steps:
|
|
64
|
+
- uses: actions/checkout@v4
|
|
65
|
+
- uses: actions/setup-python@v5
|
|
66
|
+
with:
|
|
67
|
+
python-version: "3.12"
|
|
68
|
+
- name: Build sdist
|
|
69
|
+
run: |
|
|
70
|
+
python -m pip install --upgrade pip
|
|
71
|
+
pip install build
|
|
72
|
+
python -m build --sdist
|
|
73
|
+
- name: Upload sdist artifact
|
|
74
|
+
uses: actions/upload-artifact@v4
|
|
75
|
+
with:
|
|
76
|
+
name: dist-sdist
|
|
77
|
+
path: dist/*.tar.gz
|
|
78
|
+
if-no-files-found: error
|
|
79
|
+
|
|
80
|
+
publish:
|
|
81
|
+
name: Publish to (Test)PyPI
|
|
82
|
+
needs: [ build_wheels, build_sdist ]
|
|
83
|
+
runs-on: ubuntu-22.04
|
|
84
|
+
steps:
|
|
85
|
+
- uses: actions/download-artifact@v4
|
|
86
|
+
with:
|
|
87
|
+
path: ./artifacts
|
|
88
|
+
- uses: actions/setup-python@v5
|
|
89
|
+
with:
|
|
90
|
+
python-version: "3.12"
|
|
91
|
+
- name: Combine dists
|
|
92
|
+
run: |
|
|
93
|
+
mkdir -p dist
|
|
94
|
+
find artifacts -type f -name "*.whl" -exec cp {} dist/ \;
|
|
95
|
+
find artifacts -type f -name "*.tar.gz" -exec cp {} dist/ \;
|
|
96
|
+
ls -la dist
|
|
97
|
+
- name: Install twine
|
|
98
|
+
run: |
|
|
99
|
+
python -m pip install --upgrade pip
|
|
100
|
+
pip install twine
|
|
101
|
+
- name: Upload to TestPyPI on branches
|
|
102
|
+
if: startsWith(github.ref, 'refs/heads/')
|
|
103
|
+
env:
|
|
104
|
+
TWINE_USERNAME: __token__
|
|
105
|
+
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
|
106
|
+
run: |
|
|
107
|
+
twine upload --repository-url https://test.pypi.org/legacy/ dist/*
|
|
108
|
+
- name: Upload to PyPI on tags
|
|
109
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
110
|
+
env:
|
|
111
|
+
TWINE_USERNAME: __token__
|
|
112
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
|
113
|
+
run: |
|
|
114
|
+
twine upload dist/*
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.21)
|
|
2
|
+
|
|
3
|
+
project(SwiftTD LANGUAGES CXX)
|
|
4
|
+
|
|
5
|
+
# Build settings
|
|
6
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
7
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
8
|
+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
9
|
+
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
|
10
|
+
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
|
11
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
|
12
|
+
|
|
13
|
+
# pybind11 + Python (Development.Module ensures proper extension build flags)
|
|
14
|
+
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
|
|
15
|
+
find_package(pybind11 CONFIG REQUIRED)
|
|
16
|
+
|
|
17
|
+
# C++ core library (not installed; linked into the Python extension)
|
|
18
|
+
add_library(SwiftTD STATIC
|
|
19
|
+
src/cpp/SwiftTD.cpp
|
|
20
|
+
)
|
|
21
|
+
target_include_directories(SwiftTD PUBLIC
|
|
22
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Python extension module (top-level: import swift_td)
|
|
26
|
+
pybind11_add_module(swift_td
|
|
27
|
+
src/cpp/pybind.cpp
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
target_link_libraries(swift_td PRIVATE SwiftTD)
|
|
31
|
+
|
|
32
|
+
# Let scikit-build-core place the extension correctly inside the wheel
|
|
33
|
+
# (use SKBUILD_PLATLIB_DIR instead of Python_SITEARCH)
|
|
34
|
+
install(TARGETS swift_td
|
|
35
|
+
LIBRARY DESTINATION "${SKBUILD_PLATLIB_DIR}"
|
|
36
|
+
ARCHIVE DESTINATION "${SKBUILD_PLATLIB_DIR}"
|
|
37
|
+
RUNTIME DESTINATION "${SKBUILD_PLATLIB_DIR}"
|
|
38
|
+
)
|
swifttd-0.1.8/PKG-INFO
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: swifttd
|
|
3
|
+
Version: 0.1.8
|
|
4
|
+
Summary: SwiftTD: Fast and Robust TD Learning
|
|
5
|
+
Author: Khurram Javed
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: C++
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Project-URL: Homepage, https://github.com/khurramjaved96/SwiftTD
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
|
|
15
|
+
|
|
16
|
+
SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install SwiftTD
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
After installation, you can use the three implementations of SwiftTD in Python as:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import swifttd
|
|
30
|
+
|
|
31
|
+
# Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
|
|
32
|
+
td_dense = swifttd.SwiftTDNonSparse(
|
|
33
|
+
num_features=5, # Number of input features
|
|
34
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
35
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
36
|
+
gamma=0.99, # Discount factor
|
|
37
|
+
eps=1e-5, # Small constant for numerical stability
|
|
38
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
39
|
+
step_size_decay=0.999, # Step size decay rate
|
|
40
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
41
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Feature vector
|
|
45
|
+
features = [1.0, 0.0, 0.5, 0.2, 0.0]
|
|
46
|
+
reward = 1.0
|
|
47
|
+
prediction = td_dense.step(features, reward)
|
|
48
|
+
print("Dense prediction:", prediction)
|
|
49
|
+
|
|
50
|
+
# Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
|
|
51
|
+
td_sparse = swifttd.SwiftTDBinaryFeatures(
|
|
52
|
+
num_features=1000, # Number of input features
|
|
53
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
54
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
55
|
+
gamma=0.99, # Discount factor
|
|
56
|
+
eps=1e-5, # Small constant for numerical stability
|
|
57
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
58
|
+
step_size_decay=0.999, # Step size decay rate
|
|
59
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
60
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Specify the indices of the features that are 1.
|
|
64
|
+
active_features = [1, 42, 999] # Indices of active features
|
|
65
|
+
reward = 1.0
|
|
66
|
+
prediction = td_sparse.step(active_features, reward)
|
|
67
|
+
print("Sparse binary prediction:", prediction)
|
|
68
|
+
|
|
69
|
+
# Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
|
|
70
|
+
|
|
71
|
+
td_sparse_nonbinary = swifttd.SwiftTD(
|
|
72
|
+
num_features=1000, # Number of input features
|
|
73
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
74
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
75
|
+
gamma=0.99, # Discount factor
|
|
76
|
+
eps=1e-5, # Small constant for numerical stability
|
|
77
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
78
|
+
step_size_decay=0.999, # Step size decay rate
|
|
79
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
80
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Specify the indices and values of the features that are non-zero.
|
|
84
|
+
feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
|
|
85
|
+
reward = 1.0
|
|
86
|
+
prediction = td_sparse_nonbinary.step(feature_values, reward)
|
|
87
|
+
print("Sparse non-binary prediction:", prediction)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Resources
|
|
91
|
+
- [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
|
|
92
|
+
- [Interactive Demo](https://khurramjaved.com/swifttd.html)
|
|
93
|
+
|
swifttd-0.1.8/README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# SwiftTD: A Fast and Robust Algorithm for Temporal Difference Learning
|
|
2
|
+
|
|
3
|
+
SwiftTD is an algorithm for learning value functions. It combines the ideas of step-size adaptation with the idea of a bound on the rate of learning. The implementations in this repository use linear function approximation.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install SwiftTD
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
After installation, you can use the three implementations of SwiftTD in Python as:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import swifttd
|
|
17
|
+
|
|
18
|
+
# Version of SwiftTD that expects the full feature vector as input. This should only be used if the feature representation is not sparse. Otherwise, the sparse versions are more efficient.
|
|
19
|
+
td_dense = swifttd.SwiftTDNonSparse(
|
|
20
|
+
num_features=5, # Number of input features
|
|
21
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
22
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
23
|
+
gamma=0.99, # Discount factor
|
|
24
|
+
eps=1e-5, # Small constant for numerical stability
|
|
25
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
26
|
+
step_size_decay=0.999, # Step size decay rate
|
|
27
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
28
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Feature vector
|
|
32
|
+
features = [1.0, 0.0, 0.5, 0.2, 0.0]
|
|
33
|
+
reward = 1.0
|
|
34
|
+
prediction = td_dense.step(features, reward)
|
|
35
|
+
print("Dense prediction:", prediction)
|
|
36
|
+
|
|
37
|
+
# Version of SwiftTD that expects the feature indices as input. This version assumes that the features are binary---0 or 1. For learning, the indices of the features that are 1 are provided.
|
|
38
|
+
td_sparse = swifttd.SwiftTDBinaryFeatures(
|
|
39
|
+
num_features=1000, # Number of input features
|
|
40
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
41
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
42
|
+
gamma=0.99, # Discount factor
|
|
43
|
+
eps=1e-5, # Small constant for numerical stability
|
|
44
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
45
|
+
step_size_decay=0.999, # Step size decay rate
|
|
46
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
47
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Specify the indices of the features that are 1.
|
|
51
|
+
active_features = [1, 42, 999] # Indices of active features
|
|
52
|
+
reward = 1.0
|
|
53
|
+
prediction = td_sparse.step(active_features, reward)
|
|
54
|
+
print("Sparse binary prediction:", prediction)
|
|
55
|
+
|
|
56
|
+
# Version of SwiftTD that expects the feature indices and values as input. This version does not assume that the features are binary. For learning, it expects a list of (index, value) pairs. Only the indices of the features that are non-zero need to be provided.
|
|
57
|
+
|
|
58
|
+
td_sparse_nonbinary = swifttd.SwiftTD(
|
|
59
|
+
num_features=1000, # Number of input features
|
|
60
|
+
lambda_=0.95, # Lambda parameter for eligibility traces
|
|
61
|
+
initial_alpha=1e-2, # Initial learning rate
|
|
62
|
+
gamma=0.99, # Discount factor
|
|
63
|
+
eps=1e-5, # Small constant for numerical stability
|
|
64
|
+
max_step_size=0.1, # Maximum allowed step size
|
|
65
|
+
step_size_decay=0.999, # Step size decay rate
|
|
66
|
+
meta_step_size=1e-3, # Meta learning rate
|
|
67
|
+
eta_min=1e-10 # Minimum value of the step-size parameter
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Specify the indices and values of the features that are non-zero.
|
|
71
|
+
feature_values = [(1, 0.8), (42, 0.3), (999, 1.2)] # (index, value) pairs
|
|
72
|
+
reward = 1.0
|
|
73
|
+
prediction = td_sparse_nonbinary.step(feature_values, reward)
|
|
74
|
+
print("Sparse non-binary prediction:", prediction)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Resources
|
|
78
|
+
- [Paper (PDF)](https://khurramjaved.com/swifttd.pdf)
|
|
79
|
+
- [Interactive Demo](https://khurramjaved.com/swifttd.html)
|
|
80
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["scikit-build-core>=0.9", "pybind11>=2.12"]
|
|
3
|
+
build-backend = "scikit_build_core.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "swifttd"
|
|
7
|
+
version = "0.1.8"
|
|
8
|
+
description = "SwiftTD: Fast and Robust TD Learning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.7"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "Khurram Javed"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: C++",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://github.com/khurramjaved96/SwiftTD"
|
|
21
|
+
|
|
22
|
+
[tool.scikit-build]
|
|
23
|
+
# Where the pure-Python package lives (create these files below)
|
|
24
|
+
wheel.packages = ["src/swifttd"]
|
|
25
|
+
|
|
26
|
+
[tool.scikit-build.cmake]
|
|
27
|
+
minimum-version = "3.21"
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Created by Khurram Javed on 2024-02-18.
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
#include "SwiftTD.h"
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <math.h>
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
SwiftTDNonSparse::SwiftTDNonSparse(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
|
|
11
|
+
float epsilon_init, float eta_init,
|
|
12
|
+
float decay_init, float meta_step_size_init, float eta_min_init)
|
|
13
|
+
{
|
|
14
|
+
this->gamma = gamma_init;
|
|
15
|
+
this->w = std::vector<float>(number_of_features, 0.0f);
|
|
16
|
+
this->featureVector = std::vector<float>(number_of_features, 0);
|
|
17
|
+
this->z = std::vector<float>(number_of_features, 0);
|
|
18
|
+
this->z_delta = std::vector<float>(number_of_features, 0);
|
|
19
|
+
this->delta_w = std::vector<float>(number_of_features, 0);
|
|
20
|
+
|
|
21
|
+
this->h = std::vector<float>(number_of_features, 0);
|
|
22
|
+
this->h_old = std::vector<float>(number_of_features, 0);
|
|
23
|
+
this->h_temp = std::vector<float>(number_of_features, 0);
|
|
24
|
+
this->beta = std::vector<float>(number_of_features, log(alpha_init));
|
|
25
|
+
this->z_bar = std::vector<float>(number_of_features, 0);
|
|
26
|
+
this->p = std::vector<float>(number_of_features, 0);
|
|
27
|
+
|
|
28
|
+
this->v_old = 0;
|
|
29
|
+
this->lambda = lambda_init;
|
|
30
|
+
this->epsilon = epsilon_init;
|
|
31
|
+
this->v_delta = 0;
|
|
32
|
+
this->eta = eta_init;
|
|
33
|
+
this->eta_min = eta_min_init;
|
|
34
|
+
this->decay = decay_init;
|
|
35
|
+
this->meta_step_size = meta_step_size_init;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
float Math::DotProduct(const std::vector<float>& a, const std::vector<float>& b)
|
|
39
|
+
{
|
|
40
|
+
float result = 0;
|
|
41
|
+
for (int i = 0; i < a.size(); i++)
|
|
42
|
+
{
|
|
43
|
+
result += a[i] * b[i];
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
float SwiftTDNonSparse::Step(const std::vector<float>& features, float reward)
|
|
49
|
+
{
|
|
50
|
+
float v = 0;
|
|
51
|
+
for (int i = 0; i < features.size(); i++)
|
|
52
|
+
{
|
|
53
|
+
v += this->w[i] * features[i];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
float delta = reward + gamma * v - this->v_old;
|
|
57
|
+
for (int i = 0; i < features.size(); i++)
|
|
58
|
+
{
|
|
59
|
+
this->delta_w[i] = delta * this->z[i] - z_delta[i] * this->v_delta;
|
|
60
|
+
this->w[i] += this->delta_w[i];
|
|
61
|
+
this->beta[i] +=
|
|
62
|
+
this->meta_step_size / (exp(this->beta[i])) * (delta - v_delta) * this->p[i];
|
|
63
|
+
if (exp(this->beta[i]) > this->eta || isinf(exp(this->beta[i])))
|
|
64
|
+
{
|
|
65
|
+
this->beta[i] = log(this->eta);
|
|
66
|
+
}
|
|
67
|
+
if(exp(this->beta[i]) < log(this->eta_min))
|
|
68
|
+
{
|
|
69
|
+
this->beta[i] = log(this->eta_min);
|
|
70
|
+
}
|
|
71
|
+
this->h_old[i] = this->h[i];
|
|
72
|
+
this->h[i] = this->h_temp[i] +
|
|
73
|
+
delta * this->z_bar[i] - this->z_delta[i] * this->v_delta;
|
|
74
|
+
this->h_temp[i] = this->h[i];
|
|
75
|
+
z_delta[i] = 0;
|
|
76
|
+
this->z[i] *= gamma * this->lambda;
|
|
77
|
+
this->p[i] *= gamma * this->lambda;
|
|
78
|
+
this->z_bar[i] *= gamma * this->lambda;
|
|
79
|
+
}
|
|
80
|
+
this->v_delta = 0;
|
|
81
|
+
float tau = 0;
|
|
82
|
+
for (int i = 0; i < features.size(); i++)
|
|
83
|
+
{
|
|
84
|
+
tau += exp(this->beta[i]) * features[i] * features[i];
|
|
85
|
+
}
|
|
86
|
+
float b = 0;
|
|
87
|
+
for (int i = 0; i < features.size(); i++)
|
|
88
|
+
{
|
|
89
|
+
b += this->z[i] * features[i];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
for (int i = 0; i < features.size(); i++)
|
|
93
|
+
{
|
|
94
|
+
this->v_delta += this->delta_w[i] * features[i];
|
|
95
|
+
float multiplier = 1;
|
|
96
|
+
if (eta / tau < 1)
|
|
97
|
+
{
|
|
98
|
+
multiplier = eta / tau;
|
|
99
|
+
}
|
|
100
|
+
this->z_delta[i] = multiplier * exp(this->beta[i]) * features[i];
|
|
101
|
+
this->z[i] += this->z_delta[i] * (1 - b);
|
|
102
|
+
this->p[i] += this->h_old[i] * features[i];
|
|
103
|
+
this->z_bar[i] += this->z_delta[i] * (1 - b - this->z_bar[i] * features[i]);
|
|
104
|
+
this->h_temp[i] = this->h[i] - this->h_old[i] * features[i] * (this->z[i] - this->z_delta[i]) -
|
|
105
|
+
this->h[i] * this->z_delta[i] * features[i];
|
|
106
|
+
if (tau > eta)
|
|
107
|
+
{
|
|
108
|
+
this->h_temp[i] = 0;
|
|
109
|
+
this->h[i] = 0;
|
|
110
|
+
this->h_old[i] = 0;
|
|
111
|
+
this->z_bar[i] = 0;
|
|
112
|
+
this->beta[i] += log(this->decay) * features[i] * features[i];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
this->v_old = v;
|
|
116
|
+
return v;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
SwiftTDBinaryFeatures::SwiftTDBinaryFeatures(int number_of_features, float lambda_init, float alpha_init,
|
|
120
|
+
float gamma_init,
|
|
121
|
+
float epsilon_init, float eta_init,
|
|
122
|
+
float decay_init, float meta_step_size_init, float eta_min_init)
|
|
123
|
+
{
|
|
124
|
+
this->gamma = gamma_init;
|
|
125
|
+
this->w = std::vector<float>(number_of_features, 0);
|
|
126
|
+
this->featureVector = std::vector<float>(number_of_features, 0);
|
|
127
|
+
this->z = std::vector<float>(number_of_features, 0);
|
|
128
|
+
this->z_delta = std::vector<float>(number_of_features, 0);
|
|
129
|
+
this->delta_w = std::vector<float>(number_of_features, 0);
|
|
130
|
+
|
|
131
|
+
this->h = std::vector<float>(number_of_features, 0);
|
|
132
|
+
this->h_old = std::vector<float>(number_of_features, 0);
|
|
133
|
+
this->h_temp = std::vector<float>(number_of_features, 0);
|
|
134
|
+
this->beta = std::vector<float>(number_of_features, log(alpha_init));
|
|
135
|
+
this->z_bar = std::vector<float>(number_of_features, 0);
|
|
136
|
+
this->p = std::vector<float>(number_of_features, 0);
|
|
137
|
+
|
|
138
|
+
this->last_alpha = std::vector<float>(number_of_features, 0);
|
|
139
|
+
|
|
140
|
+
this->v_old = 0;
|
|
141
|
+
this->lambda = lambda_init;
|
|
142
|
+
this->epsilon = epsilon_init;
|
|
143
|
+
this->v_delta = 0;
|
|
144
|
+
this->eta = eta_init;
|
|
145
|
+
this->eta_min = eta_min_init;
|
|
146
|
+
this->decay = decay_init;
|
|
147
|
+
|
|
148
|
+
this->meta_step_size = meta_step_size_init;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
float SwiftTDBinaryFeatures::Step(const std::vector<int>& feature_indices, float reward)
|
|
152
|
+
{
|
|
153
|
+
float v = 0;
|
|
154
|
+
|
|
155
|
+
for (auto& index : feature_indices)
|
|
156
|
+
{
|
|
157
|
+
v += this->w[index];
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
float delta = reward + gamma * v - this->v_old;
|
|
161
|
+
int position = 0;
|
|
162
|
+
while (position < this->setOfEligibleItems.size())
|
|
163
|
+
{
|
|
164
|
+
int index = this->setOfEligibleItems[position];
|
|
165
|
+
this->delta_w[index] = delta * this->z[index] - z_delta[index] * this->v_delta;
|
|
166
|
+
this->w[index] += this->delta_w[index];
|
|
167
|
+
this->beta[index] +=
|
|
168
|
+
this->meta_step_size / (exp(this->beta[index])) * (delta - v_delta) * this->p[index];
|
|
169
|
+
if (exp(this->beta[index]) > this->eta || isinf(exp(this->beta[index])))
|
|
170
|
+
{
|
|
171
|
+
this->beta[index] = log(this->eta);
|
|
172
|
+
}
|
|
173
|
+
if(exp(this->beta[index]) < log(this->eta_min))
|
|
174
|
+
{
|
|
175
|
+
this->beta[index] = log(this->eta_min);
|
|
176
|
+
}
|
|
177
|
+
this->h_old[index] = this->h[index];
|
|
178
|
+
this->h[index] = this->h_temp[index] +
|
|
179
|
+
delta * this->z_bar[index] - this->z_delta[index] * this->v_delta;
|
|
180
|
+
this->h_temp[index] = this->h[index];
|
|
181
|
+
z_delta[index] = 0;
|
|
182
|
+
this->z[index] = gamma * this->lambda * this->z[index];
|
|
183
|
+
this->p[index] = gamma * this->lambda * this->p[index];
|
|
184
|
+
this->z_bar[index] = gamma * this->lambda * this->z_bar[index];
|
|
185
|
+
if (this->z[index] <= this->last_alpha[index] * epsilon)
|
|
186
|
+
{
|
|
187
|
+
this->z[index] = 0;
|
|
188
|
+
this->p[index] = 0;
|
|
189
|
+
this->z_bar[index] = 0;
|
|
190
|
+
this->delta_w[index] = 0;
|
|
191
|
+
this->setOfEligibleItems[position] = this->setOfEligibleItems[this->setOfEligibleItems.size() - 1];
|
|
192
|
+
this->setOfEligibleItems.pop_back();
|
|
193
|
+
}
|
|
194
|
+
else
|
|
195
|
+
{
|
|
196
|
+
position++;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
this->v_delta = 0;
|
|
200
|
+
float rate_of_learning = 0;
|
|
201
|
+
|
|
202
|
+
for (auto& index : feature_indices)
|
|
203
|
+
{
|
|
204
|
+
rate_of_learning += exp(this->beta[index]);
|
|
205
|
+
}
|
|
206
|
+
float E = this->eta;
|
|
207
|
+
if (rate_of_learning > this->eta)
|
|
208
|
+
{
|
|
209
|
+
E = rate_of_learning;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
float t = 0;
|
|
214
|
+
for (auto& index : feature_indices)
|
|
215
|
+
{
|
|
216
|
+
t += this->z[index];
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
for (auto& index : feature_indices)
|
|
220
|
+
{
|
|
221
|
+
if (z[index] == 0)
|
|
222
|
+
{
|
|
223
|
+
this->setOfEligibleItems.push_back(index);
|
|
224
|
+
}
|
|
225
|
+
this->v_delta += this->delta_w[index];
|
|
226
|
+
this->z_delta[index] = (this->eta / E) * exp(this->beta[index]);
|
|
227
|
+
this->last_alpha[index] = this->z_delta[index];
|
|
228
|
+
if ((this->eta / E) < 1)
|
|
229
|
+
{
|
|
230
|
+
this->h_temp[index] = 0;
|
|
231
|
+
this->h[index] = 0;
|
|
232
|
+
this->h_old[index] = 0;
|
|
233
|
+
this->z_bar[index] = 0;
|
|
234
|
+
this->beta[index] += log(this->decay);
|
|
235
|
+
}
|
|
236
|
+
this->z[index] += this->z_delta[index] * (1 - t);
|
|
237
|
+
this->p[index] += this->h_old[index];
|
|
238
|
+
this->z_bar[index] += this->z_delta[index] * (1 - t - this->z_bar[index]);
|
|
239
|
+
this->h_temp[index] = this->h[index] - this->h_old[index] * (this->z[index] - this->z_delta[index]) -
|
|
240
|
+
this->h[index] * this->z_delta[index];
|
|
241
|
+
}
|
|
242
|
+
this->v_old = v;
|
|
243
|
+
return v;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
SwiftTD::SwiftTD(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
|
|
248
|
+
float epsilon_init, float eta_init,
|
|
249
|
+
float decay_init, float meta_step_size_init, float eta_min_init)
|
|
250
|
+
{
|
|
251
|
+
this->gamma = gamma_init;
|
|
252
|
+
this->w = std::vector<float>(number_of_features, 0);
|
|
253
|
+
this->featureVector = std::vector<float>(number_of_features, 0);
|
|
254
|
+
this->z = std::vector<float>(number_of_features, 0);
|
|
255
|
+
this->z_delta = std::vector<float>(number_of_features, 0);
|
|
256
|
+
this->delta_w = std::vector<float>(number_of_features, 0);
|
|
257
|
+
|
|
258
|
+
this->h = std::vector<float>(number_of_features, 0);
|
|
259
|
+
this->h_old = std::vector<float>(number_of_features, 0);
|
|
260
|
+
this->h_temp = std::vector<float>(number_of_features, 0);
|
|
261
|
+
this->beta = std::vector<float>(number_of_features, log(alpha_init));
|
|
262
|
+
this->z_bar = std::vector<float>(number_of_features, 0);
|
|
263
|
+
this->p = std::vector<float>(number_of_features, 0);
|
|
264
|
+
|
|
265
|
+
this->last_alpha = std::vector<float>(number_of_features, 0);
|
|
266
|
+
|
|
267
|
+
this->v_old = 0;
|
|
268
|
+
this->lambda = lambda_init;
|
|
269
|
+
this->epsilon = epsilon_init;
|
|
270
|
+
this->v_delta = 0;
|
|
271
|
+
this->eta = eta_init;
|
|
272
|
+
this->eta_min = eta_min_init;
|
|
273
|
+
this->decay = decay_init;
|
|
274
|
+
|
|
275
|
+
this->meta_step_size = meta_step_size_init;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
float SwiftTD::Step(const std::vector<std::pair<int, float>>& feature_indices, float reward)
|
|
279
|
+
{
|
|
280
|
+
float v = 0;
|
|
281
|
+
|
|
282
|
+
for (auto& index : feature_indices)
|
|
283
|
+
{
|
|
284
|
+
v += this->w[index.first] * index.second;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
float delta = reward + gamma * v - this->v_old;
|
|
288
|
+
int position = 0;
|
|
289
|
+
while (position < this->setOfEligibleItems.size())
|
|
290
|
+
{
|
|
291
|
+
auto index = this->setOfEligibleItems[position];
|
|
292
|
+
this->delta_w[index.first] = delta * this->z[index.first] - z_delta[index.first] * this->v_delta;
|
|
293
|
+
this->w[index.first] += this->delta_w[index.first];
|
|
294
|
+
this->beta[index.first] +=
|
|
295
|
+
this->meta_step_size / (exp(this->beta[index.first])) * (delta - v_delta) * this->p[index.first];
|
|
296
|
+
if (exp(this->beta[index.first]) > this->eta || isinf(exp(this->beta[index.first])))
|
|
297
|
+
{
|
|
298
|
+
this->beta[index.first] = log(this->eta);
|
|
299
|
+
}
|
|
300
|
+
if(exp(this->beta[index.first]) < log(this->eta_min))
|
|
301
|
+
{
|
|
302
|
+
this->beta[index.first] = log(this->eta_min);
|
|
303
|
+
}
|
|
304
|
+
this->h_old[index.first] = this->h[index.first];
|
|
305
|
+
this->h[index.first] = this->h_temp[index.first] +
|
|
306
|
+
delta * this->z_bar[index.first] - this->z_delta[index.first] * this->v_delta;
|
|
307
|
+
this->h_temp[index.first] = this->h[index.first];
|
|
308
|
+
z_delta[index.first] = 0;
|
|
309
|
+
this->z[index.first] = gamma * this->lambda * this->z[index.first];
|
|
310
|
+
this->p[index.first] = gamma * this->lambda * this->p[index.first];
|
|
311
|
+
this->z_bar[index.first] = gamma * this->lambda * this->z_bar[index.first];
|
|
312
|
+
if (this->z[index.first] <= this->last_alpha[index.first] * epsilon)
|
|
313
|
+
{
|
|
314
|
+
this->z[index.first] = 0;
|
|
315
|
+
this->p[index.first] = 0;
|
|
316
|
+
this->z_bar[index.first] = 0;
|
|
317
|
+
this->delta_w[index.first] = 0;
|
|
318
|
+
this->setOfEligibleItems[position] = this->setOfEligibleItems[this->setOfEligibleItems.size() - 1];
|
|
319
|
+
this->setOfEligibleItems.pop_back();
|
|
320
|
+
}
|
|
321
|
+
else
|
|
322
|
+
{
|
|
323
|
+
position++;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
this->v_delta = 0;
|
|
327
|
+
float rate_of_learning = 0;
|
|
328
|
+
|
|
329
|
+
for (auto& index : feature_indices)
|
|
330
|
+
{
|
|
331
|
+
rate_of_learning += exp(this->beta[index.first]) * index.second * index.second;
|
|
332
|
+
}
|
|
333
|
+
float E = this->eta;
|
|
334
|
+
if (rate_of_learning > this->eta)
|
|
335
|
+
{
|
|
336
|
+
E = rate_of_learning;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
float t = 0;
|
|
340
|
+
for (auto& index : feature_indices)
|
|
341
|
+
{
|
|
342
|
+
t += this->z[index.first] * index.second;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
for (auto& index : feature_indices)
|
|
346
|
+
{
|
|
347
|
+
if (z[index.first] == 0)
|
|
348
|
+
{
|
|
349
|
+
this->setOfEligibleItems.push_back(index);
|
|
350
|
+
}
|
|
351
|
+
this->v_delta += this->delta_w[index.first] * index.second;
|
|
352
|
+
this->z_delta[index.first] = (this->eta / E) * exp(this->beta[index.first]) * index.second;
|
|
353
|
+
this->last_alpha[index.first] = (this->eta / E) * exp(this->beta[index.first]);
|
|
354
|
+
if ((this->eta / E) < 1)
|
|
355
|
+
{
|
|
356
|
+
this->h_temp[index.first] = 0;
|
|
357
|
+
this->h[index.first] = 0;
|
|
358
|
+
this->h_old[index.first] = 0;
|
|
359
|
+
this->z_bar[index.first] = 0;
|
|
360
|
+
this->beta[index.first] += log(this->decay) * index.second * index.second;
|
|
361
|
+
}
|
|
362
|
+
this->z[index.first] += this->z_delta[index.first] * (1 - t);
|
|
363
|
+
this->p[index.first] += this->h_old[index.first] * index.second;
|
|
364
|
+
this->z_bar[index.first] += this->z_delta[index.first] * (1 - t - this->z_bar[index.first] * index.second);
|
|
365
|
+
this->h_temp[index.first] = this->h[index.first] - this->h_old[index.first] * index.second * (this->z[index.
|
|
366
|
+
first] - this->
|
|
367
|
+
z_delta[index.first]) -
|
|
368
|
+
this->h[index.first] * this->z_delta[index.first] * index.second;
|
|
369
|
+
}
|
|
370
|
+
this->v_old = v;
|
|
371
|
+
return v;
|
|
372
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
|
|
2
|
+
//
|
|
3
|
+
// Created by Khurram Javed on 2024-02-18.
|
|
4
|
+
//
|
|
5
|
+
|
|
6
|
+
#ifndef SWIFTTD_H
|
|
7
|
+
#define SWIFTTD_H
|
|
8
|
+
|
|
9
|
+
#include <vector>
|
|
10
|
+
|
|
11
|
+
class Math
|
|
12
|
+
{
|
|
13
|
+
public:
|
|
14
|
+
static float DotProduct(const std::vector<float>& a, const std::vector<float>& b);
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
class SwiftTDNonSparse
|
|
18
|
+
{
|
|
19
|
+
private:
|
|
20
|
+
std::vector<float> w;
|
|
21
|
+
std::vector<float> z;
|
|
22
|
+
std::vector<float> z_delta;
|
|
23
|
+
std::vector<float> delta_w;
|
|
24
|
+
|
|
25
|
+
std::vector<float> featureVector;
|
|
26
|
+
|
|
27
|
+
std::vector<float> h;
|
|
28
|
+
std::vector<float> h_old;
|
|
29
|
+
std::vector<float> h_temp;
|
|
30
|
+
std::vector<float> beta;
|
|
31
|
+
std::vector<float> z_bar;
|
|
32
|
+
std::vector<float> p;
|
|
33
|
+
|
|
34
|
+
float v_delta;
|
|
35
|
+
float lambda;
|
|
36
|
+
float epsilon;
|
|
37
|
+
float v_old;
|
|
38
|
+
float meta_step_size;
|
|
39
|
+
|
|
40
|
+
float eta;
|
|
41
|
+
float eta_min;
|
|
42
|
+
|
|
43
|
+
float decay;
|
|
44
|
+
float gamma;
|
|
45
|
+
|
|
46
|
+
public:
|
|
47
|
+
SwiftTDNonSparse(int number_of_features, float lambda_init, float alpha_init, float gamma_init, float epsilon_init,
|
|
48
|
+
float eta_init,
|
|
49
|
+
float decay_init, float meta_step_size_init, float eta_min = 1e-10);
|
|
50
|
+
float Step(const std::vector<float>& features, float reward);
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
class SwiftTDBinaryFeatures
|
|
54
|
+
{
|
|
55
|
+
std::vector<int> setOfEligibleItems; // set of eligible items
|
|
56
|
+
std::vector<float> w;
|
|
57
|
+
std::vector<float> z;
|
|
58
|
+
std::vector<float> z_delta;
|
|
59
|
+
std::vector<float> delta_w;
|
|
60
|
+
|
|
61
|
+
std::vector<float> featureVector;
|
|
62
|
+
|
|
63
|
+
std::vector<float> h;
|
|
64
|
+
std::vector<float> h_old;
|
|
65
|
+
std::vector<float> h_temp;
|
|
66
|
+
std::vector<float> beta;
|
|
67
|
+
std::vector<float> z_bar;
|
|
68
|
+
std::vector<float> p;
|
|
69
|
+
|
|
70
|
+
std::vector<float> last_alpha;
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
float v_delta;
|
|
74
|
+
float lambda;
|
|
75
|
+
float epsilon;
|
|
76
|
+
float v_old;
|
|
77
|
+
float meta_step_size;
|
|
78
|
+
|
|
79
|
+
float eta;
|
|
80
|
+
float eta_min;
|
|
81
|
+
|
|
82
|
+
float decay;
|
|
83
|
+
float gamma;
|
|
84
|
+
|
|
85
|
+
public:
|
|
86
|
+
SwiftTDBinaryFeatures(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
|
|
87
|
+
float epsilon_init, float eta_init,
|
|
88
|
+
float decay_init, float meta_step_size_init, float eta_min = 1e-10);
|
|
89
|
+
float Step(const std::vector<int>& feature_indices, float reward);
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class SwiftTD
|
|
94
|
+
{
|
|
95
|
+
std::vector<std::pair<int, float>> setOfEligibleItems; // set of eligible items
|
|
96
|
+
std::vector<float> w;
|
|
97
|
+
std::vector<float> z;
|
|
98
|
+
std::vector<float> z_delta;
|
|
99
|
+
std::vector<float> delta_w;
|
|
100
|
+
|
|
101
|
+
std::vector<float> featureVector;
|
|
102
|
+
|
|
103
|
+
std::vector<float> h;
|
|
104
|
+
std::vector<float> h_old;
|
|
105
|
+
std::vector<float> h_temp;
|
|
106
|
+
std::vector<float> beta;
|
|
107
|
+
std::vector<float> z_bar;
|
|
108
|
+
std::vector<float> p;
|
|
109
|
+
|
|
110
|
+
std::vector<float> last_alpha;
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
float v_delta;
|
|
114
|
+
float lambda;
|
|
115
|
+
float epsilon;
|
|
116
|
+
float v_old;
|
|
117
|
+
float meta_step_size;
|
|
118
|
+
|
|
119
|
+
float eta;
|
|
120
|
+
float eta_min;
|
|
121
|
+
|
|
122
|
+
float decay;
|
|
123
|
+
float gamma;
|
|
124
|
+
|
|
125
|
+
public:
|
|
126
|
+
SwiftTD(int number_of_features, float lambda_init, float alpha_init, float gamma_init,
|
|
127
|
+
float epsilon_init, float eta_init,
|
|
128
|
+
float decay_init, float meta_step_size_init, float eta_min = 1e-10);
|
|
129
|
+
float Step(const std::vector<std::pair<int, float>>& feature_indices, float reward);
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
#endif // SWIFTTD_H
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <pybind11/stl.h>
|
|
3
|
+
#include "SwiftTD.h"
|
|
4
|
+
|
|
5
|
+
namespace py = pybind11;
|
|
6
|
+
|
|
7
|
+
PYBIND11_MODULE(swift_td, m)
|
|
8
|
+
{
|
|
9
|
+
m.doc() = "Python bindings for the SwiftTD reinforcement learning algorithm"; // Module docstring
|
|
10
|
+
py::class_<SwiftTDNonSparse>(m, "SwiftTDNonSparse")
|
|
11
|
+
.def(py::init<int, float, float, float, float, float, float, float, float>(),
|
|
12
|
+
"Initialize the SwiftTDNonSparse algorithm",
|
|
13
|
+
py::arg("num_of_features"),
|
|
14
|
+
py::arg("lambda"),
|
|
15
|
+
py::arg("alpha"),
|
|
16
|
+
py::arg("gamma"),
|
|
17
|
+
py::arg("epsilon"),
|
|
18
|
+
py::arg("eta"),
|
|
19
|
+
py::arg("decay"),
|
|
20
|
+
py::arg("meta_step_size"),
|
|
21
|
+
py::arg("eta_min"))
|
|
22
|
+
.def("step", &SwiftTDNonSparse::Step,
|
|
23
|
+
"Perform one step of learning",
|
|
24
|
+
py::arg("features"),
|
|
25
|
+
py::arg("reward"));
|
|
26
|
+
|
|
27
|
+
// Bind SwiftTDSparse class
|
|
28
|
+
py::class_<SwiftTDBinaryFeatures>(m, "SwiftTDBinaryFeatures")
|
|
29
|
+
.def(py::init<int, float, float, float, float, float, float, float, float>(),
|
|
30
|
+
"Initialize the SwiftTDBinaryFeatures algorithm",
|
|
31
|
+
py::arg("num_of_features"),
|
|
32
|
+
py::arg("lambda"),
|
|
33
|
+
py::arg("alpha"),
|
|
34
|
+
py::arg("gamma"),
|
|
35
|
+
py::arg("epsilon"),
|
|
36
|
+
py::arg("eta"),
|
|
37
|
+
py::arg("decay"),
|
|
38
|
+
py::arg("meta_step_size"),
|
|
39
|
+
py::arg("eta_min"))
|
|
40
|
+
.def("step", &SwiftTDBinaryFeatures::Step,
|
|
41
|
+
"Perform one step of learning with sparse features",
|
|
42
|
+
py::arg("features_indices"),
|
|
43
|
+
py::arg("reward"));
|
|
44
|
+
|
|
45
|
+
// Bind SwiftTDSparseAndNonBinaryFeatures class
|
|
46
|
+
py::class_<SwiftTD>(m, "SwiftTD")
|
|
47
|
+
.def(py::init<int, float, float, float, float, float, float, float, float>(),
|
|
48
|
+
"Initialize the SwiftTD algorithm",
|
|
49
|
+
py::arg("num_of_features"),
|
|
50
|
+
py::arg("lambda"),
|
|
51
|
+
py::arg("alpha"),
|
|
52
|
+
py::arg("gamma"),
|
|
53
|
+
py::arg("epsilon"),
|
|
54
|
+
py::arg("eta"),
|
|
55
|
+
py::arg("decay"),
|
|
56
|
+
py::arg("meta_step_size"),
|
|
57
|
+
py::arg("eta_min"))
|
|
58
|
+
.def("step", &SwiftTD::Step,
|
|
59
|
+
"Perform one step of learning with sparse non-binary features",
|
|
60
|
+
py::arg("feature_indices_values"),
|
|
61
|
+
py::arg("reward"));
|
|
62
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.8"
|