sop4py 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sop/__init__.py +3 -0
- sop/btree.py +612 -0
- sop/call_go.py +304 -0
- sop/context.py +33 -0
- sop/libjsondb_amd64darwin.dylib +0 -0
- sop/libjsondb_amd64darwin.h +120 -0
- sop/libjsondb_amd64linux.h +120 -0
- sop/libjsondb_amd64linux.so +0 -0
- sop/libjsondb_amd64windows.dll +0 -0
- sop/libjsondb_amd64windows.h +120 -0
- sop/libjsondb_arm64darwin.dylib +0 -0
- sop/libjsondb_arm64darwin.h +120 -0
- sop/libjsondb_arm64linux.h +120 -0
- sop/libjsondb_arm64linux.so +0 -0
- sop/redis.py +40 -0
- sop/test_btree.py +479 -0
- sop/test_btree_idx.py +86 -0
- sop/transaction.py +167 -0
- sop4py-2.0.0.dist-info/METADATA +124 -0
- sop4py-2.0.0.dist-info/RECORD +22 -0
- sop4py-2.0.0.dist-info/WHEEL +5 -0
- sop4py-2.0.0.dist-info/top_level.txt +1 -0
sop/transaction.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import call_go
|
|
3
|
+
import uuid
|
|
4
|
+
import context
|
|
5
|
+
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from dataclasses import dataclass, asdict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TransactionMode(Enum):
|
|
11
|
+
"""
|
|
12
|
+
Transaction mode enumeration.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
Enum (_type_):
|
|
16
|
+
NoCheck - No check on commit, most performant but does NOT guarantee ACIDity
|
|
17
|
+
ForWriting - Mode for writing/updates/deletes.
|
|
18
|
+
ForReading - Mode for reading only. Checks all read items' version # on commit to guarantee ACIDity.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
NoCheck = 0
|
|
22
|
+
ForWriting = 1
|
|
23
|
+
ForReading = 2
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# 250, should generate 1MB file segment. Formula: 250 X 4096 = 1MB
|
|
27
|
+
# Given a 50 slot size per node, should be able to manage 825,000 B-Tree items (key/value pairs).
|
|
28
|
+
#
|
|
29
|
+
# Formula: 250 * 66 * 50 = 825,000
|
|
30
|
+
# Or if you use 100 slot size per node, 'will give you 1,650,000 items, or assuming you have about 65%
|
|
31
|
+
# b-tree utilization, 1,072,500 usable space.
|
|
32
|
+
MIN_HASH_MOD_VALUE = 250
|
|
33
|
+
# 750k, should generate 3GB file segment. Formula: 750k X 4096 = 3GB
|
|
34
|
+
MAX_HASH_MOD_VALUE = 750000
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ErasureCodingConfig:
|
|
39
|
+
"""
|
|
40
|
+
Erasure Coding config is used to package the parameter configuration of Reed Solomon based EC replication.
|
|
41
|
+
This is a special algorithm for replication allowing full operational capability even if you lose a half of your
|
|
42
|
+
storage drives. :)
|
|
43
|
+
|
|
44
|
+
For example, if you have 100% redundancy on four drives, losing two drives SOP will still be able to give you Read & Write.
|
|
45
|
+
BUT of course, your IT needs to replace the drives and allow SOP to auto-reconstruct the redundant "shards" so your setup
|
|
46
|
+
can offer tolerance once again.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
_type_: _description_
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
data_shards_count: int
|
|
53
|
+
parity_shards_count: int
|
|
54
|
+
base_folder_paths_across_drives: str
|
|
55
|
+
repair_corrupted_shards: bool
|
|
56
|
+
|
|
57
|
+
def __eq__(self, other):
|
|
58
|
+
return (
|
|
59
|
+
self.data_shards_count == other.data_shards_count
|
|
60
|
+
and self.parity_shards_count == other.parity_shards_count
|
|
61
|
+
and self.repair_corrupted_shards == other.repair_corrupted_shards
|
|
62
|
+
and self.base_folder_paths_across_drives
|
|
63
|
+
== other.base_folder_paths_across_drives
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def __hash__(self):
|
|
67
|
+
return hash(
|
|
68
|
+
(
|
|
69
|
+
self.data_shards_count,
|
|
70
|
+
self.parity_shards_count,
|
|
71
|
+
self.base_folder_paths_across_drives,
|
|
72
|
+
self.repair_corrupted_shards,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class TransationOptions:
|
|
79
|
+
mode: int
|
|
80
|
+
# max_time in Python is in minutes, SOP in Golang will convert that to respective time.duration value.
|
|
81
|
+
max_time: int
|
|
82
|
+
# Registry hash mod, minimum value is 250, max is 750000. Hash mod is used on Registry map on disk.
|
|
83
|
+
# At 250, 1MB segment file is generated. See comment about the equivalent in Golang side (for now).
|
|
84
|
+
registry_hash_mod: int
|
|
85
|
+
# Stores' base folder path (home folder).
|
|
86
|
+
stores_folders: str
|
|
87
|
+
# EC config.
|
|
88
|
+
erasure_config: dict[str, ErasureCodingConfig]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TransactionError(Exception):
|
|
92
|
+
"""Base exception for transaction-related errors."""
|
|
93
|
+
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class InvalidTransactionStateError(TransactionError):
|
|
98
|
+
"""Raised when a transaction is attempted in an invalid state."""
|
|
99
|
+
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class Transaction:
|
|
104
|
+
"""
|
|
105
|
+
Transaction object is used to manage transaction (begin, commit, rollback).
|
|
106
|
+
Delegates API calls to the SOP library that does Direct IO to disk drives w/ built-in L1/L2 caching.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, ctx: context.Context, options: TransationOptions):
|
|
110
|
+
self.options = options
|
|
111
|
+
self.transaction_id = uuid.UUID(int=0)
|
|
112
|
+
|
|
113
|
+
res = call_go.manage_transaction(ctx.id, 1, json.dumps(asdict(options)))
|
|
114
|
+
|
|
115
|
+
if res == None:
|
|
116
|
+
raise TransactionError("unable to create a Tranasaction object in SOP")
|
|
117
|
+
try:
|
|
118
|
+
self.transaction_id = uuid.UUID(res)
|
|
119
|
+
except:
|
|
120
|
+
# if res can't be converted to UUID, it is expected to be an error msg from SOP.
|
|
121
|
+
raise TransactionError(res)
|
|
122
|
+
|
|
123
|
+
def begin(self):
|
|
124
|
+
if self.transaction_id == uuid.UUID(int=0):
|
|
125
|
+
raise InvalidTransactionStateError("transaction_id is missing")
|
|
126
|
+
res = call_go.manage_transaction(0, 2, str(self.transaction_id))
|
|
127
|
+
if res != None:
|
|
128
|
+
raise TransactionError(f"Transaction begin failed, details {res}")
|
|
129
|
+
|
|
130
|
+
def commit(
|
|
131
|
+
self,
|
|
132
|
+
ctx: context.Context,
|
|
133
|
+
):
|
|
134
|
+
"""Commit will finalize the transaction, all b-tree management operations to the backend storage. The committed changes
|
|
135
|
+
will start to reflect on succeeding transactions b-tree store fetches/operations.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
ctx (context.Context): _description_
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
InvalidTransactionStateError: _description_
|
|
142
|
+
TransactionError: _description_
|
|
143
|
+
"""
|
|
144
|
+
if self.transaction_id == uuid.UUID(int=0):
|
|
145
|
+
raise InvalidTransactionStateError("transaction_id is missing")
|
|
146
|
+
res = call_go.manage_transaction(ctx.id, 3, str(self.transaction_id))
|
|
147
|
+
if res != None:
|
|
148
|
+
raise TransactionError(f"Transaction commit failed, details {res}")
|
|
149
|
+
|
|
150
|
+
def rollback(
|
|
151
|
+
self,
|
|
152
|
+
ctx: context.Context,
|
|
153
|
+
):
|
|
154
|
+
"""Undo or rollback the changes done within the transaction.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
ctx (context.Context): _description_
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
InvalidTransactionStateError: _description_
|
|
161
|
+
TransactionError: _description_
|
|
162
|
+
"""
|
|
163
|
+
if self.transaction_id == uuid.UUID(int=0):
|
|
164
|
+
raise InvalidTransactionStateError("transaction_id is missing")
|
|
165
|
+
res = call_go.manage_transaction(ctx.id, 4, str(self.transaction_id))
|
|
166
|
+
if res != None:
|
|
167
|
+
raise TransactionError(f"Transaction rollback failed, details {res}")
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sop4py
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Scalable Objects Persistence for Python.
|
|
5
|
+
Author-email: Gerardo Recinto <gerardorecinto@yahoo.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.7
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: libjsondb_amd64darwin.dylib
|
|
12
|
+
Requires-Dist: libjsondb_amd64darwin.h
|
|
13
|
+
Requires-Dist: libjsondb_arm64darwin.dylib
|
|
14
|
+
Requires-Dist: libjsondb_arm64darwin.h
|
|
15
|
+
Requires-Dist: libjsondb_amd64linux.so
|
|
16
|
+
Requires-Dist: libjsondb_amd64linux.h
|
|
17
|
+
Requires-Dist: libjsondb_arm64linux.so
|
|
18
|
+
Requires-Dist: libjsondb_arm64linux.h
|
|
19
|
+
Requires-Dist: libjsondb_amd64windows.dll
|
|
20
|
+
Requires-Dist: libjsondb_amd64windows.h
|
|
21
|
+
|
|
22
|
+
# What is SOP?
|
|
23
|
+
|
|
24
|
+
Scalable Objects Persistence (SOP) is a raw storage engine that bakes together a set of storage related features & algorithms in order to provide the most efficient & reliable (ACID attributes of transactions) technique (known) of storage management and rich search, as it brings to the application, the raw muscle of "raw storage", direct IO communications w/ disk drives. In a code library form factor.
|
|
25
|
+
|
|
26
|
+
# SOP supported Hardware/OS
|
|
27
|
+
SOP supports popular architectures & Operating Systems such as Linux, Darwin & Microsoft Windows, in both ARM64 & AMD64 architectures. For Windows, only AMD64 is supported since it is the only architecture Windows is available in.
|
|
28
|
+
|
|
29
|
+
# SOP Dependencies
|
|
30
|
+
* Redis, you will need to have one of the recent or latest version of Redis for use in SOP caching.
|
|
31
|
+
* More than one Disk Drives(recommended is around four or more, for replication) with plenty of drive space available, for storage management. Example:
|
|
32
|
+
```
|
|
33
|
+
/disk1
|
|
34
|
+
/disk2
|
|
35
|
+
/disk3
|
|
36
|
+
/disk4
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
# SOP for Python package
|
|
40
|
+
Following steps outlines how to use the Scalable Objects Persistence code library for Python:
|
|
41
|
+
* Install the package using: pip install sop-python-beta-3
|
|
42
|
+
* Follow standard Python package import and start coding to use the SOP for Python code library for data management. Import the sop package in your python code file.
|
|
43
|
+
* Specify Home base folders where Store info & Registry data files will be stored.
|
|
44
|
+
* Specify Erasure Coding (EC) configuration details which will be used by SOP's EC based replication.
|
|
45
|
+
* Create a transaction
|
|
46
|
+
* Begin a transaction
|
|
47
|
+
* Create a new B-tree, or Open an existing B-tree
|
|
48
|
+
* Manage data, do some CRUD operations
|
|
49
|
+
* Commit the transaction
|
|
50
|
+
|
|
51
|
+
Below is an example code black for illustrating the above steps. For other SOP B-tree examples, you can checkout the code in the unit tests test_btree.py & test_btree_idx.py files that comes w/ the SOP package you downloaded from pypi.
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
import sop.transaction
|
|
55
|
+
import sop.btree
|
|
56
|
+
import sop.context
|
|
57
|
+
|
|
58
|
+
stores_folders = ("/disk1", "/disk2")
|
|
59
|
+
ec = {
|
|
60
|
+
# Erasure Config default entry(key="") will allow different B-tree(tables) to share same EC structure.
|
|
61
|
+
"": transaction.ErasureCodingConfig(
|
|
62
|
+
2, # two data shards
|
|
63
|
+
2, # two parity shards
|
|
64
|
+
(
|
|
65
|
+
# 4 disk drives paths
|
|
66
|
+
"/disk1",
|
|
67
|
+
"/disk2",
|
|
68
|
+
"/disk3",
|
|
69
|
+
"/disk4",
|
|
70
|
+
),
|
|
71
|
+
# False means Auto repair of failed reads from (shards') disk drive will not get repaired.
|
|
72
|
+
False,
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Transaction Options (to).
|
|
77
|
+
to = transaction.TransationOptions(
|
|
78
|
+
transaction.TransactionMode.ForWriting.value,
|
|
79
|
+
# commit timeout of 5mins
|
|
80
|
+
5,
|
|
81
|
+
# Min Registry hash mod value is 250, you can specify higher value like 1000. A 250 hashmod
|
|
82
|
+
# will use 1MB sized file segments. Good for demo, but for Prod, perhaps a bigger value is better.
|
|
83
|
+
transaction.MIN_HASH_MOD_VALUE,
|
|
84
|
+
# Store info & Registry home base folders. Array of strings of two elements, one for Active & another, for passive folder.
|
|
85
|
+
stores_folders,
|
|
86
|
+
# Erasure Coding config as shown above.
|
|
87
|
+
ec,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Context object.
|
|
91
|
+
ctx = context.Context()
|
|
92
|
+
|
|
93
|
+
# initialize/open SOP global Redis connection
|
|
94
|
+
ro = RedisOptions()
|
|
95
|
+
Redis.open_connection(ro)
|
|
96
|
+
|
|
97
|
+
t = transaction.Transaction(ctx, to)
|
|
98
|
+
t.begin()
|
|
99
|
+
|
|
100
|
+
cache = btree.CacheConfig()
|
|
101
|
+
|
|
102
|
+
# "barstoreec" is new b-tree name, 2nd parameter set to True specifies B-tree Key field to be native data type
|
|
103
|
+
bo = btree.BtreeOptions("barstoreec", True, cache_config=cache)
|
|
104
|
+
bo.set_value_data_size(btree.ValueDataSize.Small)
|
|
105
|
+
|
|
106
|
+
# create the new "barstoreec" b-tree store.
|
|
107
|
+
b3 = btree.Btree.new(ctx, bo, t)
|
|
108
|
+
|
|
109
|
+
# Since we've specified Native data type = True in BtreeOptions, we can use "integer" values as Key.
|
|
110
|
+
l = [
|
|
111
|
+
btree.Item(1, "foo"),
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
# Add Item to the B-tree,
|
|
115
|
+
b3.add(ctx, l)
|
|
116
|
+
|
|
117
|
+
# Commit the transaction to finalize the new B-tree (store) change.
|
|
118
|
+
t.commit(ctx)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
# SOP in Github
|
|
122
|
+
SOP open source project (MIT license) is in github. You can checkout the "...sop/jsondb/" package which contains the Go code enabling general purpose JSON data management & the Python wrapper, coding guideline of which, was described above.
|
|
123
|
+
|
|
124
|
+
Please feel free to join the SOP project if you have the bandwidth and participate/co-own/lead! the project engineering.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
sop/__init__.py,sha256=kCWbTRTReQH3_Or-COEOdCr5H5EEdh0UqYIsEnrpxdQ,22
|
|
2
|
+
sop/btree.py,sha256=mWGi7jJyi-oOY6tZQMRl7xW8kE5UQbcFpqiW3JXeCrQ,20612
|
|
3
|
+
sop/call_go.py,sha256=xO2DOAd1xdsVTnpWiWdq4oKyGz4pyV8WO16LjIvRjR4,8201
|
|
4
|
+
sop/context.py,sha256=jiKZayAu-NlvH47mPjMTaNoczjx-3J_kQGgKGRtEkS8,1117
|
|
5
|
+
sop/libjsondb_amd64darwin.dylib,sha256=J1yd9wxA0C_OC8wufoQnVLJ4WEgXL1iWFDdpZEVo8f4,9132992
|
|
6
|
+
sop/libjsondb_amd64darwin.h,sha256=V6vvqBwxSgbTmexvtwZikAlsQNdAXZq8gr9DvfVG1bo,2860
|
|
7
|
+
sop/libjsondb_amd64linux.h,sha256=V6vvqBwxSgbTmexvtwZikAlsQNdAXZq8gr9DvfVG1bo,2860
|
|
8
|
+
sop/libjsondb_amd64linux.so,sha256=KK8rvyi3sbffH7UK_FYJLgviGAL98g_bZjc3dzsf3lQ,20317952
|
|
9
|
+
sop/libjsondb_amd64windows.dll,sha256=ON3hj591vClFz3Zl8NSc_PmBtTnvHWgeACnSH3KrRd0,19708240
|
|
10
|
+
sop/libjsondb_amd64windows.h,sha256=U5CQxqyjYTG33HYjiUw1Kpf9JvhaJugRdyXUGecq9Vw,3156
|
|
11
|
+
sop/libjsondb_arm64darwin.dylib,sha256=3NiXoK83NFh8hkrun1591MaIpAGV_YRDLuDf3xcdyUc,7700914
|
|
12
|
+
sop/libjsondb_arm64darwin.h,sha256=V6vvqBwxSgbTmexvtwZikAlsQNdAXZq8gr9DvfVG1bo,2860
|
|
13
|
+
sop/libjsondb_arm64linux.h,sha256=V6vvqBwxSgbTmexvtwZikAlsQNdAXZq8gr9DvfVG1bo,2860
|
|
14
|
+
sop/libjsondb_arm64linux.so,sha256=w_T66HroMqGOsNGAX8rr3xiSQiDJmmBGmNy_Lt3BU8E,18502536
|
|
15
|
+
sop/redis.py,sha256=WHc4OUpvKEM2mwVCK_RBDaf6zWUpduKT4hQ9ZDcEWao,1172
|
|
16
|
+
sop/test_btree.py,sha256=4hUvZM1eXU2SrdgjZSXRtDFV9nWnTF-aDfFeJ8XZpbs,13071
|
|
17
|
+
sop/test_btree_idx.py,sha256=FcKosiDEaBkBw-zhHcP3EjdB3_FOcTo7YkGKaQtBnUo,2231
|
|
18
|
+
sop/transaction.py,sha256=63fzhC8LanQJ6OIeZCdg-9dVHsOOyN_qtFpXli5DJu0,5753
|
|
19
|
+
sop4py-2.0.0.dist-info/METADATA,sha256=q5DvN9vMIMjn43u7Kfy0QTAdK-MXh3s3pEdGLT2qnOc,5033
|
|
20
|
+
sop4py-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
sop4py-2.0.0.dist-info/top_level.txt,sha256=Xg42qRXwoOKelr7Lc8cpmy9uuBU3LtD3tPLHVnt0aHs,4
|
|
22
|
+
sop4py-2.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sop
|