syncmodels 0.1.330__py2.py3-none-any.whl → 0.1.333__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syncmodels/__init__.py +1 -1
- syncmodels/crawler.py +16 -32
- syncmodels/helpers/surreal.py +2 -1
- syncmodels/session/sql.py +1 -1
- syncmodels/storage.py +85 -84
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/METADATA +2 -2
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/RECORD +12 -12
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/AUTHORS.rst +0 -0
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/LICENSE +0 -0
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/WHEEL +0 -0
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/entry_points.txt +0 -0
- {syncmodels-0.1.330.dist-info → syncmodels-0.1.333.dist-info}/top_level.txt +0 -0
syncmodels/__init__.py
CHANGED
syncmodels/crawler.py
CHANGED
@@ -851,7 +851,7 @@ class iBot(iAgent):
|
|
851
851
|
self.add_plugin(MetaExtractPlugin())
|
852
852
|
# self.add_plugin(PaginatorPlugin())
|
853
853
|
|
854
|
-
self.add_plugin(PutPlugin())
|
854
|
+
self.add_plugin(PutPlugin()) # comment this for dry-run
|
855
855
|
|
856
856
|
self.add_plugin(SetURIPlugin())
|
857
857
|
self.add_plugin(Cleaner())
|
@@ -1201,7 +1201,6 @@ class iBot(iAgent):
|
|
1201
1201
|
# TODO: I need to clean really old cache entries
|
1202
1202
|
# TODO: in order to control any excesive memory compsumption
|
1203
1203
|
try:
|
1204
|
-
|
1205
1204
|
if not self._is_already(timeout=timeout, type_=type_, **params):
|
1206
1205
|
blueprint = self.blueprint(**params)
|
1207
1206
|
universe = self.ALREADY.setdefault(type_, {})
|
@@ -1610,34 +1609,14 @@ class SortPlugin(iPlugin):
|
|
1610
1609
|
# )
|
1611
1610
|
reverse = list(set(sort_key).intersection(item_fields))
|
1612
1611
|
if not reverse:
|
1613
|
-
log.
|
1614
|
-
|
1612
|
+
log.debug(
|
1613
|
+
"model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
|
1614
|
+
model,
|
1615
|
+
list(item_fields),
|
1616
|
+
sort_key,
|
1617
|
+
)
|
1618
|
+
# return
|
1615
1619
|
|
1616
|
-
if False:
|
1617
|
-
sample = stream[-1]
|
1618
|
-
reverse = []
|
1619
|
-
for key in sort_key:
|
1620
|
-
value = sample[key]
|
1621
|
-
if func := TYPES_MAP.get(value.__class__):
|
1622
|
-
candidates = {
|
1623
|
-
k: v
|
1624
|
-
for k, v in sample.items()
|
1625
|
-
if k != key and func(sample[k]) == func(value)
|
1626
|
-
}
|
1627
|
-
if candidates:
|
1628
|
-
_key, _ = candidates.popitem()
|
1629
|
-
reverse.append(_key)
|
1630
|
-
else:
|
1631
|
-
log.debug(
|
1632
|
-
"can't find the reverse key of '%s' in %s",
|
1633
|
-
sort_key,
|
1634
|
-
sample,
|
1635
|
-
)
|
1636
|
-
# # using same sort_key because source and target
|
1637
|
-
# # layouts share the same sorteable key (i.e. 'datetime')??
|
1638
|
-
# reverse.append(key)
|
1639
|
-
else:
|
1640
|
-
pass
|
1641
1620
|
context[REVERSE_SORT_KEY] = reverse
|
1642
1621
|
|
1643
1622
|
if sort_key:
|
@@ -1656,8 +1635,13 @@ class SortPlugin(iPlugin):
|
|
1656
1635
|
item_fields = model.model_fields
|
1657
1636
|
reverse = list(set(sort_key).intersection(item_fields))
|
1658
1637
|
if not reverse:
|
1659
|
-
log.
|
1660
|
-
|
1638
|
+
log.debug(
|
1639
|
+
"model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
|
1640
|
+
model,
|
1641
|
+
list(item_fields),
|
1642
|
+
sort_key,
|
1643
|
+
)
|
1644
|
+
# return
|
1661
1645
|
|
1662
1646
|
context[REVERSE_SORT_KEY] = reverse
|
1663
1647
|
|
@@ -1714,7 +1698,7 @@ class HashStreamPlugin(iPlugin):
|
|
1714
1698
|
blueprint = blueprint.hexdigest()
|
1715
1699
|
context["stream_blueprint"] = blueprint
|
1716
1700
|
if not self.bot._set_already(
|
1717
|
-
timeout=3600,
|
1701
|
+
# timeout=3600,
|
1718
1702
|
type_="response",
|
1719
1703
|
blueprint=blueprint,
|
1720
1704
|
):
|
syncmodels/helpers/surreal.py
CHANGED
syncmodels/session/sql.py
CHANGED
@@ -111,7 +111,7 @@ class iSQLSession(iSession):
|
|
111
111
|
|
112
112
|
since_key = params.get(MONOTONIC_SINCE_KEY)
|
113
113
|
table = context[KIND_KEY]
|
114
|
-
limit = params.get(LIMIT_KEY_VALUE, context.get(LIMIT_KEY_VALUE)) or 1024
|
114
|
+
limit = params.get(LIMIT_KEY_VALUE, context.get(LIMIT_KEY_VALUE)) or 1024 * 8
|
115
115
|
|
116
116
|
query = f"SELECT * FROM {table}"
|
117
117
|
if MONOTONIC_SINCE_VALUE in params:
|
syncmodels/storage.py
CHANGED
@@ -1183,8 +1183,13 @@ class WaveStorage(iWaves, iStorage):
|
|
1183
1183
|
if must_check:
|
1184
1184
|
t0 = time.time()
|
1185
1185
|
await prevously_inserted()
|
1186
|
-
|
1187
|
-
|
1186
|
+
if False or random.random() < 0.1:
|
1187
|
+
elapsed = time.time() - t0
|
1188
|
+
log.info("[%s] prevously_inserted took: %s secs", uid, elapsed)
|
1189
|
+
if elapsed > 1.0:
|
1190
|
+
# TODO: debug what's going here
|
1191
|
+
foo = 1
|
1192
|
+
|
1188
1193
|
else:
|
1189
1194
|
# hack for not altering the data
|
1190
1195
|
# push = False
|
@@ -1202,6 +1207,7 @@ class WaveStorage(iWaves, iStorage):
|
|
1202
1207
|
|
1203
1208
|
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1204
1209
|
|
1210
|
+
res0 = res1 = res2 = False
|
1205
1211
|
# must push the data?
|
1206
1212
|
context[PUSHED] = push
|
1207
1213
|
if push:
|
@@ -1212,98 +1218,93 @@ class WaveStorage(iWaves, iStorage):
|
|
1212
1218
|
data_sort_blueprint,
|
1213
1219
|
data,
|
1214
1220
|
)
|
1215
|
-
foo = 1
|
1216
1221
|
query = f"{namespace}://{database}/{thing}:{monotonic}"
|
1217
1222
|
res2 = await self.storage.put(query, data)
|
1218
1223
|
else:
|
1219
1224
|
# TODO: agp: refactor all this function when we've time!
|
1220
1225
|
res2 = True
|
1221
|
-
foo = 1
|
1222
1226
|
|
1223
|
-
#
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
+
# save Snapshot of the object
|
1228
|
+
if push:
|
1229
|
+
# long fquid version
|
1230
|
+
# data[ID_KEY] = data[ORG_KEY]
|
1231
|
+
# short version
|
1232
|
+
# data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1233
|
+
if _uri["id"] is None:
|
1234
|
+
__id = data.get(ORG_KEY, "")
|
1235
|
+
_id = parse_duri(__id)
|
1236
|
+
if _id["id"] is None:
|
1237
|
+
_uri["id"] = __id
|
1238
|
+
else:
|
1239
|
+
_uri["id"] = _id["id"]
|
1240
|
+
data[ORG_KEY] = build_uri(**_uri)
|
1241
|
+
|
1242
|
+
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1243
|
+
query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
|
1244
|
+
# resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
|
1245
|
+
# resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
|
1246
|
+
resuming_info = {
|
1247
|
+
k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
|
1248
|
+
}
|
1249
|
+
# force to be json compatible
|
1250
|
+
resuming_info = JSONVerter.to_json(resuming_info)
|
1227
1251
|
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1252
|
+
res0 = await self.storage.put(
|
1253
|
+
query,
|
1254
|
+
data,
|
1255
|
+
)
|
1256
|
+
# 3. finally add the wave info into tube
|
1257
|
+
data.pop(MONOTONIC_KEY)
|
1258
|
+
|
1259
|
+
# update the TUBE_WAVE due the insertion of this object
|
1260
|
+
if push:
|
1261
|
+
# 2. save last Wave from this particular tube
|
1262
|
+
# AVOID_KEYS contains all keys that aren't json serializable
|
1263
|
+
# wave = {
|
1264
|
+
# k: v for k, v in kw.items() if k not in self.AVOID_KEYS
|
1265
|
+
# }
|
1266
|
+
# try to recover the 'intact' bootstrap that we've using
|
1267
|
+
for wave0 in kw.get(WAVE_LAST_KEY, []):
|
1268
|
+
wave = wave0.get("wave") # TODO: use a define
|
1269
|
+
if wave:
|
1270
|
+
break
|
1233
1271
|
else:
|
1234
|
-
|
1235
|
-
|
1272
|
+
# otherwise, its the 1st time and we need to create the 1st
|
1273
|
+
# bootstrap-wave info
|
1274
|
+
wave_keys = set(kw.get(WAVE_INFO_KEY, []))
|
1275
|
+
wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
|
1276
|
+
# task = kw[TASK_KEY]
|
1277
|
+
wave = {k: kw[k] for k in wave_keys.intersection(kw)}
|
1236
1278
|
|
1237
|
-
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1238
|
-
query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
|
1239
|
-
# resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
|
1240
|
-
# resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
|
1241
|
-
resuming_info = {
|
1242
|
-
k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
|
1243
|
-
}
|
1244
|
-
# force to be json compatible
|
1245
|
-
resuming_info = JSONVerter.to_json(resuming_info)
|
1246
|
-
|
1247
|
-
res0 = await self.storage.put(
|
1248
|
-
query,
|
1249
|
-
data,
|
1250
|
-
)
|
1251
|
-
# 3. finally add the wave info into tube
|
1252
|
-
data.pop(MONOTONIC_KEY)
|
1253
|
-
|
1254
|
-
# 2. save last Wave from this particular tube
|
1255
|
-
# AVOID_KEYS contains all keys that aren't json serializable
|
1256
|
-
# wave = {
|
1257
|
-
# k: v for k, v in kw.items() if k not in self.AVOID_KEYS
|
1258
|
-
# }
|
1259
|
-
|
1260
|
-
# try to recover the 'intact' bootstrap that we've using
|
1261
|
-
for wave0 in kw.get(WAVE_LAST_KEY, []):
|
1262
|
-
wave = wave0.get("wave") # TODO: use a define
|
1263
1279
|
if wave:
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
#
|
1291
|
-
|
1292
|
-
wave = exists[0]
|
1293
|
-
# stamp current wave
|
1294
|
-
_wave = {
|
1295
|
-
**wave,
|
1296
|
-
MONOTONIC_KEY: monotonic,
|
1297
|
-
WAVE_RESUMING_INFO_KEY: resuming_info,
|
1298
|
-
}
|
1299
|
-
res1 = await self.storage.put(query, _wave)
|
1300
|
-
else:
|
1301
|
-
# wave is empty, maybe because is not a resuming crawling task
|
1302
|
-
log.debug(
|
1303
|
-
"wave is empty, maybe because is not a resuming crawling task"
|
1304
|
-
)
|
1305
|
-
# log.info("Saving: %s", data)
|
1306
|
-
res1 = True
|
1280
|
+
# wave must be json compatible and do not use any reserved
|
1281
|
+
# keyword for storage (i.e. 'scope' in Surreal)
|
1282
|
+
query = f"{namespace}://{database}/{TUBE_WAVE}"
|
1283
|
+
|
1284
|
+
# query can't containg MONOTONIC_KEY
|
1285
|
+
wave.pop(MONOTONIC_KEY, None)
|
1286
|
+
wave.pop(WAVE_RESUMING_INFO_KEY, None)
|
1287
|
+
exists = await self.storage.query(query, **wave)
|
1288
|
+
assert len(exists) <= 1
|
1289
|
+
|
1290
|
+
if len(exists):
|
1291
|
+
# use the same record_id
|
1292
|
+
# otherwise a new record will be created
|
1293
|
+
wave = exists[0]
|
1294
|
+
# stamp current wave
|
1295
|
+
_wave = {
|
1296
|
+
**wave,
|
1297
|
+
MONOTONIC_KEY: monotonic,
|
1298
|
+
WAVE_RESUMING_INFO_KEY: resuming_info,
|
1299
|
+
}
|
1300
|
+
res1 = await self.storage.put(query, _wave)
|
1301
|
+
else:
|
1302
|
+
# wave is empty, maybe because is not a resuming crawling task
|
1303
|
+
log.debug(
|
1304
|
+
"wave is empty, maybe because is not a resuming crawling task"
|
1305
|
+
)
|
1306
|
+
# log.info("Saving: %s", data)
|
1307
|
+
res1 = True
|
1307
1308
|
|
1308
1309
|
return all([res0, res1, res2])
|
1309
1310
|
else:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: syncmodels
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.333
|
4
4
|
Summary: Synchronizable Models
|
5
5
|
Home-page: https://github.com/asterio.gonzalez/syncmodels
|
6
6
|
Author: Asterio Gonzalez
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Requires-Python: >=3.6
|
19
19
|
License-File: LICENSE
|
20
20
|
License-File: AUTHORS.rst
|
21
|
-
Requires-Dist: agptools>=0.1.
|
21
|
+
Requires-Dist: agptools>=0.1.333
|
22
22
|
Requires-Dist: aiocache
|
23
23
|
Requires-Dist: aiohttp
|
24
24
|
Requires-Dist: Click
|
@@ -1,6 +1,6 @@
|
|
1
|
-
syncmodels/__init__.py,sha256=
|
1
|
+
syncmodels/__init__.py,sha256=heMvuebb5E-VxcjkidHaEr6oN5d2fTQ9wqrWcqxCzBE,142
|
2
2
|
syncmodels/context.py,sha256=k1Gs_ip9BfyRFpyRnzqYvRDKo0sYBqJsh6z9sWln9oE,451
|
3
|
-
syncmodels/crawler.py,sha256=
|
3
|
+
syncmodels/crawler.py,sha256=sjyuLLBT5RGb-3LVTtjW9nd2PTJyIV5Tldoz8Jq3dVs,94511
|
4
4
|
syncmodels/crud.py,sha256=oZIcwEKR2i-lesEF_059Y4yThohd9m7gs6R6xYgLH-I,15351
|
5
5
|
syncmodels/definitions.py,sha256=w-3TrSomp9T8OzLmJhKeZQDzrUIJLKldyh1lzlE7Yj0,5476
|
6
6
|
syncmodels/exceptions.py,sha256=ZLAwu19cs2UN2Sv3jaLnixT_jRI7T42TfyutCkUsuIk,685
|
@@ -11,7 +11,7 @@ syncmodels/registry.py,sha256=YaQtgbSwa0je1MpCcVHALI3_b85vrddyOlhsnrUcKZs,8224
|
|
11
11
|
syncmodels/requests.py,sha256=wWoC5hPDm1iBM_zrlyKRauzhXgdKR3pT5RqyC-5UZhQ,538
|
12
12
|
syncmodels/runner.py,sha256=IHDKuQ3yJ1DN9wktMiIrerPepYX61tc3AzbFfuUqEFw,5454
|
13
13
|
syncmodels/schema.py,sha256=uinUt8Asq_x7xa6MKWVXNyoWO6gKocjGPppjimaXzEU,2492
|
14
|
-
syncmodels/storage.py,sha256=
|
14
|
+
syncmodels/storage.py,sha256=HIoh_KSvalgTzBbb5py_0dUBHHipmHWqxmVu1hcEV4s,74991
|
15
15
|
syncmodels/syncmodels.py,sha256=jcUxVbv1hrx5hI81VCO1onIM6WyORTqJVPwIqlPocOc,10596
|
16
16
|
syncmodels/timequeue.py,sha256=YRd3ULRaIhoszaBsYhfr0epMqAbL6-NwVEtScjUYttM,595
|
17
17
|
syncmodels/wave.py,sha256=Gra22BLiA9z2nF-6diXpjAc4GZv9nebmyvHxdAfXec4,7764
|
@@ -33,7 +33,7 @@ syncmodels/helpers/importers.py,sha256=KImR9pQu4ir6EI6Ipta0q3RWloFT_VTJi67kM0lZs
|
|
33
33
|
syncmodels/helpers/loaders.py,sha256=aus0aRcbU1vVa_zWo42aX6uV3B0fQ0aQpkTWlR9xGLA,4325
|
34
34
|
syncmodels/helpers/models.py,sha256=c_ATzmiw5mVY1IGnwmyhjIuu5d2idHU-XeRigZSMkOQ,719
|
35
35
|
syncmodels/helpers/orion.py,sha256=6lRp1w3yaq_rxOI7nJIjuHdsgBjQu92y0bW0IX_gq44,30719
|
36
|
-
syncmodels/helpers/surreal.py,sha256=
|
36
|
+
syncmodels/helpers/surreal.py,sha256=4bBBj__kQta_5ShWzUKlsuq6xSmICP5UwpeA6QTD-rA,10088
|
37
37
|
syncmodels/helpers/units.py,sha256=g50m5DQrAyP_qpDRa4LCEA5Rz2UZUmlIixfWG_ddw9I,3571
|
38
38
|
syncmodels/logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
39
|
syncmodels/logic/activity_logger.py,sha256=8wjvgRwaNbibYWGgl-trovSS70yNkoCTlb-AIx3aZEE,14053
|
@@ -300,13 +300,13 @@ syncmodels/model/schema_org/xpathtype.py,sha256=D8gKiCrGSSuUVYw7BIWmOIUbKATfv2Ip
|
|
300
300
|
syncmodels/session/__init__.py,sha256=NxFkOiL_oGaYt2qv9yAvWrNcXn_xT9yLzCLd7PGRaWI,15564
|
301
301
|
syncmodels/session/http.py,sha256=tf7z0ccAEYoCOZT4Ukv3NBXz9hUO3vs2s9bm491pCj8,1480
|
302
302
|
syncmodels/session/postgresql.py,sha256=ZMIu1Rv93pKfvFlovFBmWArzlrT2xaQWNYGZT_LW61k,175
|
303
|
-
syncmodels/session/sql.py,sha256=
|
303
|
+
syncmodels/session/sql.py,sha256=17C8EHn_1twHezhMlD5esMvx4m0iIrnD7JK-TuBswgU,6573
|
304
304
|
syncmodels/session/sqlite.py,sha256=nCDjopLiBpX1F10qkKoARM7JrVdIpJ1WdGOduFVxaiA,2080
|
305
305
|
syncmodels/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
306
|
-
syncmodels-0.1.
|
307
|
-
syncmodels-0.1.
|
308
|
-
syncmodels-0.1.
|
309
|
-
syncmodels-0.1.
|
310
|
-
syncmodels-0.1.
|
311
|
-
syncmodels-0.1.
|
312
|
-
syncmodels-0.1.
|
306
|
+
syncmodels-0.1.333.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
|
307
|
+
syncmodels-0.1.333.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
|
308
|
+
syncmodels-0.1.333.dist-info/METADATA,sha256=KChKUTfP3d_Je0m3czd5SC3iTebjP1loPJn9VHXmoKE,2700
|
309
|
+
syncmodels-0.1.333.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
|
310
|
+
syncmodels-0.1.333.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
|
311
|
+
syncmodels-0.1.333.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
|
312
|
+
syncmodels-0.1.333.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|