syncmodels 0.1.331__py2.py3-none-any.whl → 0.1.333__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syncmodels/__init__.py +1 -1
- syncmodels/crawler.py +16 -32
- syncmodels/helpers/surreal.py +2 -1
- syncmodels/storage.py +79 -83
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/METADATA +2 -2
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/RECORD +11 -11
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/AUTHORS.rst +0 -0
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/LICENSE +0 -0
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/WHEEL +0 -0
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/entry_points.txt +0 -0
- {syncmodels-0.1.331.dist-info → syncmodels-0.1.333.dist-info}/top_level.txt +0 -0
syncmodels/__init__.py
CHANGED
syncmodels/crawler.py
CHANGED
@@ -851,7 +851,7 @@ class iBot(iAgent):
|
|
851
851
|
self.add_plugin(MetaExtractPlugin())
|
852
852
|
# self.add_plugin(PaginatorPlugin())
|
853
853
|
|
854
|
-
self.add_plugin(PutPlugin())
|
854
|
+
self.add_plugin(PutPlugin()) # comment this for dry-run
|
855
855
|
|
856
856
|
self.add_plugin(SetURIPlugin())
|
857
857
|
self.add_plugin(Cleaner())
|
@@ -1201,7 +1201,6 @@ class iBot(iAgent):
|
|
1201
1201
|
# TODO: I need to clean really old cache entries
|
1202
1202
|
# TODO: in order to control any excesive memory compsumption
|
1203
1203
|
try:
|
1204
|
-
|
1205
1204
|
if not self._is_already(timeout=timeout, type_=type_, **params):
|
1206
1205
|
blueprint = self.blueprint(**params)
|
1207
1206
|
universe = self.ALREADY.setdefault(type_, {})
|
@@ -1610,34 +1609,14 @@ class SortPlugin(iPlugin):
|
|
1610
1609
|
# )
|
1611
1610
|
reverse = list(set(sort_key).intersection(item_fields))
|
1612
1611
|
if not reverse:
|
1613
|
-
log.
|
1614
|
-
|
1612
|
+
log.debug(
|
1613
|
+
"model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
|
1614
|
+
model,
|
1615
|
+
list(item_fields),
|
1616
|
+
sort_key,
|
1617
|
+
)
|
1618
|
+
# return
|
1615
1619
|
|
1616
|
-
if False:
|
1617
|
-
sample = stream[-1]
|
1618
|
-
reverse = []
|
1619
|
-
for key in sort_key:
|
1620
|
-
value = sample[key]
|
1621
|
-
if func := TYPES_MAP.get(value.__class__):
|
1622
|
-
candidates = {
|
1623
|
-
k: v
|
1624
|
-
for k, v in sample.items()
|
1625
|
-
if k != key and func(sample[k]) == func(value)
|
1626
|
-
}
|
1627
|
-
if candidates:
|
1628
|
-
_key, _ = candidates.popitem()
|
1629
|
-
reverse.append(_key)
|
1630
|
-
else:
|
1631
|
-
log.debug(
|
1632
|
-
"can't find the reverse key of '%s' in %s",
|
1633
|
-
sort_key,
|
1634
|
-
sample,
|
1635
|
-
)
|
1636
|
-
# # using same sort_key because source and target
|
1637
|
-
# # layouts share the same sorteable key (i.e. 'datetime')??
|
1638
|
-
# reverse.append(key)
|
1639
|
-
else:
|
1640
|
-
pass
|
1641
1620
|
context[REVERSE_SORT_KEY] = reverse
|
1642
1621
|
|
1643
1622
|
if sort_key:
|
@@ -1656,8 +1635,13 @@ class SortPlugin(iPlugin):
|
|
1656
1635
|
item_fields = model.model_fields
|
1657
1636
|
reverse = list(set(sort_key).intersection(item_fields))
|
1658
1637
|
if not reverse:
|
1659
|
-
log.
|
1660
|
-
|
1638
|
+
log.debug(
|
1639
|
+
"model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
|
1640
|
+
model,
|
1641
|
+
list(item_fields),
|
1642
|
+
sort_key,
|
1643
|
+
)
|
1644
|
+
# return
|
1661
1645
|
|
1662
1646
|
context[REVERSE_SORT_KEY] = reverse
|
1663
1647
|
|
@@ -1714,7 +1698,7 @@ class HashStreamPlugin(iPlugin):
|
|
1714
1698
|
blueprint = blueprint.hexdigest()
|
1715
1699
|
context["stream_blueprint"] = blueprint
|
1716
1700
|
if not self.bot._set_already(
|
1717
|
-
timeout=3600,
|
1701
|
+
# timeout=3600,
|
1718
1702
|
type_="response",
|
1719
1703
|
blueprint=blueprint,
|
1720
1704
|
):
|
syncmodels/helpers/surreal.py
CHANGED
syncmodels/storage.py
CHANGED
@@ -1183,7 +1183,7 @@ class WaveStorage(iWaves, iStorage):
|
|
1183
1183
|
if must_check:
|
1184
1184
|
t0 = time.time()
|
1185
1185
|
await prevously_inserted()
|
1186
|
-
if random.random() < 0.1:
|
1186
|
+
if False or random.random() < 0.1:
|
1187
1187
|
elapsed = time.time() - t0
|
1188
1188
|
log.info("[%s] prevously_inserted took: %s secs", uid, elapsed)
|
1189
1189
|
if elapsed > 1.0:
|
@@ -1207,6 +1207,7 @@ class WaveStorage(iWaves, iStorage):
|
|
1207
1207
|
|
1208
1208
|
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1209
1209
|
|
1210
|
+
res0 = res1 = res2 = False
|
1210
1211
|
# must push the data?
|
1211
1212
|
context[PUSHED] = push
|
1212
1213
|
if push:
|
@@ -1217,98 +1218,93 @@ class WaveStorage(iWaves, iStorage):
|
|
1217
1218
|
data_sort_blueprint,
|
1218
1219
|
data,
|
1219
1220
|
)
|
1220
|
-
foo = 1
|
1221
1221
|
query = f"{namespace}://{database}/{thing}:{monotonic}"
|
1222
1222
|
res2 = await self.storage.put(query, data)
|
1223
1223
|
else:
|
1224
1224
|
# TODO: agp: refactor all this function when we've time!
|
1225
1225
|
res2 = True
|
1226
|
-
foo = 1
|
1227
1226
|
|
1228
|
-
#
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1227
|
+
# save Snapshot of the object
|
1228
|
+
if push:
|
1229
|
+
# long fquid version
|
1230
|
+
# data[ID_KEY] = data[ORG_KEY]
|
1231
|
+
# short version
|
1232
|
+
# data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1233
|
+
if _uri["id"] is None:
|
1234
|
+
__id = data.get(ORG_KEY, "")
|
1235
|
+
_id = parse_duri(__id)
|
1236
|
+
if _id["id"] is None:
|
1237
|
+
_uri["id"] = __id
|
1238
|
+
else:
|
1239
|
+
_uri["id"] = _id["id"]
|
1240
|
+
data[ORG_KEY] = build_uri(**_uri)
|
1241
|
+
|
1242
|
+
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1243
|
+
query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
|
1244
|
+
# resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
|
1245
|
+
# resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
|
1246
|
+
resuming_info = {
|
1247
|
+
k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
|
1248
|
+
}
|
1249
|
+
# force to be json compatible
|
1250
|
+
resuming_info = JSONVerter.to_json(resuming_info)
|
1232
1251
|
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1252
|
+
res0 = await self.storage.put(
|
1253
|
+
query,
|
1254
|
+
data,
|
1255
|
+
)
|
1256
|
+
# 3. finally add the wave info into tube
|
1257
|
+
data.pop(MONOTONIC_KEY)
|
1258
|
+
|
1259
|
+
# update the TUBE_WAVE due the insertion of this object
|
1260
|
+
if push:
|
1261
|
+
# 2. save last Wave from this particular tube
|
1262
|
+
# AVOID_KEYS contains all keys that aren't json serializable
|
1263
|
+
# wave = {
|
1264
|
+
# k: v for k, v in kw.items() if k not in self.AVOID_KEYS
|
1265
|
+
# }
|
1266
|
+
# try to recover the 'intact' bootstrap that we've using
|
1267
|
+
for wave0 in kw.get(WAVE_LAST_KEY, []):
|
1268
|
+
wave = wave0.get("wave") # TODO: use a define
|
1269
|
+
if wave:
|
1270
|
+
break
|
1238
1271
|
else:
|
1239
|
-
|
1240
|
-
|
1272
|
+
# otherwise, its the 1st time and we need to create the 1st
|
1273
|
+
# bootstrap-wave info
|
1274
|
+
wave_keys = set(kw.get(WAVE_INFO_KEY, []))
|
1275
|
+
wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
|
1276
|
+
# task = kw[TASK_KEY]
|
1277
|
+
wave = {k: kw[k] for k in wave_keys.intersection(kw)}
|
1241
1278
|
|
1242
|
-
data[ID_KEY] = "{thing}:{id}".format_map(_uri)
|
1243
|
-
query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
|
1244
|
-
# resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
|
1245
|
-
# resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
|
1246
|
-
resuming_info = {
|
1247
|
-
k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
|
1248
|
-
}
|
1249
|
-
# force to be json compatible
|
1250
|
-
resuming_info = JSONVerter.to_json(resuming_info)
|
1251
|
-
|
1252
|
-
res0 = await self.storage.put(
|
1253
|
-
query,
|
1254
|
-
data,
|
1255
|
-
)
|
1256
|
-
# 3. finally add the wave info into tube
|
1257
|
-
data.pop(MONOTONIC_KEY)
|
1258
|
-
|
1259
|
-
# 2. save last Wave from this particular tube
|
1260
|
-
# AVOID_KEYS contains all keys that aren't json serializable
|
1261
|
-
# wave = {
|
1262
|
-
# k: v for k, v in kw.items() if k not in self.AVOID_KEYS
|
1263
|
-
# }
|
1264
|
-
|
1265
|
-
# try to recover the 'intact' bootstrap that we've using
|
1266
|
-
for wave0 in kw.get(WAVE_LAST_KEY, []):
|
1267
|
-
wave = wave0.get("wave") # TODO: use a define
|
1268
1279
|
if wave:
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
#
|
1296
|
-
|
1297
|
-
wave = exists[0]
|
1298
|
-
# stamp current wave
|
1299
|
-
_wave = {
|
1300
|
-
**wave,
|
1301
|
-
MONOTONIC_KEY: monotonic,
|
1302
|
-
WAVE_RESUMING_INFO_KEY: resuming_info,
|
1303
|
-
}
|
1304
|
-
res1 = await self.storage.put(query, _wave)
|
1305
|
-
else:
|
1306
|
-
# wave is empty, maybe because is not a resuming crawling task
|
1307
|
-
log.debug(
|
1308
|
-
"wave is empty, maybe because is not a resuming crawling task"
|
1309
|
-
)
|
1310
|
-
# log.info("Saving: %s", data)
|
1311
|
-
res1 = True
|
1280
|
+
# wave must be json compatible and do not use any reserved
|
1281
|
+
# keyword for storage (i.e. 'scope' in Surreal)
|
1282
|
+
query = f"{namespace}://{database}/{TUBE_WAVE}"
|
1283
|
+
|
1284
|
+
# query can't containg MONOTONIC_KEY
|
1285
|
+
wave.pop(MONOTONIC_KEY, None)
|
1286
|
+
wave.pop(WAVE_RESUMING_INFO_KEY, None)
|
1287
|
+
exists = await self.storage.query(query, **wave)
|
1288
|
+
assert len(exists) <= 1
|
1289
|
+
|
1290
|
+
if len(exists):
|
1291
|
+
# use the same record_id
|
1292
|
+
# otherwise a new record will be created
|
1293
|
+
wave = exists[0]
|
1294
|
+
# stamp current wave
|
1295
|
+
_wave = {
|
1296
|
+
**wave,
|
1297
|
+
MONOTONIC_KEY: monotonic,
|
1298
|
+
WAVE_RESUMING_INFO_KEY: resuming_info,
|
1299
|
+
}
|
1300
|
+
res1 = await self.storage.put(query, _wave)
|
1301
|
+
else:
|
1302
|
+
# wave is empty, maybe because is not a resuming crawling task
|
1303
|
+
log.debug(
|
1304
|
+
"wave is empty, maybe because is not a resuming crawling task"
|
1305
|
+
)
|
1306
|
+
# log.info("Saving: %s", data)
|
1307
|
+
res1 = True
|
1312
1308
|
|
1313
1309
|
return all([res0, res1, res2])
|
1314
1310
|
else:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: syncmodels
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.333
|
4
4
|
Summary: Synchronizable Models
|
5
5
|
Home-page: https://github.com/asterio.gonzalez/syncmodels
|
6
6
|
Author: Asterio Gonzalez
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Requires-Python: >=3.6
|
19
19
|
License-File: LICENSE
|
20
20
|
License-File: AUTHORS.rst
|
21
|
-
Requires-Dist: agptools>=0.1.
|
21
|
+
Requires-Dist: agptools>=0.1.333
|
22
22
|
Requires-Dist: aiocache
|
23
23
|
Requires-Dist: aiohttp
|
24
24
|
Requires-Dist: Click
|
@@ -1,6 +1,6 @@
|
|
1
|
-
syncmodels/__init__.py,sha256=
|
1
|
+
syncmodels/__init__.py,sha256=heMvuebb5E-VxcjkidHaEr6oN5d2fTQ9wqrWcqxCzBE,142
|
2
2
|
syncmodels/context.py,sha256=k1Gs_ip9BfyRFpyRnzqYvRDKo0sYBqJsh6z9sWln9oE,451
|
3
|
-
syncmodels/crawler.py,sha256=
|
3
|
+
syncmodels/crawler.py,sha256=sjyuLLBT5RGb-3LVTtjW9nd2PTJyIV5Tldoz8Jq3dVs,94511
|
4
4
|
syncmodels/crud.py,sha256=oZIcwEKR2i-lesEF_059Y4yThohd9m7gs6R6xYgLH-I,15351
|
5
5
|
syncmodels/definitions.py,sha256=w-3TrSomp9T8OzLmJhKeZQDzrUIJLKldyh1lzlE7Yj0,5476
|
6
6
|
syncmodels/exceptions.py,sha256=ZLAwu19cs2UN2Sv3jaLnixT_jRI7T42TfyutCkUsuIk,685
|
@@ -11,7 +11,7 @@ syncmodels/registry.py,sha256=YaQtgbSwa0je1MpCcVHALI3_b85vrddyOlhsnrUcKZs,8224
|
|
11
11
|
syncmodels/requests.py,sha256=wWoC5hPDm1iBM_zrlyKRauzhXgdKR3pT5RqyC-5UZhQ,538
|
12
12
|
syncmodels/runner.py,sha256=IHDKuQ3yJ1DN9wktMiIrerPepYX61tc3AzbFfuUqEFw,5454
|
13
13
|
syncmodels/schema.py,sha256=uinUt8Asq_x7xa6MKWVXNyoWO6gKocjGPppjimaXzEU,2492
|
14
|
-
syncmodels/storage.py,sha256=
|
14
|
+
syncmodels/storage.py,sha256=HIoh_KSvalgTzBbb5py_0dUBHHipmHWqxmVu1hcEV4s,74991
|
15
15
|
syncmodels/syncmodels.py,sha256=jcUxVbv1hrx5hI81VCO1onIM6WyORTqJVPwIqlPocOc,10596
|
16
16
|
syncmodels/timequeue.py,sha256=YRd3ULRaIhoszaBsYhfr0epMqAbL6-NwVEtScjUYttM,595
|
17
17
|
syncmodels/wave.py,sha256=Gra22BLiA9z2nF-6diXpjAc4GZv9nebmyvHxdAfXec4,7764
|
@@ -33,7 +33,7 @@ syncmodels/helpers/importers.py,sha256=KImR9pQu4ir6EI6Ipta0q3RWloFT_VTJi67kM0lZs
|
|
33
33
|
syncmodels/helpers/loaders.py,sha256=aus0aRcbU1vVa_zWo42aX6uV3B0fQ0aQpkTWlR9xGLA,4325
|
34
34
|
syncmodels/helpers/models.py,sha256=c_ATzmiw5mVY1IGnwmyhjIuu5d2idHU-XeRigZSMkOQ,719
|
35
35
|
syncmodels/helpers/orion.py,sha256=6lRp1w3yaq_rxOI7nJIjuHdsgBjQu92y0bW0IX_gq44,30719
|
36
|
-
syncmodels/helpers/surreal.py,sha256=
|
36
|
+
syncmodels/helpers/surreal.py,sha256=4bBBj__kQta_5ShWzUKlsuq6xSmICP5UwpeA6QTD-rA,10088
|
37
37
|
syncmodels/helpers/units.py,sha256=g50m5DQrAyP_qpDRa4LCEA5Rz2UZUmlIixfWG_ddw9I,3571
|
38
38
|
syncmodels/logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
39
|
syncmodels/logic/activity_logger.py,sha256=8wjvgRwaNbibYWGgl-trovSS70yNkoCTlb-AIx3aZEE,14053
|
@@ -303,10 +303,10 @@ syncmodels/session/postgresql.py,sha256=ZMIu1Rv93pKfvFlovFBmWArzlrT2xaQWNYGZT_LW
|
|
303
303
|
syncmodels/session/sql.py,sha256=17C8EHn_1twHezhMlD5esMvx4m0iIrnD7JK-TuBswgU,6573
|
304
304
|
syncmodels/session/sqlite.py,sha256=nCDjopLiBpX1F10qkKoARM7JrVdIpJ1WdGOduFVxaiA,2080
|
305
305
|
syncmodels/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
306
|
-
syncmodels-0.1.
|
307
|
-
syncmodels-0.1.
|
308
|
-
syncmodels-0.1.
|
309
|
-
syncmodels-0.1.
|
310
|
-
syncmodels-0.1.
|
311
|
-
syncmodels-0.1.
|
312
|
-
syncmodels-0.1.
|
306
|
+
syncmodels-0.1.333.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
|
307
|
+
syncmodels-0.1.333.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
|
308
|
+
syncmodels-0.1.333.dist-info/METADATA,sha256=KChKUTfP3d_Je0m3czd5SC3iTebjP1loPJn9VHXmoKE,2700
|
309
|
+
syncmodels-0.1.333.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
|
310
|
+
syncmodels-0.1.333.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
|
311
|
+
syncmodels-0.1.333.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
|
312
|
+
syncmodels-0.1.333.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|