syncmodels 0.1.330__py2.py3-none-any.whl → 0.1.333__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
syncmodels/__init__.py CHANGED
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = """Asterio Gonzalez"""
4
4
  __email__ = "asterio.gonzalez@gmail.com"
5
- __version__ = "0.1.330"
5
+ __version__ = "0.1.333"
syncmodels/crawler.py CHANGED
@@ -851,7 +851,7 @@ class iBot(iAgent):
851
851
  self.add_plugin(MetaExtractPlugin())
852
852
  # self.add_plugin(PaginatorPlugin())
853
853
 
854
- self.add_plugin(PutPlugin())
854
+ self.add_plugin(PutPlugin()) # comment this for dry-run
855
855
 
856
856
  self.add_plugin(SetURIPlugin())
857
857
  self.add_plugin(Cleaner())
@@ -1201,7 +1201,6 @@ class iBot(iAgent):
1201
1201
  # TODO: I need to clean really old cache entries
1202
1202
  # TODO: in order to control any excesive memory compsumption
1203
1203
  try:
1204
-
1205
1204
  if not self._is_already(timeout=timeout, type_=type_, **params):
1206
1205
  blueprint = self.blueprint(**params)
1207
1206
  universe = self.ALREADY.setdefault(type_, {})
@@ -1610,34 +1609,14 @@ class SortPlugin(iPlugin):
1610
1609
  # )
1611
1610
  reverse = list(set(sort_key).intersection(item_fields))
1612
1611
  if not reverse:
1613
- log.error("model [%s] has not datetime alike key??", model)
1614
- return
1612
+ log.debug(
1613
+ "model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
1614
+ model,
1615
+ list(item_fields),
1616
+ sort_key,
1617
+ )
1618
+ # return
1615
1619
 
1616
- if False:
1617
- sample = stream[-1]
1618
- reverse = []
1619
- for key in sort_key:
1620
- value = sample[key]
1621
- if func := TYPES_MAP.get(value.__class__):
1622
- candidates = {
1623
- k: v
1624
- for k, v in sample.items()
1625
- if k != key and func(sample[k]) == func(value)
1626
- }
1627
- if candidates:
1628
- _key, _ = candidates.popitem()
1629
- reverse.append(_key)
1630
- else:
1631
- log.debug(
1632
- "can't find the reverse key of '%s' in %s",
1633
- sort_key,
1634
- sample,
1635
- )
1636
- # # using same sort_key because source and target
1637
- # # layouts share the same sorteable key (i.e. 'datetime')??
1638
- # reverse.append(key)
1639
- else:
1640
- pass
1641
1620
  context[REVERSE_SORT_KEY] = reverse
1642
1621
 
1643
1622
  if sort_key:
@@ -1656,8 +1635,13 @@ class SortPlugin(iPlugin):
1656
1635
  item_fields = model.model_fields
1657
1636
  reverse = list(set(sort_key).intersection(item_fields))
1658
1637
  if not reverse:
1659
- log.error("model [%s] has not datetime alike key??", model)
1660
- return
1638
+ log.debug(
1639
+ "model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
1640
+ model,
1641
+ list(item_fields),
1642
+ sort_key,
1643
+ )
1644
+ # return
1661
1645
 
1662
1646
  context[REVERSE_SORT_KEY] = reverse
1663
1647
 
@@ -1714,7 +1698,7 @@ class HashStreamPlugin(iPlugin):
1714
1698
  blueprint = blueprint.hexdigest()
1715
1699
  context["stream_blueprint"] = blueprint
1716
1700
  if not self.bot._set_already(
1717
- timeout=3600,
1701
+ # timeout=3600,
1718
1702
  type_="response",
1719
1703
  blueprint=blueprint,
1720
1704
  ):
@@ -120,7 +120,8 @@ class SurrealServer:
120
120
  "root",
121
121
  "--pass",
122
122
  "root",
123
- f"file://{self.path}",
123
+ # f"file://{self.url}",
124
+ f"rocksdb://{self.url}",
124
125
  ]
125
126
 
126
127
  def start(self):
syncmodels/session/sql.py CHANGED
@@ -111,7 +111,7 @@ class iSQLSession(iSession):
111
111
 
112
112
  since_key = params.get(MONOTONIC_SINCE_KEY)
113
113
  table = context[KIND_KEY]
114
- limit = params.get(LIMIT_KEY_VALUE, context.get(LIMIT_KEY_VALUE)) or 1024
114
+ limit = params.get(LIMIT_KEY_VALUE, context.get(LIMIT_KEY_VALUE)) or 1024 * 8
115
115
 
116
116
  query = f"SELECT * FROM {table}"
117
117
  if MONOTONIC_SINCE_VALUE in params:
syncmodels/storage.py CHANGED
@@ -1183,8 +1183,13 @@ class WaveStorage(iWaves, iStorage):
1183
1183
  if must_check:
1184
1184
  t0 = time.time()
1185
1185
  await prevously_inserted()
1186
- elapsed = time.time() - t0
1187
- log.info("[%s] prevously_inserted took: %s secs", uid, elapsed)
1186
+ if False or random.random() < 0.1:
1187
+ elapsed = time.time() - t0
1188
+ log.info("[%s] prevously_inserted took: %s secs", uid, elapsed)
1189
+ if elapsed > 1.0:
1190
+ # TODO: debug what's going here
1191
+ foo = 1
1192
+
1188
1193
  else:
1189
1194
  # hack for not altering the data
1190
1195
  # push = False
@@ -1202,6 +1207,7 @@ class WaveStorage(iWaves, iStorage):
1202
1207
 
1203
1208
  data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1204
1209
 
1210
+ res0 = res1 = res2 = False
1205
1211
  # must push the data?
1206
1212
  context[PUSHED] = push
1207
1213
  if push:
@@ -1212,98 +1218,93 @@ class WaveStorage(iWaves, iStorage):
1212
1218
  data_sort_blueprint,
1213
1219
  data,
1214
1220
  )
1215
- foo = 1
1216
1221
  query = f"{namespace}://{database}/{thing}:{monotonic}"
1217
1222
  res2 = await self.storage.put(query, data)
1218
1223
  else:
1219
1224
  # TODO: agp: refactor all this function when we've time!
1220
1225
  res2 = True
1221
- foo = 1
1222
1226
 
1223
- # long fquid version
1224
- # data[ID_KEY] = data[ORG_KEY]
1225
- # short version
1226
- # data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1227
+ # save Snapshot of the object
1228
+ if push:
1229
+ # long fquid version
1230
+ # data[ID_KEY] = data[ORG_KEY]
1231
+ # short version
1232
+ # data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1233
+ if _uri["id"] is None:
1234
+ __id = data.get(ORG_KEY, "")
1235
+ _id = parse_duri(__id)
1236
+ if _id["id"] is None:
1237
+ _uri["id"] = __id
1238
+ else:
1239
+ _uri["id"] = _id["id"]
1240
+ data[ORG_KEY] = build_uri(**_uri)
1241
+
1242
+ data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1243
+ query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
1244
+ # resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
1245
+ # resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
1246
+ resuming_info = {
1247
+ k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
1248
+ }
1249
+ # force to be json compatible
1250
+ resuming_info = JSONVerter.to_json(resuming_info)
1227
1251
 
1228
- if _uri["id"] is None:
1229
- __id = data.get(ORG_KEY, "")
1230
- _id = parse_duri(__id)
1231
- if _id["id"] is None:
1232
- _uri["id"] = __id
1252
+ res0 = await self.storage.put(
1253
+ query,
1254
+ data,
1255
+ )
1256
+ # 3. finally add the wave info into tube
1257
+ data.pop(MONOTONIC_KEY)
1258
+
1259
+ # update the TUBE_WAVE due the insertion of this object
1260
+ if push:
1261
+ # 2. save last Wave from this particular tube
1262
+ # AVOID_KEYS contains all keys that aren't json serializable
1263
+ # wave = {
1264
+ # k: v for k, v in kw.items() if k not in self.AVOID_KEYS
1265
+ # }
1266
+ # try to recover the 'intact' bootstrap that we've using
1267
+ for wave0 in kw.get(WAVE_LAST_KEY, []):
1268
+ wave = wave0.get("wave") # TODO: use a define
1269
+ if wave:
1270
+ break
1233
1271
  else:
1234
- _uri["id"] = _id["id"]
1235
- data[ORG_KEY] = build_uri(**_uri)
1272
+ # otherwise, its the 1st time and we need to create the 1st
1273
+ # bootstrap-wave info
1274
+ wave_keys = set(kw.get(WAVE_INFO_KEY, []))
1275
+ wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
1276
+ # task = kw[TASK_KEY]
1277
+ wave = {k: kw[k] for k in wave_keys.intersection(kw)}
1236
1278
 
1237
- data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1238
- query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
1239
- # resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
1240
- # resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
1241
- resuming_info = {
1242
- k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
1243
- }
1244
- # force to be json compatible
1245
- resuming_info = JSONVerter.to_json(resuming_info)
1246
-
1247
- res0 = await self.storage.put(
1248
- query,
1249
- data,
1250
- )
1251
- # 3. finally add the wave info into tube
1252
- data.pop(MONOTONIC_KEY)
1253
-
1254
- # 2. save last Wave from this particular tube
1255
- # AVOID_KEYS contains all keys that aren't json serializable
1256
- # wave = {
1257
- # k: v for k, v in kw.items() if k not in self.AVOID_KEYS
1258
- # }
1259
-
1260
- # try to recover the 'intact' bootstrap that we've using
1261
- for wave0 in kw.get(WAVE_LAST_KEY, []):
1262
- wave = wave0.get("wave") # TODO: use a define
1263
1279
  if wave:
1264
- break
1265
- else:
1266
- # otherwise, its the 1st time and we need to create the 1st
1267
- # bootstrap-wave info
1268
-
1269
- # TODO: review for inclussion
1270
- # TODO: this settings are crawler activity oriented
1271
- # TODO: what about 'Task' an other use cases?
1272
- wave_keys = set(kw.get(WAVE_INFO_KEY, []))
1273
- wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
1274
-
1275
- # task = kw[TASK_KEY]
1276
- wave = {k: kw[k] for k in wave_keys.intersection(kw)}
1277
-
1278
- if wave:
1279
- # wave must be json compatible and do not use any reserved
1280
- # keyword for storage (i.e. 'scope' in Surreal)
1281
- query = f"{namespace}://{database}/{TUBE_WAVE}"
1282
-
1283
- # query can't containg MONOTONIC_KEY
1284
- wave.pop(MONOTONIC_KEY, None)
1285
- wave.pop(WAVE_RESUMING_INFO_KEY, None)
1286
- exists = await self.storage.query(query, **wave)
1287
- assert len(exists) <= 1
1288
-
1289
- if len(exists):
1290
- # use the same record_id
1291
- # otherwise a new record will be created
1292
- wave = exists[0]
1293
- # stamp current wave
1294
- _wave = {
1295
- **wave,
1296
- MONOTONIC_KEY: monotonic,
1297
- WAVE_RESUMING_INFO_KEY: resuming_info,
1298
- }
1299
- res1 = await self.storage.put(query, _wave)
1300
- else:
1301
- # wave is empty, maybe because is not a resuming crawling task
1302
- log.debug(
1303
- "wave is empty, maybe because is not a resuming crawling task"
1304
- )
1305
- # log.info("Saving: %s", data)
1306
- res1 = True
1280
+ # wave must be json compatible and do not use any reserved
1281
+ # keyword for storage (i.e. 'scope' in Surreal)
1282
+ query = f"{namespace}://{database}/{TUBE_WAVE}"
1283
+
1284
+ # query can't containg MONOTONIC_KEY
1285
+ wave.pop(MONOTONIC_KEY, None)
1286
+ wave.pop(WAVE_RESUMING_INFO_KEY, None)
1287
+ exists = await self.storage.query(query, **wave)
1288
+ assert len(exists) <= 1
1289
+
1290
+ if len(exists):
1291
+ # use the same record_id
1292
+ # otherwise a new record will be created
1293
+ wave = exists[0]
1294
+ # stamp current wave
1295
+ _wave = {
1296
+ **wave,
1297
+ MONOTONIC_KEY: monotonic,
1298
+ WAVE_RESUMING_INFO_KEY: resuming_info,
1299
+ }
1300
+ res1 = await self.storage.put(query, _wave)
1301
+ else:
1302
+ # wave is empty, maybe because is not a resuming crawling task
1303
+ log.debug(
1304
+ "wave is empty, maybe because is not a resuming crawling task"
1305
+ )
1306
+ # log.info("Saving: %s", data)
1307
+ res1 = True
1307
1308
 
1308
1309
  return all([res0, res1, res2])
1309
1310
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: syncmodels
3
- Version: 0.1.330
3
+ Version: 0.1.333
4
4
  Summary: Synchronizable Models
5
5
  Home-page: https://github.com/asterio.gonzalez/syncmodels
6
6
  Author: Asterio Gonzalez
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Requires-Python: >=3.6
19
19
  License-File: LICENSE
20
20
  License-File: AUTHORS.rst
21
- Requires-Dist: agptools>=0.1.330
21
+ Requires-Dist: agptools>=0.1.333
22
22
  Requires-Dist: aiocache
23
23
  Requires-Dist: aiohttp
24
24
  Requires-Dist: Click
@@ -1,6 +1,6 @@
1
- syncmodels/__init__.py,sha256=g59bA8lWKxQA9npRZE5WiXLHT-QTCR2ggetQwG4iExQ,142
1
+ syncmodels/__init__.py,sha256=heMvuebb5E-VxcjkidHaEr6oN5d2fTQ9wqrWcqxCzBE,142
2
2
  syncmodels/context.py,sha256=k1Gs_ip9BfyRFpyRnzqYvRDKo0sYBqJsh6z9sWln9oE,451
3
- syncmodels/crawler.py,sha256=_pgelyrIKuVl8vdINJ6NSh5qkSnZf4rAACph4SZ_2H4,95281
3
+ syncmodels/crawler.py,sha256=sjyuLLBT5RGb-3LVTtjW9nd2PTJyIV5Tldoz8Jq3dVs,94511
4
4
  syncmodels/crud.py,sha256=oZIcwEKR2i-lesEF_059Y4yThohd9m7gs6R6xYgLH-I,15351
5
5
  syncmodels/definitions.py,sha256=w-3TrSomp9T8OzLmJhKeZQDzrUIJLKldyh1lzlE7Yj0,5476
6
6
  syncmodels/exceptions.py,sha256=ZLAwu19cs2UN2Sv3jaLnixT_jRI7T42TfyutCkUsuIk,685
@@ -11,7 +11,7 @@ syncmodels/registry.py,sha256=YaQtgbSwa0je1MpCcVHALI3_b85vrddyOlhsnrUcKZs,8224
11
11
  syncmodels/requests.py,sha256=wWoC5hPDm1iBM_zrlyKRauzhXgdKR3pT5RqyC-5UZhQ,538
12
12
  syncmodels/runner.py,sha256=IHDKuQ3yJ1DN9wktMiIrerPepYX61tc3AzbFfuUqEFw,5454
13
13
  syncmodels/schema.py,sha256=uinUt8Asq_x7xa6MKWVXNyoWO6gKocjGPppjimaXzEU,2492
14
- syncmodels/storage.py,sha256=tnF74q94wCidUo4dW77RWLVSKzJRv5ZGKFsNfoxWOkg,74546
14
+ syncmodels/storage.py,sha256=HIoh_KSvalgTzBbb5py_0dUBHHipmHWqxmVu1hcEV4s,74991
15
15
  syncmodels/syncmodels.py,sha256=jcUxVbv1hrx5hI81VCO1onIM6WyORTqJVPwIqlPocOc,10596
16
16
  syncmodels/timequeue.py,sha256=YRd3ULRaIhoszaBsYhfr0epMqAbL6-NwVEtScjUYttM,595
17
17
  syncmodels/wave.py,sha256=Gra22BLiA9z2nF-6diXpjAc4GZv9nebmyvHxdAfXec4,7764
@@ -33,7 +33,7 @@ syncmodels/helpers/importers.py,sha256=KImR9pQu4ir6EI6Ipta0q3RWloFT_VTJi67kM0lZs
33
33
  syncmodels/helpers/loaders.py,sha256=aus0aRcbU1vVa_zWo42aX6uV3B0fQ0aQpkTWlR9xGLA,4325
34
34
  syncmodels/helpers/models.py,sha256=c_ATzmiw5mVY1IGnwmyhjIuu5d2idHU-XeRigZSMkOQ,719
35
35
  syncmodels/helpers/orion.py,sha256=6lRp1w3yaq_rxOI7nJIjuHdsgBjQu92y0bW0IX_gq44,30719
36
- syncmodels/helpers/surreal.py,sha256=zoWtGm5oAxwvgJNq_NTpKOHN3h9FNObhFDLuiBOl1YY,10050
36
+ syncmodels/helpers/surreal.py,sha256=4bBBj__kQta_5ShWzUKlsuq6xSmICP5UwpeA6QTD-rA,10088
37
37
  syncmodels/helpers/units.py,sha256=g50m5DQrAyP_qpDRa4LCEA5Rz2UZUmlIixfWG_ddw9I,3571
38
38
  syncmodels/logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  syncmodels/logic/activity_logger.py,sha256=8wjvgRwaNbibYWGgl-trovSS70yNkoCTlb-AIx3aZEE,14053
@@ -300,13 +300,13 @@ syncmodels/model/schema_org/xpathtype.py,sha256=D8gKiCrGSSuUVYw7BIWmOIUbKATfv2Ip
300
300
  syncmodels/session/__init__.py,sha256=NxFkOiL_oGaYt2qv9yAvWrNcXn_xT9yLzCLd7PGRaWI,15564
301
301
  syncmodels/session/http.py,sha256=tf7z0ccAEYoCOZT4Ukv3NBXz9hUO3vs2s9bm491pCj8,1480
302
302
  syncmodels/session/postgresql.py,sha256=ZMIu1Rv93pKfvFlovFBmWArzlrT2xaQWNYGZT_LW61k,175
303
- syncmodels/session/sql.py,sha256=bD7zXRrEKKJmqY2UoibWENuWb5zHrrU72F3_dYbS6LY,6569
303
+ syncmodels/session/sql.py,sha256=17C8EHn_1twHezhMlD5esMvx4m0iIrnD7JK-TuBswgU,6573
304
304
  syncmodels/session/sqlite.py,sha256=nCDjopLiBpX1F10qkKoARM7JrVdIpJ1WdGOduFVxaiA,2080
305
305
  syncmodels/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
306
- syncmodels-0.1.330.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
307
- syncmodels-0.1.330.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
308
- syncmodels-0.1.330.dist-info/METADATA,sha256=tNxb2OSD2M4XsGc7gAC8FQRkCOcnZdoHFtpRESCA9tk,2700
309
- syncmodels-0.1.330.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
310
- syncmodels-0.1.330.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
311
- syncmodels-0.1.330.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
312
- syncmodels-0.1.330.dist-info/RECORD,,
306
+ syncmodels-0.1.333.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
307
+ syncmodels-0.1.333.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
308
+ syncmodels-0.1.333.dist-info/METADATA,sha256=KChKUTfP3d_Je0m3czd5SC3iTebjP1loPJn9VHXmoKE,2700
309
+ syncmodels-0.1.333.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
310
+ syncmodels-0.1.333.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
311
+ syncmodels-0.1.333.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
312
+ syncmodels-0.1.333.dist-info/RECORD,,