syncmodels 0.1.331__py2.py3-none-any.whl → 0.1.334__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
syncmodels/__init__.py CHANGED
@@ -2,4 +2,4 @@
2
2
 
3
3
  __author__ = """Asterio Gonzalez"""
4
4
  __email__ = "asterio.gonzalez@gmail.com"
5
- __version__ = "0.1.331"
5
+ __version__ = "0.1.334"
syncmodels/crawler.py CHANGED
@@ -851,7 +851,7 @@ class iBot(iAgent):
851
851
  self.add_plugin(MetaExtractPlugin())
852
852
  # self.add_plugin(PaginatorPlugin())
853
853
 
854
- self.add_plugin(PutPlugin())
854
+ self.add_plugin(PutPlugin()) # comment this for dry-run
855
855
 
856
856
  self.add_plugin(SetURIPlugin())
857
857
  self.add_plugin(Cleaner())
@@ -896,7 +896,7 @@ class iBot(iAgent):
896
896
 
897
897
  async def get_data(self, **task):
898
898
  """
899
- Example a crawling function for recommender crawler.
899
+ Example a crawling function for crawler.
900
900
 
901
901
  Get data related to the given kind and path.
902
902
  May add more tasks to be done by crawler.
@@ -1201,7 +1201,6 @@ class iBot(iAgent):
1201
1201
  # TODO: I need to clean really old cache entries
1202
1202
  # TODO: in order to control any excesive memory compsumption
1203
1203
  try:
1204
-
1205
1204
  if not self._is_already(timeout=timeout, type_=type_, **params):
1206
1205
  blueprint = self.blueprint(**params)
1207
1206
  universe = self.ALREADY.setdefault(type_, {})
@@ -1610,34 +1609,14 @@ class SortPlugin(iPlugin):
1610
1609
  # )
1611
1610
  reverse = list(set(sort_key).intersection(item_fields))
1612
1611
  if not reverse:
1613
- log.error("model [%s] has not datetime alike key??", model)
1614
- return
1612
+ log.debug(
1613
+ "model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
1614
+ model,
1615
+ list(item_fields),
1616
+ sort_key,
1617
+ )
1618
+ # return
1615
1619
 
1616
- if False:
1617
- sample = stream[-1]
1618
- reverse = []
1619
- for key in sort_key:
1620
- value = sample[key]
1621
- if func := TYPES_MAP.get(value.__class__):
1622
- candidates = {
1623
- k: v
1624
- for k, v in sample.items()
1625
- if k != key and func(sample[k]) == func(value)
1626
- }
1627
- if candidates:
1628
- _key, _ = candidates.popitem()
1629
- reverse.append(_key)
1630
- else:
1631
- log.debug(
1632
- "can't find the reverse key of '%s' in %s",
1633
- sort_key,
1634
- sample,
1635
- )
1636
- # # using same sort_key because source and target
1637
- # # layouts share the same sorteable key (i.e. 'datetime')??
1638
- # reverse.append(key)
1639
- else:
1640
- pass
1641
1620
  context[REVERSE_SORT_KEY] = reverse
1642
1621
 
1643
1622
  if sort_key:
@@ -1656,8 +1635,13 @@ class SortPlugin(iPlugin):
1656
1635
  item_fields = model.model_fields
1657
1636
  reverse = list(set(sort_key).intersection(item_fields))
1658
1637
  if not reverse:
1659
- log.error("model [%s] has not datetime alike key??", model)
1660
- return
1638
+ log.debug(
1639
+ "model [%s] attributes:[%s] doesn't match any datetime alike sort keys: [%s]",
1640
+ model,
1641
+ list(item_fields),
1642
+ sort_key,
1643
+ )
1644
+ # return
1661
1645
 
1662
1646
  context[REVERSE_SORT_KEY] = reverse
1663
1647
 
@@ -1714,7 +1698,7 @@ class HashStreamPlugin(iPlugin):
1714
1698
  blueprint = blueprint.hexdigest()
1715
1699
  context["stream_blueprint"] = blueprint
1716
1700
  if not self.bot._set_already(
1717
- timeout=3600,
1701
+ # timeout=3600,
1718
1702
  type_="response",
1719
1703
  blueprint=blueprint,
1720
1704
  ):
@@ -53,9 +53,19 @@ class SurrealServer:
53
53
  REG_VERSION = r"(?P<version>(?P<a>\d+)(\.(?P<b>\d+))?(\.(?P<c>\d+))?)"
54
54
  SURREAL_PID = ".surreal.pid"
55
55
 
56
- def __init__(self, path, bind="0.0.0.0:9000", daemon=False, version=""):
56
+ def __init__(
57
+ self,
58
+ path,
59
+ bind="0.0.0.0:9000",
60
+ user="root",
61
+ password="root",
62
+ daemon=False,
63
+ version="",
64
+ ):
57
65
  self.path = path
58
66
  self.bind = bind
67
+ self.user = user
68
+ self.password = password
59
69
  self.daemon = daemon
60
70
  self.proc = None
61
71
  self.pid = None
@@ -99,6 +109,7 @@ class SurrealServer:
99
109
  print(f"exit: {version}")
100
110
  break
101
111
  else:
112
+ self._version = version
102
113
  return path
103
114
 
104
115
  except TimeoutExpired:
@@ -110,18 +121,39 @@ class SurrealServer:
110
121
  executable = self.find_executable()
111
122
  else:
112
123
  executable = "surreal"
113
- return [
114
- executable,
115
- "start",
116
- "--allow-all",
117
- "--bind",
118
- f"{self.bind}",
119
- "--user",
120
- "root",
121
- "--pass",
122
- "root",
123
- f"file://{self.path}",
124
- ]
124
+
125
+ path = os.path.abspath(self.path)
126
+
127
+ if self._version.get("a") in ("2",):
128
+ return [
129
+ executable,
130
+ "start",
131
+ "--allow-all",
132
+ "--bind",
133
+ f"{self.bind}",
134
+ "-u",
135
+ f"{self.user}",
136
+ "-p",
137
+ f"{self.password}",
138
+ "--log",
139
+ "debug",
140
+ "--changefeed-gc-interval",
141
+ "600s",
142
+ f"rocksdb://{path}",
143
+ ]
144
+ else:
145
+ return [
146
+ executable,
147
+ "start",
148
+ "--allow-all",
149
+ "--bind",
150
+ f"{self.bind}",
151
+ "-u",
152
+ f"{self.user}",
153
+ "-p",
154
+ f"{self.password}",
155
+ f"file://{path}",
156
+ ]
125
157
 
126
158
  def start(self):
127
159
  """starts surreal process and register a callback is anything goes wrong"""
syncmodels/storage.py CHANGED
@@ -1183,7 +1183,7 @@ class WaveStorage(iWaves, iStorage):
1183
1183
  if must_check:
1184
1184
  t0 = time.time()
1185
1185
  await prevously_inserted()
1186
- if random.random() < 0.1:
1186
+ if False or random.random() < 0.1:
1187
1187
  elapsed = time.time() - t0
1188
1188
  log.info("[%s] prevously_inserted took: %s secs", uid, elapsed)
1189
1189
  if elapsed > 1.0:
@@ -1207,6 +1207,7 @@ class WaveStorage(iWaves, iStorage):
1207
1207
 
1208
1208
  data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1209
1209
 
1210
+ res0 = res1 = res2 = False
1210
1211
  # must push the data?
1211
1212
  context[PUSHED] = push
1212
1213
  if push:
@@ -1217,98 +1218,93 @@ class WaveStorage(iWaves, iStorage):
1217
1218
  data_sort_blueprint,
1218
1219
  data,
1219
1220
  )
1220
- foo = 1
1221
1221
  query = f"{namespace}://{database}/{thing}:{monotonic}"
1222
1222
  res2 = await self.storage.put(query, data)
1223
1223
  else:
1224
1224
  # TODO: agp: refactor all this function when we've time!
1225
1225
  res2 = True
1226
- foo = 1
1227
1226
 
1228
- # long fquid version
1229
- # data[ID_KEY] = data[ORG_KEY]
1230
- # short version
1231
- # data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1227
+ # save Snapshot of the object
1228
+ if push:
1229
+ # long fquid version
1230
+ # data[ID_KEY] = data[ORG_KEY]
1231
+ # short version
1232
+ # data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1233
+ if _uri["id"] is None:
1234
+ __id = data.get(ORG_KEY, "")
1235
+ _id = parse_duri(__id)
1236
+ if _id["id"] is None:
1237
+ _uri["id"] = __id
1238
+ else:
1239
+ _uri["id"] = _id["id"]
1240
+ data[ORG_KEY] = build_uri(**_uri)
1241
+
1242
+ data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1243
+ query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
1244
+ # resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
1245
+ # resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
1246
+ resuming_info = {
1247
+ k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
1248
+ }
1249
+ # force to be json compatible
1250
+ resuming_info = JSONVerter.to_json(resuming_info)
1232
1251
 
1233
- if _uri["id"] is None:
1234
- __id = data.get(ORG_KEY, "")
1235
- _id = parse_duri(__id)
1236
- if _id["id"] is None:
1237
- _uri["id"] = __id
1252
+ res0 = await self.storage.put(
1253
+ query,
1254
+ data,
1255
+ )
1256
+ # 3. finally add the wave info into tube
1257
+ data.pop(MONOTONIC_KEY)
1258
+
1259
+ # update the TUBE_WAVE due the insertion of this object
1260
+ if push:
1261
+ # 2. save last Wave from this particular tube
1262
+ # AVOID_KEYS contains all keys that aren't json serializable
1263
+ # wave = {
1264
+ # k: v for k, v in kw.items() if k not in self.AVOID_KEYS
1265
+ # }
1266
+ # try to recover the 'intact' bootstrap that we've using
1267
+ for wave0 in kw.get(WAVE_LAST_KEY, []):
1268
+ wave = wave0.get("wave") # TODO: use a define
1269
+ if wave:
1270
+ break
1238
1271
  else:
1239
- _uri["id"] = _id["id"]
1240
- data[ORG_KEY] = build_uri(**_uri)
1272
+ # otherwise, its the 1st time and we need to create the 1st
1273
+ # bootstrap-wave info
1274
+ wave_keys = set(kw.get(WAVE_INFO_KEY, []))
1275
+ wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
1276
+ # task = kw[TASK_KEY]
1277
+ wave = {k: kw[k] for k in wave_keys.intersection(kw)}
1241
1278
 
1242
- data[ID_KEY] = "{thing}:{id}".format_map(_uri)
1243
- query = f"{namespace}://{database}/{TUBE_SNAPSHOT}"
1244
- # resuming_info = {k: kw[k] for k in kw.get(REVERSE_SORT_KEY, [])}
1245
- # resuming_info = {k: kw[k] for k in kw.get(SORT_KEY) or []}
1246
- resuming_info = {
1247
- k: kw.get(k, data.get(k)) for k in kw.get(SORT_KEY) or []
1248
- }
1249
- # force to be json compatible
1250
- resuming_info = JSONVerter.to_json(resuming_info)
1251
-
1252
- res0 = await self.storage.put(
1253
- query,
1254
- data,
1255
- )
1256
- # 3. finally add the wave info into tube
1257
- data.pop(MONOTONIC_KEY)
1258
-
1259
- # 2. save last Wave from this particular tube
1260
- # AVOID_KEYS contains all keys that aren't json serializable
1261
- # wave = {
1262
- # k: v for k, v in kw.items() if k not in self.AVOID_KEYS
1263
- # }
1264
-
1265
- # try to recover the 'intact' bootstrap that we've using
1266
- for wave0 in kw.get(WAVE_LAST_KEY, []):
1267
- wave = wave0.get("wave") # TODO: use a define
1268
1279
  if wave:
1269
- break
1270
- else:
1271
- # otherwise, its the 1st time and we need to create the 1st
1272
- # bootstrap-wave info
1273
-
1274
- # TODO: review for inclussion
1275
- # TODO: this settings are crawler activity oriented
1276
- # TODO: what about 'Task' an other use cases?
1277
- wave_keys = set(kw.get(WAVE_INFO_KEY, []))
1278
- wave_keys.update([KIND_KEY, PREFIX_URL, PARAMS_KEY])
1279
-
1280
- # task = kw[TASK_KEY]
1281
- wave = {k: kw[k] for k in wave_keys.intersection(kw)}
1282
-
1283
- if wave:
1284
- # wave must be json compatible and do not use any reserved
1285
- # keyword for storage (i.e. 'scope' in Surreal)
1286
- query = f"{namespace}://{database}/{TUBE_WAVE}"
1287
-
1288
- # query can't containg MONOTONIC_KEY
1289
- wave.pop(MONOTONIC_KEY, None)
1290
- wave.pop(WAVE_RESUMING_INFO_KEY, None)
1291
- exists = await self.storage.query(query, **wave)
1292
- assert len(exists) <= 1
1293
-
1294
- if len(exists):
1295
- # use the same record_id
1296
- # otherwise a new record will be created
1297
- wave = exists[0]
1298
- # stamp current wave
1299
- _wave = {
1300
- **wave,
1301
- MONOTONIC_KEY: monotonic,
1302
- WAVE_RESUMING_INFO_KEY: resuming_info,
1303
- }
1304
- res1 = await self.storage.put(query, _wave)
1305
- else:
1306
- # wave is empty, maybe because is not a resuming crawling task
1307
- log.debug(
1308
- "wave is empty, maybe because is not a resuming crawling task"
1309
- )
1310
- # log.info("Saving: %s", data)
1311
- res1 = True
1280
+ # wave must be json compatible and do not use any reserved
1281
+ # keyword for storage (i.e. 'scope' in Surreal)
1282
+ query = f"{namespace}://{database}/{TUBE_WAVE}"
1283
+
1284
+ # query can't containg MONOTONIC_KEY
1285
+ wave.pop(MONOTONIC_KEY, None)
1286
+ wave.pop(WAVE_RESUMING_INFO_KEY, None)
1287
+ exists = await self.storage.query(query, **wave)
1288
+ assert len(exists) <= 1
1289
+
1290
+ if len(exists):
1291
+ # use the same record_id
1292
+ # otherwise a new record will be created
1293
+ wave = exists[0]
1294
+ # stamp current wave
1295
+ _wave = {
1296
+ **wave,
1297
+ MONOTONIC_KEY: monotonic,
1298
+ WAVE_RESUMING_INFO_KEY: resuming_info,
1299
+ }
1300
+ res1 = await self.storage.put(query, _wave)
1301
+ else:
1302
+ # wave is empty, maybe because is not a resuming crawling task
1303
+ log.debug(
1304
+ "wave is empty, maybe because is not a resuming crawling task"
1305
+ )
1306
+ # log.info("Saving: %s", data)
1307
+ res1 = True
1312
1308
 
1313
1309
  return all([res0, res1, res2])
1314
1310
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: syncmodels
3
- Version: 0.1.331
3
+ Version: 0.1.334
4
4
  Summary: Synchronizable Models
5
5
  Home-page: https://github.com/asterio.gonzalez/syncmodels
6
6
  Author: Asterio Gonzalez
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Requires-Python: >=3.6
19
19
  License-File: LICENSE
20
20
  License-File: AUTHORS.rst
21
- Requires-Dist: agptools>=0.1.331
21
+ Requires-Dist: agptools>=0.1.334
22
22
  Requires-Dist: aiocache
23
23
  Requires-Dist: aiohttp
24
24
  Requires-Dist: Click
@@ -1,6 +1,6 @@
1
- syncmodels/__init__.py,sha256=ocQlY6lVgFKLeXKNltLiiEFTWqGUZ7-BL8jUbQMZg7c,142
1
+ syncmodels/__init__.py,sha256=79qKhIGH-rtdnic0Qst_kuzypvEYgtYjXJQicUUrO7I,142
2
2
  syncmodels/context.py,sha256=k1Gs_ip9BfyRFpyRnzqYvRDKo0sYBqJsh6z9sWln9oE,451
3
- syncmodels/crawler.py,sha256=_pgelyrIKuVl8vdINJ6NSh5qkSnZf4rAACph4SZ_2H4,95281
3
+ syncmodels/crawler.py,sha256=DAD2tL93iG-LjNQRCg2h_snusjGhQyhdZyMPOCEyvNU,94499
4
4
  syncmodels/crud.py,sha256=oZIcwEKR2i-lesEF_059Y4yThohd9m7gs6R6xYgLH-I,15351
5
5
  syncmodels/definitions.py,sha256=w-3TrSomp9T8OzLmJhKeZQDzrUIJLKldyh1lzlE7Yj0,5476
6
6
  syncmodels/exceptions.py,sha256=ZLAwu19cs2UN2Sv3jaLnixT_jRI7T42TfyutCkUsuIk,685
@@ -11,7 +11,7 @@ syncmodels/registry.py,sha256=YaQtgbSwa0je1MpCcVHALI3_b85vrddyOlhsnrUcKZs,8224
11
11
  syncmodels/requests.py,sha256=wWoC5hPDm1iBM_zrlyKRauzhXgdKR3pT5RqyC-5UZhQ,538
12
12
  syncmodels/runner.py,sha256=IHDKuQ3yJ1DN9wktMiIrerPepYX61tc3AzbFfuUqEFw,5454
13
13
  syncmodels/schema.py,sha256=uinUt8Asq_x7xa6MKWVXNyoWO6gKocjGPppjimaXzEU,2492
14
- syncmodels/storage.py,sha256=zjKnwgiuG4Vt4nm4n74sngBo6kLkki3sCK0U9tXISHY,74739
14
+ syncmodels/storage.py,sha256=HIoh_KSvalgTzBbb5py_0dUBHHipmHWqxmVu1hcEV4s,74991
15
15
  syncmodels/syncmodels.py,sha256=jcUxVbv1hrx5hI81VCO1onIM6WyORTqJVPwIqlPocOc,10596
16
16
  syncmodels/timequeue.py,sha256=YRd3ULRaIhoszaBsYhfr0epMqAbL6-NwVEtScjUYttM,595
17
17
  syncmodels/wave.py,sha256=Gra22BLiA9z2nF-6diXpjAc4GZv9nebmyvHxdAfXec4,7764
@@ -33,7 +33,7 @@ syncmodels/helpers/importers.py,sha256=KImR9pQu4ir6EI6Ipta0q3RWloFT_VTJi67kM0lZs
33
33
  syncmodels/helpers/loaders.py,sha256=aus0aRcbU1vVa_zWo42aX6uV3B0fQ0aQpkTWlR9xGLA,4325
34
34
  syncmodels/helpers/models.py,sha256=c_ATzmiw5mVY1IGnwmyhjIuu5d2idHU-XeRigZSMkOQ,719
35
35
  syncmodels/helpers/orion.py,sha256=6lRp1w3yaq_rxOI7nJIjuHdsgBjQu92y0bW0IX_gq44,30719
36
- syncmodels/helpers/surreal.py,sha256=zoWtGm5oAxwvgJNq_NTpKOHN3h9FNObhFDLuiBOl1YY,10050
36
+ syncmodels/helpers/surreal.py,sha256=lHXvm5oNvpDBXFnIksTNV4c3PWp35myw3TiQpP2dqN0,10855
37
37
  syncmodels/helpers/units.py,sha256=g50m5DQrAyP_qpDRa4LCEA5Rz2UZUmlIixfWG_ddw9I,3571
38
38
  syncmodels/logic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  syncmodels/logic/activity_logger.py,sha256=8wjvgRwaNbibYWGgl-trovSS70yNkoCTlb-AIx3aZEE,14053
@@ -303,10 +303,10 @@ syncmodels/session/postgresql.py,sha256=ZMIu1Rv93pKfvFlovFBmWArzlrT2xaQWNYGZT_LW
303
303
  syncmodels/session/sql.py,sha256=17C8EHn_1twHezhMlD5esMvx4m0iIrnD7JK-TuBswgU,6573
304
304
  syncmodels/session/sqlite.py,sha256=nCDjopLiBpX1F10qkKoARM7JrVdIpJ1WdGOduFVxaiA,2080
305
305
  syncmodels/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
306
- syncmodels-0.1.331.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
307
- syncmodels-0.1.331.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
308
- syncmodels-0.1.331.dist-info/METADATA,sha256=gcR0Vk04bekJbu7pwkLBWpeUEvvtmqQuUerKgm0JOwg,2700
309
- syncmodels-0.1.331.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
310
- syncmodels-0.1.331.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
311
- syncmodels-0.1.331.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
312
- syncmodels-0.1.331.dist-info/RECORD,,
306
+ syncmodels-0.1.334.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
307
+ syncmodels-0.1.334.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
308
+ syncmodels-0.1.334.dist-info/METADATA,sha256=tMGMIT9CJEpVOlBmUqV7MCE_Z-myAFJo918gpJdV4KQ,2700
309
+ syncmodels-0.1.334.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
310
+ syncmodels-0.1.334.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
311
+ syncmodels-0.1.334.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
312
+ syncmodels-0.1.334.dist-info/RECORD,,