syncmodels 0.1.350__py2.py3-none-any.whl → 0.1.351__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- syncmodels/__init__.py +1 -1
- syncmodels/crawler.py +35 -10
- syncmodels/registry.py +5 -0
- syncmodels/session/__init__.py +6 -0
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/METADATA +2 -2
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/RECORD +11 -11
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/AUTHORS.rst +0 -0
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/LICENSE +0 -0
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/WHEEL +0 -0
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/entry_points.txt +0 -0
- {syncmodels-0.1.350.dist-info → syncmodels-0.1.351.dist-info}/top_level.txt +0 -0
syncmodels/__init__.py
CHANGED
syncmodels/crawler.py
CHANGED
@@ -403,7 +403,12 @@ class iAgent(iRunner):
|
|
403
403
|
|
404
404
|
if self.add_task(task):
|
405
405
|
activity_log = ActivityStatus(
|
406
|
-
|
406
|
+
**task,
|
407
|
+
**{
|
408
|
+
"activity_type": "task",
|
409
|
+
"status": "pending",
|
410
|
+
# "name": task[KIND_KEY],
|
411
|
+
},
|
407
412
|
)
|
408
413
|
task[ACTIVITY_LOG_KEY] = activity_log
|
409
414
|
log.debug("+ Task: [%s]: [%s] %s", i, self.name, task)
|
@@ -418,7 +423,9 @@ class iAgent(iRunner):
|
|
418
423
|
else:
|
419
424
|
log.debug("- Task: SKIP: [%s]: [%s] %s", i, self.name, task)
|
420
425
|
|
421
|
-
if i
|
426
|
+
if i > 0:
|
427
|
+
log.info("[%s] %s/%s tasks added", self.name, i, len(tasks))
|
428
|
+
else:
|
422
429
|
log.warning("[%s] no task provided by bootstrap()", self.name)
|
423
430
|
|
424
431
|
log.debug("<< [%s] exit bootstrap()", self.name)
|
@@ -1140,6 +1147,8 @@ class iBot(iAgent):
|
|
1140
1147
|
msg = "".join(traceback.format_exception(*sys.exc_info()))
|
1141
1148
|
log.error(msg)
|
1142
1149
|
exception_raised = why
|
1150
|
+
await self.stop()
|
1151
|
+
foo = 1
|
1143
1152
|
|
1144
1153
|
log.warning("retry: %s: %s, %s", tries, call_kw, params)
|
1145
1154
|
await asyncio.sleep(self.RETRY_DELAY)
|
@@ -1186,7 +1195,14 @@ class iBot(iAgent):
|
|
1186
1195
|
# context[KIND_KEY] = context.pop("kind")
|
1187
1196
|
|
1188
1197
|
for k, v in context.items():
|
1189
|
-
if match(k) and isinstance(
|
1198
|
+
if match(k) and isinstance(
|
1199
|
+
v,
|
1200
|
+
(
|
1201
|
+
int,
|
1202
|
+
str,
|
1203
|
+
float,
|
1204
|
+
),
|
1205
|
+
):
|
1190
1206
|
params[k] = v
|
1191
1207
|
|
1192
1208
|
return params
|
@@ -1760,6 +1776,9 @@ class PutPlugin(iPlugin):
|
|
1760
1776
|
await activity_logger.update_activity(activity)
|
1761
1777
|
last_logged_percent = progress
|
1762
1778
|
last_logged_time = now
|
1779
|
+
await asyncio.sleep(
|
1780
|
+
0.25
|
1781
|
+
) # be nice and try to unlock other fibers
|
1763
1782
|
|
1764
1783
|
# Original inject logic
|
1765
1784
|
await crawler.inject(data, context)
|
@@ -2623,6 +2642,13 @@ class iAsyncCrawler(iCrawler):
|
|
2623
2642
|
int(restart),
|
2624
2643
|
)
|
2625
2644
|
await asyncio.sleep(restart)
|
2645
|
+
log.info(
|
2646
|
+
"[%s]: remaining cycles: [%s] : restart crawling right now!",
|
2647
|
+
self.bootstrap,
|
2648
|
+
self.cycles,
|
2649
|
+
# int(restart),
|
2650
|
+
)
|
2651
|
+
foo = 1
|
2626
2652
|
|
2627
2653
|
result = await self.save()
|
2628
2654
|
if result:
|
@@ -2726,14 +2752,13 @@ class iAsyncCrawler(iCrawler):
|
|
2726
2752
|
bot.fiber = loop.create_task(bot.run())
|
2727
2753
|
|
2728
2754
|
async def remove_bot(self, bot: iBot):
|
2729
|
-
|
2730
|
-
|
2731
|
-
|
2732
|
-
# bot.input_queue.put_nowait(None)
|
2755
|
+
if bot.name in self.bot:
|
2756
|
+
# request bot to finish
|
2757
|
+
# bot.input_queue.put_nowait(None)
|
2733
2758
|
|
2734
|
-
|
2735
|
-
|
2736
|
-
|
2759
|
+
# remove bot for more assignations
|
2760
|
+
self.round_robin.remove(bot)
|
2761
|
+
self.bot.pop(bot.name)
|
2737
2762
|
|
2738
2763
|
def remain_tasks(self):
|
2739
2764
|
"compute how many pending tasks still remains"
|
syncmodels/registry.py
CHANGED
@@ -121,6 +121,11 @@ class iRegistry:
|
|
121
121
|
for klass in __klass__:
|
122
122
|
for pattern, info in cls.REGISTRY.items():
|
123
123
|
# Note: search(uri pattern) + match(uri when used will provide)
|
124
|
+
# try: # debug
|
125
|
+
# re.search(pattern, uri) or re.match(uri, pattern)
|
126
|
+
# except Exception as why:
|
127
|
+
# print(why)
|
128
|
+
|
124
129
|
if m := re.search(pattern, uri) or re.match(uri, pattern):
|
125
130
|
candidate = {}
|
126
131
|
for factory, options in info.items():
|
syncmodels/session/__init__.py
CHANGED
@@ -78,6 +78,7 @@ class iSession(iContext, iSchema, iRegistry): # , iAuthenticator):
|
|
78
78
|
|
79
79
|
QUERY_BODY_KEY = "json"
|
80
80
|
PARAMS_KEY = "params"
|
81
|
+
EXTRA_KEY = None # don't add extra info (aiohttp may fail)
|
81
82
|
|
82
83
|
HEADERS = {}
|
83
84
|
|
@@ -417,6 +418,11 @@ class iSession(iContext, iSchema, iRegistry): # , iAuthenticator):
|
|
417
418
|
elif self.PARAMS_KEY:
|
418
419
|
call_kw[self.PARAMS_KEY] = params
|
419
420
|
|
421
|
+
if self.EXTRA_KEY:
|
422
|
+
call_kw[self.EXTRA_KEY] = {
|
423
|
+
k: v for k, v in context.items() if k not in params
|
424
|
+
}
|
425
|
+
|
420
426
|
return call_kw
|
421
427
|
|
422
428
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: syncmodels
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.351
|
4
4
|
Summary: Synchronizable Models
|
5
5
|
Home-page: https://github.com/asterio.gonzalez/syncmodels
|
6
6
|
Author: Asterio Gonzalez
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Requires-Python: >=3.6
|
19
19
|
License-File: LICENSE
|
20
20
|
License-File: AUTHORS.rst
|
21
|
-
Requires-Dist: agptools>=0.1.
|
21
|
+
Requires-Dist: agptools>=0.1.351
|
22
22
|
Requires-Dist: aiocache
|
23
23
|
Requires-Dist: aiohttp
|
24
24
|
Requires-Dist: Click
|
@@ -1,13 +1,13 @@
|
|
1
|
-
syncmodels/__init__.py,sha256=
|
1
|
+
syncmodels/__init__.py,sha256=yY3_O0kIkf8luh4UrReqSq4x2fcMWfEiGT1p0C1LySE,146
|
2
2
|
syncmodels/context.py,sha256=k1Gs_ip9BfyRFpyRnzqYvRDKo0sYBqJsh6z9sWln9oE,451
|
3
|
-
syncmodels/crawler.py,sha256=
|
3
|
+
syncmodels/crawler.py,sha256=pE6lXz2mWgPsaYWnHWrtyjtq5J5CUjuau4D7mvHM2g4,95374
|
4
4
|
syncmodels/crud.py,sha256=oZIcwEKR2i-lesEF_059Y4yThohd9m7gs6R6xYgLH-I,15351
|
5
5
|
syncmodels/definitions.py,sha256=w-3TrSomp9T8OzLmJhKeZQDzrUIJLKldyh1lzlE7Yj0,5476
|
6
6
|
syncmodels/exceptions.py,sha256=ZLAwu19cs2UN2Sv3jaLnixT_jRI7T42TfyutCkUsuIk,685
|
7
7
|
syncmodels/geofactory.py,sha256=1FkrdEn0QA0O4_lSUAwjqXH2dmlQWi32AkntnG4AEQY,10372
|
8
8
|
syncmodels/http.py,sha256=FFVT3QJJgur2dv1Q_7l9ZsWN8z6_gUjOT9hJff1ZAqk,3335
|
9
9
|
syncmodels/parallel.py,sha256=Ll8HmyFF9v9fIofqqSgfhyTlklvb77mTtNdG5Y9lqdQ,7145
|
10
|
-
syncmodels/registry.py,sha256=
|
10
|
+
syncmodels/registry.py,sha256=3RhSdihKBkoxMUIflTUmPt3M4hA3mAoOGQ5V7joqL3o,8404
|
11
11
|
syncmodels/requests.py,sha256=wWoC5hPDm1iBM_zrlyKRauzhXgdKR3pT5RqyC-5UZhQ,538
|
12
12
|
syncmodels/runner.py,sha256=Tb5KTu_XFxCphrtfcnmIqTItG91el7P-9B5WaDTMp18,5928
|
13
13
|
syncmodels/schema.py,sha256=HMr0LcB40se5sqHF4WYQIdzXUphLa2fhf0KFFUMzpDM,2726
|
@@ -297,16 +297,16 @@ syncmodels/model/schema_org/webpage.py,sha256=-bqKOpEs_2lW7qrey2nHtvOZ9xbmmwcviN
|
|
297
297
|
syncmodels/model/schema_org/webpageelement.py,sha256=brXfhU3l3FBXpy8qnR1Ve-EckjHW8VGoyR2IsnT7t2Y,1104
|
298
298
|
syncmodels/model/schema_org/website.py,sha256=48Rox27BbFIg1u3wDlOtX-lLCPoFgvvrCw5Hrdf6uRU,912
|
299
299
|
syncmodels/model/schema_org/xpathtype.py,sha256=D8gKiCrGSSuUVYw7BIWmOIUbKATfv2IpbkV1B2TmjC0,484
|
300
|
-
syncmodels/session/__init__.py,sha256=
|
300
|
+
syncmodels/session/__init__.py,sha256=HeyQiSg8RjCEBRF3VoZmuPyABognZ6wHNw0ekt7jLwY,15820
|
301
301
|
syncmodels/session/http.py,sha256=tf7z0ccAEYoCOZT4Ukv3NBXz9hUO3vs2s9bm491pCj8,1480
|
302
302
|
syncmodels/session/postgresql.py,sha256=ZMIu1Rv93pKfvFlovFBmWArzlrT2xaQWNYGZT_LW61k,175
|
303
303
|
syncmodels/session/sql.py,sha256=Ia-Yrs_hOhAIZ4IcoVB5i8mQZVCULfnV9-TQz49knZU,7181
|
304
304
|
syncmodels/session/sqlite.py,sha256=nCDjopLiBpX1F10qkKoARM7JrVdIpJ1WdGOduFVxaiA,2080
|
305
305
|
syncmodels/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
306
|
-
syncmodels-0.1.
|
307
|
-
syncmodels-0.1.
|
308
|
-
syncmodels-0.1.
|
309
|
-
syncmodels-0.1.
|
310
|
-
syncmodels-0.1.
|
311
|
-
syncmodels-0.1.
|
312
|
-
syncmodels-0.1.
|
306
|
+
syncmodels-0.1.351.dist-info/AUTHORS.rst,sha256=3ZPoqg8Aav8DSYKd0fwcwn4_5HwSiMLart0E5Un00-U,168
|
307
|
+
syncmodels-0.1.351.dist-info/LICENSE,sha256=uzMOYtIiUsnsD0xHJR7aJWJ4v_bvan0kTnvufy5eNoA,1075
|
308
|
+
syncmodels-0.1.351.dist-info/METADATA,sha256=WAOLdI0tEnOPkqttzU2VqOnuc6DsFsKlLyl65aeBglc,2700
|
309
|
+
syncmodels-0.1.351.dist-info/WHEEL,sha256=SrDKpSbFN1G94qcmBqS9nyHcDMp9cUS9OC06hC0G3G0,109
|
310
|
+
syncmodels-0.1.351.dist-info/entry_points.txt,sha256=dMnigjZsHMxTwXiiZyBZdBbMYE0-hY3L5cG15EcDAzw,51
|
311
|
+
syncmodels-0.1.351.dist-info/top_level.txt,sha256=2DfQ9NuAhKMjY3BvQGVBA7GfqTm7EoHNbaehSUiqiHQ,11
|
312
|
+
syncmodels-0.1.351.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|