softhauzpy 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/PKG-INFO +3 -1
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/setup.py +3 -1
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy/main.py +2 -5
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy.egg-info/PKG-INFO +3 -1
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/README.md +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/setup.cfg +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy/__init__.py +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy.egg-info/SOURCES.txt +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy.egg-info/dependency_links.txt +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy.egg-info/requires.txt +0 -0
- {softhauzpy-0.0.2 → softhauzpy-0.0.4}/softhauzpy.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,9 @@ with open("README.md", "r", encoding="utf-8") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name='softhauzpy',
|
|
8
|
-
version='0.0.
|
|
8
|
+
version='0.0.4',
|
|
9
|
+
author='Karen Urate',
|
|
10
|
+
author_email='karen.urate@softhauz.ca',
|
|
9
11
|
packages=find_packages(),
|
|
10
12
|
install_requires=[
|
|
11
13
|
'requests>=2.32.3',
|
|
@@ -193,7 +193,7 @@ def get_search_results_list(page_list=[], keywords='') -> list:
|
|
|
193
193
|
creation_date = page[4] or ''
|
|
194
194
|
modified_date = page[5] or ''
|
|
195
195
|
|
|
196
|
-
if keywords in extract_pure_text(url, title, author, description, creation_date, modified_date)["content"]:
|
|
196
|
+
if keywords in extract_pure_text(url, title=title, author=author, description=description, creation_date=creation_date, modified_date=modified_date)["content"]:
|
|
197
197
|
results.append((url, title, author, description, creation_date, modified_date))
|
|
198
198
|
|
|
199
199
|
return results
|
|
@@ -989,21 +989,18 @@ def incremental_update(
|
|
|
989
989
|
fp = fingerprint_page(text)
|
|
990
990
|
|
|
991
991
|
if fingerprints.get(url) == fp:
|
|
992
|
-
return False
|
|
992
|
+
return False
|
|
993
993
|
|
|
994
994
|
fingerprints[url] = fp
|
|
995
995
|
|
|
996
|
-
# Remove stale entries from index
|
|
997
996
|
for token in list(index.keys()):
|
|
998
997
|
index[token] = [(doc_id, freq) for doc_id, freq in index[token] if doc_id != url]
|
|
999
998
|
if not index[token]:
|
|
1000
999
|
del index[token]
|
|
1001
1000
|
|
|
1002
|
-
# Remove stale tfidf and metadata entries
|
|
1003
1001
|
tfidf.pop(url, None)
|
|
1004
1002
|
metadata[:] = [m for m in metadata if m.get("url") != url]
|
|
1005
1003
|
|
|
1006
|
-
# Build fresh entries for this page
|
|
1007
1004
|
token_freq = Counter(tokenize(text))
|
|
1008
1005
|
total = len(list(token_freq.elements())) or 1
|
|
1009
1006
|
for token, freq in token_freq.items():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|