web-valueist 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Andreas Galazis
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.1
2
+ Name: web-valueist
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Andreas Galazis
6
+ Requires-Python: >=3.13,<4.0
7
+ Classifier: Programming Language :: Python :: 3
8
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
9
+ Requires-Dist: lxml (>=5.3.0,<6.0.0)
10
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
11
+ Description-Content-Type: text/markdown
12
+
13
+ # Web Valueist
14
+
15
+ Fetches a value from the web, compares it with a given value and exits with zero
16
+ exit code if the condition is satisfied
17
+
18
+
19
+ ## Setup
20
+
21
+ While in project directory:
22
+ ```
23
+ ./install.sh
24
+ ```
25
+
26
+ ## Usage:
27
+
28
+ `web_valueist [-h] [--debug] [--json] url parser_name [quantifier] selector operator_name value`
29
+
30
+ ```
31
+ positional arguments:
32
+ url
33
+ parser_name
34
+ quantifier Optional: ANY or EVERY (default: ANY)
35
+ selector
36
+ operator_name
37
+ value
38
+
39
+ options:
40
+ -h, --help show this help message and exit
41
+ --debug Show debug logs including found values
42
+ --json Output input and result as JSON
43
+ ```
44
+
45
+ ## Sample Usage
46
+
47
+ By default, `web_valueist` is silent and communicates success or failure via the exit code.
48
+
49
+ Sample success
50
+
51
+ ```
52
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer ">" 240
53
+ ```
54
+
55
+ ( you can also use `gt` instead of `">"`)
56
+
57
+ Exit Code: `0`
58
+
59
+ Sample failure
60
+
61
+ ```
62
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer "<" 240
63
+ ```
64
+
65
+ ( you can also use `lt` instead of `"<"`)
66
+
67
+ Exit Code: `1`
68
+
69
+ ### Debugging
70
+
71
+ Use the `--debug` flag to see the values fetched from the web.
72
+
73
+ ```
74
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer ">" 240 --debug
75
+ ```
76
+
77
+ Output:
78
+
79
+ ```
80
+ DEBUG:web_valueist.lib:Found value ['245']
81
+ ```
82
+
83
+ ### JSON Output
84
+
85
+ Use the `--json` flag to get a structured output.
86
+
87
+ ```
88
+ python -m web_valueist http://example.com str h1 "eq" "Example Domain" --json
89
+ ```
90
+
91
+ Output:
92
+ ```json
93
+ {"args": {"url": "http://example.com", "parser_name": "str", "quantifier": "ANY", "selector": "h1", "operator_name": "eq", "value": "Example Domain"}, "result": {"success": true, "value": "Example Domain"}}
94
+ ```
95
+
96
+ ### Using Quantifiers
97
+
98
+ When a selector matches multiple elements, you can use `ANY` or `EVERY`.
99
+
100
+ - **ANY** (default): At least one selector match needs to satisfy the condition.
101
+ - **EVERY**: All selector matches need to satisfy the condition.
102
+
103
+ Example using `EVERY`:
104
+ ```
105
+ python -m web_valueist https://example.com int EVERY .price ">" 100
106
+ ```
107
+
108
+ If no quantifier is specified, `ANY` is used by default.
109
+
110
+ ### Sample cron job
111
+
112
+ ```
113
+ */30 * * * * web_valueist "https://www.bazaraki.com/car-motorbikes-boats-and-parts/cars-trucks-and-vans/mazda/mazda-mx5/year_min---71/?ordering=cheapest&lat=35.01804869361969&lng=34.04709596563199&radius=5000&price_max=30000" int .advert__content-price._not-title "<" 22500 &&message="Some fancy car matching your criteria was found" &&if command -v notify-send >/dev/null 2>&1 ; then notify-send "$message"; else say "$message"; fi
114
+ ```
115
+
@@ -0,0 +1,102 @@
1
+ # Web Valueist
2
+
3
+ Fetches a value from the web, compares it with a given value and exits with zero
4
+ exit code if the condition is satisfied
5
+
6
+
7
+ ## Setup
8
+
9
+ While in project directory:
10
+ ```
11
+ ./install.sh
12
+ ```
13
+
14
+ ## Usage:
15
+
16
+ `web_valueist [-h] [--debug] [--json] url parser_name [quantifier] selector operator_name value`
17
+
18
+ ```
19
+ positional arguments:
20
+ url
21
+ parser_name
22
+ quantifier Optional: ANY or EVERY (default: ANY)
23
+ selector
24
+ operator_name
25
+ value
26
+
27
+ options:
28
+ -h, --help show this help message and exit
29
+ --debug Show debug logs including found values
30
+ --json Output input and result as JSON
31
+ ```
32
+
33
+ ## Sample Usage
34
+
35
+ By default, `web_valueist` is silent and communicates success or failure via the exit code.
36
+
37
+ Sample success
38
+
39
+ ```
40
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer ">" 240
41
+ ```
42
+
43
+ ( you can also use `gt` instead of `">"`)
44
+
45
+ Exit Code: `0`
46
+
47
+ Sample failure
48
+
49
+ ```
50
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer "<" 240
51
+ ```
52
+
53
+ ( you can also use `lt` instead of `"<"`)
54
+
55
+ Exit Code: `1`
56
+
57
+ ### Debugging
58
+
59
+ Use the `--debug` flag to see the values fetched from the web.
60
+
61
+ ```
62
+ python -m web_valueist https://www.ikea.com.cy/en/products/fjallhavre-duvet-warm-240x220-cm/70458057/ int span.price__integer ">" 240 --debug
63
+ ```
64
+
65
+ Output:
66
+
67
+ ```
68
+ DEBUG:web_valueist.lib:Found value ['245']
69
+ ```
70
+
71
+ ### JSON Output
72
+
73
+ Use the `--json` flag to get a structured output.
74
+
75
+ ```
76
+ python -m web_valueist http://example.com str h1 "eq" "Example Domain" --json
77
+ ```
78
+
79
+ Output:
80
+ ```json
81
+ {"args": {"url": "http://example.com", "parser_name": "str", "quantifier": "ANY", "selector": "h1", "operator_name": "eq", "value": "Example Domain"}, "result": {"success": true, "value": "Example Domain"}}
82
+ ```
83
+
84
+ ### Using Quantifiers
85
+
86
+ When a selector matches multiple elements, you can use `ANY` or `EVERY`.
87
+
88
+ - **ANY** (default): At least one selector match needs to satisfy the condition.
89
+ - **EVERY**: All selector matches need to satisfy the condition.
90
+
91
+ Example using `EVERY`:
92
+ ```
93
+ python -m web_valueist https://example.com int EVERY .price ">" 100
94
+ ```
95
+
96
+ If no quantifier is specified, `ANY` is used by default.
97
+
98
+ ### Sample cron job
99
+
100
+ ```
101
+ */30 * * * * web_valueist "https://www.bazaraki.com/car-motorbikes-boats-and-parts/cars-trucks-and-vans/mazda/mazda-mx5/year_min---71/?ordering=cheapest&lat=35.01804869361969&lng=34.04709596563199&radius=5000&price_max=30000" int .advert__content-price._not-title "<" 22500 &&message="Some fancy car matching your criteria was found" &&if command -v notify-send >/dev/null 2>&1 ; then notify-send "$message"; else say "$message"; fi
102
+ ```
@@ -0,0 +1,19 @@
1
+ [tool.poetry]
2
+ name = "web-valueist"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Andreas Galazis"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.13"
10
+ beautifulsoup4 = "^4.12.3"
11
+ requests = "^2.32.3"
12
+ lxml = "^5.3.0"
13
+
14
+ [tool.poetry.scripts]
15
+ web_valueist = "web_valueist.__main__:__main__"
16
+
17
+ [build-system]
18
+ requires = ["poetry-core"]
19
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,11 @@
1
+ from .lib import *
2
+
3
+ __all__ = [
4
+ "evaluate",
5
+ "Parser",
6
+ "Operator",
7
+ "ParserNotSupportedError",
8
+ "OperatorNotSupportedError",
9
+ "ValueistException",
10
+ "ValueNotFound",
11
+ ]
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env python3
2
+ import logging
3
+ import web_valueist
4
+ from argparse import ArgumentParser
5
+ from typing import TypedDict, Unpack
6
+ from signal import signal, SIGTERM
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class Args(TypedDict):
11
+ url: str
12
+ selector: str
13
+ parser_name: web_valueist.Parser
14
+ operator_name: web_valueist.Operator
15
+ value: str
16
+ quantifier: str
17
+
18
+
19
+ class CliArgs(Args):
20
+ debug: bool
21
+ json: bool
22
+
23
+
24
+ def _detect_optional_arguments(config: dict[str, dict]):
25
+ import sys
26
+
27
+ positional_args = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
28
+ results = {}
29
+ for name, attr in config.items():
30
+ pos = attr["position"]
31
+ possible_values = attr["possible_values"]
32
+ # Quantifier is at position 2 if there are 6 positional arguments.
33
+ # If there are only 5, then it's not present.
34
+ results[name] = (
35
+ len(positional_args) == 6
36
+ and len(positional_args) > pos
37
+ and positional_args[pos].upper() in possible_values
38
+ )
39
+ return results
40
+
41
+
42
+ def _parse_args() -> CliArgs:
43
+ optional_args = _detect_optional_arguments(
44
+ {"quantifier": {"position": 2, "possible_values": ["ANY", "EVERY"]}}
45
+ )
46
+ has_quantifier = optional_args.get("quantifier")
47
+
48
+ parser = ArgumentParser(
49
+ prog="web_valueist",
50
+ usage="web_valueist [-h] [--debug] [--json] url parser_name [quantifier] selector operator_name value",
51
+ description="""Fetches the value from the web, compares
52
+ it with a given value and exits with zero exit code
53
+ if the condition is satisfied """,
54
+ epilog="Did somebody say cron jobs? Have fun!",
55
+ )
56
+
57
+ _ = parser.add_argument("url", help="The URL to fetch")
58
+ _ = parser.add_argument(
59
+ "parser_name", help="The name of the parser to use (e.g., int, str, bool, float)"
60
+ )
61
+
62
+ if has_quantifier:
63
+ _ = parser.add_argument(
64
+ "quantifier", help="Quantifier for multiple matches (ANY or EVERY)"
65
+ )
66
+
67
+ _ = parser.add_argument("selector", help="The CSS selector to find the value")
68
+ _ = parser.add_argument(
69
+ "operator_name",
70
+ help="The operator to use for comparison (e.g., gt, lt, eq, ne, !=)",
71
+ )
72
+ _ = parser.add_argument("value", help="The reference value to compare against")
73
+
74
+ _ = parser.add_argument("--debug", action="store_true", help="Enable debug logging")
75
+ _ = parser.add_argument(
76
+ "--json", action="store_true", help="Output input and result as JSON"
77
+ )
78
+
79
+ args = parser.parse_args().__dict__
80
+
81
+ if not has_quantifier:
82
+ args["quantifier"] = "ANY"
83
+ else:
84
+ args["quantifier"] = args["quantifier"].upper()
85
+
86
+ return args
87
+
88
+
89
+ def _initialize_logger(debug: bool, **otherArgs: Unpack[Args]) -> Args:
90
+ logging.basicConfig()
91
+ if debug:
92
+ logging.getLogger().setLevel(logging.DEBUG)
93
+ else:
94
+ logging.getLogger().setLevel(logging.INFO)
95
+ return otherArgs
96
+
97
+
98
+ def _initialize_cli():
99
+ args = _parse_args()
100
+ return _initialize_logger(**args)
101
+
102
+
103
+ def sigterm_handler(_, __):
104
+ logger.error("termination requested, bye...\n")
105
+ raise SystemExit(1)
106
+
107
+
108
+ def main():
109
+ import json
110
+ import sys
111
+ args=_initialize_cli()
112
+ json_output = args.pop("json")
113
+ result = web_valueist.evaluate(**args)
114
+ if json_output:
115
+ print(json.dumps({"args": args, "result": result}))
116
+ if result["success"]:
117
+ sys.exit(0)
118
+ sys.exit(1)
119
+
120
+
121
+ def __main__():
122
+ signal(SIGTERM, sigterm_handler)
123
+
124
+ try:
125
+ main()
126
+ except KeyboardInterrupt:
127
+ logger.error("ok, bye...\n")
128
+ exit(1)
129
+ except web_valueist.ValueistException as e:
130
+ logger.error(f"Error: {e}")
131
+ exit(1)
132
+
133
+
134
+ if __name__ == "__main__":
135
+ __main__()
@@ -0,0 +1,75 @@
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from . import parser, operator
4
+ from .exception import ValueistException, ValueNotFound
5
+ from .parser import Parser, ParserNotSupportedError
6
+ from .operator import Operator, OperatorNotSupportedError
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def _fetch_values(url: str, selector: str):
12
+ response = requests.get(url, timeout=10)
13
+ if logger.isEnabledFor(logging.DEBUG):
14
+ # We only access response.text if we are in debug mode
15
+ # to avoid unnecessary decoding of the response content
16
+ # for large payloads.
17
+ logger.debug("Looking for %s in %s", selector, response.text)
18
+ soup = BeautifulSoup(response.content, "lxml")
19
+ elements=soup.css.select(selector)
20
+ if len(elements)<1:
21
+ raise ValueNotFound
22
+ values = [el.text for el in elements]
23
+ return values
24
+
25
+
26
+ def _apply_operator(
27
+ parser_name: Parser,
28
+ current_value: str,
29
+ operator_name: Operator,
30
+ reference_value: str,
31
+ ):
32
+ parsed_current_value = parser.parse(parser_name, current_value)
33
+ parsed_reference_value = parser.parse(parser_name, reference_value)
34
+ return operator.apply(operator_name, parsed_current_value, parsed_reference_value)
35
+
36
+
37
+ def evaluate(
38
+ url: str,
39
+ selector: str,
40
+ parser_name: Parser,
41
+ operator_name: Operator,
42
+ value: str,
43
+ quantifier: str = "ANY",
44
+ ):
45
+
46
+ current_values = _fetch_values(url, selector)
47
+ if logger.isEnabledFor(logging.DEBUG):
48
+ logger.debug("Found value %s", current_values)
49
+ results = [
50
+ _apply_operator(parser_name, val, operator_name, value)
51
+ for val in current_values
52
+ ]
53
+ if quantifier == "ANY":
54
+ success = any(results)
55
+ elif quantifier == "EVERY":
56
+ success = all(results)
57
+ else:
58
+ # Fallback to ANY if quantifier is unknown, or we could raise an error
59
+ success = any(results)
60
+
61
+ return {
62
+ "success": success,
63
+ "value": current_values if len(current_values) > 1 else current_values[0],
64
+ }
65
+
66
+
67
+ __all__ = [
68
+ "evaluate",
69
+ "Parser",
70
+ "Operator",
71
+ "ParserNotSupportedError",
72
+ "OperatorNotSupportedError",
73
+ "ValueistException",
74
+ "ValueNotFound",
75
+ ]
@@ -0,0 +1,7 @@
1
+ class ValueistException(Exception):
2
+ pass
3
+
4
+
5
+ class ValueNotFound(ValueistException):
6
+ def __init__(self, *args: object) -> None:
7
+ super().__init__("Value not found")
@@ -0,0 +1,43 @@
1
+ from typing import Any, Literal, TypedDict
2
+ import operator
3
+
4
+ from .exception import ValueistException
5
+
6
+ type Operator = Literal[
7
+ "gt", ">", "lt", "<", "ge", ">=", "le", "<=", "eq", "=", "ne", "!="
8
+ ]
9
+
10
+ type ParsedValue = str | int | float | bool
11
+
12
+ _operators = {
13
+ "gt": operator.gt,
14
+ ">": operator.gt,
15
+ "lt": operator.lt,
16
+ "<": operator.lt,
17
+ "ge": operator.ge,
18
+ ">=": operator.ge,
19
+ "le": operator.le,
20
+ "<=": operator.le,
21
+ "eq": operator.eq,
22
+ "=": operator.eq,
23
+ "ne": operator.ne,
24
+ "!=": operator.ne,
25
+ }
26
+
27
+
28
+ class OperatorNotSupportedError(ValueistException):
29
+ def __init__(self, *args: object) -> None:
30
+ super().__init__(
31
+ f"Operator not supported. Possible operators are: {','.join(_operators.keys())}"
32
+ )
33
+
34
+
35
+ def _get_operator(operator_name: str):
36
+ try:
37
+ return _operators[operator_name]
38
+ except KeyError as exception:
39
+ raise OperatorNotSupportedError from exception
40
+
41
+
42
+ def apply(operator_name: Operator, a: ParsedValue, b: ParsedValue) -> bool:
43
+ return _get_operator(operator_name)(a, b)
@@ -0,0 +1,108 @@
1
+ from decimal import ROUND_HALF_UP, Decimal
2
+ import re
3
+ from typing import Literal
4
+
5
+ from .exception import ValueistException
6
+
7
+ type Parser = Literal["int", "str", "bool", "float"]
8
+
9
+ INT_EXPONENT=Decimal('0')
10
+
11
+ def _clean_float_string(val: str):
12
+ """Cleans up float string and returns float
13
+ The cleanup regex does the following
14
+ 1) Keeps only digits, commas, dots and leading minus sign
15
+ 2) Removes commas or dots that do not match coma or dot and the final 1 or 2 digits
16
+
17
+
18
+ Args:
19
+ val (str): a string that is expected to be parsed as float
20
+
21
+ Returns:
22
+ str: The float for the provided string
23
+ """
24
+ # Keep only digits, commas, dots and leading minus
25
+ cleaned = re.sub(r"[^\d|.,-]", "", val)
26
+
27
+ # Ensure minus is only at the beginning
28
+ if cleaned.startswith("-"):
29
+ cleaned = "-" + cleaned[1:].replace("-", "")
30
+ else:
31
+ cleaned = cleaned.replace("-", "")
32
+
33
+ # Handle thousands separators and decimal points
34
+ # This regex removes dots or commas that are NOT followed by exactly 1 or 2 digits at the end of the string
35
+ return re.sub(r"(?![.,]\d{1,2}$)[.,]", "", cleaned).replace(",", ".")
36
+
37
+ def _clean_bool_tiny_int_string(val: str):
38
+ """Cleans up bool/tiny int values and returns tiny int string
39
+
40
+ Args:
41
+ val str: a string that is expected to be parsed as bool/ tiny int
42
+
43
+ Returns:
44
+ str: The tiny int for the provided string
45
+ """
46
+ return (
47
+ re.sub("(?i)(?!true|false|yes|no|1|0|t|f|y|n).", "", val)
48
+ .upper()
49
+ .replace("F", "0")
50
+ .replace("T", "1")
51
+ .replace("Y", "1")
52
+ .replace("N", "0")
53
+ )
54
+
55
+ def _parse_int(val:str):
56
+ """ Cleans up int string and returns int
57
+ The cleanup regex does the following
58
+ 1) Cleans up string as float
59
+ 2) Rounds it using HALF_UP rather than banker rounding
60
+
61
+ Args:
62
+ val (str): a string that is expected to be parsed as integer
63
+
64
+ Returns:
65
+ int: The rounded integer for the provided string
66
+ """
67
+ float_string=_clean_float_string(val)
68
+ return int(Decimal(float_string).quantize(exp=INT_EXPONENT, rounding=ROUND_HALF_UP))
69
+
70
+ def _parse_float(val:str):
71
+ return float(_clean_float_string(val))
72
+
73
+ def _parse_bool(val:str):
74
+ """Cleans up bool/tiny int string and returns bool
75
+
76
+ Args:
77
+ val (str): a string that is expected to be parsed as boolean
78
+
79
+ Returns:
80
+ bool: The boolean for the provided string
81
+ """
82
+ tiny_int_string = _clean_bool_tiny_int_string(val)
83
+ return bool(int(tiny_int_string))
84
+
85
+ _parsers = {
86
+ "int": _parse_int,
87
+ "float": _parse_float,
88
+ "str": str,
89
+ "bool": _parse_bool
90
+ }
91
+
92
+
93
+ class ParserNotSupportedError(ValueistException):
94
+ def __init__(self, *args: object) -> None:
95
+ super().__init__(
96
+ f"Parser not supported. Possible parsers are: {','.join(_parsers.keys())}"
97
+ )
98
+
99
+
100
+ def _get_parser(parser_name: str):
101
+ try:
102
+ return _parsers[parser_name]
103
+ except KeyError as exception:
104
+ raise ParserNotSupportedError from exception
105
+
106
+
107
+ def parse(parser_name: Parser, value: str):
108
+ return _get_parser(parser_name)(value)