web-datafilter 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,472 @@
1
+ """
2
+ DataFilter is a library written on Python used to check data for presence of web vulnerabilities exploitation
3
+
4
+ For full documentation, see the README.MD file in the project's GitHub repository
5
+ Link to repository: https://github.com/qwertyvs/DataFilter
6
+ """
7
+
8
+ import regex as re, html, unicodedata, hashlib
9
+ from time import time_ns
10
+ from urllib.parse import unquote
11
+
12
+ #(redefine these variables inside code depending on cpu) max time to process pattern in seconds
13
+ sqli_patern_time=0.1
14
+ ssti_patern_time=0.1
15
+ xss_patern_time=0.2
16
+
17
+ #Groups of symbols for quick allowed symbols array assembling
18
+ symbolsDict = {
19
+ "ascii_lowercase": "abcdefghijklmnopqrstuvwxyz",
20
+ "ascii_uppercase": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
21
+ "ascii": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
22
+ "numbers": "1234567890",
23
+ "special": "!@#$%^&*()_-+=:;<>,.?/*",
24
+ "all": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_-+=:;<>,.?/*",
25
+ }
26
+
27
+ #Often used sql keywords
28
+ _SQL_KEYWORDS = [
29
+ "select", "insert", "update", "delete", "replace", "truncate",
30
+ "create", "alter", "drop", "rename", "grant", "revoke", "use",
31
+ "describe", "desc", "show", "explain",
32
+
33
+ "from", "where", "having", "group by", "order by", "limit", "offset",
34
+ "top", "fetch", "into", "values", "returning", "union", "union all",
35
+ "intersect", "except", "distinct", "case", "when", "then", "else", "end",
36
+
37
+ "and", "or", "not", "xor", "like", "ilike", "rlike", "regexp", "similar to",
38
+ "in", "exists", "all", "any", "between", "is", "null", "is null",
39
+ "is not null", "=", "==", "!=", "<>", ">", "<", ">=", "<=",
40
+
41
+ "--", "#", "/*", "*/", ";", "-- ", "# ",
42
+
43
+ "cast", "convert", "concat", "concat_ws", "group_concat", "string_agg",
44
+ "substr", "substring", "left", "right", "mid", "instr", "locate",
45
+ "length", "char_length", "len", "upper", "lower", "trim", "ltrim", "rtrim",
46
+ "replace", "replace(", "ascii", "char", "chr", "hex", "unhex",
47
+
48
+ "+", "-", "*", "/", "%", "mod", "power", "floor", "ceil",
49
+
50
+ "exec", "execute", "sp_executesql", "execute immediate", "prepare",
51
+ "deallocate", "execute immediate", "declare", "set", "select into",
52
+ "openrowset", "opendatasource", "openquery", "bulk insert", "bcp",
53
+
54
+ "version", "@@version", "version()", "user()", "current_user", "session_user",
55
+ "system_user", "@@hostname", "@@datadir", "@@identity", "@@rowcount",
56
+ "database()", "schema_name", "schema()", "database", "schema",
57
+
58
+ "sleep", "benchmark", "load_file", "into outfile", "into dumpfile",
59
+ "information_schema", "performance_schema", "mysql.user", "found_rows",
60
+ "updatexml", "extractvalue", "group_concat", "benchmark(", "sleep(",
61
+
62
+ "pg_sleep", "pg_read_file", "pg_ls_dir", "pg_read_binary_file",
63
+ "pg_shadow", "pg_roles", "pg_database", "pg_user", "pg_catalog", "current_database",
64
+
65
+ "xp_cmdshell", "sp_msforeachdb", "sp_msforeachtable", "xp_dirtree",
66
+ "xp_availablemedia", "xp_regread", "xp_regwrite", "sp_oacreate",
67
+ "sp_oamethod", "sp_oaputfile", "sp_configure", "master..", "sysobjects",
68
+ "sysdatabases", "information_schema.tables", "sys.tables", "sys.schemas",
69
+ "bulkadmin", "dbcc", "OPENROWSET", "OPENDATASOURCE", "xp_subdirs",
70
+
71
+ "dbms_lock.sleep", "dbms_pipe.receive_message", "dbms_output", "utl_http.request",
72
+ "utl_file", "utl_file.fopen", "all_users", "dba_users", "user_users",
73
+ "v$version", "v$instance", "xmltype", "extractvalue", "updatexml",
74
+ "to_char", "to_date", "rownum", "connect by", "sys.dba_users",
75
+
76
+ "sqlite_master", "pragma", "attach", "detach", "load_extension",
77
+
78
+ "into outfile", "into dumpfile", "load_file(", "xp_cmdshell", "shell",
79
+ "cmd.exe", "powershell", "wget", "curl", "ftp", "into incremental", "outfile",
80
+
81
+ "xmltype", "extractvalue", "updatexml", "xpath", "json_extract", "jsonb_extract_path",
82
+ "jsonb_each", "json_each", "jsonb_each_text",
83
+
84
+ "information_schema.columns", "information_schema.tables",
85
+ "information_schema.routines", "information_schema.schemata",
86
+ "pg_catalog.pg_tables", "pg_catalog.pg_roles", "all_tables", "dba_tables",
87
+
88
+ "inet_server_addr", "inet_server_port", "version()", "session_user()",
89
+ "current_user()", "user()", "database()", "schema()", "schema_name()",
90
+
91
+ "count(", "sum(", "avg(", "min(", "max(",
92
+
93
+ "sp_tables", "sp_columns", "sp_help", "sp_helptext", "sp_who", "sp_who2",
94
+ "sp_password", "sp_addsrvrolemember", "sp_addlinkedserver",
95
+
96
+ "grant", "revoke", "create user", "alter user", "drop user", "create role",
97
+ "dba_", "all_", "role_", "privileges", "has_privilege",
98
+
99
+ "concat(", "group_concat(", "string_agg(", "regexp_replace", "regexp_like",
100
+ "instr(", "position(", "pg_sleep(", "sleep(", "benchmark(", "waitfor delay",
101
+ "waitfor", "delay", "dbms_lock.sleep(", "utl_http.request(", "utl_inaddr.get_host_address",
102
+
103
+ "having", "limit", "offset", "order", "by", "group", "procedure", "function",
104
+ "trigger", "triggered", "cursor", "open", "fetch", "close", "loop", "if", "elsif",
105
+ "elsif", "else", "end", "case", "while", "for", "begin", "declare", "exception",
106
+
107
+ "||", "+", "concat", "concat_ws", "0x", "0x", "/*", "*/", "--", "#",
108
+
109
+ "unionselect", "union all select", "union allselect", "unionselect", "union--", "union/*",
110
+ "sleep(", "benchmark(", "benchmark(", "intooutfile", "intodumpfile", "intooutfile(",
111
+
112
+ "xmlserialize", "xmlagg", "db2.", "teradata", "tdg", "sysibm", "qsys2", "syscat",
113
+ "SYSDUMMY1", "sysobjects", "syscolumns", "syscomments", "sys.sql_modules",
114
+
115
+ "passwd", "password", "pwd", "hash", "salt", "credit_card", "ssn", "social_security_number",
116
+
117
+ "load_file(", "openrowset(", "xp_cmdshell(", "sp_oacreate(", "sp_oamethod(",
118
+
119
+ "mysql.user", "pg_shadow", "pg_user", "dba_users", "all_users", "user_users",
120
+ "information_schema", "performance_schema", "pg_catalog", "v$session", "v$instance",
121
+
122
+ "extractvalue(", "updatexml(", "xmlquery(", "xmltable(",
123
+
124
+ "hex(", "unhex(", "base64_decode(", "from_base64(", "to_base64(", "decode(",
125
+
126
+ "msdb.dbo.backupset", "msdb.dbo.restorefile", "msdb", "master.dbo", "dba_tables", "all_tables",
127
+
128
+ "xp_cmdshell", "xp_dirtree", "xp_regread", "xp_regwrite", "xp_subdirs", "openquery", "openrowset",
129
+
130
+ "selecting", "selection", "selected", "dropbox", "updateable"
131
+ ]
132
+
133
+ #SQLI regex
134
+ _SQLI_FILTERS = {
135
+ "sql_comment": re.compile(r"(--|#)(?!\S)", re.IGNORECASE),
136
+ "sql_comment_multi": re.compile(r"/\*.*?\*/", re.IGNORECASE | re.DOTALL),
137
+ "tautology_numeric": re.compile(r"(?:'|\")?\s*or\s+1\s*=\s*1\b", re.IGNORECASE),
138
+ "tautology_string": re.compile(r"(?:'|\")\s*or\s+['\"][^'\"]+['\"]\s*=\s*['\"][^'\"]+['\"]", re.IGNORECASE),
139
+ "union_select": re.compile(r"\bunion\b\s*(all\s*)?\bselect\b", re.IGNORECASE),
140
+ "stacked_query": re.compile(r";\s*(select|insert|update|delete|drop|create|alter|exec|declare)\b", re.IGNORECASE),
141
+ "time_based": re.compile(r"\b(sleep|pg_sleep|benchmark)\s*\(", re.IGNORECASE),
142
+ "hex_or_char": re.compile(r"\b0x[0-9a-f]+\b|\bchar\s*\(|\bchr\s*\(", re.IGNORECASE),
143
+ "always_true_like": re.compile(r"(?:(?:'|\")\s*=\s*(?:'|\"))|(?:'\s*or\s*'x'='x')", re.IGNORECASE),
144
+ "sql_keyword_used": re.compile(r"\b(" + "|".join(map(re.escape, _SQL_KEYWORDS)) + r")\b", re.IGNORECASE),
145
+ "logical_expression": re.compile(r"\b(or|and)\b\s+[^=<>]+\s*(=|>|<)", re.IGNORECASE)
146
+ }
147
+
148
+ #SSTI regex
149
+ _SSTI_FILTERS = {
150
+ "python_magic_attrs": re.compile(r"__class__|__mro__|__subclasses__|__globals__|__init__"),
151
+ "python_danger_calls": re.compile(r"os\.popen|subprocess|eval\(|exec\(|open\(", re.IGNORECASE),
152
+ "java_runtime": re.compile(r"Runtime\.getRuntime|ProcessBuilder|Class\.forName", re.IGNORECASE),
153
+ "php_exec": re.compile(r"shell_exec|passthru|system\(|exec\(", re.IGNORECASE),
154
+ "node_constructor_rce": re.compile(r"constructor\s*\.\s*constructor", re.IGNORECASE),
155
+ "ruby_eval": re.compile(r"instance_eval|class_eval|Kernel\.", re.IGNORECASE),
156
+ "double_curly": re.compile(r"\{\{.*?\}\}", re.DOTALL),
157
+ "percent_blocks": re.compile(r"\{\%.*?\%\}", re.DOTALL),
158
+ "angle_percent": re.compile(r"<%.*?%>", re.DOTALL),
159
+ "dollar_brace": re.compile(r"\$\{.*?\}", re.DOTALL),
160
+ "hash_brace": re.compile(r"\#\{.*?\}", re.DOTALL),
161
+ "template_filters": re.compile(r"\|\s*(safe|join|attr|map|select|system)", re.IGNORECASE),
162
+ "template_math": re.compile(r"\{\{.*?[\+\-\*/].*?\}\}", re.DOTALL),
163
+ "triple_curly": re.compile(r"\{\{\{.*?\}\}\}", re.DOTALL),
164
+ "freemarker_directive": re.compile(r"<#.*?>", re.DOTALL),
165
+ "dollar_excl_brace": re.compile(r"\$!?\{.*?\}", re.DOTALL),
166
+ }
167
+
168
+ #XSS regex
169
+ _XSS_FILTERS = {
170
+ "script_tag": re.compile(r"<\s*script\b", re.IGNORECASE),
171
+ "javascript_protocol": re.compile(r"java[\x00-\x20]*script\s*:", re.IGNORECASE),
172
+ "on_event_attribute": re.compile(r"\bon[a-z]{2,}\s*=", re.IGNORECASE),
173
+ "dangerous_embed_tag": re.compile(r"<\s*(?:iframe|object|embed)\b", re.IGNORECASE),
174
+ "meta_refresh_js": re.compile(r"<\s*meta\b[^>]*http-equiv\s*=\s*['\"]?\s*refresh[^>]*" r"\burl\s*=\s*['\"]?\s*java[\x00-\x20]*script\s*:", re.IGNORECASE),
175
+ "scriptable_uri": re.compile(r"\b(?:href|src|xlink:href|formaction|action)\s*=\s*['\"]?" r"(?:java[\x00-\x20]*script|data\s*:\s*(?:text/html|image/svg\+xml))", re.IGNORECASE),
176
+ "srcdoc": re.compile(r"\bsrcdoc\s*=", re.IGNORECASE),
177
+ "css_js_url": re.compile(r"url\s*\(\s*['\"]?\s*java[\x00-\x20]*script\s*:", re.IGNORECASE),
178
+ "css_expression": re.compile(r"\bexpression\s*\(", re.IGNORECASE),
179
+ "encoded_script_tag": re.compile(r"&(?:lt|#0*60|#x0*3c)\s*;?\s*script\b", re.IGNORECASE),
180
+ "svg_math_tag": re.compile(r"<\s*(?:svg|math)\b", re.IGNORECASE),
181
+ "background": re.compile(r"\sbackground\s*=\s*['\"]?\s*java[\x00-\x20]*script", re.IGNORECASE),
182
+ }
183
+
184
+
185
+
186
+ class DataFilterException(Exception):
187
+ """DataFilterException internal library exception class"""
188
+ def __init__(self, text: str = "Unknown exception occured"):
189
+ super().__init__(text)
190
+
191
+ def __str__(self) -> str:
192
+ return str(self.args[0])
193
+
194
+
195
+
196
+ class filterReport:
197
+ """Report structure base, returned by filter functions
198
+
199
+ Struction includes:
200
+ data - initial data that was inputted in filter function (string)
201
+ type - check type that was done
202
+ status - code phrase filter function returns:
203
+ OK - no detections
204
+ FOUND - suspicous data found (false positive safeguard)
205
+ DETECTED - dangerous payload found or data is too suspicous (many detections)
206
+
207
+ detections - array of names of possibly used sqli strategies:
208
+ if status is FOUND or DETECTED includes array of strings - short names of detected vulnerabilities usage in data
209
+ if status is OK - empty array
210
+
211
+ issecure - defines if data is secure or dangerous:
212
+ True if data is considered secure
213
+ False if data may be dangerous
214
+
215
+ processtime - time taken to process data in ns
216
+ """
217
+ def __init__(self, data: str = "", type: str = "", status: str = "None", detections: list[str] | None = None, issecure: bool = False) -> None:
218
+ self.data = data
219
+ self.type = type
220
+ self.status = status
221
+ self.detections = [] if detections is None else list(detections)
222
+ self.issecure = issecure
223
+ self.processtime = 0
224
+
225
+
226
+
227
+ def strSQLICheck(data: str = "", allowedSymbols: str = "") -> filterReport:
228
+ """strSQLICheck checks data for usage of sqli vulnerability
229
+
230
+ :param data: data to be checked
231
+ :type data: str
232
+ :param allowedSymbols: string of allowed in data symbols
233
+ if there is a symbol which is not in allowedSymbols, function returns DETECTED status, defaults to ""
234
+ :type allowedSymbols: str, optional
235
+ :raises DataFilterException:
236
+ :return: report, including results of check
237
+ :rtype: filterReport
238
+ """
239
+ try:
240
+ return _strSQLICheck(data = data, allowedSymbols = allowedSymbols)
241
+ except Exception as exp:
242
+ raise DataFilterException(f"Exception occured in strSQLICheck, details: {exp}")
243
+
244
+
245
+
246
+ def _strSQLICheck(data: str = "", allowedSymbols: str = "") -> filterReport:
247
+ starttime = time_ns()
248
+ if type(data) != str:
249
+ raise DataFilterException(f"INVALID_INPUT: strSQLICheck expected str as data, instead got {type(data)}")
250
+
251
+ if type(allowedSymbols) != str:
252
+ raise DataFilterException(f"INVALID_INPUT: strSQLICheck expected str as allowedSymbols, instead got {type(allowedSymbols)}")
253
+
254
+ if allowedSymbols:
255
+ tempdata = data
256
+ for symbol in allowedSymbols:
257
+ tempdata = tempdata.replace(symbol,"")
258
+ if tempdata:
259
+ return filterReport(data, status = "DETECTED", detections = ["banned_symbol_usage"], issecure = False)
260
+
261
+ report = filterReport(data = data, type = "SQLI", status = "OK", issecure = True)
262
+
263
+ if "'" in data or '"' in data:
264
+ has_quote = True
265
+ report.detections.append("quotes_usage")
266
+ report.status = "FOUND"
267
+ report.issecure = False
268
+ else:
269
+ has_quote = False
270
+
271
+ def match_add(name: str) -> None:
272
+ try:
273
+ if _SQLI_FILTERS[name].search(data, timeout = sqli_patern_time):
274
+ report.detections.append(name)
275
+ if has_quote:
276
+ report.issecure = False
277
+ report.status = "DETECTED"
278
+ else:
279
+ report.status = "FOUND"
280
+ except TimeoutError:
281
+ if "dos_payload" not in report.detections:
282
+ report.detections.append("dos_payload")
283
+ report.issecure = False
284
+ report.status = "DETECTED"
285
+ return
286
+
287
+ for pattern in _SQLI_FILTERS:
288
+ match_add(pattern)
289
+
290
+ if len(report.detections) > 1:
291
+ report.issecure = False
292
+ report.status = "DETECTED"
293
+ report.processtime = time_ns() - starttime
294
+ return report
295
+
296
+
297
+
298
+ def strSSTICheck(data: str = "", allowedSymbols: str = "") -> filterReport:
299
+ """strSSTICheck checks data for usage of ssti vulnerability
300
+
301
+ :param data: data to be checked
302
+ :type data: str
303
+ :param allowedSymbols: string of allowed in data symbols
304
+ if there is a symbol which is not in allowedSymbols, function returns DETECTED status, defaults to ""
305
+ :type allowedSymbols: str, optional
306
+ :raises DataFilterException:
307
+ :return: report, including results of check
308
+ :rtype: filterReport
309
+ """
310
+ try:
311
+ return _strSSTICheck(data = data, allowedSymbols = allowedSymbols)
312
+ except Exception as exp:
313
+ raise DataFilterException(f"Exception occured in strSSTICheck, details: {exp}")
314
+
315
+
316
+
317
+ def _strSSTICheck(data: str = "", allowedSymbols: str = "") -> filterReport:
318
+ starttime = time_ns()
319
+
320
+ if type(data) != str:
321
+ raise DataFilterException(f"INVALID_INPUT: strSSTICheck expected str as data, instead got {type(data)}")
322
+
323
+ if type(allowedSymbols) != str:
324
+ raise DataFilterException(f"INVALID_INPUT: strSSTICheck expected str as allowedSymbols, instead got {type(allowedSymbols)}")
325
+
326
+ if allowedSymbols:
327
+ tempdata = data
328
+ for symbol in allowedSymbols:
329
+ tempdata = tempdata.replace(symbol, "")
330
+ if tempdata:
331
+ return filterReport(data, status = "DETECTED", detections = ["banned_symbol_usage"], issecure = False)
332
+
333
+ report = filterReport(data = data, type = "SSTI", status = "OK", issecure = True)
334
+
335
+ def match_add(name: str) -> None:
336
+ try:
337
+ if _SSTI_FILTERS[name].search(data, timeout = ssti_patern_time):
338
+ report.detections.append(name)
339
+ report.issecure = False
340
+ report.status = "DETECTED"
341
+ except TimeoutError:
342
+ if "dos_payload" not in report.detections:
343
+ report.detections.append("dos_payload")
344
+ report.issecure = False
345
+ report.status = "DETECTED"
346
+ return
347
+
348
+ for pattern in _SSTI_FILTERS:
349
+ try:
350
+ match_add(pattern)
351
+ except re.TimeoutError:
352
+ break
353
+
354
+ report.processtime = time_ns() - starttime
355
+ return report
356
+
357
+
358
+
359
+ def strXSSCheck(data: str = "", allowedSymbols: str = "") -> filterReport:
360
+ """strXSSCheck checks data for usage of xss vulnerability
361
+
362
+ :param data: data to be checked
363
+ :type data: str
364
+ :param allowedSymbols: string of allowed in data symbols
365
+ if there is a symbol which is not in allowedSymbols, function returns DETECTED status, defaults to ""
366
+ :type allowedSymbols: str, optional
367
+ :raises DataFilterException:
368
+ :return: report, including results of check
369
+ :rtype: filterReport
370
+ """
371
+ try:
372
+ return _strXSSCheck(data = data, allowedSymbols = allowedSymbols)
373
+ except Exception as exp:
374
+ raise DataFilterException(f"Exception occured in strXSSCheck, details: {exp}")
375
+
376
+
377
+
378
+ def _strXSSCheck(data: str = "", allowedSymbols: str = "") -> filterReport:
379
+ starttime = time_ns()
380
+
381
+ if type(data) != str:
382
+ raise DataFilterException(f"INVALID_INPUT: strXSSCheck expected str as data, instead got {type(data)}")
383
+
384
+ if type(allowedSymbols) != str:
385
+ raise DataFilterException(f"INVALID_INPUT: strXSSCheck expected str as allowedSymbols, instead got {type(allowedSymbols)}")
386
+
387
+ if allowedSymbols:
388
+ tempdata = data
389
+ for symbol in allowedSymbols:
390
+ tempdata = tempdata.replace(symbol, "")
391
+ if tempdata:
392
+ return filterReport(data, status = "DETECTED", detections = ["banned_symbol_usage"], issecure = False)
393
+
394
+ seen = set()
395
+ for i in range(20):
396
+ h = hashlib.blake2b(data.encode('utf-8','ignore'), digest_size=8).digest()
397
+ if h in seen:
398
+ break
399
+ seen.add(h)
400
+ decoded = unquote(data)
401
+ decoded = html.unescape(decoded)
402
+ decoded = re.sub(r"[\x00-\x08\x0b-\x1f\x7f]+", "", decoded)
403
+ decoded = unicodedata.normalize("NFC", decoded)
404
+ if decoded == data:
405
+ break
406
+ data = decoded
407
+
408
+ report = filterReport(data = data, type = "XSS", status = "OK", issecure = True)
409
+
410
+ def match_add(name: str) -> None:
411
+ try:
412
+ if _XSS_FILTERS[name].search(data, timeout = xss_patern_time):
413
+ report.detections.append(name)
414
+ report.issecure = False
415
+ report.status = "DETECTED"
416
+ except TimeoutError:
417
+ if "dos_payload" not in report.detections:
418
+ report.detections.append("dos_payload")
419
+ report.issecure = False
420
+ report.status = "DETECTED"
421
+ return
422
+
423
+ for pattern in _XSS_FILTERS:
424
+ try:
425
+ match_add(pattern)
426
+ except re.TimeoutError:
427
+ break
428
+
429
+ report.processtime = time_ns() - starttime
430
+ return report
431
+
432
+
433
+
434
+ def strMultCheck(data: str = "", allowedSymbols: str = "", modes: list[str] = ["SQLI", "SSTI", "XSS"]) -> dict[str: ...]:
435
+ """strMultCheck checks data for usage of multiple vulnerabilities listed in modes
436
+
437
+ :param data: data to be checked
438
+ :type data: str
439
+ :param allowedSymbols: string of allowed in data symbols
440
+ if there is a symbol which is not in allowedSymbols, function returns DETECTED status, defaults to ""
441
+ :type allowedSymbols: str, optional
442
+ :raises DataFilterException:
443
+ :return: report for each vulnerability check function, total check time, total issecure and status
444
+ :rtype: dict[str: ...]
445
+ """
446
+ try:
447
+ return _strMultCheck(data = data, allowedSymbols = allowedSymbols, modes = modes)
448
+ except Exception as exp:
449
+ raise DataFilterException(f"Exception occured in strMultCheck, details: {exp}")
450
+
451
+
452
+
453
+ def _strMultCheck(data: str = "", allowedSymbols: str = "", modes: list[str] = ["SQLI", "SSTI", "XSS"]) -> dict[str: ...]:
454
+ starttime = time_ns()
455
+
456
+ for _mode in modes:
457
+ if _mode not in ["SQLI", "SSTI", "XSS"]:
458
+ raise DataFilterException(f"INVALID_INPUT: strMultCheck got unexpected mode {_mode}, expected SSTI, SQLI or XSS")
459
+
460
+ reports = {}
461
+ _check_funcs={"SQLI":strSQLICheck,"SSTI":strSSTICheck,"XSS":strXSSCheck}
462
+
463
+ lstatus = ""
464
+ lissecure = True
465
+
466
+ for _mode in modes:
467
+ reports[_mode] = _check_funcs[_mode](data, allowedSymbols)
468
+ lissecure = False if not reports[_mode].issecure else lissecure
469
+ lstatus = "DETECTED" if reports[_mode].status == "DETECTED" else "FOUND" if reports[_mode].status == "FOUND" else lstatus
470
+ reports["total_status"] = lstatus
471
+ reports["total_issecure"] = lissecure
472
+ reports["total_processtime"] = time_ns() - starttime
DataFilter/__init__.py ADDED
@@ -0,0 +1,20 @@
1
+ from .core import strSQLICheck
2
+ from .core import strSSTICheck
3
+ from .core import strXSSCheck
4
+ from .core import strMultCheck
5
+
6
+ from .config import (
7
+ sqli_pattern_time,
8
+ ssti_pattern_time,
9
+ xss_pattern_time
10
+ )
11
+
12
+ __all__ = [
13
+ "strSQLICheck",
14
+ "strSSTICheck",
15
+ "strXSSCheck",
16
+ "strMultCheck",
17
+ "sqli_pattern_time",
18
+ "ssti_pattern_time",
19
+ "xss_pattern_time"
20
+ ]
DataFilter/config.py ADDED
@@ -0,0 +1,3 @@
1
+ sqli_pattern_time = 0.1
2
+ ssti_pattern_time = 0.1
3
+ xss_pattern_time = 0.2
@@ -0,0 +1,299 @@
1
+ Metadata-Version: 2.4
2
+ Name: web-datafilter
3
+ Version: 0.1.0
4
+ Summary: Security-oriented Python library for detecting SQLi, SSTI and XSS payloads
5
+ Author-email: qwertyvs <NuhUh@gmail.com>
6
+ License: MIT
7
+ Keywords: security,sqli,xss,ssti,input-validation
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.8
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENCE
13
+ Requires-Dist: regex
14
+ Dynamic: license-file
15
+
16
+ # DataFilter - документация
17
+
18
+ **Автор:** qwertyvs<br>**Репозиторий:** https://github.com/qwertyvs/DataFilter
19
+
20
+ ## Оглавление
21
+
22
+ * [Кратко о проекте](#Кратко-о-проекте)
23
+ * [Установка](#установка)
24
+ * [Ожидаемое поведение](#ожидаемое-поведение)
25
+
26
+ * [Обычный пользователь](#обычный-пользователь)
27
+ * [Нарушитель (злоумышленник)](#нарушитель-злоумышленник)
28
+ * [Значения статусов](#значения-статусов)
29
+ * [Глобальные параметры](#глобальные-параметры)
30
+ * [Классы](#классы)
31
+
32
+ * [DataFilterException](#datafilterexception)
33
+ * [filterReport](#filterreport)
34
+ * [Функции](#функции)
35
+
36
+ * [strSQLICheck()](#strsqlicheck)
37
+ * [strSSTICheck()](#strssticheck)
38
+ * [strXSSCheck()](#strxsscheck)
39
+ * [strMultCheck()](#strmultcheck)
40
+ * [Пример использования](#пример-использования)
41
+ * [Обратная связь](#обратная-связь)
42
+
43
+ # Кратко о проекте
44
+
45
+ **DataFilter** - это лёгкая библиотека на Python, предназначенная для
46
+ анализа входных данных и обнаружения возможных попыток эксплуатации
47
+ распространённых веб‑уязвимостей.
48
+
49
+ Библиотека умеет обнаруживать признаки:
50
+
51
+ - SQL Injection (SQLi)
52
+ - Server-Side Template Injection (SSTI)
53
+ - Cross-Site Scripting (XSS)
54
+
55
+ DataFilter выполняет **статический анализ строк** и ищет сигнатуры,
56
+ характерные для вредоносных полезных нагрузок.
57
+
58
+ ------------------------------------------------------------------------
59
+
60
+ # Установка
61
+
62
+ 1. Установите библиотеку:
63
+
64
+ ``` bash
65
+ pip install DataFilter
66
+ ```
67
+
68
+ 2. Импортируйте нужные функции:
69
+
70
+ ``` python
71
+ from datafilter import strSQLICheck, strSSTICheck, strXSSCheck, strMultCheck
72
+ ```
73
+
74
+ ------------------------------------------------------------------------
75
+
76
+ # Ожидаемое поведение
77
+
78
+ ## Обычный пользователь
79
+
80
+ Обычный пользователь отправляет обычные значения:
81
+
82
+ - имена пользователей
83
+ - email
84
+ - короткие текстовые поля
85
+
86
+ В этом случае библиотека должна вернуть:
87
+
88
+ status = OK
89
+ issecure = True
90
+
91
+ ## Нарушитель (злоумышленник)
92
+
93
+ Злоумышленник может отправлять вредоносные полезные нагрузки, например:
94
+
95
+ 1' OR 1=1 --
96
+ <script>alert(1)</script>
97
+ {{7*7}}
98
+
99
+ В таких случаях библиотека должна вернуть:
100
+
101
+ status = FOUND
102
+ или
103
+ status = DETECTED
104
+
105
+ ### Значения статусов
106
+
107
+ Статус: Значение
108
+
109
+ OK: Подозрительных паттернов не обнаружено<br>
110
+ FOUND*: Обнаружены подозрительные признаки<br>
111
+ DETECTED: Высокая вероятность вредоносной нагрузки
112
+
113
+ \* **В strSQLICheck означает, что найдено ключевое слово sql, в контексте проверки осмысленного текста на английском языке является защитой от ложных срабатываний**
114
+
115
+ ------------------------------------------------------------------------
116
+
117
+ # Глобальные параметры
118
+
119
+ В библиотеке есть три глобальных параметра, отвечающих за защиту от
120
+ слишком долгого выполнения регулярных выражений.
121
+
122
+ sqli_patern_time = 0.1
123
+ ssti_patern_time = 0.1
124
+ xss_patern_time = 0.2
125
+
126
+ Эти значения задают **максимальное время выполнения одного
127
+ regex‑паттерна в секундах**.
128
+
129
+ Если выполнение превышает тайм‑аут, библиотека помечает это как:
130
+
131
+ dos_payload
132
+
133
+ Это может указывать на попытку **ReDoS‑атаки** (нагрузки через сложные
134
+ регулярные выражения).
135
+
136
+ Если процессор досточно мощный можно уменьшить, переопределив в программе
137
+
138
+ ------------------------------------------------------------------------
139
+
140
+ # Классы
141
+
142
+ ## DataFilterException
143
+
144
+ Внутреннее исключение библиотеки.
145
+
146
+ Возникает если:
147
+
148
+ - переданы неверные типы данных
149
+ - произошла внутренняя ошибка
150
+
151
+ Пример сообщения:
152
+
153
+ INVALID_INPUT: expected str
154
+
155
+ ------------------------------------------------------------------------
156
+
157
+ ## filterReport
158
+
159
+ Структура отчёта, возвращаемая всеми функциями проверки.
160
+
161
+ Поля:
162
+
163
+ Поле: Описание
164
+
165
+ data: исходная проверенная строка<br>
166
+ type: тип проверки (SQLI / SSTI / XSS)<br>
167
+ status: итоговый статус<br>
168
+ detections: список сработавших сигнатур<br>
169
+ issecure: True если строка считается безопасной<br>
170
+ processtime: время выполнения проверки в наносекундах
171
+
172
+ ------------------------------------------------------------------------
173
+
174
+ # Функции
175
+
176
+ ## strSQLICheck()
177
+
178
+ Проверяет строку на признаки SQL Injection.
179
+
180
+ Параметры:
181
+
182
+ data : str
183
+ allowedSymbols : str
184
+
185
+ Если параметр `allowedSymbols` задан и строка содержит символ вне этого
186
+ набора, функция сразу возвращает:
187
+
188
+ status = DETECTED
189
+ detections = ["banned_symbol_usage"]
190
+
191
+ Функция ищет такие признаки SQL‑инъекций как:
192
+
193
+ - UNION SELECT
194
+ - SQL комментарии
195
+ - логические таутологии
196
+ - time‑based атаки
197
+ - ключевые SQL слова
198
+
199
+ Возвращает объект `filterReport`.
200
+
201
+ ------------------------------------------------------------------------
202
+
203
+ ## strSSTICheck()
204
+
205
+ Проверяет строку на Server-Side Template Injection.
206
+
207
+ Обнаруживает конструкции, используемые в шаблонизаторах.
208
+
209
+ Примеры:
210
+
211
+ {{ ... }}
212
+ {% ... %}
213
+ ${ ... }
214
+ #{ ... }
215
+
216
+ Также ищет опасные вызовы:
217
+
218
+ eval()
219
+ exec()
220
+ os.popen()
221
+ Runtime.getRuntime()
222
+
223
+ Возвращает `filterReport`.
224
+
225
+ ------------------------------------------------------------------------
226
+
227
+ ## strXSSCheck()
228
+
229
+ Проверяет строку на признаки Cross-Site Scripting.
230
+
231
+ Перед анализом строка проходит нормализацию:
232
+
233
+ 1. URL‑декодирование
234
+ 2. декодирование HTML‑сущностей
235
+ 3. удаление управляющих символов
236
+ 4. нормализация Unicode
237
+
238
+ После этого выполняется поиск таких паттернов как:
239
+
240
+ - `<script>`
241
+ - протокол `javascript:`
242
+ - обработчики событий `onload=`
243
+ - `<iframe>`
244
+ - теги `svg` или `math`
245
+
246
+ Возвращает `filterReport`.
247
+
248
+ ------------------------------------------------------------------------
249
+
250
+ ## strMultCheck()
251
+
252
+ Позволяет выполнить несколько проверок сразу.
253
+
254
+ Пример:
255
+
256
+ ``` python
257
+ strMultCheck(data, modes=["SQLI","SSTI","XSS"])
258
+ ```
259
+
260
+ Возвращает словарь:
261
+
262
+ {
263
+ "SQLI": filterReport,
264
+ "SSTI": filterReport,
265
+ "XSS": filterReport,
266
+ "total_status": str,
267
+ "total_issecure": bool,
268
+ "total_processtime": int
269
+ }
270
+
271
+ ------------------------------------------------------------------------
272
+
273
+ # Пример использования
274
+
275
+ ``` python
276
+ from DataFilter import strMultCheck
277
+
278
+ payload = "1' OR 1=1 --"
279
+
280
+ result = strMultCheck(payload)
281
+
282
+ print(result["SQLI"].status)
283
+ print(result["total_status"])
284
+ ```
285
+
286
+ ------------------------------------------------------------------------
287
+
288
+ # Обратная связь
289
+
290
+ ## Контакты
291
+
292
+ Пожалуйста помните, что ни одно решение не является идеальным, и
293
+ **DataFilter** - не исключение.
294
+
295
+ Если у вас есть вопросы или предложения, вы можете написать мне.\
296
+ Я постараюсь ответить как только появится возможность (обычно в течение
297
+ одного рабочего дня).
298
+
299
+ **Спасибо за использование DataFilter!**
@@ -0,0 +1,8 @@
1
+ DataFilter/DataFilter.py,sha256=QZA-FzlABhBXVA6bk_9t8YK1nN3s56-pZuutYgdy7YU,20902
2
+ DataFilter/__init__.py,sha256=Lh2ftLHoX-PmJ9MRZV7UcPGARRZYy5ttzzJaaMrh4eY,400
3
+ DataFilter/config.py,sha256=w1RX0vtLWj_2__f6iWYraNR7sPQJIe5ABu9E2yCcXkc,72
4
+ web_datafilter-0.1.0.dist-info/licenses/LICENCE,sha256=P2WoVSSGaGZW8qrvSVsPzI0E6PEe3FxF_7TgCoFMQrI,1084
5
+ web_datafilter-0.1.0.dist-info/METADATA,sha256=rnewrG64ARdTUqO9Aypg97CdbEYgUc0WfdMK_TOXRnU,9894
6
+ web_datafilter-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ web_datafilter-0.1.0.dist-info/top_level.txt,sha256=urbgmbA2D-iuS3iqnwkNcQdeFH9avoA63np1o3lYk9w,11
8
+ web_datafilter-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 qwertyvs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ DataFilter