sql-blocks 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -381,3 +381,39 @@ m2 = Select(
381
381
  created_at=[Field, GroupBy, OrderBy]
382
382
  )
383
383
  ```
384
+
385
+ ### 13 - Change parser engine
386
+ ```
387
+ a, c, m = Select.parse(
388
+ """
389
+ Actor(name, id ?age = 40)
390
+ <- Cast(actor_id, movie_id) ->
391
+ Movie(id ^title)
392
+ """,
393
+ Cypher
394
+ # ^^^ recognizes syntax like Neo4J queries
395
+ )
396
+ ```
397
+
398
+ **print(a+c+m)**
399
+ ```
400
+ SELECT
401
+ act.name,
402
+ mov.title
403
+ FROM
404
+ Cast cas
405
+ JOIN Movie mov ON (cas.movie_id = mov.id)
406
+ JOIN Actor act ON (cas.actor_id = act.id)
407
+ WHERE
408
+ act.age = 40
409
+ ORDER BY
410
+ mov.title
411
+ ```
412
+ ---
413
+ > **Separators and meaning:**
414
+ * `( )` Delimits a table and its fields
415
+ * `,` Separate fields
416
+ * `?` For simple conditions (> < = <>)
417
+ * `<-` connects to the table on the left
418
+ * `->` connects to the table on the right
419
+ * `^` Put the field in the ORDER BY clause
@@ -365,4 +365,40 @@ m2 = Select(
365
365
  user_id=[Field, GroupBy, OrderBy],
366
366
  created_at=[Field, GroupBy, OrderBy]
367
367
  )
368
- ```
368
+ ```
369
+
370
+ ### 13 - Change parser engine
371
+ ```
372
+ a, c, m = Select.parse(
373
+ """
374
+ Actor(name, id ?age = 40)
375
+ <- Cast(actor_id, movie_id) ->
376
+ Movie(id ^title)
377
+ """,
378
+ Cypher
379
+ # ^^^ recognizes syntax like Neo4J queries
380
+ )
381
+ ```
382
+
383
+ **print(a+c+m)**
384
+ ```
385
+ SELECT
386
+ act.name,
387
+ mov.title
388
+ FROM
389
+ Cast cas
390
+ JOIN Movie mov ON (cas.movie_id = mov.id)
391
+ JOIN Actor act ON (cas.actor_id = act.id)
392
+ WHERE
393
+ act.age = 40
394
+ ORDER BY
395
+ mov.title
396
+ ```
397
+ ---
398
+ > **Separators and meaning:**
399
+ * `( )` Delimits a table and its fields
400
+ * `,` Separate fields
401
+ * `?` For simple conditions (> < = <>)
402
+ * `<-` connects to the table on the left
403
+ * `->` connects to the table on the right
404
+ * `^` Put the field in the ORDER BY clause
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sql_blocks"
3
- version = "0.2.2"
3
+ version = "0.2.4"
4
4
  authors = [
5
5
  { name="Julio Cascalles", email="julio.cascalles@outlook.com" },
6
6
  ]
@@ -3,7 +3,7 @@ from setuptools import setup
3
3
 
4
4
  setup(
5
5
  name = 'sql_blocks',
6
- version = '0.2.2',
6
+ version = '0.2.4',
7
7
  author = 'Júlio Cascalles',
8
8
  author_email = 'julio.cascalles@outlook.com',
9
9
  packages = ['sql_blocks'],
@@ -413,6 +413,276 @@ class Rule:
413
413
  def apply(cls, target: 'Select'):
414
414
  ...
415
415
 
416
+ class Parser:
417
+ REGEX = {}
418
+
419
+ def prepare(self):
420
+ ...
421
+
422
+ def __init__(self, txt: str, class_type):
423
+ self.queries = []
424
+ if not self.REGEX:
425
+ self.prepare()
426
+ self.class_type = class_type
427
+ self.eval(txt)
428
+
429
+ def eval(self, txt: str):
430
+ ...
431
+
432
+
433
+ class SQLParser(Parser):
434
+ REGEX = {}
435
+
436
+ def prepare(self):
437
+ keywords = '|'.join(k + r'\b' for k in KEYWORD)
438
+ flags = re.IGNORECASE + re.MULTILINE
439
+ self.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
440
+ self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
441
+
442
+ def eval(self, txt: str):
443
+ def find_last_word(pos: int) -> int:
444
+ SPACE, WORD = 1, 2
445
+ found = set()
446
+ for i in range(pos, 0, -1):
447
+ if txt[i] in [' ', '\t', '\n']:
448
+ if sum(found) == 3:
449
+ return i
450
+ found.add(SPACE)
451
+ if txt[i].isalpha():
452
+ found.add(WORD)
453
+ elif txt[i] == '.':
454
+ found.remove(WORD)
455
+ def find_parenthesis(pos: int) -> int:
456
+ for i in range(pos, len(txt)-1):
457
+ if txt[i] == ')':
458
+ return i+1
459
+ result = {}
460
+ found = self.REGEX['subquery'].search(txt)
461
+ while found:
462
+ start, end = found.span()
463
+ inner = txt[start: end]
464
+ if inner.count('(') > inner.count(')'):
465
+ end = find_parenthesis(end)
466
+ inner = txt[start: end-1]
467
+ fld, *inner = re.split(r' IN | in', inner, maxsplit=1)
468
+ if fld.upper() == 'NOT':
469
+ pos = find_last_word(start)
470
+ fld = txt[pos: start].strip() # [To-Do] Use the value of `fld`
471
+ start = pos
472
+ target_class = NotSelectIN
473
+ else:
474
+ target_class = SelectIN
475
+ obj = SQLParser(
476
+ ' '.join(re.sub(r'^\(', '', s.strip()) for s in inner),
477
+ class_type=target_class
478
+ ).queries[0]
479
+ result[obj.alias] = obj
480
+ txt = txt[:start-1] + txt[end+1:]
481
+ found = self.REGEX['subquery'].search(txt)
482
+ tokens = [t.strip() for t in self.REGEX['keywords'].split(txt) if t.strip()]
483
+ values = {k.upper(): v for k, v in zip(tokens[::2], tokens[1::2])}
484
+ tables = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', values[FROM]) if t.strip()]
485
+ for item in tables:
486
+ if '=' in item:
487
+ a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', item) if r]
488
+ obj1: SQLObject = result[a1]
489
+ obj2: SQLObject = result[a2]
490
+ PrimaryKey.add(f2, obj2)
491
+ ForeignKey(obj2.table_name).add(f1, obj1)
492
+ else:
493
+ obj = self.class_type(item)
494
+ for key in USUAL_KEYS:
495
+ if not key in values:
496
+ continue
497
+ separator = self.class_type.get_separator(key)
498
+ obj.values[key] = [
499
+ Field.format(fld, obj)
500
+ for fld in re.split(separator, values[key])
501
+ if (fld != '*' and len(tables) == 1) or obj.match(fld)
502
+ ]
503
+ result[obj.alias] = obj
504
+ self.queries = list( result.values() )
505
+
506
+
507
+ class Cypher(Parser):
508
+ REGEX = {}
509
+ TOKEN_METHODS = {}
510
+
511
+ def prepare(self):
512
+ self.REGEX['separator'] = re.compile(r'([(,?)^]|->|<-)')
513
+ self.REGEX['condition'] = re.compile(r'(^\w+)|([<>=])')
514
+ self.TOKEN_METHODS = {
515
+ '(': self.add_field, '?': self.add_where,
516
+ ',': self.add_field, '^': self.add_order,
517
+ ')': self.new_query, '->': self.left_ftable,
518
+ '<-': self.right_ftable,
519
+ }
520
+
521
+ def new_query(self, token: str):
522
+ if token.isidentifier():
523
+ self.queries.append( self.class_type(token) )
524
+
525
+ def add_where(self, token: str):
526
+ field, *condition = [
527
+ t for t in self.REGEX['condition'].split(token) if t
528
+ ]
529
+ Where(' '.join(condition)).add(field, self.queries[-1])
530
+
531
+ def add_order(self, token: str):
532
+ FieldList(token, [Field, OrderBy]).add('', self.queries[-1])
533
+
534
+ def add_field(self, token: str):
535
+ FieldList(token, [Field]).add('', self.queries[-1])
536
+
537
+ def left_ftable(self, token: str):
538
+ self.new_query(token)
539
+ self.join_type = JoinType.LEFT
540
+
541
+ def right_ftable(self, token: str):
542
+ self.new_query(token)
543
+ self.join_type = JoinType.RIGHT
544
+
545
+ def add_foreign_key(self, token: str):
546
+ curr, last = [self.queries[i] for i in (-1, -2)]
547
+ pk_field = last.values[SELECT][-1].split('.')[-1]
548
+ last.delete(pk_field, [SELECT])
549
+ if self.join_type == JoinType.RIGHT:
550
+ curr, last = last, curr
551
+ pk_field, token = token, pk_field
552
+ last.key_field = pk_field
553
+ k = ForeignKey.get_key(last, curr)
554
+ ForeignKey.references[k] = (pk_field, token)
555
+ self.join_type = JoinType.INNER
556
+
557
+ def eval(self, txt: str):
558
+ self.join_type = JoinType.INNER
559
+ self.method = self.new_query
560
+ for token in self.REGEX['separator'].split( re.sub(r'\s+', '', txt) ):
561
+ if not token:
562
+ continue
563
+ if self.method:
564
+ self.method(token)
565
+ if token == '(' and self.join_type != JoinType.INNER:
566
+ self.method = self.add_foreign_key
567
+ else:
568
+ self.method = self.TOKEN_METHODS.get(token)
569
+
570
+ # ----------------------------
571
+ class MongoParser(Parser):
572
+ REGEX = {}
573
+
574
+ def prepare(self):
575
+ self.REGEX['separator'] = re.compile(r'([({[\]},)])')
576
+
577
+ def new_query(self, token: str):
578
+ if not token:
579
+ return
580
+ *table, function = token.split('.')
581
+ self.param_type = self.PARAM_BY_FUNCTION.get(function)
582
+ if not self.param_type:
583
+ raise SyntaxError(f'Unknown function {function}')
584
+ if table and table[0]:
585
+ self.queries.append( self.class_type(table[-1]) )
586
+
587
+ def param_is_where(self) -> bool:
588
+ return self.param_type == Where or isinstance(self.param_type, Where)
589
+
590
+ def next_param(self, token: str):
591
+ if self.param_type == GroupBy:
592
+ self.param_type = Field
593
+ self.get_param(token)
594
+
595
+ def get_param(self, token: str):
596
+ if not ':' in token:
597
+ return
598
+ field, value = token.split(':')
599
+ is_function = field.startswith('$')
600
+ if not value and not is_function:
601
+ if self.param_is_where():
602
+ self.last_field = field
603
+ return
604
+ if self.param_is_where():
605
+ if is_function:
606
+ function = field
607
+ field = self.last_field
608
+ self.last_field = ''
609
+ else:
610
+ function = '$eq'
611
+ if '"' in value:
612
+ value = value.replace('"', '')
613
+ elif value and value[0].isnumeric():
614
+ numeric_type = float if len(value.split('.')) == 2 else int
615
+ value = numeric_type(value)
616
+ self.param_type = self.CONDITIONS[function](value)
617
+ if function == '$or':
618
+ return
619
+ elif self.param_type == GroupBy:
620
+ if field != '_id':
621
+ return
622
+ field = re.sub('"|[$]', '', value)
623
+ elif self.param_type == OrderBy and value == '-1':
624
+ OrderBy.sort = SortType.DESC
625
+ elif field.startswith('$'):
626
+ field = '{}({})'.format(
627
+ field.replace('$', ''), value
628
+ )
629
+ if self.where_list is not None and self.param_is_where():
630
+ self.where_list[field] = self.param_type
631
+ return
632
+ self.param_type.add(field, self.queries[-1])
633
+
634
+ def close_brackets(self, token: str):
635
+ self.brackets[token] -= 1
636
+ if self.param_is_where() and self.brackets[token] == 0:
637
+ if self.where_list is not None:
638
+ Options(**self.where_list).add('OR', self.queries[-1])
639
+ self.where_list = None
640
+ if token == '{':
641
+ self.param_type = Field
642
+
643
+ def begin_conditions(self, value: str):
644
+ self.where_list = {}
645
+ return Where
646
+
647
+ def increment_brackets(self, value: str):
648
+ self.brackets[value] += 1
649
+
650
+ def eval(self, txt: str):
651
+ self.method = self.new_query
652
+ self.last_field = ''
653
+ self.where_list = None
654
+ self.PARAM_BY_FUNCTION = {
655
+ 'find': Where, 'aggregate': GroupBy, 'sort': OrderBy
656
+ }
657
+ BRACKET_PAIR = {'}': '{', ']': '['}
658
+ self.brackets = {char: 0 for char in BRACKET_PAIR.values()}
659
+ self.CONDITIONS = {
660
+ '$in': lambda value: contains(value),
661
+ '$gt': lambda value: gt(value),
662
+ '$gte' : lambda value: gte(value),
663
+ '$lt': lambda value: lt(value),
664
+ '$lte' : lambda value: lte(value),
665
+ '$eq': lambda value: eq(value),
666
+ '$ne': lambda value: Not.eq(value),
667
+ '$or': self.begin_conditions,
668
+ }
669
+ self.TOKEN_METHODS = {
670
+ '{': self.get_param, ',': self.next_param, ')': self.new_query,
671
+ }
672
+ for token in self.REGEX['separator'].split( re.sub(r'\s+', '', txt) ):
673
+ if not token:
674
+ continue
675
+ if self.method:
676
+ self.method(token)
677
+ if token in self.brackets:
678
+ self.increment_brackets(token)
679
+ elif token in BRACKET_PAIR:
680
+ self.close_brackets(
681
+ BRACKET_PAIR[token]
682
+ )
683
+ self.method = self.TOKEN_METHODS.get(token)
684
+ # ----------------------------
685
+
416
686
 
417
687
  class JoinType(Enum):
418
688
  INNER = ''
@@ -504,73 +774,8 @@ class Select(SQLObject):
504
774
  return re.findall(f'\b*{self.alias}[.]', expr) != []
505
775
 
506
776
  @classmethod
507
- def parse(cls, txt: str) -> list[SQLObject]:
508
- def find_last_word(pos: int) -> int:
509
- SPACE, WORD = 1, 2
510
- found = set()
511
- for i in range(pos, 0, -1):
512
- if txt[i] in [' ', '\t', '\n']:
513
- if sum(found) == 3:
514
- return i
515
- found.add(SPACE)
516
- if txt[i].isalpha():
517
- found.add(WORD)
518
- elif txt[i] == '.':
519
- found.remove(WORD)
520
- def find_parenthesis(pos: int) -> int:
521
- for i in range(pos, len(txt)-1):
522
- if txt[i] == ')':
523
- return i+1
524
- if not cls.REGEX:
525
- keywords = '|'.join(k + r'\b' for k in KEYWORD)
526
- flags = re.IGNORECASE + re.MULTILINE
527
- cls.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
528
- cls.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
529
- result = {}
530
- found = cls.REGEX['subquery'].search(txt)
531
- while found:
532
- start, end = found.span()
533
- inner = txt[start: end]
534
- if inner.count('(') > inner.count(')'):
535
- end = find_parenthesis(end)
536
- inner = txt[start: end-1]
537
- fld, *inner = re.split(r' IN | in', inner, maxsplit=1)
538
- if fld.upper() == 'NOT':
539
- pos = find_last_word(start)
540
- fld = txt[pos: start].strip() # [To-Do] Use the value of `fld`
541
- start = pos
542
- class_type = NotSelectIN
543
- else:
544
- class_type = SelectIN
545
- obj = class_type.parse(
546
- ' '.join(re.sub(r'^\(', '', s.strip()) for s in inner)
547
- )[0]
548
- result[obj.alias] = obj
549
- txt = txt[:start-1] + txt[end+1:]
550
- found = cls.REGEX['subquery'].search(txt)
551
- tokens = [t.strip() for t in cls.REGEX['keywords'].split(txt) if t.strip()]
552
- values = {k.upper(): v for k, v in zip(tokens[::2], tokens[1::2])}
553
- tables = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', values[FROM]) if t.strip()]
554
- for item in tables:
555
- if '=' in item:
556
- a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', item) if r]
557
- obj1: SQLObject = result[a1]
558
- obj2: SQLObject = result[a2]
559
- PrimaryKey.add(f2, obj2)
560
- ForeignKey(obj2.table_name).add(f1, obj1)
561
- else:
562
- obj = cls(item)
563
- for key in USUAL_KEYS:
564
- if not key in values:
565
- continue
566
- separator = cls.get_separator(key)
567
- obj.values[key] = [
568
- Field.format(fld, obj)
569
- for fld in re.split(separator, values[key])
570
- if (fld != '*' and len(tables) == 1) or obj.match(fld)
571
- ]
572
- result[obj.alias] = obj
573
- return list( result.values() )
777
+ def parse(cls, txt: str, parser: Parser = SQLParser) -> list[SQLObject]:
778
+ return parser(txt, cls).queries
574
779
 
575
780
  def optimize(self, rules: list[Rule]=None):
576
781
  if not rules:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -381,3 +381,39 @@ m2 = Select(
381
381
  created_at=[Field, GroupBy, OrderBy]
382
382
  )
383
383
  ```
384
+
385
+ ### 13 - Change parser engine
386
+ ```
387
+ a, c, m = Select.parse(
388
+ """
389
+ Actor(name, id ?age = 40)
390
+ <- Cast(actor_id, movie_id) ->
391
+ Movie(id ^title)
392
+ """,
393
+ Cypher
394
+ # ^^^ recognizes syntax like Neo4J queries
395
+ )
396
+ ```
397
+
398
+ **print(a+c+m)**
399
+ ```
400
+ SELECT
401
+ act.name,
402
+ mov.title
403
+ FROM
404
+ Cast cas
405
+ JOIN Movie mov ON (cas.movie_id = mov.id)
406
+ JOIN Actor act ON (cas.actor_id = act.id)
407
+ WHERE
408
+ act.age = 40
409
+ ORDER BY
410
+ mov.title
411
+ ```
412
+ ---
413
+ > **Separators and meaning:**
414
+ * `( )` Delimits a table and its fields
415
+ * `,` Separate fields
416
+ * `?` For simple conditions (> < = <>)
417
+ * `<-` connects to the table on the left
418
+ * `->` connects to the table on the right
419
+ * `^` Put the field in the ORDER BY clause
File without changes
File without changes