sqlshell 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlshell might be problematic. Click here for more details.

@@ -1,50 +1,126 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from datetime import datetime, timedelta
4
+ import os
5
+
6
+ # Set random seed for reproducibility
7
+ np.random.seed(42)
8
+
9
+ # Define output directory
10
+ OUTPUT_DIR = 'test_data'
11
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
4
12
 
5
13
  def create_sales_data(num_records=1000):
6
- """Create sample sales data"""
7
- # Generate random dates within the last year
14
+ # Generate dates for the last 365 days
8
15
  end_date = datetime.now()
9
16
  start_date = end_date - timedelta(days=365)
10
- dates = pd.date_range(start=start_date, end=end_date, periods=num_records)
11
-
17
+ dates = [start_date + timedelta(days=x) for x in range(366)]
18
+ random_dates = np.random.choice(dates, num_records)
19
+
20
+ # Create product data
21
+ products = ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer']
22
+ product_prices = {
23
+ 'Laptop': (800, 2000),
24
+ 'Smartphone': (400, 1200),
25
+ 'Tablet': (200, 800),
26
+ 'Monitor': (150, 500),
27
+ 'Keyboard': (20, 150),
28
+ 'Mouse': (10, 80),
29
+ 'Headphones': (30, 300),
30
+ 'Printer': (100, 400)
31
+ }
32
+
12
33
  # Generate random data
13
34
  data = {
14
- 'orderid': range(1, num_records + 1),
15
- 'orderdate': dates,
16
- 'customerid': np.random.randint(1, 101, num_records),
17
- 'productid': np.random.randint(1, 51, num_records),
18
- 'quantity': np.random.randint(1, 11, num_records),
19
- 'unitprice': np.random.uniform(10.0, 1000.0, num_records).round(2)
35
+ 'OrderID': range(1, num_records + 1),
36
+ 'Date': random_dates,
37
+ 'ProductID': np.random.randint(1, len(products) + 1, num_records), # Changed to ProductID for joining
38
+ 'Quantity': np.random.randint(1, 11, num_records),
39
+ 'CustomerID': np.random.randint(1, 201, num_records),
40
+ 'Region': np.random.choice(['North', 'South', 'East', 'West'], num_records)
20
41
  }
21
-
22
- return pd.DataFrame(data)
23
42
 
24
- def create_customer_data(num_customers=100):
25
- """Create sample customer data"""
26
- # Generate random customer data
43
+ # Calculate prices based on product
44
+ product_list = [products[pid-1] for pid in data['ProductID']]
45
+ data['Price'] = [np.random.uniform(product_prices[p][0], product_prices[p][1])
46
+ for p in product_list]
47
+ data['TotalAmount'] = [price * qty for price, qty in zip(data['Price'], data['Quantity'])]
48
+
49
+ # Create DataFrame
50
+ df = pd.DataFrame(data)
51
+
52
+ # Round numerical columns
53
+ df['Price'] = df['Price'].round(2)
54
+ df['TotalAmount'] = df['TotalAmount'].round(2)
55
+
56
+ # Sort by Date
57
+ return df.sort_values('Date')
58
+
59
+ def create_customer_data(num_customers=200):
60
+ # Generate customer data
27
61
  data = {
28
- 'customerid': range(1, num_customers + 1),
29
- 'customername': [f"Customer {i}" for i in range(1, num_customers + 1)],
30
- 'email': [f"customer{i}@example.com" for i in range(1, num_customers + 1)],
31
- 'country': np.random.choice(['USA', 'UK', 'Canada', 'Australia', 'Germany'], num_customers),
32
- 'joindate': pd.date_range(start='2020-01-01', periods=num_customers).tolist()
62
+ 'CustomerID': range(1, num_customers + 1),
63
+ 'FirstName': [f'Customer{i}' for i in range(1, num_customers + 1)],
64
+ 'LastName': [f'Lastname{i}' for i in range(1, num_customers + 1)],
65
+ 'Email': [f'customer{i}@example.com' for i in range(1, num_customers + 1)],
66
+ 'JoinDate': [datetime.now() - timedelta(days=np.random.randint(1, 1000))
67
+ for _ in range(num_customers)],
68
+ 'CustomerType': np.random.choice(['Regular', 'Premium', 'VIP'], num_customers),
69
+ 'CreditScore': np.random.randint(300, 851, num_customers)
33
70
  }
34
71
 
35
72
  return pd.DataFrame(data)
36
73
 
37
- def create_product_data(num_products=50):
38
- """Create sample product data"""
39
- categories = ['Electronics', 'Books', 'Clothing', 'Home & Garden', 'Sports']
74
+ def create_product_data():
75
+ # Create detailed product information
76
+ products = {
77
+ 'ProductID': range(1, 9),
78
+ 'ProductName': ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer'],
79
+ 'Category': ['Computers', 'Mobile', 'Mobile', 'Accessories', 'Accessories', 'Accessories', 'Audio', 'Peripherals'],
80
+ 'Brand': ['TechPro', 'MobileX', 'TabletCo', 'ViewMax', 'TypeMaster', 'ClickPro', 'SoundMax', 'PrintPro'],
81
+ 'StockQuantity': np.random.randint(50, 500, 8),
82
+ 'MinPrice': [800, 400, 200, 150, 20, 10, 30, 100],
83
+ 'MaxPrice': [2000, 1200, 800, 500, 150, 80, 300, 400],
84
+ 'Weight_kg': [2.5, 0.2, 0.5, 3.0, 0.8, 0.1, 0.3, 5.0],
85
+ 'WarrantyMonths': [24, 12, 12, 36, 12, 12, 24, 12]
86
+ }
87
+
88
+ return pd.DataFrame(products)
89
+
90
+ def create_large_numbers_data(num_records=100):
91
+ """Create a dataset with very large numbers for testing and visualization."""
40
92
 
41
- # Generate random product data
93
+ # Generate random IDs
94
+ ids = range(1, num_records + 1)
95
+
96
+ # Create different columns with large numbers
42
97
  data = {
43
- 'productid': range(1, num_products + 1),
44
- 'productname': [f"Product {i}" for i in range(1, num_products + 1)],
45
- 'category': np.random.choice(categories, num_products),
46
- 'baseprice': np.random.uniform(5.0, 500.0, num_products).round(2),
47
- 'instock': np.random.choice([True, False], num_products, p=[0.8, 0.2])
98
+ 'ID': ids,
99
+ 'Date': pd.date_range(start='2023-01-01', periods=num_records),
100
+ 'SmallValue': np.random.randint(1, 1000, num_records),
101
+ 'MediumValue': np.random.randint(10000, 9999999, num_records),
102
+ 'LargeValue': [int(str(np.random.randint(1, 999)) + str(np.random.randint(0, 9999999)).zfill(7) +
103
+ str(np.random.randint(0, 9999)).zfill(4)) for _ in range(num_records)],
104
+ 'VeryLargeValue': [int(str(np.random.randint(100, 999)) + str(np.random.randint(1000000, 9999999)) +
105
+ str(np.random.randint(1000000, 9999999))) for _ in range(num_records)],
106
+ 'MassiveValue': [int('1' + ''.join([str(np.random.randint(0, 10)) for _ in range(15)])) for _ in range(num_records)],
107
+ 'Category': np.random.choice(['A', 'B', 'C', 'D', 'E'], num_records),
108
+ 'IsActive': np.random.choice([True, False], num_records, p=[0.8, 0.2])
48
109
  }
49
110
 
50
- return pd.DataFrame(data)
111
+ # Create exponential values for scientific notation
112
+ data['ExponentialValue'] = [float(f"{np.random.randint(1, 10)}.{np.random.randint(1, 100):02d}e{np.random.randint(10, 20)}")
113
+ for _ in range(num_records)]
114
+
115
+ # Create monetary values (with decimals)
116
+ data['Revenue'] = [np.random.randint(1000000, 9999999999) + np.random.random() for _ in range(num_records)]
117
+ data['Budget'] = [np.random.randint(10000000, 999999999) + np.random.random() for _ in range(num_records)]
118
+
119
+ # Create DataFrame
120
+ df = pd.DataFrame(data)
121
+
122
+ # Round monetary values to 2 decimal places
123
+ df['Revenue'] = df['Revenue'].round(2)
124
+ df['Budget'] = df['Budget'].round(2)
125
+
126
+ return df
@@ -0,0 +1,5 @@
1
+ """Database management components for SQLShell application."""
2
+
3
+ from sqlshell.db.database_manager import DatabaseManager
4
+
5
+ __all__ = ['DatabaseManager']