varsha-preprocess 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- varsha_preprocess-0.1.1/My_Preprocess1/Column_Transformer.py +58 -0
- varsha_preprocess-0.1.1/My_Preprocess1/__init__.py +9 -0
- varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyLabelEncoder.py +22 -0
- varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyOneHotEncoder.py +38 -0
- varsha_preprocess-0.1.1/My_Preprocess1/encoders/MyOrdinalEncoder.py +53 -0
- varsha_preprocess-0.1.1/My_Preprocess1/encoders/__init__.py +3 -0
- varsha_preprocess-0.1.1/My_Preprocess1/my_pipeline.py +23 -0
- varsha_preprocess-0.1.1/My_Preprocess1/preprocessing/__init__.py +1 -0
- varsha_preprocess-0.1.1/My_Preprocess1/preprocessing/my_scaler.py +38 -0
- varsha_preprocess-0.1.1/PKG-INFO +49 -0
- varsha_preprocess-0.1.1/README.md +37 -0
- varsha_preprocess-0.1.1/setup.cfg +4 -0
- varsha_preprocess-0.1.1/setup.py +19 -0
- varsha_preprocess-0.1.1/varsha_preprocess.egg-info/PKG-INFO +49 -0
- varsha_preprocess-0.1.1/varsha_preprocess.egg-info/SOURCES.txt +16 -0
- varsha_preprocess-0.1.1/varsha_preprocess.egg-info/dependency_links.txt +1 -0
- varsha_preprocess-0.1.1/varsha_preprocess.egg-info/requires.txt +2 -0
- varsha_preprocess-0.1.1/varsha_preprocess.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from .encoders.MyLabelEncoder import MyLabelEncoder
|
|
2
|
+
from .encoders.MyOneHotEncoder import MyOneHotEncoder
|
|
3
|
+
from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
|
|
4
|
+
from .my_pipeline import MyPipeline
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
class MyColumnTransformer:
|
|
10
|
+
|
|
11
|
+
def __init__(self, transformers):
|
|
12
|
+
self.transformers = transformers
|
|
13
|
+
|
|
14
|
+
def fit_transform(self, df):
|
|
15
|
+
outputs = []
|
|
16
|
+
|
|
17
|
+
for name, encoder, col in self.transformers:
|
|
18
|
+
|
|
19
|
+
# it will extract column data
|
|
20
|
+
data = df[col]
|
|
21
|
+
# data = np.array(data).reshape(-1, 1)
|
|
22
|
+
transformed = encoder.fit_transform(data)
|
|
23
|
+
|
|
24
|
+
outputs.append(transformed)
|
|
25
|
+
|
|
26
|
+
# this will combine all outputs
|
|
27
|
+
return np.concatenate(outputs, axis=1)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# df = pd.read_csv("Practice/SalaryData.csv")
|
|
31
|
+
|
|
32
|
+
# ct = MyColumnTransformer([
|
|
33
|
+
# # ("label", MyLabelEncoder(), "cough"),
|
|
34
|
+
# ("ordinal", MyOneHotEncoder(), "Education Level")
|
|
35
|
+
# # ("onehot", MyOneHotEncoder(), "city")
|
|
36
|
+
# ])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# result = ct.fit_transform(df)
|
|
40
|
+
# print(result)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ct = MyColumnTransformer([
|
|
44
|
+
|
|
45
|
+
# ("education_pipe",
|
|
46
|
+
# MyPipeline([
|
|
47
|
+
# ("ordinal", MyOrdinalEncoder())
|
|
48
|
+
# ]),
|
|
49
|
+
# "Education Level"
|
|
50
|
+
# ),
|
|
51
|
+
|
|
52
|
+
# ("gender_pipe",
|
|
53
|
+
# MyPipeline([
|
|
54
|
+
# ("onehot", MyOneHotEncoder())
|
|
55
|
+
# ]),
|
|
56
|
+
# "Gender"
|
|
57
|
+
# )])
|
|
58
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .encoders.MyLabelEncoder import MyLabelEncoder
|
|
2
|
+
from .encoders.MyOneHotEncoder import MyOneHotEncoder
|
|
3
|
+
from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
|
|
4
|
+
|
|
5
|
+
from .preprocessing.my_scaler import standardscaling
|
|
6
|
+
|
|
7
|
+
from .Column_Transformer import MyColumnTransformer
|
|
8
|
+
from .my_pipeline import MyPipeline
|
|
9
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class MyLabelEncoder:
|
|
5
|
+
|
|
6
|
+
def fit(self, y):
|
|
7
|
+
self.categories = np.unique(y)
|
|
8
|
+
|
|
9
|
+
def transform(self, y):
|
|
10
|
+
mapping_value ={val: index for index,val in enumerate(self.categories)}
|
|
11
|
+
return y.map(mapping_value)
|
|
12
|
+
|
|
13
|
+
def fit_transform(self, y):
|
|
14
|
+
self.fit(y)
|
|
15
|
+
return self.transform(y)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# df = pd.read_csv("Practice/covid_data.csv")
|
|
19
|
+
# y = df['has_covid']
|
|
20
|
+
|
|
21
|
+
# le = MyLabelEncoder()
|
|
22
|
+
# print(le.fit_transform(y))
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class MyOneHotEncoder:
|
|
5
|
+
|
|
6
|
+
def fit(self, X):
|
|
7
|
+
self.categories = np.unique(X.dropna())
|
|
8
|
+
|
|
9
|
+
def transform(self, X):
|
|
10
|
+
|
|
11
|
+
result = {}
|
|
12
|
+
|
|
13
|
+
for category in self.categories:
|
|
14
|
+
# for this category, compare every value in X
|
|
15
|
+
# convert True-False to 1-0
|
|
16
|
+
# store in result dictionary with category as key
|
|
17
|
+
result[category] = (X == category).astype(int)
|
|
18
|
+
|
|
19
|
+
# convert dictionary to DataFrame and return
|
|
20
|
+
return pd.DataFrame(result)
|
|
21
|
+
|
|
22
|
+
def fit_transform(self, X):
|
|
23
|
+
self.fit(X)
|
|
24
|
+
return self.transform(X)
|
|
25
|
+
|
|
26
|
+
# df = pd.read_csv("Practice/covid_data.csv")
|
|
27
|
+
# X = np.array(df['gender']).reshape(-1,1)
|
|
28
|
+
|
|
29
|
+
# X = [['male']], [['female']]
|
|
30
|
+
# df = pd.read_csv("Practice/SalaryData.csv")
|
|
31
|
+
# X = df['Education Level']
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# print(type(X))
|
|
35
|
+
# ohe = MyOneHotEncoder()
|
|
36
|
+
# print(ohe.fit_transform(X))
|
|
37
|
+
|
|
38
|
+
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class MyOrdinalEncoder:
|
|
5
|
+
|
|
6
|
+
# def ordinal_encoding(self, X):
|
|
7
|
+
|
|
8
|
+
# order = {
|
|
9
|
+
# "Strong": 1,
|
|
10
|
+
# "Mild": 0
|
|
11
|
+
# }
|
|
12
|
+
# # we can use for loop but this is faster to do
|
|
13
|
+
# return X.map(order)
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.mapping = None
|
|
17
|
+
|
|
18
|
+
def fit(self,X, order):
|
|
19
|
+
self.mapping = {}
|
|
20
|
+
|
|
21
|
+
for i, category in enumerate(order):
|
|
22
|
+
self.mapping[category] = i
|
|
23
|
+
|
|
24
|
+
self.mapping = self.mapping
|
|
25
|
+
return self
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def transform(self, X):
|
|
29
|
+
if self.mapping is None:
|
|
30
|
+
raise ValueError("first insert the order")
|
|
31
|
+
|
|
32
|
+
result = []
|
|
33
|
+
for value in X:
|
|
34
|
+
if value in self.mapping is None:
|
|
35
|
+
result.append(self.mapping[value])
|
|
36
|
+
|
|
37
|
+
else:
|
|
38
|
+
result.append(-1)
|
|
39
|
+
|
|
40
|
+
return np.array(result).reshape(-1, 1)
|
|
41
|
+
|
|
42
|
+
def fit_transform(self, X, order):
|
|
43
|
+
self.fit(X,order)
|
|
44
|
+
return self.transform(X)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# df = pd.read_csv("Practice/covid_data.csv")
|
|
48
|
+
# X = df['cough']
|
|
49
|
+
|
|
50
|
+
# oe = MyOrdinalEncoder()
|
|
51
|
+
|
|
52
|
+
# print(oe.ordinal_encoding(X))
|
|
53
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .encoders.MyLabelEncoder import MyLabelEncoder
|
|
2
|
+
from .encoders.MyOneHotEncoder import MyOneHotEncoder
|
|
3
|
+
from .encoders.MyOrdinalEncoder import MyOrdinalEncoder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
class MyPipeline:
|
|
10
|
+
def __init__(self,steps):
|
|
11
|
+
self.steps = steps
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def fit_transform(self,data):
|
|
15
|
+
for name, transformer in self.steps:
|
|
16
|
+
if len(self.steps)==0:
|
|
17
|
+
return data
|
|
18
|
+
data = transformer.fit_transform(data)
|
|
19
|
+
|
|
20
|
+
return data
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .my_scaler import standardscaling
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class standardscaling:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.mean = 0
|
|
7
|
+
self.standard_deviation = 0
|
|
8
|
+
|
|
9
|
+
def fit(self, X):
|
|
10
|
+
self.X = np.array(X).reshape(-1,1)
|
|
11
|
+
self.mean = np.mean(X, axis=0)
|
|
12
|
+
self.standard_deviation = np.std(X, axis =0)
|
|
13
|
+
return self.mean, self.standard_deviation
|
|
14
|
+
|
|
15
|
+
def transform(self, X):
|
|
16
|
+
self.X = np.array(X).reshape(-1,1)
|
|
17
|
+
if np.all( self.standard_deviation == 0):
|
|
18
|
+
return (X-self.mean)
|
|
19
|
+
else:
|
|
20
|
+
return (X-self.mean)/self.standard_deviation
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def fit_transform(self, X):
|
|
24
|
+
self.fit(X)
|
|
25
|
+
return self.transform(X)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# df = pd.read_csv("/home/varsha/PracticePython/100DaysOfMl/Practice/SalaryData.csv")
|
|
29
|
+
|
|
30
|
+
# X = df['Age']
|
|
31
|
+
# # print(X[3])
|
|
32
|
+
# # X = [100,120,130,150]
|
|
33
|
+
|
|
34
|
+
# standardscaler = standardscaling()
|
|
35
|
+
# X_scaled = standardscaler.fit_transform(X)
|
|
36
|
+
|
|
37
|
+
# print(X_scaled)
|
|
38
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: varsha-preprocess
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Custom preprocessing pipeline library
|
|
5
|
+
Description-Content-Type: text/markdown
|
|
6
|
+
Requires-Dist: numpy
|
|
7
|
+
Requires-Dist: pandas
|
|
8
|
+
Dynamic: description
|
|
9
|
+
Dynamic: description-content-type
|
|
10
|
+
Dynamic: requires-dist
|
|
11
|
+
Dynamic: summary
|
|
12
|
+
|
|
13
|
+
# My_Preprocess
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
|
|
17
|
+
|
|
18
|
+
This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## ✨ Features
|
|
23
|
+
|
|
24
|
+
- 🔁 Custom Pipeline (sequential transformations)
|
|
25
|
+
- 🧩 ColumnTransformer (parallel column-wise transformations)
|
|
26
|
+
- 🔤 Encoders:
|
|
27
|
+
- Label Encoder
|
|
28
|
+
- One Hot Encoder
|
|
29
|
+
- Ordinal Encoder
|
|
30
|
+
- 📏 Standard Scaler
|
|
31
|
+
- ⚙️ Fully built using NumPy and Pandas
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 🧠 Why this project?
|
|
36
|
+
|
|
37
|
+
Most ML libraries (like sklearn) hide internal implementation details.
|
|
38
|
+
|
|
39
|
+
This project focuses on:
|
|
40
|
+
- Understanding how transformations are chained
|
|
41
|
+
- Learning how fit / transform logic works
|
|
42
|
+
- Building preprocessing systems from scratch
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## 📦 Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install varsha-preprocess
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# My_Preprocess
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
|
|
5
|
+
|
|
6
|
+
This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## ✨ Features
|
|
11
|
+
|
|
12
|
+
- 🔁 Custom Pipeline (sequential transformations)
|
|
13
|
+
- 🧩 ColumnTransformer (parallel column-wise transformations)
|
|
14
|
+
- 🔤 Encoders:
|
|
15
|
+
- Label Encoder
|
|
16
|
+
- One Hot Encoder
|
|
17
|
+
- Ordinal Encoder
|
|
18
|
+
- 📏 Standard Scaler
|
|
19
|
+
- ⚙️ Fully built using NumPy and Pandas
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 🧠 Why this project?
|
|
24
|
+
|
|
25
|
+
Most ML libraries (like sklearn) hide internal implementation details.
|
|
26
|
+
|
|
27
|
+
This project focuses on:
|
|
28
|
+
- Understanding how transformations are chained
|
|
29
|
+
- Learning how fit / transform logic works
|
|
30
|
+
- Building preprocessing systems from scratch
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 📦 Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install varsha-preprocess
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
# from .My_Preprocess1.my_pipeline import MyPipeline
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
with open("README.md", "r", encoding="utf-8") as f:
|
|
6
|
+
long_description = f.read()
|
|
7
|
+
|
|
8
|
+
setup(
|
|
9
|
+
name="varsha-preprocess",
|
|
10
|
+
version="0.1.1",
|
|
11
|
+
packages=find_packages(),
|
|
12
|
+
install_requires=["numpy", "pandas"],
|
|
13
|
+
|
|
14
|
+
description="Custom preprocessing pipeline library",
|
|
15
|
+
|
|
16
|
+
# key part for readme
|
|
17
|
+
long_description=long_description,
|
|
18
|
+
long_description_content_type="text/markdown",
|
|
19
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: varsha-preprocess
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Custom preprocessing pipeline library
|
|
5
|
+
Description-Content-Type: text/markdown
|
|
6
|
+
Requires-Dist: numpy
|
|
7
|
+
Requires-Dist: pandas
|
|
8
|
+
Dynamic: description
|
|
9
|
+
Dynamic: description-content-type
|
|
10
|
+
Dynamic: requires-dist
|
|
11
|
+
Dynamic: summary
|
|
12
|
+
|
|
13
|
+
# My_Preprocess
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
A lightweight machine learning preprocessing library built from scratch using NumPy and Pandas.
|
|
17
|
+
|
|
18
|
+
This project is designed to deeply understand how core preprocessing tools like Pipeline, ColumnTransformer, and Encoders work internally — without relying on sklearn.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## ✨ Features
|
|
23
|
+
|
|
24
|
+
- 🔁 Custom Pipeline (sequential transformations)
|
|
25
|
+
- 🧩 ColumnTransformer (parallel column-wise transformations)
|
|
26
|
+
- 🔤 Encoders:
|
|
27
|
+
- Label Encoder
|
|
28
|
+
- One Hot Encoder
|
|
29
|
+
- Ordinal Encoder
|
|
30
|
+
- 📏 Standard Scaler
|
|
31
|
+
- ⚙️ Fully built using NumPy and Pandas
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## 🧠 Why this project?
|
|
36
|
+
|
|
37
|
+
Most ML libraries (like sklearn) hide internal implementation details.
|
|
38
|
+
|
|
39
|
+
This project focuses on:
|
|
40
|
+
- Understanding how transformations are chained
|
|
41
|
+
- Learning how fit / transform logic works
|
|
42
|
+
- Building preprocessing systems from scratch
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## 📦 Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install varsha-preprocess
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
My_Preprocess1/Column_Transformer.py
|
|
4
|
+
My_Preprocess1/__init__.py
|
|
5
|
+
My_Preprocess1/my_pipeline.py
|
|
6
|
+
My_Preprocess1/encoders/MyLabelEncoder.py
|
|
7
|
+
My_Preprocess1/encoders/MyOneHotEncoder.py
|
|
8
|
+
My_Preprocess1/encoders/MyOrdinalEncoder.py
|
|
9
|
+
My_Preprocess1/encoders/__init__.py
|
|
10
|
+
My_Preprocess1/preprocessing/__init__.py
|
|
11
|
+
My_Preprocess1/preprocessing/my_scaler.py
|
|
12
|
+
varsha_preprocess.egg-info/PKG-INFO
|
|
13
|
+
varsha_preprocess.egg-info/SOURCES.txt
|
|
14
|
+
varsha_preprocess.egg-info/dependency_links.txt
|
|
15
|
+
varsha_preprocess.egg-info/requires.txt
|
|
16
|
+
varsha_preprocess.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
My_Preprocess1
|