zipcode-features 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipcode_features
3
+ Version: 0.0.5
4
+ Summary: A tool to get features based on census data from zipcodes
5
+ Home-page: https://github.com/EricSchles/zipcode_features
6
+ Author: Eric Schles
7
+ Author-email: ericschles@gmail.com
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: zipcodes
20
+ Requires-Dist: pandas
21
+ Requires-Dist: zipcode3
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
32
+
33
+ # zipcode features
34
+
35
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
36
+
37
+ ## Getting CBSA mapping
38
+
39
+ If you need CBSA data you can append it to the dataframe with the following example:
40
+
41
+ ```python
42
+ from zipcode_features import us_get_demographics
43
+ import pandas as pd
44
+
45
+ def _get_cbsa_data():
46
+ return pd.read_excel(
47
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
48
+ sheet_name='Export Worksheet'
49
+ )[["CBSA", "ZIP"]]
50
+
51
+ demo = us_get_demographics(state="NY")
52
+ cbsa_zip_map = _get_cbsa_data()
53
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
54
+ ```
55
+
56
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
57
+
58
+ Here's a python script to parse them:
59
+
60
+ ```python
61
+ import urllib.request
62
+ import PyPDF2
63
+ import json
64
+ import re
65
+ import io
66
+
67
+ def fetch_cbsa_to_json():
68
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
69
+
70
+ print("Downloading Census PDF...")
71
+ # Using a User-Agent to ensure the request isn't blocked by the server
72
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
73
+
74
+ try:
75
+ response = urllib.request.urlopen(req)
76
+ pdf_bytes = io.BytesIO(response.read())
77
+ except Exception as e:
78
+ print(f"Failed to download PDF: {e}")
79
+ return
80
+
81
+ print("Parsing PDF...")
82
+ reader = PyPDF2.PdfReader(pdf_bytes)
83
+
84
+ cbsa_mapping = {}
85
+
86
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
87
+ # Example match: "11460 Ann Arbor, MI"
88
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
89
+
90
+ for page in reader.pages:
91
+ text = page.extract_text()
92
+ if text:
93
+ matches = pattern.findall(text)
94
+ for code, name in matches:
95
+ # Clean up any trailing spaces or artifacts
96
+ clean_name = name.strip()
97
+ # Exclude standalone numbers or random headers that might get caught
98
+ if len(clean_name) > 2 and not clean_name.isdigit():
99
+ cbsa_mapping[code] = clean_name
100
+
101
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
102
+
103
+ # Save the mapping to a JSON file
104
+ output_file = 'cbsa_codes.json'
105
+ with open(output_file, 'w', encoding='utf-8') as f:
106
+ json.dump(cbsa_mapping, f, indent=4)
107
+
108
+ print(f"Successfully saved to {output_file}")
109
+
110
+ if __name__ == "__main__":
111
+ fetch_cbsa_to_json()
112
+ ```
113
+
114
+ Here's a working example for using this with the above:
115
+
116
+ ```python
117
+ import requests
118
+ from zipcode_features import us_get_demographics
119
+ import pandas as pd
120
+
121
+ def _get_cbsa_data():
122
+ return pd.read_excel(
123
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
124
+ sheet_name='Export Worksheet'
125
+ )[["CBSA", "ZIP"]]
126
+
127
+ demo = us_get_demographics(state="NY")
128
+ cbsa_zip_map = _get_cbsa_data()
129
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
130
+ df = df.drop("ZIP", axis=1)
131
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
132
+ df["cbsa_name"] = df["CBSA"].map(mapping)
133
+ df = df.drop("CBSA", axis=1)
134
+ ```
135
+
136
+ ## Adding County
137
+
138
+
139
+ ```python
140
+ from zipcode_features import us_get_demographics
141
+ import pandas as pd
142
+
143
+ def _get_fips_data():
144
+ df = pd.read_excel(
145
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
146
+ dtype={'ZIP': 'str'},
147
+ sheet_name='Export Worksheet'
148
+ )[["COUNTY", "ZIP"]]
149
+ df["COUNTY"] = df['COUNTY'].astype(str)
150
+ return df.dropna()
151
+
152
+ demo = us_get_demographics(state="NY")
153
+ fips_zip_map = _get_fips_data()
154
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
155
+ df = df.drop("ZIP", axis=1)
156
+ df = df.dropna()
157
+ ```
158
+
159
+ ## Adding Regional Prices
160
+
161
+ ```bash
162
+ python -m pip install beaapi us
163
+ ```
164
+
165
+ ```python
166
+ from zipcode_features import us_get_demographics
167
+ import pandas as pd
168
+ import beaapi
169
+ import us
170
+
171
+ df = us_get_demographics(state="NY")
172
+
173
+ # get your key here: https://apps.bea.gov/API/signup/
174
+ beakey = ""
175
+
176
+ dataset="Regional"
177
+ table = "SARPP"
178
+ regional_cpi = beaapi.get_data(
179
+ userid=beakey,
180
+ method='GetData',
181
+ datasetname=dataset, # National Income and Product Accounts
182
+ tablename=table, # Table 1.1.1
183
+ GeoFips="STATE",
184
+ LineCode="1",
185
+ ResultFormat="json"
186
+ #Frequency='A', # Annual data
187
+ )[["GeoName", "DataValue"]]
188
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
189
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
190
+ abbreviations_map = us.states.mapping('name', 'abbr')
191
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
192
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
193
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
194
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
195
+ regional_cpi["cpi_year"] = regional_cpi["year"]
196
+ regional_cpi.drop("year", axis=1)
197
+ df = pd.merge(df, regional_cpi, how='left', on="state")
198
+ df["regional_cpi"] = df["cpi"]
199
+ df = df.drop("cpi", axis=1)
200
+ ```
201
+
202
+
203
+
@@ -0,0 +1,171 @@
1
+ # zipcode features
2
+
3
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
4
+
5
+ ## Getting CBSA mapping
6
+
7
+ If you need CBSA data you can append it to the dataframe with the following example:
8
+
9
+ ```python
10
+ from zipcode_features import us_get_demographics
11
+ import pandas as pd
12
+
13
+ def _get_cbsa_data():
14
+ return pd.read_excel(
15
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
16
+ sheet_name='Export Worksheet'
17
+ )[["CBSA", "ZIP"]]
18
+
19
+ demo = us_get_demographics(state="NY")
20
+ cbsa_zip_map = _get_cbsa_data()
21
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
22
+ ```
23
+
24
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
25
+
26
+ Here's a python script to parse them:
27
+
28
+ ```python
29
+ import urllib.request
30
+ import PyPDF2
31
+ import json
32
+ import re
33
+ import io
34
+
35
+ def fetch_cbsa_to_json():
36
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
37
+
38
+ print("Downloading Census PDF...")
39
+ # Using a User-Agent to ensure the request isn't blocked by the server
40
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
41
+
42
+ try:
43
+ response = urllib.request.urlopen(req)
44
+ pdf_bytes = io.BytesIO(response.read())
45
+ except Exception as e:
46
+ print(f"Failed to download PDF: {e}")
47
+ return
48
+
49
+ print("Parsing PDF...")
50
+ reader = PyPDF2.PdfReader(pdf_bytes)
51
+
52
+ cbsa_mapping = {}
53
+
54
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
55
+ # Example match: "11460 Ann Arbor, MI"
56
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
57
+
58
+ for page in reader.pages:
59
+ text = page.extract_text()
60
+ if text:
61
+ matches = pattern.findall(text)
62
+ for code, name in matches:
63
+ # Clean up any trailing spaces or artifacts
64
+ clean_name = name.strip()
65
+ # Exclude standalone numbers or random headers that might get caught
66
+ if len(clean_name) > 2 and not clean_name.isdigit():
67
+ cbsa_mapping[code] = clean_name
68
+
69
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
70
+
71
+ # Save the mapping to a JSON file
72
+ output_file = 'cbsa_codes.json'
73
+ with open(output_file, 'w', encoding='utf-8') as f:
74
+ json.dump(cbsa_mapping, f, indent=4)
75
+
76
+ print(f"Successfully saved to {output_file}")
77
+
78
+ if __name__ == "__main__":
79
+ fetch_cbsa_to_json()
80
+ ```
81
+
82
+ Here's a working example for using this with the above:
83
+
84
+ ```python
85
+ import requests
86
+ from zipcode_features import us_get_demographics
87
+ import pandas as pd
88
+
89
+ def _get_cbsa_data():
90
+ return pd.read_excel(
91
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
92
+ sheet_name='Export Worksheet'
93
+ )[["CBSA", "ZIP"]]
94
+
95
+ demo = us_get_demographics(state="NY")
96
+ cbsa_zip_map = _get_cbsa_data()
97
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
98
+ df = df.drop("ZIP", axis=1)
99
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
100
+ df["cbsa_name"] = df["CBSA"].map(mapping)
101
+ df = df.drop("CBSA", axis=1)
102
+ ```
103
+
104
+ ## Adding County
105
+
106
+
107
+ ```python
108
+ from zipcode_features import us_get_demographics
109
+ import pandas as pd
110
+
111
+ def _get_fips_data():
112
+ df = pd.read_excel(
113
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
114
+ dtype={'ZIP': 'str'},
115
+ sheet_name='Export Worksheet'
116
+ )[["COUNTY", "ZIP"]]
117
+ df["COUNTY"] = df['COUNTY'].astype(str)
118
+ return df.dropna()
119
+
120
+ demo = us_get_demographics(state="NY")
121
+ fips_zip_map = _get_fips_data()
122
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
123
+ df = df.drop("ZIP", axis=1)
124
+ df = df.dropna()
125
+ ```
126
+
127
+ ## Adding Regional Prices
128
+
129
+ ```bash
130
+ python -m pip install beaapi us
131
+ ```
132
+
133
+ ```python
134
+ from zipcode_features import us_get_demographics
135
+ import pandas as pd
136
+ import beaapi
137
+ import us
138
+
139
+ df = us_get_demographics(state="NY")
140
+
141
+ # get your key here: https://apps.bea.gov/API/signup/
142
+ beakey = ""
143
+
144
+ dataset="Regional"
145
+ table = "SARPP"
146
+ regional_cpi = beaapi.get_data(
147
+ userid=beakey,
148
+ method='GetData',
149
+ datasetname=dataset, # National Income and Product Accounts
150
+ tablename=table, # Table 1.1.1
151
+ GeoFips="STATE",
152
+ LineCode="1",
153
+ ResultFormat="json"
154
+ #Frequency='A', # Annual data
155
+ )[["GeoName", "DataValue"]]
156
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
157
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
158
+ abbreviations_map = us.states.mapping('name', 'abbr')
159
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
160
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
161
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
162
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
163
+ regional_cpi["cpi_year"] = regional_cpi["year"]
164
+ regional_cpi.drop("year", axis=1)
165
+ df = pd.merge(df, regional_cpi, how='left', on="state")
166
+ df["regional_cpi"] = df["cpi"]
167
+ df = df.drop("cpi", axis=1)
168
+ ```
169
+
170
+
171
+
@@ -10,7 +10,7 @@ README = (HERE / "README.md").read_text()
10
10
  # This call to setup() does all the work
11
11
  setup(
12
12
  name="zipcode_features",
13
- version="0.0.4",
13
+ version="0.0.5",
14
14
  description="A tool to get features based on census data from zipcodes",
15
15
  long_description=README,
16
16
  long_description_content_type="text/markdown",
@@ -1,9 +1,44 @@
1
- __version__ = '0.0.4'
1
+ __version__ = '0.0.5'
2
2
 
3
3
  import zipcodes
4
4
  from zipcode3.search import SearchEngine
5
5
  import pandas as pd
6
+ import json
6
7
 
8
+ def zipcode_mapper(x):
9
+ if x["ZIP_len"] == 3:
10
+ return "00" + x["ZIP"]
11
+ elif x["ZIP_len"] == 4:
12
+ return "0" + x["ZIP"]
13
+ else:
14
+ return x["ZIP"]
15
+
16
+ def _get_zip_to_cbsa_code() -> dict:
17
+ """
18
+ This method gets a mapping from zipcode to cbsa code
19
+ mapping is of the form:
20
+ {"zip code": "cbsa code"}
21
+ """
22
+
23
+ df = pd.read_csv("CBSA_ZIP_122025.csv", dtype={'ZIP': str, "CBSA": str})
24
+ df["ZIP_len"] = df["ZIP"].apply(lambda x: len(x))
25
+ df["ZIP"] = df.apply(zipcode_mapper, axis=1)
26
+ return df[["ZIP", "CBSA"]].to_dict()
27
+
28
+ def _get_cbsa_code_to_cbsa_name() -> dict:
29
+ """
30
+ This method gets a mapping from cbsa code to name
31
+ {cbsa code: cbsa name}
32
+ """
33
+ code_to_name = json.load(open("cbsa_codes.json"))
34
+ df = pd.DataFrame(columns=["code", "name"])
35
+ df["name"] = code_to_name.values()
36
+ df["code"] = code_to_name.keys()
37
+ df["name"] = df["name"].str.replace(" -", "-")
38
+ df["name"] = df["name"].str.split().str.join(' ')
39
+ return df.to_dict()
40
+
41
+
7
42
  def us_get_demographics(state: str, city: str = None, zip_list: list = None) -> pd.DataFrame:
8
43
  """
9
44
  This gets demographic information for associated with zipcodes in the United States of America.
@@ -43,4 +78,10 @@ def us_get_demographics(state: str, city: str = None, zip_list: list = None) ->
43
78
  tmp_dict = zipcode_and_demo[index][1].to_dict()
44
79
  tmp_dict["zip_code"] = zipcode_and_demo[index][0]
45
80
  demographics.append(tmp_dict)
46
- return pd.DataFrame(demographics)
81
+ df = pd.DataFrame(demographics)
82
+ zip_to_cbsa = _get_zip_to_cbsa_code()
83
+ df["cbsa"] = df["zip_code"].map(zip_to_cbsa)
84
+ cbsa_code_to_name = _get_cbsa_code_to_cbsa_name()
85
+ df["cbsa_name"] = df["cbsa"].map(cbsa_code_to_name)
86
+ return df
87
+
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipcode_features
3
+ Version: 0.0.5
4
+ Summary: A tool to get features based on census data from zipcodes
5
+ Home-page: https://github.com/EricSchles/zipcode_features
6
+ Author: Eric Schles
7
+ Author-email: ericschles@gmail.com
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: zipcodes
20
+ Requires-Dist: pandas
21
+ Requires-Dist: zipcode3
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
32
+
33
+ # zipcode features
34
+
35
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
36
+
37
+ ## Getting CBSA mapping
38
+
39
+ If you need CBSA data you can append it to the dataframe with the following example:
40
+
41
+ ```python
42
+ from zipcode_features import us_get_demographics
43
+ import pandas as pd
44
+
45
+ def _get_cbsa_data():
46
+ return pd.read_excel(
47
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
48
+ sheet_name='Export Worksheet'
49
+ )[["CBSA", "ZIP"]]
50
+
51
+ demo = us_get_demographics(state="NY")
52
+ cbsa_zip_map = _get_cbsa_data()
53
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
54
+ ```
55
+
56
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
57
+
58
+ Here's a python script to parse them:
59
+
60
+ ```python
61
+ import urllib.request
62
+ import PyPDF2
63
+ import json
64
+ import re
65
+ import io
66
+
67
+ def fetch_cbsa_to_json():
68
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
69
+
70
+ print("Downloading Census PDF...")
71
+ # Using a User-Agent to ensure the request isn't blocked by the server
72
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
73
+
74
+ try:
75
+ response = urllib.request.urlopen(req)
76
+ pdf_bytes = io.BytesIO(response.read())
77
+ except Exception as e:
78
+ print(f"Failed to download PDF: {e}")
79
+ return
80
+
81
+ print("Parsing PDF...")
82
+ reader = PyPDF2.PdfReader(pdf_bytes)
83
+
84
+ cbsa_mapping = {}
85
+
86
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
87
+ # Example match: "11460 Ann Arbor, MI"
88
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
89
+
90
+ for page in reader.pages:
91
+ text = page.extract_text()
92
+ if text:
93
+ matches = pattern.findall(text)
94
+ for code, name in matches:
95
+ # Clean up any trailing spaces or artifacts
96
+ clean_name = name.strip()
97
+ # Exclude standalone numbers or random headers that might get caught
98
+ if len(clean_name) > 2 and not clean_name.isdigit():
99
+ cbsa_mapping[code] = clean_name
100
+
101
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
102
+
103
+ # Save the mapping to a JSON file
104
+ output_file = 'cbsa_codes.json'
105
+ with open(output_file, 'w', encoding='utf-8') as f:
106
+ json.dump(cbsa_mapping, f, indent=4)
107
+
108
+ print(f"Successfully saved to {output_file}")
109
+
110
+ if __name__ == "__main__":
111
+ fetch_cbsa_to_json()
112
+ ```
113
+
114
+ Here's a working example for using this with the above:
115
+
116
+ ```python
117
+ import requests
118
+ from zipcode_features import us_get_demographics
119
+ import pandas as pd
120
+
121
+ def _get_cbsa_data():
122
+ return pd.read_excel(
123
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
124
+ sheet_name='Export Worksheet'
125
+ )[["CBSA", "ZIP"]]
126
+
127
+ demo = us_get_demographics(state="NY")
128
+ cbsa_zip_map = _get_cbsa_data()
129
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
130
+ df = df.drop("ZIP", axis=1)
131
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
132
+ df["cbsa_name"] = df["CBSA"].map(mapping)
133
+ df = df.drop("CBSA", axis=1)
134
+ ```
135
+
136
+ ## Adding County
137
+
138
+
139
+ ```python
140
+ from zipcode_features import us_get_demographics
141
+ import pandas as pd
142
+
143
+ def _get_fips_data():
144
+ df = pd.read_excel(
145
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
146
+ dtype={'ZIP': 'str'},
147
+ sheet_name='Export Worksheet'
148
+ )[["COUNTY", "ZIP"]]
149
+ df["COUNTY"] = df['COUNTY'].astype(str)
150
+ return df.dropna()
151
+
152
+ demo = us_get_demographics(state="NY")
153
+ fips_zip_map = _get_fips_data()
154
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
155
+ df = df.drop("ZIP", axis=1)
156
+ df = df.dropna()
157
+ ```
158
+
159
+ ## Adding Regional Prices
160
+
161
+ ```bash
162
+ python -m pip install beaapi us
163
+ ```
164
+
165
+ ```python
166
+ from zipcode_features import us_get_demographics
167
+ import pandas as pd
168
+ import beaapi
169
+ import us
170
+
171
+ df = us_get_demographics(state="NY")
172
+
173
+ # get your key here: https://apps.bea.gov/API/signup/
174
+ beakey = ""
175
+
176
+ dataset="Regional"
177
+ table = "SARPP"
178
+ regional_cpi = beaapi.get_data(
179
+ userid=beakey,
180
+ method='GetData',
181
+ datasetname=dataset, # National Income and Product Accounts
182
+ tablename=table, # Table 1.1.1
183
+ GeoFips="STATE",
184
+ LineCode="1",
185
+ ResultFormat="json"
186
+ #Frequency='A', # Annual data
187
+ )[["GeoName", "DataValue"]]
188
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
189
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
190
+ abbreviations_map = us.states.mapping('name', 'abbr')
191
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
192
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
193
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
194
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
195
+ regional_cpi["cpi_year"] = regional_cpi["year"]
196
+ regional_cpi.drop("year", axis=1)
197
+ df = pd.merge(df, regional_cpi, how='left', on="state")
198
+ df["regional_cpi"] = df["cpi"]
199
+ df = df.drop("cpi", axis=1)
200
+ ```
201
+
202
+
203
+
@@ -1,35 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: zipcode_features
3
- Version: 0.0.4
4
- Summary: A tool to get features based on census data from zipcodes
5
- Home-page: https://github.com/EricSchles/zipcode_features
6
- Author: Eric Schles
7
- Author-email: ericschles@gmail.com
8
- License: MIT
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: zipcodes
20
- Requires-Dist: pandas
21
- Requires-Dist: zipcode3
22
- Dynamic: author
23
- Dynamic: author-email
24
- Dynamic: classifier
25
- Dynamic: description
26
- Dynamic: description-content-type
27
- Dynamic: home-page
28
- Dynamic: license
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: summary
32
-
33
- # zipcode features
34
-
35
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,3 +0,0 @@
1
- # zipcode features
2
-
3
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,35 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: zipcode_features
3
- Version: 0.0.4
4
- Summary: A tool to get features based on census data from zipcodes
5
- Home-page: https://github.com/EricSchles/zipcode_features
6
- Author: Eric Schles
7
- Author-email: ericschles@gmail.com
8
- License: MIT
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: zipcodes
20
- Requires-Dist: pandas
21
- Requires-Dist: zipcode3
22
- Dynamic: author
23
- Dynamic: author-email
24
- Dynamic: classifier
25
- Dynamic: description
26
- Dynamic: description-content-type
27
- Dynamic: home-page
28
- Dynamic: license
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: summary
32
-
33
- # zipcode features
34
-
35
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)