zipcode-features 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipcode_features
3
+ Version: 0.0.6
4
+ Summary: A tool to get features based on census data from zipcodes
5
+ Home-page: https://github.com/EricSchles/zipcode_features
6
+ Author: Eric Schles
7
+ Author-email: ericschles@gmail.com
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: zipcodes
20
+ Requires-Dist: pandas
21
+ Requires-Dist: zipcode3==1.0.3
22
+ Requires-Dist: numpy<=2.3.5
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: license
30
+ Dynamic: license-file
31
+ Dynamic: requires-dist
32
+ Dynamic: summary
33
+
34
+ # zipcode features
35
+
36
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
37
+
38
+ ## Getting CBSA mapping
39
+
40
+ If you need CBSA data you can append it to the dataframe with the following example:
41
+
42
+ ```python
43
+ from zipcode_features import us_get_demographics
44
+ import pandas as pd
45
+
46
+ def _get_cbsa_data():
47
+ return pd.read_excel(
48
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
49
+ sheet_name='Export Worksheet'
50
+ )[["CBSA", "ZIP"]]
51
+
52
+ demo = us_get_demographics(state="NY")
53
+ cbsa_zip_map = _get_cbsa_data()
54
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
55
+ ```
56
+
57
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
58
+
59
+ Here's a python script to parse them:
60
+
61
+ ```python
62
+ import urllib.request
63
+ import PyPDF2
64
+ import json
65
+ import re
66
+ import io
67
+
68
+ def fetch_cbsa_to_json():
69
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
70
+
71
+ print("Downloading Census PDF...")
72
+ # Using a User-Agent to ensure the request isn't blocked by the server
73
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
74
+
75
+ try:
76
+ response = urllib.request.urlopen(req)
77
+ pdf_bytes = io.BytesIO(response.read())
78
+ except Exception as e:
79
+ print(f"Failed to download PDF: {e}")
80
+ return
81
+
82
+ print("Parsing PDF...")
83
+ reader = PyPDF2.PdfReader(pdf_bytes)
84
+
85
+ cbsa_mapping = {}
86
+
87
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
88
+ # Example match: "11460 Ann Arbor, MI"
89
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
90
+
91
+ for page in reader.pages:
92
+ text = page.extract_text()
93
+ if text:
94
+ matches = pattern.findall(text)
95
+ for code, name in matches:
96
+ # Clean up any trailing spaces or artifacts
97
+ clean_name = name.strip()
98
+ # Exclude standalone numbers or random headers that might get caught
99
+ if len(clean_name) > 2 and not clean_name.isdigit():
100
+ cbsa_mapping[code] = clean_name
101
+
102
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
103
+
104
+ # Save the mapping to a JSON file
105
+ output_file = 'cbsa_codes.json'
106
+ with open(output_file, 'w', encoding='utf-8') as f:
107
+ json.dump(cbsa_mapping, f, indent=4)
108
+
109
+ print(f"Successfully saved to {output_file}")
110
+
111
+ if __name__ == "__main__":
112
+ fetch_cbsa_to_json()
113
+ ```
114
+
115
+ Here's a working example for using this with the above:
116
+
117
+ ```python
118
+ import requests
119
+ from zipcode_features import us_get_demographics
120
+ import pandas as pd
121
+
122
+ def _get_cbsa_data():
123
+ return pd.read_excel(
124
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
125
+ sheet_name='Export Worksheet'
126
+ )[["CBSA", "ZIP"]]
127
+
128
+ demo = us_get_demographics(state="NY")
129
+ cbsa_zip_map = _get_cbsa_data()
130
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
131
+ df = df.drop("ZIP", axis=1)
132
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
133
+ df["cbsa_name"] = df["CBSA"].map(mapping)
134
+ df = df.drop("CBSA", axis=1)
135
+ ```
136
+
137
+ ## Adding County
138
+
139
+
140
+ ```python
141
+ from zipcode_features import us_get_demographics
142
+ import pandas as pd
143
+
144
+ def _get_fips_data():
145
+ df = pd.read_excel(
146
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
147
+ dtype={'ZIP': 'str'},
148
+ sheet_name='Export Worksheet'
149
+ )[["COUNTY", "ZIP"]]
150
+ df["COUNTY"] = df['COUNTY'].astype(str)
151
+ return df.dropna()
152
+
153
+ demo = us_get_demographics(state="NY")
154
+ fips_zip_map = _get_fips_data()
155
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
156
+ df = df.drop("ZIP", axis=1)
157
+ df = df.dropna()
158
+ ```
159
+
160
+ ## Adding Regional Prices
161
+
162
+ ```bash
163
+ python -m pip install beaapi us
164
+ ```
165
+
166
+ ```python
167
+ from zipcode_features import us_get_demographics
168
+ import pandas as pd
169
+ import beaapi
170
+ import us
171
+
172
+ df = us_get_demographics(state="NY")
173
+
174
+ # get your key here: https://apps.bea.gov/API/signup/
175
+ beakey = ""
176
+
177
+ dataset="Regional"
178
+ table = "SARPP"
179
+ regional_cpi = beaapi.get_data(
180
+ userid=beakey,
181
+ method='GetData',
182
+ datasetname=dataset, # National Income and Product Accounts
183
+ tablename=table, # Table 1.1.1
184
+ GeoFips="STATE",
185
+ LineCode="1",
186
+ ResultFormat="json"
187
+ #Frequency='A', # Annual data
188
+ )[["GeoName", "DataValue"]]
189
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
190
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
191
+ abbreviations_map = us.states.mapping('name', 'abbr')
192
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
193
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
194
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
195
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
196
+ regional_cpi["cpi_year"] = regional_cpi["year"]
197
+ regional_cpi.drop("year", axis=1)
198
+ df = pd.merge(df, regional_cpi, how='left', on="state")
199
+ df["regional_cpi"] = df["cpi"]
200
+ df = df.drop("cpi", axis=1)
201
+ ```
202
+
203
+
204
+
@@ -0,0 +1,171 @@
1
+ # zipcode features
2
+
3
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
4
+
5
+ ## Getting CBSA mapping
6
+
7
+ If you need CBSA data you can append it to the dataframe with the following example:
8
+
9
+ ```python
10
+ from zipcode_features import us_get_demographics
11
+ import pandas as pd
12
+
13
+ def _get_cbsa_data():
14
+ return pd.read_excel(
15
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
16
+ sheet_name='Export Worksheet'
17
+ )[["CBSA", "ZIP"]]
18
+
19
+ demo = us_get_demographics(state="NY")
20
+ cbsa_zip_map = _get_cbsa_data()
21
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
22
+ ```
23
+
24
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
25
+
26
+ Here's a python script to parse them:
27
+
28
+ ```python
29
+ import urllib.request
30
+ import PyPDF2
31
+ import json
32
+ import re
33
+ import io
34
+
35
+ def fetch_cbsa_to_json():
36
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
37
+
38
+ print("Downloading Census PDF...")
39
+ # Using a User-Agent to ensure the request isn't blocked by the server
40
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
41
+
42
+ try:
43
+ response = urllib.request.urlopen(req)
44
+ pdf_bytes = io.BytesIO(response.read())
45
+ except Exception as e:
46
+ print(f"Failed to download PDF: {e}")
47
+ return
48
+
49
+ print("Parsing PDF...")
50
+ reader = PyPDF2.PdfReader(pdf_bytes)
51
+
52
+ cbsa_mapping = {}
53
+
54
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
55
+ # Example match: "11460 Ann Arbor, MI"
56
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
57
+
58
+ for page in reader.pages:
59
+ text = page.extract_text()
60
+ if text:
61
+ matches = pattern.findall(text)
62
+ for code, name in matches:
63
+ # Clean up any trailing spaces or artifacts
64
+ clean_name = name.strip()
65
+ # Exclude standalone numbers or random headers that might get caught
66
+ if len(clean_name) > 2 and not clean_name.isdigit():
67
+ cbsa_mapping[code] = clean_name
68
+
69
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
70
+
71
+ # Save the mapping to a JSON file
72
+ output_file = 'cbsa_codes.json'
73
+ with open(output_file, 'w', encoding='utf-8') as f:
74
+ json.dump(cbsa_mapping, f, indent=4)
75
+
76
+ print(f"Successfully saved to {output_file}")
77
+
78
+ if __name__ == "__main__":
79
+ fetch_cbsa_to_json()
80
+ ```
81
+
82
+ Here's a working example for using this with the above:
83
+
84
+ ```python
85
+ import requests
86
+ from zipcode_features import us_get_demographics
87
+ import pandas as pd
88
+
89
+ def _get_cbsa_data():
90
+ return pd.read_excel(
91
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
92
+ sheet_name='Export Worksheet'
93
+ )[["CBSA", "ZIP"]]
94
+
95
+ demo = us_get_demographics(state="NY")
96
+ cbsa_zip_map = _get_cbsa_data()
97
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
98
+ df = df.drop("ZIP", axis=1)
99
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
100
+ df["cbsa_name"] = df["CBSA"].map(mapping)
101
+ df = df.drop("CBSA", axis=1)
102
+ ```
103
+
104
+ ## Adding County
105
+
106
+
107
+ ```python
108
+ from zipcode_features import us_get_demographics
109
+ import pandas as pd
110
+
111
+ def _get_fips_data():
112
+ df = pd.read_excel(
113
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
114
+ dtype={'ZIP': 'str'},
115
+ sheet_name='Export Worksheet'
116
+ )[["COUNTY", "ZIP"]]
117
+ df["COUNTY"] = df['COUNTY'].astype(str)
118
+ return df.dropna()
119
+
120
+ demo = us_get_demographics(state="NY")
121
+ fips_zip_map = _get_fips_data()
122
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
123
+ df = df.drop("ZIP", axis=1)
124
+ df = df.dropna()
125
+ ```
126
+
127
+ ## Adding Regional Prices
128
+
129
+ ```bash
130
+ python -m pip install beaapi us
131
+ ```
132
+
133
+ ```python
134
+ from zipcode_features import us_get_demographics
135
+ import pandas as pd
136
+ import beaapi
137
+ import us
138
+
139
+ df = us_get_demographics(state="NY")
140
+
141
+ # get your key here: https://apps.bea.gov/API/signup/
142
+ beakey = ""
143
+
144
+ dataset="Regional"
145
+ table = "SARPP"
146
+ regional_cpi = beaapi.get_data(
147
+ userid=beakey,
148
+ method='GetData',
149
+ datasetname=dataset, # National Income and Product Accounts
150
+ tablename=table, # Table 1.1.1
151
+ GeoFips="STATE",
152
+ LineCode="1",
153
+ ResultFormat="json"
154
+ #Frequency='A', # Annual data
155
+ )[["GeoName", "DataValue"]]
156
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
157
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
158
+ abbreviations_map = us.states.mapping('name', 'abbr')
159
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
160
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
161
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
162
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
163
+ regional_cpi["cpi_year"] = regional_cpi["year"]
164
+ regional_cpi.drop("year", axis=1)
165
+ df = pd.merge(df, regional_cpi, how='left', on="state")
166
+ df["regional_cpi"] = df["cpi"]
167
+ df = df.drop("cpi", axis=1)
168
+ ```
169
+
170
+
171
+
@@ -10,7 +10,7 @@ README = (HERE / "README.md").read_text()
10
10
  # This call to setup() does all the work
11
11
  setup(
12
12
  name="zipcode_features",
13
- version="0.0.4",
13
+ version="0.0.6",
14
14
  description="A tool to get features based on census data from zipcodes",
15
15
  long_description=README,
16
16
  long_description_content_type="text/markdown",
@@ -32,5 +32,10 @@ setup(
32
32
  "zipcode_features",
33
33
  ],
34
34
  include_package_data=True,
35
- install_requires=["zipcodes", "pandas", "zipcode3"],
35
+ install_requires=[
36
+ "zipcodes",
37
+ "pandas",
38
+ "zipcode3==1.0.3",
39
+ "numpy<=2.3.5"
40
+ ],
36
41
  )
@@ -1,9 +1,44 @@
1
- __version__ = '0.0.4'
1
+ __version__ = '0.0.5'
2
2
 
3
3
  import zipcodes
4
4
  from zipcode3.search import SearchEngine
5
5
  import pandas as pd
6
+ import json
6
7
 
8
+ def zipcode_mapper(x):
9
+ if x["ZIP_len"] == 3:
10
+ return "00" + x["ZIP"]
11
+ elif x["ZIP_len"] == 4:
12
+ return "0" + x["ZIP"]
13
+ else:
14
+ return x["ZIP"]
15
+
16
+ def _get_zip_to_cbsa_code() -> dict:
17
+ """
18
+ This method gets a mapping from zipcode to cbsa code
19
+ mapping is of the form:
20
+ {"zip code": "cbsa code"}
21
+ """
22
+
23
+ df = pd.read_csv("CBSA_ZIP_122025.csv", dtype={'ZIP': str, "CBSA": str})
24
+ df["ZIP_len"] = df["ZIP"].apply(lambda x: len(x))
25
+ df["ZIP"] = df.apply(zipcode_mapper, axis=1)
26
+ return df[["ZIP", "CBSA"]].to_dict()
27
+
28
+ def _get_cbsa_code_to_cbsa_name() -> dict:
29
+ """
30
+ This method gets a mapping from cbsa code to name
31
+ {cbsa code: cbsa name}
32
+ """
33
+ code_to_name = json.load(open("cbsa_codes.json"))
34
+ df = pd.DataFrame(columns=["code", "name"])
35
+ df["name"] = code_to_name.values()
36
+ df["code"] = code_to_name.keys()
37
+ df["name"] = df["name"].str.replace(" -", "-")
38
+ df["name"] = df["name"].str.split().str.join(' ')
39
+ return df.to_dict()
40
+
41
+
7
42
  def us_get_demographics(state: str, city: str = None, zip_list: list = None) -> pd.DataFrame:
8
43
  """
9
44
  This gets demographic information for associated with zipcodes in the United States of America.
@@ -43,4 +78,10 @@ def us_get_demographics(state: str, city: str = None, zip_list: list = None) ->
43
78
  tmp_dict = zipcode_and_demo[index][1].to_dict()
44
79
  tmp_dict["zip_code"] = zipcode_and_demo[index][0]
45
80
  demographics.append(tmp_dict)
46
- return pd.DataFrame(demographics)
81
+ df = pd.DataFrame(demographics)
82
+ zip_to_cbsa = _get_zip_to_cbsa_code()
83
+ df["cbsa"] = df["zip_code"].map(zip_to_cbsa)
84
+ cbsa_code_to_name = _get_cbsa_code_to_cbsa_name()
85
+ df["cbsa_name"] = df["cbsa"].map(cbsa_code_to_name)
86
+ return df
87
+
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipcode_features
3
+ Version: 0.0.6
4
+ Summary: A tool to get features based on census data from zipcodes
5
+ Home-page: https://github.com/EricSchles/zipcode_features
6
+ Author: Eric Schles
7
+ Author-email: ericschles@gmail.com
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: zipcodes
20
+ Requires-Dist: pandas
21
+ Requires-Dist: zipcode3==1.0.3
22
+ Requires-Dist: numpy<=2.3.5
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: license
30
+ Dynamic: license-file
31
+ Dynamic: requires-dist
32
+ Dynamic: summary
33
+
34
+ # zipcode features
35
+
36
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
37
+
38
+ ## Getting CBSA mapping
39
+
40
+ If you need CBSA data you can append it to the dataframe with the following example:
41
+
42
+ ```python
43
+ from zipcode_features import us_get_demographics
44
+ import pandas as pd
45
+
46
+ def _get_cbsa_data():
47
+ return pd.read_excel(
48
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
49
+ sheet_name='Export Worksheet'
50
+ )[["CBSA", "ZIP"]]
51
+
52
+ demo = us_get_demographics(state="NY")
53
+ cbsa_zip_map = _get_cbsa_data()
54
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
55
+ ```
56
+
57
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
58
+
59
+ Here's a python script to parse them:
60
+
61
+ ```python
62
+ import urllib.request
63
+ import PyPDF2
64
+ import json
65
+ import re
66
+ import io
67
+
68
+ def fetch_cbsa_to_json():
69
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
70
+
71
+ print("Downloading Census PDF...")
72
+ # Using a User-Agent to ensure the request isn't blocked by the server
73
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
74
+
75
+ try:
76
+ response = urllib.request.urlopen(req)
77
+ pdf_bytes = io.BytesIO(response.read())
78
+ except Exception as e:
79
+ print(f"Failed to download PDF: {e}")
80
+ return
81
+
82
+ print("Parsing PDF...")
83
+ reader = PyPDF2.PdfReader(pdf_bytes)
84
+
85
+ cbsa_mapping = {}
86
+
87
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
88
+ # Example match: "11460 Ann Arbor, MI"
89
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
90
+
91
+ for page in reader.pages:
92
+ text = page.extract_text()
93
+ if text:
94
+ matches = pattern.findall(text)
95
+ for code, name in matches:
96
+ # Clean up any trailing spaces or artifacts
97
+ clean_name = name.strip()
98
+ # Exclude standalone numbers or random headers that might get caught
99
+ if len(clean_name) > 2 and not clean_name.isdigit():
100
+ cbsa_mapping[code] = clean_name
101
+
102
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
103
+
104
+ # Save the mapping to a JSON file
105
+ output_file = 'cbsa_codes.json'
106
+ with open(output_file, 'w', encoding='utf-8') as f:
107
+ json.dump(cbsa_mapping, f, indent=4)
108
+
109
+ print(f"Successfully saved to {output_file}")
110
+
111
+ if __name__ == "__main__":
112
+ fetch_cbsa_to_json()
113
+ ```
114
+
115
+ Here's a working example for using this with the above:
116
+
117
+ ```python
118
+ import requests
119
+ from zipcode_features import us_get_demographics
120
+ import pandas as pd
121
+
122
+ def _get_cbsa_data():
123
+ return pd.read_excel(
124
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
125
+ sheet_name='Export Worksheet'
126
+ )[["CBSA", "ZIP"]]
127
+
128
+ demo = us_get_demographics(state="NY")
129
+ cbsa_zip_map = _get_cbsa_data()
130
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
131
+ df = df.drop("ZIP", axis=1)
132
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
133
+ df["cbsa_name"] = df["CBSA"].map(mapping)
134
+ df = df.drop("CBSA", axis=1)
135
+ ```
136
+
137
+ ## Adding County
138
+
139
+
140
+ ```python
141
+ from zipcode_features import us_get_demographics
142
+ import pandas as pd
143
+
144
+ def _get_fips_data():
145
+ df = pd.read_excel(
146
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
147
+ dtype={'ZIP': 'str'},
148
+ sheet_name='Export Worksheet'
149
+ )[["COUNTY", "ZIP"]]
150
+ df["COUNTY"] = df['COUNTY'].astype(str)
151
+ return df.dropna()
152
+
153
+ demo = us_get_demographics(state="NY")
154
+ fips_zip_map = _get_fips_data()
155
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
156
+ df = df.drop("ZIP", axis=1)
157
+ df = df.dropna()
158
+ ```
159
+
160
+ ## Adding Regional Prices
161
+
162
+ ```bash
163
+ python -m pip install beaapi us
164
+ ```
165
+
166
+ ```python
167
+ from zipcode_features import us_get_demographics
168
+ import pandas as pd
169
+ import beaapi
170
+ import us
171
+
172
+ df = us_get_demographics(state="NY")
173
+
174
+ # get your key here: https://apps.bea.gov/API/signup/
175
+ beakey = ""
176
+
177
+ dataset="Regional"
178
+ table = "SARPP"
179
+ regional_cpi = beaapi.get_data(
180
+ userid=beakey,
181
+ method='GetData',
182
+ datasetname=dataset, # National Income and Product Accounts
183
+ tablename=table, # Table 1.1.1
184
+ GeoFips="STATE",
185
+ LineCode="1",
186
+ ResultFormat="json"
187
+ #Frequency='A', # Annual data
188
+ )[["GeoName", "DataValue"]]
189
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
190
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
191
+ abbreviations_map = us.states.mapping('name', 'abbr')
192
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
193
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
194
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
195
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
196
+ regional_cpi["cpi_year"] = regional_cpi["year"]
197
+ regional_cpi.drop("year", axis=1)
198
+ df = pd.merge(df, regional_cpi, how='left', on="state")
199
+ df["regional_cpi"] = df["cpi"]
200
+ df = df.drop("cpi", axis=1)
201
+ ```
202
+
203
+
204
+
@@ -0,0 +1,4 @@
1
+ zipcodes
2
+ pandas
3
+ zipcode3==1.0.3
4
+ numpy<=2.3.5
@@ -1,35 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: zipcode_features
3
- Version: 0.0.4
4
- Summary: A tool to get features based on census data from zipcodes
5
- Home-page: https://github.com/EricSchles/zipcode_features
6
- Author: Eric Schles
7
- Author-email: ericschles@gmail.com
8
- License: MIT
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: zipcodes
20
- Requires-Dist: pandas
21
- Requires-Dist: zipcode3
22
- Dynamic: author
23
- Dynamic: author-email
24
- Dynamic: classifier
25
- Dynamic: description
26
- Dynamic: description-content-type
27
- Dynamic: home-page
28
- Dynamic: license
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: summary
32
-
33
- # zipcode features
34
-
35
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,3 +0,0 @@
1
- # zipcode features
2
-
3
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,35 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: zipcode_features
3
- Version: 0.0.4
4
- Summary: A tool to get features based on census data from zipcodes
5
- Home-page: https://github.com/EricSchles/zipcode_features
6
- Author: Eric Schles
7
- Author-email: ericschles@gmail.com
8
- License: MIT
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: zipcodes
20
- Requires-Dist: pandas
21
- Requires-Dist: zipcode3
22
- Dynamic: author
23
- Dynamic: author-email
24
- Dynamic: classifier
25
- Dynamic: description
26
- Dynamic: description-content-type
27
- Dynamic: home-page
28
- Dynamic: license
29
- Dynamic: license-file
30
- Dynamic: requires-dist
31
- Dynamic: summary
32
-
33
- # zipcode features
34
-
35
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,3 +0,0 @@
1
- zipcodes
2
- pandas
3
- zipcode3