import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%config Completer.use_jedi = False
pd.options.display.max_columns = None
raw_data = pd.read_csv("flight.csv")
df_prep = raw_data.copy()
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | SUBPARTS | TOTAL REPORTED EMISSIONS, 2011 | TOTAL REPORTED EMISSIONS, 2012 | TOTAL REPORTED EMISSIONS, 2013 | TOTAL REPORTED EMISSIONS, 2014 | TOTAL REPORTED EMISSIONS, 2015 | TOTAL REPORTED EMISSIONS, 2016 | TOTAL REPORTED EMISSIONS, 2017 | TOTAL REPORTED EMISSIONS, 2018 | TOTAL REPORTED EMISSIONS, 2019 | TOTAL REPORTED EMISSIONS, 2020 | TOTAL REPORTED EMISSIONS, 2021 | CHANGE IN EMISSIONS (2020 TO 2021) | CHANGE IN EMISSIONS (2011 TO 2021) | SECTORS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | C | 53,903 | 44,367 | 44,797 | 49,319 | 50,937 | 51,247 | 43,986 | 50,356 | 43,827 | 37,558 | 47,622 | 10,064 | -6,281 | Other |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | C | 141,908 | 137,864 | 140,448 | 145,000 | 141,797 | 128,376 | 106,789 | 131,470 | 123,487 | 111,369 | 131,848 | 20,479 | -10,060 | Other |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | C | 30,348 | 31,573 | 33,735 | 31,160 | 31,316 | 28,985 | 30,011 | 30,846 | 29,883 | 27,380 | 27,139 | -241 | -3,209 | Other |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | C | 74,824 | 64,839 | 69,223 | 69,798 | 69,309 | 80,210 | 79,996 | 82,860 | 83,227 | 73,613 | 76,533 | 2,920 | 1,709 | Other |
df_prep.shape
(4, 25)
list(enumerate(df_prep.columns))
[(0, 'FACILITY'), (1, 'GHGRP ID'), (2, 'REPORTED ADDRESS'), (3, 'LATITUDE'), (4, 'LONGITUDE'), (5, 'CITY'), (6, 'METRO AREA NAME'), (7, 'STATE'), (8, 'ZIP CODE'), (9, 'PARENT COMPANIES'), (10, 'SUBPARTS'), (11, 'TOTAL REPORTED EMISSIONS, 2011'), (12, 'TOTAL REPORTED EMISSIONS, 2012'), (13, 'TOTAL REPORTED EMISSIONS, 2013'), (14, 'TOTAL REPORTED EMISSIONS, 2014'), (15, 'TOTAL REPORTED EMISSIONS, 2015'), (16, 'TOTAL REPORTED EMISSIONS, 2016'), (17, 'TOTAL REPORTED EMISSIONS, 2017'), (18, 'TOTAL REPORTED EMISSIONS, 2018'), (19, 'TOTAL REPORTED EMISSIONS, 2019'), (20, 'TOTAL REPORTED EMISSIONS, 2020'), (21, 'TOTAL REPORTED EMISSIONS, 2021'), (22, 'CHANGE IN EMISSIONS (2020 TO 2021)'), (23, 'CHANGE IN EMISSIONS (2011 TO 2021)'), (24, 'SECTORS')]
df_prep.drop(columns= df_prep.columns[[10,22,23,24]], inplace=True)
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | TOTAL REPORTED EMISSIONS, 2011 | TOTAL REPORTED EMISSIONS, 2012 | TOTAL REPORTED EMISSIONS, 2013 | TOTAL REPORTED EMISSIONS, 2014 | TOTAL REPORTED EMISSIONS, 2015 | TOTAL REPORTED EMISSIONS, 2016 | TOTAL REPORTED EMISSIONS, 2017 | TOTAL REPORTED EMISSIONS, 2018 | TOTAL REPORTED EMISSIONS, 2019 | TOTAL REPORTED EMISSIONS, 2020 | TOTAL REPORTED EMISSIONS, 2021 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53,903 | 44,367 | 44,797 | 49,319 | 50,937 | 51,247 | 43,986 | 50,356 | 43,827 | 37,558 | 47,622 |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141,908 | 137,864 | 140,448 | 145,000 | 141,797 | 128,376 | 106,789 | 131,470 | 123,487 | 111,369 | 131,848 |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30,348 | 31,573 | 33,735 | 31,160 | 31,316 | 28,985 | 30,011 | 30,846 | 29,883 | 27,380 | 27,139 |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74,824 | 64,839 | 69,223 | 69,798 | 69,309 | 80,210 | 79,996 | 82,860 | 83,227 | 73,613 | 76,533 |
df_prep.dtypes
FACILITY object GHGRP ID int64 REPORTED ADDRESS object LATITUDE float64 LONGITUDE float64 CITY object METRO AREA NAME object STATE object ZIP CODE int64 PARENT COMPANIES object TOTAL REPORTED EMISSIONS, 2011 object TOTAL REPORTED EMISSIONS, 2012 object TOTAL REPORTED EMISSIONS, 2013 object TOTAL REPORTED EMISSIONS, 2014 object TOTAL REPORTED EMISSIONS, 2015 object TOTAL REPORTED EMISSIONS, 2016 object TOTAL REPORTED EMISSIONS, 2017 object TOTAL REPORTED EMISSIONS, 2018 object TOTAL REPORTED EMISSIONS, 2019 object TOTAL REPORTED EMISSIONS, 2020 object TOTAL REPORTED EMISSIONS, 2021 object dtype: object
df_prep.rename(columns = lambda x: x.replace("TOTAL REPORTED EMISSIONS, ", ""), inplace=True)
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53,903 | 44,367 | 44,797 | 49,319 | 50,937 | 51,247 | 43,986 | 50,356 | 43,827 | 37,558 | 47,622 |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141,908 | 137,864 | 140,448 | 145,000 | 141,797 | 128,376 | 106,789 | 131,470 | 123,487 | 111,369 | 131,848 |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30,348 | 31,573 | 33,735 | 31,160 | 31,316 | 28,985 | 30,011 | 30,846 | 29,883 | 27,380 | 27,139 |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74,824 | 64,839 | 69,223 | 69,798 | 69,309 | 80,210 | 79,996 | 82,860 | 83,227 | 73,613 | 76,533 |
df_prep.loc[:, "2011":] = df_prep.loc[:, "2011":].applymap(lambda x: x.replace(",", ""))
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53903 | 44367 | 44797 | 49319 | 50937 | 51247 | 43986 | 50356 | 43827 | 37558 | 47622 |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141908 | 137864 | 140448 | 145000 | 141797 | 128376 | 106789 | 131470 | 123487 | 111369 | 131848 |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30348 | 31573 | 33735 | 31160 | 31316 | 28985 | 30011 | 30846 | 29883 | 27380 | 27139 |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74824 | 64839 | 69223 | 69798 | 69309 | 80210 | 79996 | 82860 | 83227 | 73613 | 76533 |
df_prep.loc[:, "2011":] = df_prep.loc[:, "2011":].apply(pd.to_numeric)
/var/folders/d0/zf8g3jwx1r38bb54k6n7g8wr0000gn/T/ipykernel_24704/1148899735.py:1: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)` df_prep.loc[:, "2011":] = df_prep.loc[:, "2011":].apply(pd.to_numeric)
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53903 | 44367 | 44797 | 49319 | 50937 | 51247 | 43986 | 50356 | 43827 | 37558 | 47622 |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141908 | 137864 | 140448 | 145000 | 141797 | 128376 | 106789 | 131470 | 123487 | 111369 | 131848 |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30348 | 31573 | 33735 | 31160 | 31316 | 28985 | 30011 | 30846 | 29883 | 27380 | 27139 |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74824 | 64839 | 69223 | 69798 | 69309 | 80210 | 79996 | 82860 | 83227 | 73613 | 76533 |
df_prep.dtypes
FACILITY object GHGRP ID int64 REPORTED ADDRESS object LATITUDE float64 LONGITUDE float64 CITY object METRO AREA NAME object STATE object ZIP CODE int64 PARENT COMPANIES object 2011 int64 2012 int64 2013 int64 2014 int64 2015 int64 2016 int64 2017 int64 2018 int64 2019 int64 2020 int64 2021 int64 dtype: object
df_prep.set_index(["FACILITY"])[df_prep.columns[-11:]].T.plot(figsize=(10,5))
<AxesSubplot: >
df_prep["Cumulative"] = df_prep.loc[:, "2011":].sum(axis=1)
df_prep.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | Cumulative | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53903 | 44367 | 44797 | 49319 | 50937 | 51247 | 43986 | 50356 | 43827 | 37558 | 47622 | 517919 |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141908 | 137864 | 140448 | 145000 | 141797 | 128376 | 106789 | 131470 | 123487 | 111369 | 131848 | 1440356 |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30348 | 31573 | 33735 | 31160 | 31316 | 28985 | 30011 | 30846 | 29883 | 27380 | 27139 | 332376 |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74824 | 64839 | 69223 | 69798 | 69309 | 80210 | 79996 | 82860 | 83227 | 73613 | 76533 | 824432 |
import geopandas as gpd
geo_df = gpd.GeoDataFrame(df_prep, geometry=gpd.points_from_xy(df_prep["LONGITUDE"], df_prep["LATITUDE"]))
geo_df.head()
FACILITY | GHGRP ID | REPORTED ADDRESS | LATITUDE | LONGITUDE | CITY | METRO AREA NAME | STATE | ZIP CODE | PARENT COMPANIES | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | Cumulative | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Boston University | 1002274 | 120 Ashford Street | 42.35395 | -71.12204 | Boston | Boston-Cambridge-Newton, MA-NH | MA | 2134 | TRUSTEES OF BOSTON UNIVERSITY (100%) | 53903 | 44367 | 44797 | 49319 | 50937 | 51247 | 43986 | 50356 | 43827 | 37558 | 47622 | 517919 | POINT (-71.12204 42.35395) |
1 | MIT Central Utility Plant | 1001289 | 59 VASSAR ST | 42.36110 | -71.09340 | CAMBRIDGE | Boston-Cambridge-Newton, MA-NH | MA | 2139 | MASSACHUSETTS INSTITUTE OF TECHNOLOGY (100%) | 141908 | 137864 | 140448 | 145000 | 141797 | 128376 | 106789 | 131470 | 123487 | 111369 | 131848 | 1440356 | POINT (-71.09340 42.36110) |
2 | NORTHEASTERN UNIVERSITY | 1005277 | 360 HUNTINGTON AVENUE | 42.34012 | -71.08933 | BOSTON | Boston-Cambridge-Newton, MA-NH | MA | 2115 | NORTHEASTERN UNIVERSITY (100%) | 30348 | 31573 | 33735 | 31160 | 31316 | 28985 | 30011 | 30846 | 29883 | 27380 | 27139 | 332376 | POINT (-71.08933 42.34012) |
3 | The President and Fellows of Harvard Universit... | 1000656 | 46 Blackstone Street | 42.36360 | -71.11610 | Cambridge | Boston-Cambridge-Newton, MA-NH | MA | 2139 | PRESIDENT & FELLOWS OF HARVARD COLLEGE (100%) | 74824 | 64839 | 69223 | 69798 | 69309 | 80210 | 79996 | 82860 | 83227 | 73613 | 76533 | 824432 | POINT (-71.11610 42.36360) |
import folium
Boston_boundaries = gpd.read_file("cb_2022_25_sldl_500k/cb_2022_25_sldl_500k.shp")
Boston_boundaries.explore()
base_layer = Boston_boundaries.explore(location=[42.35395, -71.12204], zoom_start=12, tooltip=False, width="60%", height="60%", style_kwds=dict(fillOpacity=0.1), highlight_kwds=dict(fillOpacity=0))
geo_df.explore(m=base_layer, column="Cumulative", marker_type="circle", marker_kwds=dict(radius=4828), cmap="autumn_r", popup=["FACILITY", "REPORTED ADDRESS", "Cumulative"], tooltip=False)
#base_layer.save("<SomeFilename>.html")
base_layer
from folium.plugins import HeatMap
heatmap_points = [ [lat, long, weight] for lat, long, weight in zip(geo_df["LATITUDE"], geo_df["LONGITUDE"], geo_df["Cumulative"]) ]
map_layer = folium.Map(location=[42.35395, -71.12204], tiles="cartodbpositron", width="25%", height="25%", zoom_start=11)
HeatMap(heatmap_points, radius=15, blur=5).add_to(map_layer)
#map_layer.save("<SomeFilename>.html")
map_layer