import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable

%matplotlib inline


def read_files(basename, spatial_units):
    data = {}
    for spatial_unit in spatial_units:
        path = 'landsat8_{basename}_{spatial_unit}.csv'.format(basename = basename, spatial_unit = spatial_unit)
        df = pd.read_csv(path, index_col = 0)
        data[spatial_unit] = df
    return data


def read_spatial_data(basename, clip_coast=False):
    """
    Reads in spatial data that is in the format 'basename_spatialUnit.shp'.
    
    Parameters
    ----------
    basename : str, required
        Base name of shape files. For example, if the files you want to read are of the format
        'cville_counties.shp', 'cville_blocks.shp', etc. then the basename is 'cville'.
        
    clip_coast : bool, optional (default is False)
        If True, shp data will be compared to coast line data and oceans will be clipped out.
    
    Output
    ------
    dictionary containing geopandas dataframes for each spatial unit
    
    """
    print('Reading spatial data for {basename}...'.format(basename = basename))
    # read in coast line data 
    if clip_coast:
        ocean = gpd.read_file('water/ne_10m_ocean.shp')
    
    # store current working directory
    og_wd = os.getcwd()
    
    # change working directory to where spatial data is located
    os.chdir("../spatial_units/data")
    
    # create empty dictionary
    data = {}
    
    if clip_coast:
        spatial_units = ['blocks', 'blkgps', 'tracts']

        # read in counties and clip first
        print("Reading counties...")
        counties = gpd.read_file(basename + '_counties.shp')
        ocean = ocean.to_crs(counties.crs)
        counties = gpd.overlay(counties, ocean, how='difference')
        data['counties'] = counties
        print("Done.")
    
        # read in rest of shapefiles, clip respective to counties, store as keys in dict
        for spatial_unit in spatial_units:
            print("Reading {spatial_unit}...".format(spatial_unit = spatial_unit))
            # read in shp file
            shp = gpd.read_file(basename + '_{spatial_unit}.shp'.format(spatial_unit = spatial_unit))

            # convert coast line data to crs of shp 
            counties = counties.to_crs(shp.crs)

            # clip out ocean
            shp = gpd.overlay(shp, counties[['geometry']], how='intersection', keep_geom_type=True)

            # add dict with spatial unit as key
            data[spatial_unit] = shp
            print("Done.")
            
    else:
        spatial_units = ['counties', 'blocks', 'blkgps', 'tracts']
        for spatial_unit in spatial_units:
            print("Reading {spatial_unit}...".format(spatial_unit = spatial_unit))
            shp = gpd.read_file(basename + '_{spatial_unit}.shp'.format(spatial_unit = spatial_unit))
            data[spatial_unit] = shp
            print("Done.")

    # reset back to original working directory
    os.chdir(og_wd)
    
    return data


cville_data = read_files('cville', ['counties', 'tracts', 'blkgps'])
cville_shp = read_spatial_data('cville')

Reading spatial data for cville...
Reading counties...
Done.
Reading blocks...
Done.
Reading blkgps...
Done.
Reading tracts...
Done.


easternShore_data = read_files('easternShore', ['counties', 'tracts', 'blkgps'])
easternShore_shp = read_spatial_data('eastshore', clip_coast = True)

Reading spatial data for eastshore...
Reading counties...
Done.
Reading blocks...
Done.
Reading blkgps...
Done.
Reading tracts...
Done.


cols_wanted = {
    'counties': ['STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME', 'geometry'],
    'tracts': ['STATEFP', 'COUNTYFP', 'TRACTCE', 'GEOID', 'NAME', 'NAMELSAD', 'geometry'],
    'blkgps': ['STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'GEOID', 'NAMELSAD', 'geometry']
}


def merge_geoms(data, shp):
    for spatial_unit in data:
        shp[spatial_unit] = shp[spatial_unit][cols_wanted[spatial_unit]]
        data[spatial_unit]["GEOID"] = data[spatial_unit]["GEOID"].apply(lambda x: str(x))
        shp[spatial_unit]["GEOID"] = shp[spatial_unit]["GEOID"].apply(lambda x: str(x))
        data[spatial_unit] = data[spatial_unit].set_index("GEOID").join(shp[spatial_unit].set_index("GEOID"), on="GEOID", how="inner")
        data[spatial_unit] = gpd.GeoDataFrame(data[spatial_unit])
    return data


cville = merge_geoms(cville_data, cville_shp)
easternShore = merge_geoms(easternShore_data, easternShore_shp)


cville['counties'].columns

Index(['file', 'min', 'max', 'mean', 'count', 'median', 'nodata',
       'masked_count', 'spatial_unit', 'basename', 'full_path', 'prop_nodata',
       'entity_id', 'start_date', 'end_date', 'start_year', 'end_year',
       'cloud_cover', 'area', 'shp_percent', 'spatial_bounds',
       'spatial_coverage', 'STATEFP', 'COUNTYFP', 'COUNTYNS', 'NAME',
       'geometry'],
      dtype='object')


cville_files = cville['counties'].file.value_counts().index.tolist()
cville_files

['LC08_L2SP_016033_20200906_20200918_02_T1',
 'LC08_L2SP_016034_20200906_20200918_02_T1',
 'LC08_L2SP_015034_20200713_20200912_02_T1',
 'LC08_L2SP_016033_20200922_20201005_02_T1',
 'LC08_L2SP_016034_20200720_20210330_02_T1',
 'LC08_L2SP_016034_20200704_20200913_02_T1',
 'LC08_L2SP_016034_20200922_20201005_02_T1']


import numpy as np

def plot_file(file, data, col="median", cmap="inferno"):
    spatial_units = ['counties', 'tracts', 'blkgps']
    f, axs = plt.subplots(3, figsize=(25, 25))
    plt.subplots_adjust(wspace=0.25, hspace=0.25)
    # set axes index
    axi = 0
    for spatial_unit in spatial_units:
        ax = axs[axi]
        to_plot = data[spatial_unit]
        to_plot = to_plot[to_plot.file == file]
        
        # set to None values that have prop_nodata less than threshold
        # to_plot.median = to_plot.apply(lambda x: x.median if x.prop_nodata < nodata_threshold else np.nan, axis=1)

        basename = to_plot.basename.iloc[0]
        date = to_plot.start_date.iloc[0]
        
        # set labels
        ax.set_title('{file} {basename} {spatial_unit} {date}'.format(file = file, basename = basename, spatial_unit = spatial_unit, date = date))
        ax.set(xlabel='Longitude', ylabel="Latitude")
        
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.1)
        to_plot.plot(col, cmap = cmap, legend=True, ax=ax, cax=cax, missing_kwds={'color': 'lightgray'})
        # cmap.set_label("Degrees Fahrenheight")
        axi += 1


for file in cville_files:
    plot_file(file, cville)


# plot all data, just for the City of Charlottesville
just_cville = {x: cville[x][cville[x].COUNTYFP == "540"] for x in cville}


for file in cville_files:
    plot_file(file, just_cville)


easternShore_files = easternShore['counties'].file.value_counts().index.tolist()
easternShore_files

['LC08_L2SP_014034_20200722_20200911_02_T1']


for file in easternShore_files:
    plot_file(file, easternShore)


from shapely.geometry import Polygon
import json

one = cville['counties'][cville['counties'].file == cville_files[0]]
one = one.to_crs("EPSG:4326")
one = one.iloc[[0]].drop(columns=['geometry'])
one['geometry'] = Polygon([(x[0], x[1]) for x in json.loads(one.spatial_coverage[0])])

two = cville['counties'][cville['counties'].file == cville_files[6]]
two = two.to_crs("EPSG:4326")
two = two.iloc[[0]].drop(columns=['geometry'])
two['geometry'] = Polygon([(x[0], x[1]) for x in json.loads(two.spatial_coverage[0])])


f, ax = plt.subplots(figsize=(25, 25))
ax.set_title('Spatial Coverage of 2 Scenes Over Charlottesville Region', fontsize=26)
one.plot(ax=ax, color = 'red', alpha = 0.10)
two.plot(ax=ax, color = 'red', alpha = 0.10)
cville['counties'].boundary.plot(ax=ax)
plt.show()


for file in cville_files:
    plot_file(file, cville, col="prop_nodata", cmap="Reds")


for file in easternShore_files:
    plot_file(file, easternShore, col="prop_nodata", cmap="Reds")

Preliminary EDA for Landsat 8 Surface Temp Data¶

Surface Temperature Data¶

Missingness¶

Charlottesville Region Temperature Data¶

Charlottesville City Temperature Data¶

Eastern Shore Region Temperature Data¶

Missing Data¶

Charlottesville Region Missingness¶

Eastern Shore Missing Data¶