# Import required packages
import earthpy # Helps with downloading climate data
import pandas as pd # Helps with calculations and data transformations
import matplotlib.pyplot as plt # Create plots
from sklearn.linear_model import LinearRegression # Create interactive plots!
import numpy as np # Helps with calculations and data transformations
import hvplot.pandas # Interactive plotting feature
import holoviews as hv # This helps with formatting the interactive trendline plot
from bokeh.models import NumeralTickFormatter # This helps with formatting the interactive trendline plot

#Load the station data for Aspen from the NCEI website
aspen_station = ('https://www.ncei.noaa.gov/access/services/da'
'ta/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00050372&startDate=1980-07-01&endDate=2025-07-31&units=standard')
aspen_station

'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00050372&startDate=1980-07-01&endDate=2025-07-31&units=standard'

# Download the climate data in Python
aspen_climate_df = pd.read_csv(
    aspen_station,
    index_col='DATE',
    parse_dates=True,
    na_values=['NaN']
)

# Check that the download worked
aspen_climate_df.head()

# Save the climate data
aspen_climate_df.to_csv('aspen_temp_data.csv')

# Check that data was imported into a pandas DataFrame
type(aspen_climate_df)

pandas.core.frame.DataFrame

# Remove station column in dataframe 
# We are only using 1 station, so don't need this column
aspen_climate_df = aspen_climate_df[["TOBS"]]
aspen_climate_df

# Rename the column name to a more descriptive name
aspen_units_f = aspen_climate_df.rename(columns={
    'TOBS': 'temp_f',
})
aspen_units_f

#Adding a column of temp converted to C
aspen_units_c_f = aspen_units_f
aspen_units_c_f['temp_c'] = ((aspen_units_f['temp_f']-32)*5/9)

#Create a new dataframe with just temp C
aspen_units_c = aspen_units_c_f[["temp_c"]]
aspen_units_c

# Plot the data using .plot
aspen_units_c.plot(
    y='temp_c',
    title='Aspen Daily Temperature Data From 1980 to 2025',
    xlabel='Date',
    ylabel='Temperature (C)')

<Axes: title={'center': 'Aspen Daily Temperature Data From 1980 to 2025'}, xlabel='Date', ylabel='Temperature (C)'>

# Using pandas to resample from daily to annual temperature
# 'YS' stands for 'Year Begin' in pandas
# Below we are resampling to once a year (YS), and recording the mean temperature of each year
# We are saving the new resampled data in the dataframe called aspen_ann_climate_df
# This new dataframe should have just one temp value per year
aspen_ann_climate_df = (
    aspen_units_c
    .resample('YS') 
    .mean()
)

# Plot the annual data
aspen_ann_climate_df.plot(
    y='temp_c',
    title='Annual Average Temperature in Aspen From 1980 to 2025',
    xlabel='Year',
    ylabel='Temperature (°C)',
    label='Temperature °C'
)

<Axes: title={'center': 'Annual Average Temperature in Aspen From 1980 to 2025'}, xlabel='Year', ylabel='Temperature (°C)'>

# Create an interactive version using hvplot
interactive_plot = aspen_ann_climate_df.hvplot(
    y="temp_c",
    title="Annual Average Temperature in Aspen From 1980 to 2025",
    xlabel="Year",
    ylabel="Temperature (°C)",
    label="Temperature (°C)"
)
interactive_plot

# Workflow for fitting trendline using OLS
# Need to reset the index to make sure that the 'DATE' column is recognized
aspen_ann_climate_df = aspen_ann_climate_df.reset_index()

# Convert 'DATE' column values to pandas datetime objects
# It is easier to extract just the year if the values are in this format
aspen_ann_climate_df["DATE"] = pd.to_datetime(aspen_ann_climate_df["DATE"])

# The line below is extracting just the year (we don't want months or days)
aspen_ann_climate_df["Year"] = aspen_ann_climate_df["DATE"].dt.year

# Remove Nan values. The Linear regression will run into errors if we keep the Nan values
aspen_ann_climate_df_clean = aspen_ann_climate_df.dropna(subset=["temp_c"])

# Assigning the X and y values for the linear regression feature from sklearn
# This creates numpy arrays of the year and temp columns
X = aspen_ann_climate_df_clean[["Year"]].values
y = aspen_ann_climate_df_clean["temp_c"].values

# Method for running the linear regression using sklearn
model = LinearRegression()
model.fit(X, y)

# finding the slope with .coef_
slope = model.coef_[0]

# finding the intercept with .intercept
intercept = model.intercept_

print("Slope:", round(slope, 4), "°C per year")
print("Intercept:", round(intercept, 4))

Slope: 0.0061 °C per year
Intercept: -10.5503

# Create a new figure object using matplotlib and define its size
plt.figure(figsize=(10, 6))

# Plot the temperature data
plt.scatter(
    aspen_ann_climate_df["Year"],
    aspen_ann_climate_df["temp_c"],
    label="Temperature (°C)",
    alpha=0.7
)

# The line below finds the min and max of "year"
# This way, our trendline won't extend off the graph of the temp data
x_vals = np.linspace(
    aspen_ann_climate_df["Year"].min(),
    aspen_ann_climate_df["Year"].max()
)

# Applying the y=mx+b formula for lines
y_trend = slope * x_vals + intercept

# plotting the line
plt.plot(
    x_vals,
    y_trend,
    color="red",
    linewidth=2,
    label="OLS trend line"
)

# Labels and title
plt.title("Annual Average Temperature in Aspen (1980–2025)")
plt.xlabel("Year")
plt.ylabel("Temperature (°C)")
plt.legend()

plt.show()

# With the help of ChatGPT, I created this interactive plot that includes the trendline.
# Ensure Bokeh is the backend for hvplot
hv.extension("bokeh")

# linear regression
X = aspen_ann_climate_df_clean[["Year"]].values
y = aspen_ann_climate_df_clean["temp_c"].values

model = LinearRegression()
model.fit(X, y)

slope = model.coef_[0]
intercept = model.intercept_

print("Slope:", round(slope, 4), "°C/year") # round to 4 decimals
print("Intercept:", round(intercept, 4))

# plot observed data 
interactive_plot = aspen_ann_climate_df_clean.hvplot.scatter(
    x="Year",
    y="temp_c",
    alpha=0.7,
    label="Temperature (°C)",
    xlabel="Year",
    ylabel="Temperature (°C)",
    title="Annual Average Temperature in Aspen (1980–2025)",
    size=20,
    color="blue",
    nan_policy="raise"
)

# Generate trendline points
x_vals = np.linspace(
    aspen_ann_climate_df_clean["Year"].min(),
    aspen_ann_climate_df_clean["Year"].max(),
    100
)
y_trend = slope * x_vals + intercept

# Convert trendline into a dataframe so hvplot can plot it
trend_df = pd.DataFrame({
    "Year": x_vals.astype(int),  # integer years
    "OLS trend line": y_trend
})

trend_plot = trend_df.hvplot.line(
    x="Year",
    y="OLS trend line",
    color="red",
    line_width=3,
    label="OLS trend line"
)

# function to center title and fix year formatting
def format_plot(plot, element):
    plot.state.title.align = "center"
    plot.state.xaxis.formatter = NumeralTickFormatter(format="0")  # force integer year labels

# Combine plots and apply formatting
final_plot = (interactive_plot * trend_plot).opts(
    title="Annual Average Temperature in Aspen (1980–2025)",
    hooks=[format_plot],
    framewise=True,
    padding=0.05,
    legend_position="top_right"
)

import hvplot
hvplot.save(final_plot, "aspen_temp_interactive.html")

final_plot

WARNING:param.main: nan_policy option not found for scatter plot with bokeh; similar options include: []

Slope: 0.0061 °C/year
Intercept: -10.5503

	STATION	TOBS
DATE
1980-07-01	USC00050372	62.0
1980-07-02	USC00050372	55.0
1980-07-03	USC00050372	58.0
1980-07-04	USC00050372	60.0
1980-07-05	USC00050372	50.0

	TOBS
DATE
1980-07-01	62.0
1980-07-02	55.0
1980-07-03	58.0
1980-07-04	60.0
1980-07-05	50.0
...	...
2025-07-27	61.0
2025-07-28	54.0
2025-07-29	62.0
2025-07-30	53.0
2025-07-31	51.0

	temp_f
DATE
1980-07-01	62.0
1980-07-02	55.0
1980-07-03	58.0
1980-07-04	60.0
1980-07-05	50.0
...	...
2025-07-27	61.0
2025-07-28	54.0
2025-07-29	62.0
2025-07-30	53.0
2025-07-31	51.0

	temp_c
DATE
1980-07-01	16.666667
1980-07-02	12.777778
1980-07-03	14.444444
1980-07-04	15.555556
1980-07-05	10.000000
...	...
2025-07-27	16.111111
2025-07-28	12.222222
2025-07-29	16.666667
2025-07-30	11.666667
2025-07-31	10.555556