Commit 4ad77537 authored by Cédric Crettaz's avatar Cédric Crettaz 🖥
Browse files

Upload New File.

parent 212129d6
#!/usr/bin/env python3
# coding: utf-8
"""
Developed by Thanasis Anagnostis @ iBO/CERTH AgroIntelligence Lab
T5.2 Weather Forecasting Toolkit
Pilots: Alicante [ES], Brăila [RO], Carouge [CH]
Variables: Temperature [°C], Pressure [Pa], Humidity [%], WindSpeed [m/s]
"""
#####################################
# Import basic libraries
import logging
import os
import sys
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from utils import error_fill, import_api, transform_json, city_string
#####################################
# Import deep learning libraries
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Flatten
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
#####################################
plt.style.use("tableau-colorblind10") # https://matplotlib.org/3.1.0/gallery/style_sheets/style_sheets_reference.html
#####################################
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
logger = logging.getLogger()
# Load env variables
ORION_HOST = os.getenv('ORION', default='localhost')
ORION_PORT = os.getenv('ORION_PORT', default='1026')
# Define training callbacks
# Add early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto',
baseline=None)
# Appoint architecture variables
# Set the lookback and prediction times
ahead = 24 # hours of the last postion of the dataset to be kept completely unknown
# In order to make it work, take into consideration the SEASONALITY
n_steps = (3 * ahead) + 3 # how many past values will be taken to produce a forecast (for 24 hours)
# Set up network's variables
lstm_units = 12
drop_val = 0.5
nb_epochs = 200
batch = 32
frozen_layers = 4
# Creates dataframe from JSON data
def create_df(data, idx_var, col_var):
# Create the dataframe
df = [] # create empty list to contain all values
for i in idx_var: # iterate over the indices of the variables we want
# print(i, attr_names[i])
df.append(data["attributes"][i]["values"]) # append data to main list
df = np.array(df).T # covert to array and transpose
df = pd.DataFrame(df, columns=col_var) # create dataframe
# Change the order of the columns
col_new = ['dateObserved', 'temperature', 'atmosphericPressure', 'relativeHumidity', 'windSpeed']
df = df[col_new]
# Let pandas find the datatypes # OPTIONAL
df = df.infer_objects()
# Change data types of datetime
df['dateObserved'] = pd.to_datetime(df['dateObserved'])
# Set dateObserved (datetime information) as index
df.set_index("dateObserved", inplace=True)
# Sort the dataframe based on the datetime
df.sort_values(by=['dateObserved'], inplace=True)
# Show number of NaNs
# display(df.isnull().sum())
# Handle NaNs per row
# for vv in df.columns:#['atmosphericPressure','relativeHumidity','temperature','windSpeed']:
# #df = df[df[vv].notna()] # keep where there is not NaN
# df[vv].fillna(df[vv].mean(), inplace=True) # Replace the NaN with the column's mean
# Drop remaining NaN
# df.dropna(axis=1, inplace=True) # columns -> bad practice
df.dropna(axis=0, how='any', inplace=True) # rows (even if only one NaN is present)
# Show number of NaNs
# display(df.isnull().sum())
# Drop variables (columns) that contain only zeros
df = df.loc[:, (df != 0).any(axis=0)]
# Drop entries (rows) that contain only zeros
df = df[(df == 0).sum(1) < len(["temperature", "atmosphericPressure",
"relativeHumidity", "windSpeed"])]
# Change data types of columns to the appropriate ones
df = df.astype("float")
df["relativeHumidity"] = df["relativeHumidity"].astype("int")
# Delete duplicates
df.drop_duplicates(inplace=True)
# Drop entries (rows) that contain zeros in atmosphericPressure or relativeHumidity
df = df.loc[df['atmosphericPressure'] * df['relativeHumidity'] != 0]
# Check for NaNs
print("NaNs in the dataframe =", df.isnull().sum().sum())
if df.isnull().sum().sum() != 0:
sys.exit('Dataframe has NaNs')
return df
#####################################
# Reshapes dataframe for the LSTM
def shape_data(df, wea_var, n_steps=72, ahead=24):
# Isolate single-variable timeseries
train = df[[wea_var]].values
# Normalize values
# Define scaler, fit and transform data
sc = MinMaxScaler(feature_range=(0, 1))
train_scaled = sc.fit_transform(train)
# Set up the data for the LSTM
# Set the lookback and prediction times
# the formula should be changed depending on the forecasting depth we want
# print("Forecasting timescale (in hours):",ahead)
# print("Lookback time (in hours) for input:",n_steps)
# Creating a data structure with XX timesteps and 1 output
X_train = []
y_train = []
for i in range(n_steps, len(train_scaled)):
X_train.append(train_scaled[i - n_steps:i, 0])
y_train.append(train_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
## Prepare the input - Create the output
# Getting the predicted value
inputs = train[len(train) - ahead - n_steps:]
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
input_list = []
for i in range(n_steps, n_steps + ahead):
input_list.append(inputs[i - n_steps:i, 0])
input_list = np.array(input_list)
input_list = np.reshape(input_list, (input_list.shape[0], input_list.shape[1], 1))
return X_train, y_train, input_list, sc
#####################################
# Builds the LSTM model
def load_trained_model(nb_layers, model_name, lstm_units=12, drop_val=0.5):
# load model
regressor = load_model(model_name)
# Freeze layers
for layer in regressor.layers[:nb_layers]:
layer.trainable = False # Freeze all layers until nb_layers
# Compiling the RNNm
regressor.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse', 'mae', 'mape']) # , 'acc'])
return regressor
#####################################
# Builds the new model as feature extractor and uses the weights from the saved model
def load_feature_extractor(X_train, regressor, nb_layers, lstm_units, drop_val):
new_regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
new_regressor.add(LSTM(units=lstm_units, return_sequences=True, input_shape=(75, 1))) # X_train.shape[1], 1)))
new_regressor.add(Dropout(drop_val))
# Adding the second LSTM layer and some Dropout regularisation
new_regressor.add(LSTM(units=lstm_units, return_sequences=True))
new_regressor.add(Dropout(drop_val))
# Adding the third LSTM layer and some Dropout regularisation
# regressor.add(LSTM(units = lstm_units, return_sequences = True))
# regressor.add(Dropout(drop_val))
# Flattening the data
new_regressor.add(Flatten())
# Adding the new layer
new_regressor.add(Dense(units=24))
# Adding the output layer
new_regressor.add(Dense(units=1))
i = 0
for layer in regressor.layers[:nb_layers]:
weights = layer.get_weights()
new_regressor.layers[i].set_weights(weights)
i += 1
# Compiling the RNNm
new_regressor.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse', 'mae', 'mape']) # , 'acc'])
return new_regressor
#####################################
# Prints post-training metrics
def print_metrics(history):
# print(pilot, wea_var)
print("\nModel training time:", str(round(t_tel - t_arx, 2)), "sec")
print("MSE =", history.history["mse"][-1])
print("MAE =", history.history["mae"][-1])
print("MAPE =", history.history["mape"][-1])
#####################################
# Created the predictions dataframe
def output_df(predictions, df, wea_var, n_steps=72, ahead=24):
# Set proper indices
input_index = df.index[-(n_steps):] # input
pred_index = pd.date_range(input_index[-1], periods=ahead + 1, freq='H') # predictions
pred_index = pred_index[1:] # drop the first
tot_index = input_index.append(pred_index) # combined
# Transform array predictions to dataframe
df_pred = pd.DataFrame(predictions, index=pred_index, columns=[wea_var]) # create
df_pred.index.name = 'DateTime' # name the index
df_pred.index = df_pred.index.astype(str) + "Z" # rename the index values
df_pred.index = df_pred.index.str.replace(' ', 'T') # replace space with T
# IF NEEDED
# df_pred["DateTime"] = df_pred.index # set DateTimeIndex as column
# df_pred = df_pred[df_pred.columns[::-1]] # reverse the order of columns
# df_pred["DateTime"] = pd.to_datetime(df_pred["DateTime"]) # set datatype to datetime (but remove the Z)
# df_pred["DateTime"] = df_pred["DateTime"].apply(lambda x: x.isoformat().replace("+00:00", "Z")) # replace the +00:00 with a Z again (for some reason)
return df_pred, input_index, pred_index, tot_index
# Creates dataset of previous input and predictions for visualization
def viz_pred(train, predictions, input_index, pred_index, tot_index, wea_var, n_steps=72, ahead=24):
df_final_1 = pd.DataFrame(train[-(n_steps):], index=input_index, columns=[wea_var])
df_final_2 = pd.DataFrame(predictions, index=pred_index, columns=[wea_var])
df_final = df_final_1.append(df_final_2)
if wea_var == "temperature":
title = 'Temperature Prediction'
yaxis = 'Temperature [°C]'
lookback_label = 'Previous Temperature Values'
pred_val_label = 'Predicted Temperature Values'
pred_trend_label = 'Predicted Temperature Trend'
elif wea_var == "atmosphericPressure":
title = 'Atmospheric Pressure Prediction'
yaxis = 'Pressure [Pa]'
lookback_label = 'Previous Pressure Values'
pred_val_label = 'Predicted Pressure Values'
pred_trend_label = 'Predicted Pressure Trend'
elif wea_var == "relativeHumidity":
title = 'Relative Humidity Prediction'
yaxis = 'Humidity [%]'
lookback_label = 'Previous Humidity Values'
pred_val_label = 'Predicted Humidity Values'
pred_trend_label = 'Predicted Humidity Trend'
elif wea_var == "windSpeed":
title = 'Wind Speed Prediction'
yaxis = 'Wind Speed [m/s]'
lookback_label = 'Previous Wind Speed Values'
pred_val_label = 'Predicted Wind Speed Values'
pred_trend_label = 'Predicted Wind Speed Trend'
else:
print("Variable Error")
sys.exit(-1)
# Create the plot with the lookback window and the 24h predictions
fig = plt.figure(figsize=(10, 2), dpi=80)
ax = fig.add_axes([0, 0, 1, 1])
# Rotate and align the tick labels so they look better
fig.autofmt_xdate()
# The timeseries
plt.plot(df_final[:-(ahead)], label=lookback_label)
plt.plot(df_final[-(ahead + 1):], 'r.', label=pred_val_label)
plt.plot(df_final[-(ahead + 1):], '--', color="black", label=pred_trend_label)
error_fill(pred_index, df_final_2[wea_var], 1 - history.history["mae"][-1], color="grey")
# Reduce the number of ticks
plt.xticks(tot_index.values)
plt.locator_params(axis='x', nbins=21)
plt.title(title + ' - ' + pilot.capitalize())
plt.xlabel('Datetime')
plt.ylabel(yaxis)
plt.legend()
return plt.show()
#####################################
# Patch the WeatherForecast on the platform
def patch_pred(predictions_dict, pilot):
url_patch = ['http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-Day0-0/attrs',
'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-Day0-1/attrs',
'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-Day0-2/attrs',
'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-Day0-3/attrs']
if pilot == "alicante-1" or pilot == 'alicante-2':
pilot = "alicante"
# url_patch = ['http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-1-Day0-0/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-1-Day0-1/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-1-Day0-2/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-1-Day0-3/attrs']
# url_patch = ['http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-2-Day0-0/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-2-Day0-1/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-2-Day0-2/attrs',
# 'http://5.53.108.182:1026/v2/entities/urn:ngsi-ld:WeatherForecast:WeatherForecast-2-Day0-3/attrs']
headers = {
'Fiware-Service': pilot,
'Content-Type': 'application/json',
}
i = 0 # counter for the 6-hour mean
for url_p in url_patch:
data = {"dayMaximum": {
"type": "StructuredValue",
"value": {
"feelsLikeTemperature": float(
list(predictions_dict.items())[0][1]["temperature"].values[int(i):int(i + 5)].mean()),
"temperature": float(
list(predictions_dict.items())[0][1]["temperature"].values[int(i):int(i + 5)].mean()),
"relativeHumidity": float(
list(predictions_dict.items())[1][1]["relativeHumidity"].values[int(i):int(i + 5)].mean())
},
"metadata": {}
},
"dayMinimum": {
"type": "StructuredValue",
"value": {
"feelsLikeTemperature": float(
list(predictions_dict.items())[0][1]["temperature"].values[int(i):int(i + 5)].mean()),
"temperature": float(
list(predictions_dict.items())[0][1]["temperature"].values[int(i):int(i + 5)].mean()),
"relativeHumidity": float(
list(predictions_dict.items())[1][1]["relativeHumidity"].values[int(i):int(i + 5)].mean())
},
"metadata": {}
},
"validFrom": {
"type": "DateTime",
"value": str(list(predictions_dict.items())[0][1].index[int(i)]),
"metadata": {}
},
"validTo": {
"type": "DateTime",
"value": str(list(predictions_dict.items())[0][1].index[int(i + 5)]),
"metadata": {}
},
"dateIssued": {
"type": "DateTime",
"value": str(list(predictions_dict.items())[0][1].index[int(i)]),
"metadata": {}
},
"windSpeed": {
"type": "Number",
"value": float(list(predictions_dict.items())[3][1]["windSpeed"].values[int(i):int(i + 5)].mean()),
"metadata": {}
},
}
i = i + 6
response = requests.request("PATCH", url_p, headers=headers, json=data)
if not (200 <= response.status_code < 300):
print(response.status_code, response.reason)
else:
print("Weather Forecast updated for", pilot.capitalize())
#####################################
# Load data
# Enter variables input
pilots = ["alicante-1", "braila", "carouge"] # alicante-1:city, alicante-2:airport
weather_vars = ["temperature", "relativeHumidity", "atmosphericPressure", "windSpeed"]
predictions_dict = {}
# Loop for all cities and all common weather variables
for pilot in pilots:
# Import the data
data = import_api(pilot)
# Transform JSON attributes
idx_var, col_var = transform_json(data)
# Create the dataset
df = create_df(data, idx_var, col_var)
# Formal pilot name string
pilotname = city_string(pilot)
# Variables' loop
for wea_var in weather_vars:
# Tranform the data for the LSTM
X_train, y_train, input_list, sc = shape_data(df, wea_var, n_steps=n_steps)
# Load the model
regressor = load_trained_model(nb_layers=frozen_layers,
model_name="pretrained_models/" + pilotname + "_" + wea_var + "_saved_model.h5",
lstm_units=lstm_units, drop_val=drop_val)
# Build a new feature extractor
new_regressor = load_feature_extractor(X_train, regressor, frozen_layers, lstm_units, drop_val)
# Measure time of fitting
t_arx = time.time()
# Fitting the RNN to the Training set
history = new_regressor.fit(X_train, y_train, epochs=nb_epochs, batch_size=batch,
validation_split=0.2, callbacks=[early_stopping], verbose=0)
t_tel = time.time()
# Print time and metrics
# print_metrics(history)
# Create the predictions
predictions = new_regressor.predict(input_list)
predictions = sc.inverse_transform(predictions)
# Transform to dataframes with datetime column in the proper format (Zulu)
df_pred, input_index, pred_index, tot_index = output_df(predictions, df, wea_var, n_steps, ahead)
# Visualize the predictions (COMMENT FOR PRODUCTION)
# viz_pred(df[[wea_var]].values, predictions, input_index, pred_index, tot_index, wea_var, n_steps, ahead)
# Append to dictionary
key = pilot + '_' + wea_var
predictions_dict[key] = df_pred
# Summarize and Patch the predictions (UNCOMMENT FOR PRODUCTION)
patch_pred(predictions_dict, pilot)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment