Simple Time Series Prediction Using XGBoost (Python)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import mean_squared_error
color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')
df = pd.read_csv('AAPL Hourly.csv')
df = df[['timestamp', 'close']]
df = df.set_index('timestamp')
df.plot(style='.',        figsize=(15, 5),        color=color_pal[0],        title='stock growth')
plt.show()

Simple Time Series Prediction Using XGBoost (Python)

df.head()

Simple Time Series Prediction Using XGBoost (Python)

mean  = df['close'].mean()
mean
162.3033402402023
df.index = pd.to_datetime(df.index)

Train / Test Split

train = df.loc[df.index < '01/01/2023']
test = df.loc[df.index >= '01/01/2023']
fig, ax = plt.subplots(figsize=(15, 5))
train.plot(ax=ax, label='Training Set', title='Data Train/Test Split')
test.plot(ax=ax, label='Test Set')
ax.axvline('01-01-2023', color='black', ls='--')
ax.legend(['Training Set', 'Test Set'])
plt.show()

Simple Time Series Prediction Using XGBoost (Python)

df

Simple Time Series Prediction Using XGBoost (Python)

df.loc[(df.index > '01/01/2022') & (df.index < '01/08/2022')] \
    .plot(figsize=(15, 5), title='Week Of Data')
plt.show()

Simple Time Series Prediction Using XGBoost (Python)

Feature Creation

def create_features(df):    """    Create time series features based on time series index.    """    df = df.copy()    df['hour'] = df.index.hour    df['dayofweek'] = df.index.dayofweek    df['quarter'] = df.index.quarter    df['month'] = df.index.month    df['year'] = df.index.year    df['dayofyear'] = df.index.dayofyear    df['dayofmonth'] = df.index.day    df['weekofyear'] = df.index.isocalendar().week    return df
df = create_features(df)

Visualize Our Feature / Target Relationship

fig, ax = plt.subplots(figsize=(10, 8))
sns.barplot(data=df, x='hour', y='close')
ax.set_title('price by hour')
plt.show()

Simple Time Series Prediction Using XGBoost (Python)

fig, ax = plt.subplots(figsize=(10, 8))
sns.barplot(data=df, x='month', y='close', palette='Blues')
ax.set_title('by month')
plt.show()

Simple Time Series Prediction Using XGBoost (Python)

Create Our Model

train = create_features(train)
test = create_features(test)

FEATURES = ['dayofyear', 'hour', 'dayofweek', 'quarter', 'month', 'year']
TARGET = 'close'

X_train = train[FEATURES]
y_train = train[TARGET]

X_test = test[FEATURES]
y_test = test[TARGET]
reg = xgb.XGBRegressor(base_score=mean,                           n_estimators=1000,                       early_stopping_rounds=1000,                       objective='reg:squarederror',                       max_depth=3,                       learning_rate=0.1)
reg.fit(X_train, y_train,        eval_set=[(X_train, y_train), (X_test, y_test)],        verbose=100)
[0] validation_0-rmse:13.77227 validation_1-rmse:19.10013
[100] validation_0-rmse:2.21008 validation_1-rmse:28.93545
[200] validation_0-rmse:1.69919 validation_1-rmse:29.13252
[300] validation_0-rmse:1.53636 validation_1-rmse:29.19358
[400] validation_0-rmse:1.43590 validation_1-rmse:29.21800
[500] validation_0-rmse:1.36775 validation_1-rmse:29.23179
[600] validation_0-rmse:1.30781 validation_1-rmse:29.24437
[700] validation_0-rmse:1.26732 validation_1-rmse:29.24926
[800] validation_0-rmse:1.23259 validation_1-rmse:29.25145
[900] validation_0-rmse:1.20048 validation_1-rmse:29.25439
[999] validation_0-rmse:1.17463 validation_1-rmse:29.25731
XGBRegressor(base_score=162.3033402402023, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=1000, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=1000,
             n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0, reg_lambda=1, ...)

Forecast on Test

test['prediction'] = reg.predict(X_test)
test

Simple Time Series Prediction Using XGBoost (Python)

test['prediction'].mean()
161.7173
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 6))
plt.plot(test['close'], label='Actual', color='blue')
plt.plot(test['prediction'], label='Predicted', color='red')

Simple Time Series Prediction Using XGBoost (Python)

df = pd.merge(df, test, on="time")
df.tail()

Simple Time Series Prediction Using XGBoost (Python)

ax = df[['close']].plot(figsize=(15, 5))

Simple Time Series Prediction Using XGBoost (Python)

df['prediction'].plot(ax=ax, style='.')
plt.legend(['Truth Data', 'Predictions'])
ax.set_title('Raw Data and Prediction')
plt.show()
ax = df.loc[(df.index > '04-01-2018') & (df.index < '04-08-2018')]['PJME_MW'] \
    .plot(figsize=(15, 5), title='Week Of Data')
df.loc[(df.index > '04-01-2018') & (df.index < '04-08-2018')]['prediction'] \
    .plot(style='.')
plt.legend(['Truth Data','Prediction'])
plt.show()

Score (RMSE)

score = np.sqrt(mean_squared_error(test['close'], test['prediction']))
print(f'RMSE Score on Test set: {score:0.2f}')

RMSE Score on Test set: 28.61

Content generated by AI

Simple Time Series Prediction Using XGBoost (Python)

Edit / Fan Ruqiang

Review / Fan Ruqiang

Verification / Fan Ruqiang

Click below

Follow us

Leave a Comment