# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
from src.prepare_datasets import get_prepared_datasets
train_df, test_df = get_prepared_datasets()
train_df.head()
high | low | open | close | volume | MACD | Stochastics Oscillator | ATR | |
---|---|---|---|---|---|---|---|---|
0 | 0.006149 | 0.006138 | 0.006149 | 0.006148 | 0.001767 | -0.001024 | 0.700059 | 0.006608 |
1 | 0.006149 | 0.006148 | 0.006148 | 0.006148 | 0.001279 | -0.000851 | 0.879061 | 0.006144 |
2 | 0.006150 | 0.006148 | 0.006149 | 0.006149 | 0.006782 | -0.000737 | 0.979972 | 0.005739 |
3 | 0.006147 | 0.006135 | 0.006147 | 0.006147 | 0.006131 | -0.000706 | 0.962959 | 0.005697 |
4 | 0.006147 | 0.006136 | 0.006147 | 0.006147 | 0.003289 | -0.000691 | 0.945946 | 0.005593 |
train_df[59::60].iplot(subplots=True)
train_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1816263 entries, 0 to 1816262 Data columns (total 8 columns): # Column Dtype --- ------ ----- 0 high float64 1 low float64 2 open float64 3 close float64 4 volume float64 5 MACD float64 6 Stochastics Oscillator float64 7 ATR float64 dtypes: float64(8) memory usage: 110.9 MB
target_column = 'close'
import tensorflow as tf
def make_generator(data, targets, shuffle, batch_size=8, sequence_length=33, sequence_stride=1):
return tf.keras.preprocessing.timeseries_dataset_from_array(
data=data[:-sequence_length],
targets=targets[sequence_length:],
sequence_length=sequence_length,
sequence_stride=sequence_stride,
shuffle=shuffle,
batch_size=batch_size,
)
example_dataset = list(range(100))
print('dataset', example_dataset, '\n')
example_iterator = make_generator(example_dataset, example_dataset, shuffle=False)
input, target = next(iter(example_iterator))
print('Input', input, '\n')
print('Target', target, '\n')
dataset [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] Input tf.Tensor( [[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32] [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33] [ 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34] [ 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35] [ 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36] [ 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37] [ 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38] [ 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39]], shape=(8, 33), dtype=int32) Target tf.Tensor([33 34 35 36 37 38 39 40], shape=(8,), dtype=int32)
result = input[:,:]
result
result[:,-1,tf.newaxis]
<tf.Tensor: shape=(8, 33), dtype=int32, numpy= array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], [ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], [ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], [ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]], dtype=int32)>
<tf.Tensor: shape=(8, 1), dtype=int32, numpy= array([[32], [33], [34], [35], [36], [37], [38], [39]], dtype=int32)>
train_iterator = make_generator(train_df, train_df[[target_column]], shuffle=True)
test_iterator = make_generator(test_df, test_df[[target_column]], shuffle=False)
input, target = next(iter(test_iterator))
import matplotlib.pyplot as plt
def plot_window(batches, target, predictions=None):
plt.figure(figsize=(15,len(batches) * 10))
batches = batches.numpy()
target = target.numpy()
for i in range(0, len(batches)):
batch = batches[i]
feature = [x[train_df.columns.get_loc(target_column)] for x in batch]
plt.subplot(len(feature), 1, i+1)
plt.plot(feature,
label='Inputs', marker='.', zorder=-10
)
label = target[i][0]
plt.scatter(len(feature), label,
label='Labels', edgecolors='k', c='#2ca02c', s=64
)
if predictions is not None:
prediction = predictions[i][0]
plt.scatter(len(feature), prediction,
marker='X', edgecolors='k', label='Predictions',
c='#ff7f0e', s=64)
plt.legend()
plot_window(input, target)
import tensorflow as tf
from src.BaselineModel import Baseline
column_indices = {name: i for i, name in enumerate(train_df.columns)}
baseline = Baseline(label_index=column_indices[target_column])
baseline.compile(
loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError(), tf.metrics.MeanSquaredLogarithmicError()]
)
predictions = baseline.predict(test_iterator, verbose=1, use_multiprocessing=True)
56747/56747 [==============================] - 42s 736us/step
predictions.shape
(453976, 1)
plot_window(input, target, predictions)