# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py

# Cufflinks wrapper on plotly
import cufflinks

# Data science imports
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')


from src.prepare_datasets import get_prepared_datasets

train_df, test_df = get_prepared_datasets()

train_df.head()


train_df[59::60].iplot(subplots=True)


train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1816263 entries, 0 to 1816262
Data columns (total 8 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   high                    float64
 1   low                     float64
 2   open                    float64
 3   close                   float64
 4   volume                  float64
 5   MACD                    float64
 6   Stochastics Oscillator  float64
 7   ATR                     float64
dtypes: float64(8)
memory usage: 110.9 MB


target_column = 'close'


import tensorflow as tf

def make_generator(data, targets, shuffle, batch_size=8, sequence_length=33, sequence_stride=1):
    return tf.keras.preprocessing.timeseries_dataset_from_array(
      data=data[:-sequence_length],
      targets=targets[sequence_length:],
      sequence_length=sequence_length,
      sequence_stride=sequence_stride,
      shuffle=shuffle,
      batch_size=batch_size,
  )

example_dataset = list(range(100))
print('dataset', example_dataset, '\n')

example_iterator = make_generator(example_dataset, example_dataset, shuffle=False)

input, target = next(iter(example_iterator))

print('Input', input, '\n')
print('Target', target, '\n')

dataset [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] 

Input tf.Tensor(
[[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
  24 25 26 27 28 29 30 31 32]
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
  25 26 27 28 29 30 31 32 33]
 [ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
  26 27 28 29 30 31 32 33 34]
 [ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
  27 28 29 30 31 32 33 34 35]
 [ 4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
  28 29 30 31 32 33 34 35 36]
 [ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
  29 30 31 32 33 34 35 36 37]
 [ 6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
  30 31 32 33 34 35 36 37 38]
 [ 7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
  31 32 33 34 35 36 37 38 39]], shape=(8, 33), dtype=int32) 

Target tf.Tensor([33 34 35 36 37 38 39 40], shape=(8,), dtype=int32)


result = input[:,:]
result

result[:,-1,tf.newaxis]

<tf.Tensor: shape=(8, 33), dtype=int32, numpy=
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
        33],
       [ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34],
       [ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
        35],
       [ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36],
       [ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
        37],
       [ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
        22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
        38],
       [ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
        23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
        39]], dtype=int32)>

<tf.Tensor: shape=(8, 1), dtype=int32, numpy=
array([[32],
       [33],
       [34],
       [35],
       [36],
       [37],
       [38],
       [39]], dtype=int32)>


train_iterator = make_generator(train_df, train_df[[target_column]], shuffle=True)
test_iterator = make_generator(test_df, test_df[[target_column]], shuffle=False)

input, target = next(iter(test_iterator))


import matplotlib.pyplot as plt

def plot_window(batches, target, predictions=None):
    plt.figure(figsize=(15,len(batches) * 10))
    
    batches = batches.numpy()
    target = target.numpy()
    
    for i in range(0, len(batches)):
        
        batch = batches[i]
        feature = [x[train_df.columns.get_loc(target_column)] for x in batch]
        plt.subplot(len(feature), 1, i+1)
        plt.plot(feature, 
                 label='Inputs', marker='.', zorder=-10
                )
        
        label = target[i][0]
        plt.scatter(len(feature), label,
                 label='Labels', edgecolors='k', c='#2ca02c', s=64
                )
        
        if predictions is not None:
            prediction = predictions[i][0]
            plt.scatter(len(feature), prediction,
                  marker='X', edgecolors='k', label='Predictions',
                  c='#ff7f0e', s=64)
        
        plt.legend()
        
plot_window(input, target)


import tensorflow as tf
from src.BaselineModel import Baseline

column_indices = {name: i for i, name in enumerate(train_df.columns)}

baseline = Baseline(label_index=column_indices[target_column])
    
baseline.compile(
    loss=tf.losses.MeanSquaredError(),
    metrics=[tf.metrics.MeanAbsoluteError(), tf.metrics.MeanSquaredLogarithmicError()]
)


predictions = baseline.predict(test_iterator, verbose=1, use_multiprocessing=True)

56747/56747 [==============================] - 42s 736us/step


predictions.shape

(453976, 1)


plot_window(input, target, predictions)

	high	low	open	close	volume	MACD	Stochastics Oscillator	ATR
0	0.006149	0.006138	0.006149	0.006148	0.001767	-0.001024	0.700059	0.006608
1	0.006149	0.006148	0.006148	0.006148	0.001279	-0.000851	0.879061	0.006144
2	0.006150	0.006148	0.006149	0.006149	0.006782	-0.000737	0.979972	0.005739
3	0.006147	0.006135	0.006147	0.006147	0.006131	-0.000706	0.962959	0.005697
4	0.006147	0.006136	0.006147	0.006147	0.003289	-0.000691	0.945946	0.005593

Window Generator¶

Explore datasets¶

Calculate batch size¶

How baseline work¶

Prepare real datasets¶

Try baseline model¶