Hier ist ein Beispiel für die sequenzielle Filterung mit Holt-Winters. Das gleiche Muster sollte für andere Arten der sequenziellen Modellierung wie den Kalman-Filter funktionieren.
from matplotlib import pyplot
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
def model_fn(features, targets):
"""Defines a basic Holt-Winters sequential filtering model in TensorFlow.
See http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc435.htm"""
times = features["times"]
values = features["values"]
# Initial estimates
initial_trend = tf.reduce_sum(
(values[seasonality:2*seasonality] - values[:seasonality])
/seasonality ** 2)
initial_smoothed_observation = values[0]
# Seasonal indices are multiplicative, so having them near 0 leads to
# instability
initial_seasonal_indices = 1. + tf.exp(
tf.get_variable("initial_seasonal_indices", shape=[seasonality]))
with tf.variable_scope("smoothing_parameters",
initializer=tf.zeros_initializer):
# Trained scalars for smoothing, transformed to be in (0, 1)
observation_smoothing = tf.sigmoid(
tf.get_variable(name="observation_smoothing", shape=[]))
trend_smoothing = tf.sigmoid(
tf.get_variable(name="trend_smoothing", shape=[]))
seasonal_smoothing = tf.sigmoid(
tf.get_variable(name="seasonal_smoothing", shape=[]))
def filter_function(
current_index, seasonal_indices, previous_smoothed_observation,
previous_trend, previous_loss_sum):
current_time = tf.gather(times, current_index)
current_observation = tf.gather(values, current_index)
current_season = current_time % seasonality
one_step_ahead_prediction = (
(previous_smoothed_observation + previous_trend)
* tf.gather(seasonal_indices, current_season))
new_loss_sum = previous_loss_sum + (
one_step_ahead_prediction - current_observation) ** 2
new_smoothed_observation = (
(observation_smoothing * current_observation
/tf.gather(seasonal_indices, current_season))
+ ((1. - observation_smoothing)
* (previous_smoothed_observation + previous_trend)))
new_trend = (
(trend_smoothing
* (new_smoothed_observation - previous_smoothed_observation))
+ (1. - trend_smoothing) * previous_trend)
updated_seasonal_index = (
seasonal_smoothing * current_observation/new_smoothed_observation
+ ((1. - seasonal_smoothing)
* tf.gather(seasonal_indices, current_season)))
new_seasonal_indices = tf.concat(
concat_dim=0,
values=[seasonal_indices[:current_season],
[updated_seasonal_index],
seasonal_indices[current_season + 1:]])
# Preserve shape to keep the while_loop shape invariants happy
new_seasonal_indices.set_shape(seasonal_indices.get_shape())
return (current_index + 1, new_seasonal_indices, new_smoothed_observation,
new_trend, new_loss_sum)
def while_run_condition(current_index, *unused_args):
return current_index < tf.shape(times)[0]
(_, final_seasonal_indices, final_smoothed_observation, final_trend,
sum_squared_errors) = tf.while_loop(
cond=while_run_condition,
body=filter_function,
loop_vars=[0, initial_seasonal_indices, initial_smoothed_observation,
initial_trend, 0.])
normalized_loss = sum_squared_errors/tf.cast(tf.shape(times)[0],
dtype=tf.float32)
train_op = tf.contrib.layers.optimize_loss(
loss=normalized_loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.1,
optimizer="Adam")
prediction_times = tf.range(30)
prediction_values = (
(final_smoothed_observation + final_trend * tf.cast(prediction_times,
dtype=tf.float32))
* tf.cast(tf.gather(params=final_seasonal_indices,
indices=prediction_times % seasonality),
dtype=tf.float32))
predictions = {"times": prediction_times,
"values": prediction_values}
return predictions, normalized_loss, train_op
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (
0.02 * times + np.sin(times * 2 * np.pi/float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(np.float32)
# Define an input function to feed the data into our model
input_fn = lambda: ({"times":tf.convert_to_tensor(times, dtype=tf.int32),
"values":tf.convert_to_tensor(values, dtype=tf.float32)},
{})
# Wrap the model in a tf.learn Estimator for training and inference
estimator = tf.contrib.learn.Estimator(model_fn=model_fn)
estimator.fit(input_fn=input_fn, steps=500)
predictions = estimator.predict(input_fn=input_fn, as_iterable=False)
# Plot the training data and predictions
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + predictions["times"], predictions["values"])
pyplot.show()
(I TensorFlow 0.11.0rc0 wurde mit, wenn dieses Schreiben)
Output of Holt-Winters on synthetic data: training data followed by predictions.
Allerdings wird dieser Code recht langsam sein, wenn sie längere Zeitreihen Scaling-up. Das Problem besteht darin, dass TensorFlow (und die meisten anderen Tools zur automatischen Differenzierung) bei sequenziellen Berechnungen (Schleifen) keine große Leistung haben. In der Regel wird dies durch Stapelverarbeitung von Daten und Verarbeitung großer Teile verbessert. Bei sequentiellen Modellen ist es etwas schwierig, da ein Zustand vorhanden ist, der von einem Zeitschritt zum nächsten übertragen werden muss.
Ein viel schneller (aber vielleicht weniger befriedigender) Ansatz ist die Verwendung eines autoregressiven Modells. Dies hat den zusätzlichen Vorteil, sehr einfach zu sein in TensorFlow zu implementieren:
import numpy as np
from matplotlib import pyplot
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (0.02 * times + np.sin(times * 2 * np.pi/float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(
np.float32)
# Parameters for stochastic gradient descent
batch_size = 16
window_size = 10
# Define a column format for the linear regression
input_column = tf.contrib.layers.real_valued_column(column_name="input_window",
dimension=window_size)
def training_input_fn():
window_starts = tf.random_uniform(shape=[batch_size], dtype=tf.int32,
maxval=series_length - window_size - 1)
element_indices = (tf.expand_dims(window_starts, 1)
+ tf.expand_dims(tf.range(window_size), 0))
return ({input_column: tf.gather(values, element_indices)},
tf.gather(values, window_starts + window_size))
estimator = tf.contrib.learn.LinearRegressor(feature_columns=[input_column])
estimator.fit(input_fn=training_input_fn, steps=500)
predictions = list(values[-10:])
def predict_input_fn():
return ({input_column: tf.reshape(predictions[-10:], [1, 10])}, {})
predict_length = 30
for i in xrange(predict_length):
prediction = estimator.predict(input_fn=predict_input_fn, as_iterable=False)
predictions.append(prediction[0])
predictions = predictions[10:]
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + np.arange(predict_length), predictions)
pyplot.show()
Output of the autoregressive model on the same synthetic dataset.
Beachten Sie, dass, da das Modell keinen Staat hat zu halten, können wir sehr leicht Mini-Batch-stochastische Gradientenabfallsaktualisierung tun.
Für Clustering könnte etwas wie k-means für Zeitreihen funktionieren.