EDIT: editiert ich meinen Code seq2seq Tutorial/Übungen zu machen, hier sind sie: https://github.com/guillaume-chevalier/seq2seq-signal-predictionTensorflow seq2seq mehrdimensionale Regression
Ich versuche, eine Sequenz zu Sequenz (seq2seq) Regression zu tun mit multidimensionale Ein- und Ausgänge. Ich habe versucht, etwas, das den folgenden Verlust im Laufe der Zeit ergibt:
Das Modell nicht vollständig einen Sinus auf all Ein- und Ausgang Dimensionen geklonten vorherzusagen zu lernen, auch wenn ich eine sehr kleine Lernrate versuchen.
Die Tensorflow-Loss-Funktion für RNNs scheint die Fälle zu adressieren, in denen wir direkt Labels oder Wörter einbetten wollen, also habe ich versucht, den Verlust selbst zu berechnen. Insofern weiß ich nicht, wie wir mit der dec_inp (Decoder-Input) -Variable umgehen sollen, was ich versuche zu tun, scheint in Tensorflow noch nicht getan zu sein, aber besonders konzeptionell (siehe Titel). Hier
ist der Tensor Graph:
Es gibt einige Dinge auf dem Graphen würde ich nicht erwartet hätte, wie die Verbindung zwischen dem RMSProp Optimierers und der basic_rnn_seq2seq.
Hier ist, was ich versuchte noch:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tempfile
import math
rnn_cell = tf.nn.rnn_cell
seq2seq = tf.nn.seq2seq
tf.reset_default_graph()
sess = tf.InteractiveSession()
# Neural net's parameters
seq_length = 5 # Inputs and outputs are sequences of 5 units
batch_size = 1 # Keeping it simple for now
# Each unit in the sequence is a float32 vector of lenght 10:
# Same dimension sizes just for simplicity now
output_dim = hidden_dim = input_dim = 10
# Optmizer:
learning_rate = 0.0007 # Small lr to avoid problem
nb_iters = 2000 # Crank up the iters in consequence
lr_decay = 0.85 # 0.9 default
momentum = 0.01 # 0.0 default
# Create seq2seq's args
enc_inp = [tf.placeholder(tf.float32, shape=(None, input_dim),
name="inp%i" % t)
for t in range(seq_length)]
# sparse "labels" that are not labels:
expected_sparse_output = [tf.placeholder(tf.float32, shape=(None, output_dim),
name="expected_sparse_output%i" % t)
for t in range(seq_length)]
# Decoder input: prepend some "GO" token and drop the final
# There might be a problem there too,
# my outputs are not tokens integer, but float vectors.
dec_inp = [tf.zeros_like(enc_inp[0], dtype=np.float32, name="GO")] + enc_inp[:-1]
# Initial memory value for recurrence.
prev_mem = tf.zeros((batch_size, hidden_dim))
# Create rnn cell and decoder's sequence
cell = rnn_cell.GRUCell(hidden_dim)
# cell = tf.nn.rnn_cell.MultiRNNCell([cell] * layers_stacked_count)
dec_outputs, dec_memory = seq2seq.basic_rnn_seq2seq(
enc_inp,
dec_inp,
cell
)
# Training loss and optimizer
loss = 0
for _y, _Y in zip(dec_outputs, expected_sparse_output):
loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(_y, _Y)) # Softmax loss
# loss + tf.reduce_mean(tf.squared_difference(_y, _Y))
# The following commented loss function did not worked because
# I want a sparse output rather than labels
# weights = [tf.ones_like(labels_t, dtype=tf.float32)
# for labels_t in expected_sparse_output]
# loss = seq2seq.sequence_loss(dec_outputs, labels, weights)
tf.scalar_summary("loss", loss)
summary_op = tf.merge_all_summaries()
# optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
# optimizer = tf.train.AdagradOptimizer(learning_rate)
optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=lr_decay, momentum=momentum)
train_op = optimizer.minimize(loss)
logdir = tempfile.mkdtemp()
print logdir
summary_writer = tf.train.SummaryWriter(logdir, sess.graph)
sess.run(tf.initialize_all_variables())
def gen_data_x_y():
"""
Simply returns data of shape:
(seq_lenght, batch_size, output_dim)
X is a sine of domain 0.0*pi to 1.5*pi
Y is a sine of domain 1.5*pi to 3.0*pi
To temporarily deal with the number of dimensions
"""
# Create the sine in x and it's continuation in y
x = np.sin(np.linspace(0.0*math.pi, 1.5*math.pi, seq_length))
y = np.sin(np.linspace(1.5*math.pi, 3.0*math.pi, seq_length))
# Clone the sine for every input_dim.
# Normaly those dims would containt different signals
# happening at the same time of a single timestep of
# a single training example, such as other features of
# the signal such as various moving averages
x = np.array([x for i in range(input_dim)])
y = np.array([y for i in range(output_dim)])
x, y = x.T, y.T
x = np.array([x]*batch_size) # simple for now: batch_size of 1
y = np.array([y]*batch_size)
# shape: (batch_size, seq_lenght, output_dim)
x = np.array(x).transpose((1, 0, 2))
y = np.array(y).transpose((1, 0, 2))
# shape: (seq_lenght, batch_size, output_dim)
# print "X_SHAPE: " + str(x.shape)
return x, y
def train_batch(batch_size):
"""
Training step: we optimize for every outputs Y at once,
feeding all inputs X
I do not know yet how to deal with
the enc_inp tensor declared earlier
"""
X, Y = gen_data_x_y()
feed_dict = {
enc_inp[t]: X[t] for t in range(seq_length)
}
feed_dict.update({expected_sparse_output[t]: Y[t] for t in range(seq_length)})
feed_dict.update({prev_mem: np.zeros((batch_size, hidden_dim))})
_, loss_t, summary = sess.run([train_op, loss, summary_op], feed_dict)
return loss_t, summary
# Train
for t in range(nb_iters):
loss_t, summary = train_batch(batch_size)
print loss_t
summary_writer.add_summary(summary, t)
summary_writer.flush()
# Visualise the loss
# !tensorboard --logdir {logdir}
# Test the training
X, Y = gen_data_x_y()
feed_dict = {
enc_inp[t]: X[t] for t in range(seq_length)
}
# feed_dict.update({expected_sparse_output[t]: Y[t] for t in range(seq_length)})
outputs = sess.run([dec_outputs], feed_dict)
# Evaluate model
np.set_printoptions(suppress=True) # No scientific exponents
expected = Y[:,0,0]
print "Expected: "
print expected
print ""
print "The following results now represents each timesteps of a different output dim:"
mses = []
for i in range(output_dim):
pred = np.array(outputs[0])[:,0,i]
print pred
mse = math.sqrt(np.mean((pred - expected)**2))
print "mse: " + str(mse)
mses.append(mse)
print ""
print ""
print "FINAL MEAN SQUARED ERROR ON RESULT: " + str(np.mean(mses))
die druckt:
/tmp/tmpVbO48U
5.87742
5.87894
5.88054
5.88221
5.88395
[...]
5.71791
5.71791
5.71791
5.71791
5.71791
Expected:
[-1. -0.38268343 0.70710678 0.92387953 0. ]
The following results now represents each timesteps of a different output dim:
[-0.99999893 -0.99999893 0.96527898 0.99995273 -0.01624492]
mse: 0.301258140201
[-0.99999952 -0.99999952 0.98715001 0.9999997 -0.79249388]
mse: 0.467620401096
[-0.99999946 -0.9999994 0.97464144 0.99999654 -0.30602577]
mse: 0.332294862093
[-0.99999893 -0.99999893 0.95765316 0.99917656 0.36947867]
mse: 0.342355383387
[-0.99999964 -0.99999952 0.9847464 0.99999964 -0.70281279]
mse: 0.43769921227
[-0.99999744 -0.9999975 0.97723919 0.99999851 -0.39834118]
mse: 0.351715216206
[-0.99999964 -0.99999952 0.97650111 0.99999803 -0.37042192]
mse: 0.34544431708
[-0.99999648 -0.99999893 0.99999917 0.99999917 0.99999726]
mse: 0.542706750242
[-0.99999917 -0.99999917 0.96115535 0.99984574 0.12008631]
mse: 0.305224828554
[-0.99999952 -0.99999946 0.98291612 0.99999952 -0.62598646]
mse: 0.413473861107
FINAL MEAN SQUARED ERROR ON RESULT: 0.383979297224
Es scheint wie eine kleine Sache in meinem Code fehlt, sonst ein wenig Fehler.
Am Ende möchte ich die N nächsten Koeffizienten einiger STFT auf Aktienmarktdaten vorhersagen, aber im Moment konzentriere ich mich darauf, das Regressionsmodell mit einfachen Daten arbeiten zu lassen, um es dann direkt zu skalieren, ich habe bereits RNNs und ConvNets. Die Änderung des Softmax-Verlustes durch MSE und die Verwendung von L2-Verlust halfen, der Rest funktionierte gut. Danke vielmals! –