Я пытаюсь использовать простую нелинейную регрессию для функции; x2sin(x) с помощью TensorFlow. Пожалуйста, смотрите код и вывод ниже. Я также пробовал: а) увеличить количество точек выборки до 10 000, б) увеличить количество скрытых слоев, в) увеличить/уменьшить скорость обучения и г) tanh вместо ReLU без каких-либо улучшений. Может кто-нибудь попробовать и посмотреть, что не так с подходом?
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import time
n = 1000
x = np.linspace(0,3.14,n)
x = np.array(x).astype(np.float32)
y = np.sin(x) * np.multiply(x, x)
r = np.random.random(n)
y = y + r
xNorm = (x - np.mean(x)) / (np.amax(x) - np.amin(x))
idxs = np.array(range(n)).astype(np.int32)
tmp = np.zeros((1, n), dtype=np.float32)
tmp[0] = xNorm
xNorm = tmp.T
print(xNorm.shape)
# Shuffle the indexes
np.random.shuffle(idxs)
# Assign 1/6th for validation, and test and the rest for training
nValidIdxs = int(n / 6)
nTestIdxs = int(n / 6)
validIdxs = idxs[0:nValidIdxs]
testIdxs = idxs[nValidIdxs:nValidIdxs + nTestIdxs]
nTrainIdxs = n - nValidIdxs - nTestIdxs
trainIdxs = idxs[nValidIdxs + nTestIdxs:n]
print('Training data points: %d' % nTrainIdxs)
print('Validation data points: %d' % nValidIdxs)
print('Testing data points: %d' % nTestIdxs)
# Split input and output values into the
# training, testing, and validation datasets.
trainX = xNorm[trainIdxs]
testX = xNorm[testIdxs]
validX = xNorm[validIdxs]
trainY = y[trainIdxs]
testY = y[testIdxs]
validY = y[validIdxs]
# This part defines a Neural Network with regularization
# applied to the loss term. SGD batch size is 128 samples.
# In addition, dropout is applied to the hidden layers during
# the training process.
batchSize = 256
nNodes = 20
stdDev = 0.001
regParam = 0.0001
initRate = 0.0001
nLayers = 1
graph = tf.Graph()
tf.set_random_seed(1234)
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
tfTrainX = tf.placeholder(tf.float32, shape=(batchSize, 1))
tfTrainY = tf.placeholder(tf.float32, shape=(batchSize))
tfValidX = tf.constant(validX)
tfTestX = tf.constant(testX)
tfAllX = tf.constant(xNorm)
# This function defines a deep neural network
# with 3 hidden layers and one output layer.
def deepNeural(dataset):
w = []
b = []
for i in range(nLayers):
w.append(None)
b.append(None)
# Hidden layers
for i in range(nLayers):
if(i == 0):
w[i] = tf.Variable(
tf.truncated_normal([1, nNodes], stddev=stdDev))
logits = tf.matmul(dataset, w[i])
else:
w[i] = tf.Variable(
tf.truncated_normal([nNodes, nNodes], stddev=stdDev))
logits = tf.matmul(logits, w[i])
b[i] = tf.Variable(tf.zeros([nNodes]))
logits = tf.add(logits, b[i])
logits = tf.nn.relu(logits)
# Output layer
wo = tf.ones([nNodes, 1], tf.float32)
logits = tf.matmul(logits, wo)
# Return the output layer
return [logits, w, b, wo]
# This function provides the logits from the output
# layer calculated based upon the passed weights and
# biases for the hidden and output layer calculated
# based upon the loss minimization.
def predict(dataset, w, b, wo):
# Computation for hidden layers
for i in range(nLayers):
if(i == 0):
logits = tf.matmul(dataset, w[i])
else:
logits = tf.matmul(logits, w[i])
logits = tf.add(logits, b[i])
logits = tf.nn.relu(logits)
# Computation for the output layer
return tf.matmul(logits, wo)
logits, w, b, wo = deepNeural(tfTrainX)
loss = 0.5 * tf.reduce_mean(tf.square(logits - tfTrainY))
# Compute regularization term
regTerm = tf.Variable(0.0)
for i in range(nLayers):
regTerm = regTerm + tf.reduce_mean(tf.nn.l2_loss(w[i]))
regTerm = regTerm * regParam
# Add regularization term to loss
loss = loss + regTerm
# Optimizer.
# Exponential decay of learning rate.
globalStep = tf.Variable(0) # count the number of steps taken.
learningRate = tf.train.exponential_decay(initRate, globalStep, 500, 0.96, staircase=True)
optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(loss)
# Predictions for the training, validation, and test data.
trainPred = logits
validPred = predict(tfValidX, w, b, wo)
testPred = predict(tfTestX, w, b, wo)
allPred = predict(tfAllX, w, b, wo)
def rmse(pred, actual):
#print(pred.shape)
pred = pred.reshape(1,-1)
actual = actual.reshape(1,-1)
return np.sqrt(((pred - actual)**2).mean())
# Run SGD for Neural Network with regularization
numSteps = 5001
startTime = time.time()
predY = np.array([])
validRMSEOld = 0.0
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print("Initialized")
for step in range(numSteps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batchSize) % (trainY.shape[0] - batchSize)
# Generate a minibatch.
batchX = trainX[offset:(offset + batchSize), :]
batchY = trainY[offset:(offset + batchSize)]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feedDict = {tfTrainX : batchX, tfTrainY : batchY}
_, l, pred = session.run(
[optimizer, loss, trainPred], feed_dict=feedDict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch RMSE: %f" % rmse(pred, batchY))
validRMSE = rmse(validPred.eval(), validY)
print("Validation RMSE: %f" % validRMSE)
relChange = (validRMSEOld - validRMSE) / validRMSEOld
if (abs(relChange) < 0.0001 or np.isnan(validRMSE)):
break
else:
validRMSEOld = validRMSE
print("Test RMSE: %f" % rmse(testPred.eval(), testY))
print("Total RMSE: %f" % rmse(allPred.eval(), y))
predY = allPred.eval()
print('Execution time: %f' % (time.time() - startTime))
plt.plot(y, 'ro')
plt.plot(predY, '-', lw=3)
Выход:
(1000, 1)
Точки данных для обучения: 668
Точки данных для проверки: 166
Точки данных для проверки: 166
Инициализировано
Потеря мини-пакета на шаге 0: 3,902083
RMSE мини-пакета: 2,793586
RMSE проверки: 2,771836
Потеря мини-пакета на шаге 500: 1,504731
RMSE мини-пакета: 1,733019
RMSE проверки: 1,693558
Потеря мини-пакета на шаге 1000 : 1.077074
RMSE для мини-пакета: 1.465299
RMSE для проверки: 1.492440
Потери мини-пакета на шаге 1500: 1.064864
RMSE для мини-пакета: 1.456898
RMSE для проверки: 1.464581
Мини-пакет потери на шаге 2000: 1,060161
RMSE мини-пакета: 1,453716
RMSE проверки: 1,461370
Потери мини-пакета на шаге 2500: 1,055446
RMSE мини-пакета: 1,450549
RMSE проверки: 1,461191< br/> Потери мини-пакета на шаге 3000: 1,069557
RMSE мини-пакета: 1,460215
RMSE проверки: 1,461298
RMSE теста: 1,557867
Всего RMSE: 1,473936
Время выполнения: 10,608121