Нелинейная регрессия с TensorFlow приводит к прямой линии

Я пытаюсь использовать простую нелинейную регрессию для функции; x2sin(x) с помощью TensorFlow. Пожалуйста, смотрите код и вывод ниже. Я также пробовал: а) увеличить количество точек выборки до 10 000, б) увеличить количество скрытых слоев, в) увеличить/уменьшить скорость обучения и г) tanh вместо ReLU без каких-либо улучшений. Может кто-нибудь попробовать и посмотреть, что не так с подходом?

import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import time

n = 1000
x = np.linspace(0,3.14,n)
x = np.array(x).astype(np.float32)
y = np.sin(x) * np.multiply(x, x)
r = np.random.random(n)
y = y + r
xNorm = (x - np.mean(x)) / (np.amax(x) - np.amin(x))

idxs = np.array(range(n)).astype(np.int32)
tmp = np.zeros((1, n), dtype=np.float32)
tmp[0] = xNorm
xNorm = tmp.T
print(xNorm.shape)

# Shuffle the indexes
np.random.shuffle(idxs)
# Assign 1/6th for validation, and test and the rest for training
nValidIdxs = int(n / 6)
nTestIdxs = int(n / 6)
validIdxs = idxs[0:nValidIdxs]
testIdxs = idxs[nValidIdxs:nValidIdxs + nTestIdxs]
nTrainIdxs = n - nValidIdxs - nTestIdxs
trainIdxs = idxs[nValidIdxs + nTestIdxs:n]
print('Training data points: %d' % nTrainIdxs)
print('Validation data points: %d' % nValidIdxs)
print('Testing data points: %d' % nTestIdxs)

# Split input and output values into the
# training, testing, and validation datasets.
trainX = xNorm[trainIdxs]
testX = xNorm[testIdxs]
validX = xNorm[validIdxs]

trainY = y[trainIdxs]
testY = y[testIdxs]
validY = y[validIdxs]

# This part defines a Neural Network with regularization
# applied to the loss term. SGD batch size is 128 samples.
# In addition, dropout is applied to the hidden layers during
# the training process.
batchSize = 256
nNodes = 20
stdDev = 0.001
regParam = 0.0001
initRate = 0.0001
nLayers = 1

graph = tf.Graph()
tf.set_random_seed(1234)
with graph.as_default():
    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tfTrainX = tf.placeholder(tf.float32, shape=(batchSize, 1))
    tfTrainY = tf.placeholder(tf.float32, shape=(batchSize))
    tfValidX = tf.constant(validX)
    tfTestX = tf.constant(testX)
    tfAllX = tf.constant(xNorm)

    # This function defines a deep neural network
    # with 3 hidden layers and one output layer.
    def deepNeural(dataset):
        w = []
        b = []
        for i in range(nLayers):
            w.append(None)
            b.append(None)

        # Hidden layers
        for i in range(nLayers):
            if(i == 0):
                w[i] = tf.Variable(
                    tf.truncated_normal([1, nNodes], stddev=stdDev))
                logits = tf.matmul(dataset, w[i])
            else:
                w[i] = tf.Variable(
                    tf.truncated_normal([nNodes, nNodes], stddev=stdDev))
                logits = tf.matmul(logits, w[i])
            b[i] = tf.Variable(tf.zeros([nNodes]))
            logits = tf.add(logits, b[i])
            logits = tf.nn.relu(logits)

        # Output layer
        wo = tf.ones([nNodes, 1], tf.float32)
        logits = tf.matmul(logits, wo)
        # Return the output layer
        return [logits, w, b, wo]

    # This function provides the logits from the output
    # layer calculated based upon the passed weights and
    # biases for the hidden and output layer calculated
    # based upon the loss minimization.
    def predict(dataset, w, b, wo):
        # Computation for hidden layers
        for i in range(nLayers):
            if(i == 0):
                logits = tf.matmul(dataset, w[i])
            else:
                logits = tf.matmul(logits, w[i])
            logits = tf.add(logits, b[i])
            logits = tf.nn.relu(logits)
        # Computation for the output layer
        return tf.matmul(logits, wo)

    logits, w, b, wo = deepNeural(tfTrainX)
    loss = 0.5 * tf.reduce_mean(tf.square(logits - tfTrainY))
    # Compute regularization term
    regTerm = tf.Variable(0.0)
    for i in range(nLayers):
        regTerm = regTerm + tf.reduce_mean(tf.nn.l2_loss(w[i]))
    regTerm = regTerm * regParam
    # Add regularization term to loss
    loss = loss + regTerm

    # Optimizer.
    # Exponential decay of learning rate.
    globalStep = tf.Variable(0) # count the number of steps taken.
    learningRate = tf.train.exponential_decay(initRate, globalStep, 500, 0.96, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(loss)

    # Predictions for the training, validation, and test data.
    trainPred = logits
    validPred = predict(tfValidX, w, b, wo)
    testPred = predict(tfTestX, w, b, wo)
    allPred = predict(tfAllX, w, b, wo)

def rmse(pred, actual):
    #print(pred.shape)
    pred = pred.reshape(1,-1)
    actual = actual.reshape(1,-1)
    return np.sqrt(((pred - actual)**2).mean())

# Run SGD for Neural Network with regularization
numSteps = 5001
startTime = time.time()
predY = np.array([])
validRMSEOld = 0.0
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in range(numSteps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batchSize) % (trainY.shape[0] - batchSize)
        # Generate a minibatch.
        batchX = trainX[offset:(offset + batchSize), :]
        batchY = trainY[offset:(offset + batchSize)]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feedDict = {tfTrainX : batchX, tfTrainY : batchY}
        _, l, pred = session.run(
            [optimizer, loss, trainPred], feed_dict=feedDict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch RMSE: %f" % rmse(pred, batchY))
            validRMSE = rmse(validPred.eval(), validY)
            print("Validation RMSE: %f" % validRMSE)
            relChange = (validRMSEOld - validRMSE) / validRMSEOld
            if (abs(relChange) < 0.0001 or np.isnan(validRMSE)):
                break
            else:
                validRMSEOld = validRMSE
    print("Test RMSE: %f" % rmse(testPred.eval(), testY))
    print("Total RMSE: %f" % rmse(allPred.eval(), y))
    predY = allPred.eval()
print('Execution time: %f' % (time.time() - startTime))

plt.plot(y, 'ro')
plt.plot(predY, '-', lw=3)

Выход:

(1000, 1)
Точки данных для обучения: 668
Точки данных для проверки: 166
Точки данных для проверки: 166
Инициализировано
Потеря мини-пакета на шаге 0: 3,902083
RMSE мини-пакета: 2,793586
RMSE проверки: 2,771836
Потеря мини-пакета на шаге 500: 1,504731
RMSE мини-пакета: 1,733019
RMSE проверки: 1,693558
Потеря мини-пакета на шаге 1000 : 1.077074
RMSE для мини-пакета: 1.465299
RMSE для проверки: 1.492440
Потери мини-пакета на шаге 1500: 1.064864
RMSE для мини-пакета: 1.456898
RMSE для проверки: 1.464581
Мини-пакет потери на шаге 2000: 1,060161
RMSE мини-пакета: 1,453716
RMSE проверки: 1,461370
Потери мини-пакета на шаге 2500: 1,055446
RMSE мини-пакета: 1,450549
RMSE проверки: 1,461191< br/> Потери мини-пакета на шаге 3000: 1,069557
RMSE мини-пакета: 1,460215
RMSE проверки: 1,461298
RMSE теста: 1,557867
Всего RMSE: 1,473936
Время выполнения: 10,608121

Результаты нелинейной регрессии


person ynpandey    schedule 05.12.2016    source источник


Ответы (1)


Следующий модифицированный код работает. Основная проблема заключалась в функции потерь, которая должна быть loss = 0.5 * tf.reduce_mean(tf.square(tf.transpose(logits) - tfTrainY))

import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import time

n = 10000
x = np.linspace(0,3.14,n)
x = np.array(x).astype(np.float32)
y = np.sin(x) * np.multiply(x, x)
r = np.random.random(n)
y = y + r
xNorm = (x - np.mean(x)) / (np.amax(x) - np.amin(x))

idxs = np.array(range(n)).astype(np.int32)
tmp = np.zeros((1, n), dtype=np.float32)
tmp[0] = xNorm
xNorm = tmp.T
print(xNorm.shape)

# Shuffle the indexes
np.random.shuffle(idxs)
# Assign 1/6th for validation, and test and the rest for training
nValidIdxs = int(n / 6)
nTestIdxs = int(n / 6)
validIdxs = idxs[0:nValidIdxs]
testIdxs = idxs[nValidIdxs:nValidIdxs + nTestIdxs]
nTrainIdxs = n - nValidIdxs - nTestIdxs
trainIdxs = idxs[nValidIdxs + nTestIdxs:n]
print('Training data points: %d' % nTrainIdxs)
print('Validation data points: %d' % nValidIdxs)
print('Testing data points: %d' % nTestIdxs)

# Split input and output values into the
# training, testing, and validation datasets.
trainX = xNorm[trainIdxs]
testX = xNorm[testIdxs]
validX = xNorm[validIdxs]

trainY = y[trainIdxs]
testY = y[testIdxs]
validY = y[validIdxs]

# This part defines a Neural Network with regularization
# applied to the loss term. SGD batch size is 128 samples.
# In addition, dropout is applied to the hidden layers during
# the training process.
batchSize = 256
nNodes = 128
stdDev = 0.1
regParam = 0.0001
initRate = 0.001
nLayers = 3

graph = tf.Graph()
tf.set_random_seed(1234)
with graph.as_default():
    # Input data. For the training data, we use a placeholder that will be fed
    # at run time with a training minibatch.
    tfTrainX = tf.placeholder(tf.float32, shape=(batchSize, 1))
    tfTrainY = tf.placeholder(tf.float32, shape=(batchSize))
    tfValidX = tf.constant(validX)
    tfTestX = tf.constant(testX)
    tfAllX = tf.constant(xNorm)

    # This function defines a deep neural network
    # with 3 hidden layers and one output layer.
    def deepNeural(dataset):
        w = []
        b = []
        for i in range(nLayers):
            w.append(None)
            b.append(None)

        # Hidden layers
        for i in range(nLayers):
            if(i == 0):
                w[i] = tf.Variable(
                    tf.truncated_normal([1, nNodes], stddev=stdDev))
                logits = tf.matmul(dataset, w[i])
            else:
                w[i] = tf.Variable(
                    tf.truncated_normal([nNodes, nNodes], stddev=stdDev))
                logits = tf.matmul(logits, w[i])
            b[i] = tf.Variable(tf.zeros([nNodes]))
            logits = tf.add(logits, b[i])
            logits = tf.nn.tanh(logits)

        # Output layer
        wo = tf.ones([nNodes, 1], tf.float32)
        logits = tf.matmul(logits, wo)
        # Return the output layer
        return [logits, w, b, wo]

    # This function provides the logits from the output
    # layer calculated based upon the passed weights and
    # biases for the hidden and output layer calculated
    # based upon the loss minimization.
    def predict(dataset, w, b, wo):
        # Computation for hidden layers
        for i in range(nLayers):
            if(i == 0):
                logits = tf.matmul(dataset, w[i])
            else:
                logits = tf.matmul(logits, w[i])
            logits = tf.add(logits, b[i])
            logits = tf.nn.tanh(logits)
        # Computation for the output layer
        return tf.matmul(logits, wo)

    logits, w, b, wo = deepNeural(tfTrainX)
    loss = 0.5 * tf.reduce_mean(tf.square(tf.transpose(logits) - tfTrainY))
    # Compute regularization term
    regTerm = tf.Variable(0.0)
    for i in range(nLayers):
        regTerm = regTerm + tf.reduce_mean(tf.nn.l2_loss(w[i]))
    regTerm = regTerm * regParam
    # Add regularization term to loss
    loss = loss + regTerm

    # Optimizer.
    # Exponential decay of learning rate.
    globalStep = tf.Variable(0) # count the number of steps taken.
    learningRate = tf.train.exponential_decay(initRate, globalStep, 1000, 0.96, staircase=True)
    optimizer = tf.train.AdamOptimizer(learningRate).minimize(loss)

    # Predictions for the training, validation, and test data.
    trainPred = logits
    validPred = predict(tfValidX, w, b, wo)
    testPred = predict(tfTestX, w, b, wo)
    allPred = predict(tfAllX, w, b, wo)

def rmse(pred, actual):
    #print(pred.shape)
    pred = pred.reshape(1,-1)
    actual = actual.reshape(1,-1)
    return np.sqrt(((pred - actual)**2).mean())

# Run SGD for Neural Network with regularization
numSteps = 10001
startTime = time.time()
predY = np.array([])
validRMSEOld = 0.0
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in range(numSteps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batchSize) % (trainY.shape[0] - batchSize)
        # Generate a minibatch.
        batchX = trainX[offset:(offset + batchSize), :]
        batchY = trainY[offset:(offset + batchSize)]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feedDict = {tfTrainX : batchX, tfTrainY : batchY}
        _, l, pred = session.run(
            [optimizer, loss, trainPred], feed_dict=feedDict)
        if (step % 1000 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch RMSE: %f" % rmse(pred, batchY))
            validRMSE = rmse(validPred.eval(), validY)
            print("Validation RMSE: %f" % validRMSE)
            relChange = (validRMSEOld - validRMSE) / validRMSEOld
            if (abs(relChange) < 0.0001 or np.isnan(validRMSE)):
                break
            else:
                validRMSEOld = validRMSE
    print("Test RMSE: %f" % rmse(testPred.eval(), testY))
    print("Total RMSE: %f" % rmse(allPred.eval(), y))
    predY = allPred.eval()
print('Execution time: %f' % (time.time() - startTime))

plt.plot(y, 'ro')
plt.plot(predY, '-', lw=5)

Вывод с измененным кодом.

person ynpandey    schedule 06.12.2016