Backpropagation in a Convolution layer (C++)
up vote
0
down vote
favorite
I'm trying to implement CNN from scratch.
I've already implemented a fullyconnected layer and it works.
But, however, convolutional layer's backpropagation doesn't give any positive results.
It contains two methods: backward() to backpropagate signal to the earlier layers, and fit() to correct convolution's kernel weights and biases.
std::vector<std::vector<std::vector<double>>> ConvLayer::backward(std::vector<std::vector<std::vector<double>>> grad)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
if(!this->gradInitialized)
{
this->gradInitialized = true;
this->m_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
this->v_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
}
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int inputNumber = this->inputSaved.size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> gradUpd;
gradUpd = RandValueGenerator::get_rand_3d_tensor(gradNumber, kernelWidth, kernelHeight, true);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
double outputInit = this->outputSaved[i][x][y];
double outputDer = ActivationFunc::calc(this->activationType, outputInit, true);
grad[i][x][y] = outputDer * grad[i][x][y];
}
}
}
this->grad = grad;
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
gradUpd[index][i][j] += grad[index][x][y] * this->kernels[index][i][j];
}
}
}
}
}
return gradUpd;
}
void ConvLayer::fit(int t, AdamOptimizer& adam)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
int inputNumber = this->inputSaved.size();
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> diff;
diff = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
diff[i][x][y] = adam.calc(t, this->m_t[i][x][y], this->v_t[i][x][y], this->grad[i][x][y]);
}
}
}
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
std::vector<std::vector<double>> kernelDet(kernelWidth, std::vector<double>(kernelHeight));
std::vector<std::vector<double>> inputNow(this->inputSaved[index % inputNumber]);
std::vector<std::vector<double>> diffNow(diff[index]);
for(int x = 0; x < widthIterNumber; ++x)
{
for(int y = 0; y < heightIterNumber; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
int x_input = x + i, y_input = y + j;
int x_grad = x_input, y_grad = y_input;
if(x_grad >= gradWidth)
x_grad = gradWidth - 1;
if(y_grad >= gradHeight)
y_grad = gradHeight - 1;
kernelDet[i][j] += inputNow[x_input][y_input] * diffNow[x_grad][y_grad];
}
}
}
}
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
kernelDet[i][j] /= (widthIterNumber * heightIterNumber);
this->kernels[index][i][j] += kernelDet[i][j];
this->biases[index] += kernelDet[i][j];
}
}
}
}
The complete code can be found here.
Of course, code is difficult and can be formatted more correctly...
But are there any problems with backpropagation's logic itself?
c++ machine-learning neural-network
New contributor
add a comment |
up vote
0
down vote
favorite
I'm trying to implement CNN from scratch.
I've already implemented a fullyconnected layer and it works.
But, however, convolutional layer's backpropagation doesn't give any positive results.
It contains two methods: backward() to backpropagate signal to the earlier layers, and fit() to correct convolution's kernel weights and biases.
std::vector<std::vector<std::vector<double>>> ConvLayer::backward(std::vector<std::vector<std::vector<double>>> grad)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
if(!this->gradInitialized)
{
this->gradInitialized = true;
this->m_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
this->v_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
}
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int inputNumber = this->inputSaved.size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> gradUpd;
gradUpd = RandValueGenerator::get_rand_3d_tensor(gradNumber, kernelWidth, kernelHeight, true);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
double outputInit = this->outputSaved[i][x][y];
double outputDer = ActivationFunc::calc(this->activationType, outputInit, true);
grad[i][x][y] = outputDer * grad[i][x][y];
}
}
}
this->grad = grad;
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
gradUpd[index][i][j] += grad[index][x][y] * this->kernels[index][i][j];
}
}
}
}
}
return gradUpd;
}
void ConvLayer::fit(int t, AdamOptimizer& adam)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
int inputNumber = this->inputSaved.size();
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> diff;
diff = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
diff[i][x][y] = adam.calc(t, this->m_t[i][x][y], this->v_t[i][x][y], this->grad[i][x][y]);
}
}
}
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
std::vector<std::vector<double>> kernelDet(kernelWidth, std::vector<double>(kernelHeight));
std::vector<std::vector<double>> inputNow(this->inputSaved[index % inputNumber]);
std::vector<std::vector<double>> diffNow(diff[index]);
for(int x = 0; x < widthIterNumber; ++x)
{
for(int y = 0; y < heightIterNumber; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
int x_input = x + i, y_input = y + j;
int x_grad = x_input, y_grad = y_input;
if(x_grad >= gradWidth)
x_grad = gradWidth - 1;
if(y_grad >= gradHeight)
y_grad = gradHeight - 1;
kernelDet[i][j] += inputNow[x_input][y_input] * diffNow[x_grad][y_grad];
}
}
}
}
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
kernelDet[i][j] /= (widthIterNumber * heightIterNumber);
this->kernels[index][i][j] += kernelDet[i][j];
this->biases[index] += kernelDet[i][j];
}
}
}
}
The complete code can be found here.
Of course, code is difficult and can be formatted more correctly...
But are there any problems with backpropagation's logic itself?
c++ machine-learning neural-network
New contributor
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I'm trying to implement CNN from scratch.
I've already implemented a fullyconnected layer and it works.
But, however, convolutional layer's backpropagation doesn't give any positive results.
It contains two methods: backward() to backpropagate signal to the earlier layers, and fit() to correct convolution's kernel weights and biases.
std::vector<std::vector<std::vector<double>>> ConvLayer::backward(std::vector<std::vector<std::vector<double>>> grad)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
if(!this->gradInitialized)
{
this->gradInitialized = true;
this->m_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
this->v_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
}
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int inputNumber = this->inputSaved.size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> gradUpd;
gradUpd = RandValueGenerator::get_rand_3d_tensor(gradNumber, kernelWidth, kernelHeight, true);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
double outputInit = this->outputSaved[i][x][y];
double outputDer = ActivationFunc::calc(this->activationType, outputInit, true);
grad[i][x][y] = outputDer * grad[i][x][y];
}
}
}
this->grad = grad;
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
gradUpd[index][i][j] += grad[index][x][y] * this->kernels[index][i][j];
}
}
}
}
}
return gradUpd;
}
void ConvLayer::fit(int t, AdamOptimizer& adam)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
int inputNumber = this->inputSaved.size();
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> diff;
diff = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
diff[i][x][y] = adam.calc(t, this->m_t[i][x][y], this->v_t[i][x][y], this->grad[i][x][y]);
}
}
}
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
std::vector<std::vector<double>> kernelDet(kernelWidth, std::vector<double>(kernelHeight));
std::vector<std::vector<double>> inputNow(this->inputSaved[index % inputNumber]);
std::vector<std::vector<double>> diffNow(diff[index]);
for(int x = 0; x < widthIterNumber; ++x)
{
for(int y = 0; y < heightIterNumber; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
int x_input = x + i, y_input = y + j;
int x_grad = x_input, y_grad = y_input;
if(x_grad >= gradWidth)
x_grad = gradWidth - 1;
if(y_grad >= gradHeight)
y_grad = gradHeight - 1;
kernelDet[i][j] += inputNow[x_input][y_input] * diffNow[x_grad][y_grad];
}
}
}
}
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
kernelDet[i][j] /= (widthIterNumber * heightIterNumber);
this->kernels[index][i][j] += kernelDet[i][j];
this->biases[index] += kernelDet[i][j];
}
}
}
}
The complete code can be found here.
Of course, code is difficult and can be formatted more correctly...
But are there any problems with backpropagation's logic itself?
c++ machine-learning neural-network
New contributor
I'm trying to implement CNN from scratch.
I've already implemented a fullyconnected layer and it works.
But, however, convolutional layer's backpropagation doesn't give any positive results.
It contains two methods: backward() to backpropagate signal to the earlier layers, and fit() to correct convolution's kernel weights and biases.
std::vector<std::vector<std::vector<double>>> ConvLayer::backward(std::vector<std::vector<std::vector<double>>> grad)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
if(!this->gradInitialized)
{
this->gradInitialized = true;
this->m_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
this->v_t = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight, true);
}
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int inputNumber = this->inputSaved.size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> gradUpd;
gradUpd = RandValueGenerator::get_rand_3d_tensor(gradNumber, kernelWidth, kernelHeight, true);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
double outputInit = this->outputSaved[i][x][y];
double outputDer = ActivationFunc::calc(this->activationType, outputInit, true);
grad[i][x][y] = outputDer * grad[i][x][y];
}
}
}
this->grad = grad;
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
gradUpd[index][i][j] += grad[index][x][y] * this->kernels[index][i][j];
}
}
}
}
}
return gradUpd;
}
void ConvLayer::fit(int t, AdamOptimizer& adam)
{
int gradNumber = grad.size();
int gradWidth = grad[0].size();
int gradHeight = grad[0][0].size();
int inputNumber = this->inputSaved.size();
int kernelWidth = this->kernels[0].size();
int kernelHeight = this->kernels[0][0].size();
int widthIterNumber = this->inputSaved[0].size() - kernelWidth + 1;
int heightIterNumber = this->inputSaved[0][0].size() - kernelHeight + 1;
std::vector<std::vector<std::vector<double>>> diff;
diff = RandValueGenerator::get_rand_3d_tensor(gradNumber, gradWidth, gradHeight);
#pragma omp parallel for
for(int i = 0; i < gradNumber; ++i)
{
for(int x = 0; x < gradWidth; ++x)
{
for(int y = 0; y < gradHeight; ++y)
{
diff[i][x][y] = adam.calc(t, this->m_t[i][x][y], this->v_t[i][x][y], this->grad[i][x][y]);
}
}
}
#pragma omp parallel for
for(int index = 0; index < gradNumber; ++index)
{
std::vector<std::vector<double>> kernelDet(kernelWidth, std::vector<double>(kernelHeight));
std::vector<std::vector<double>> inputNow(this->inputSaved[index % inputNumber]);
std::vector<std::vector<double>> diffNow(diff[index]);
for(int x = 0; x < widthIterNumber; ++x)
{
for(int y = 0; y < heightIterNumber; ++y)
{
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
int x_input = x + i, y_input = y + j;
int x_grad = x_input, y_grad = y_input;
if(x_grad >= gradWidth)
x_grad = gradWidth - 1;
if(y_grad >= gradHeight)
y_grad = gradHeight - 1;
kernelDet[i][j] += inputNow[x_input][y_input] * diffNow[x_grad][y_grad];
}
}
}
}
for(int i = 0; i < kernelWidth; ++i)
{
for(int j = 0; j < kernelHeight; ++j)
{
kernelDet[i][j] /= (widthIterNumber * heightIterNumber);
this->kernels[index][i][j] += kernelDet[i][j];
this->biases[index] += kernelDet[i][j];
}
}
}
}
The complete code can be found here.
Of course, code is difficult and can be formatted more correctly...
But are there any problems with backpropagation's logic itself?
c++ machine-learning neural-network
c++ machine-learning neural-network
New contributor
New contributor
edited 13 hours ago
user2966394
233
233
New contributor
asked 13 hours ago
mrhemen2015
61
61
New contributor
New contributor
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
mrhemen2015 is a new contributor. Be nice, and check out our Code of Conduct.
mrhemen2015 is a new contributor. Be nice, and check out our Code of Conduct.
mrhemen2015 is a new contributor. Be nice, and check out our Code of Conduct.
mrhemen2015 is a new contributor. Be nice, and check out our Code of Conduct.
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209370%2fbackpropagation-in-a-convolution-layer-c%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown