connected_layer.c 3.43 KB
 Joseph Redmon committed Oct 06, 2021 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 ``````#include #include #include #include "uwnet.h" // Add bias terms to a matrix // matrix xw: partially computed output of layer // matrix b: bias to add in (should only be one row!) // returns: y = wx + b matrix forward_bias(matrix xw, matrix b) { assert(b.rows == 1); assert(xw.cols == b.cols); matrix y = copy_matrix(xw); int i,j; for(i = 0; i < xw.rows; ++i){ for(j = 0; j < xw.cols; ++j){ y.data[i*y.cols + j] += b.data[j]; } } return y; } // Calculate dL/db from a dL/dy // matrix dy: derivative of loss wrt xw+b, dL/d(xw+b) // returns: derivative of loss wrt b, dL/db matrix backward_bias(matrix dy) { matrix db = make_matrix(1, dy.cols); int i, j; for(i = 0; i < dy.rows; ++i){ for(j = 0; j < dy.cols; ++j){ db.data[j] += dy.data[i*dy.cols + j]; } } return db; } // Run a connected layer on input // layer l: pointer to layer to run // matrix x: input to layer // returns: the result of running the layer y = xw+b matrix forward_connected_layer(layer l, matrix x) { // Saving our input // Probably don't change this free_matrix(*l.x); *l.x = copy_matrix(x); // TODO: 3.1 - run the network forward `````` Shawn Nithyan Stanley committed Oct 31, 2021 52 53 54 `````` //matrix y = make_matrix(x.rows, l.w.cols); // Going to want to change this! matrix y = matmul(x, l.w); y = forward_bias(y, l.b); `````` Joseph Redmon committed Oct 06, 2021 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 `````` return y; } // Run a connected layer backward // layer l: layer to run // matrix dy: dL/dy for this layer // returns: dL/dx for this layer matrix backward_connected_layer(layer l, matrix dy) { matrix x = *l.x; // TODO: 3.2 // Calculate the gradient dL/db for the bias terms using backward_bias // add this into any stored gradient info already in l.db // Then calculate dL/dw. Use axpy to add this dL/dw into any previously stored // updates for our weights, which are stored in l.dw // Calculate dL/dx and return it `````` Shawn Nithyan Stanley committed Oct 31, 2021 74 `````` axpy_matrix(1.0, backward_bias(dy), l.db); `````` Joseph Redmon committed Oct 06, 2021 75 `````` `````` Shawn Nithyan Stanley committed Oct 31, 2021 76 `````` matrix dw = matmul(transpose_matrix(x), dy); // Change this `````` Joseph Redmon committed Oct 06, 2021 77 `````` `````` Shawn Nithyan Stanley committed Oct 31, 2021 78 79 `````` axpy_matrix(1.0, dw, l.dw); matrix dx = matmul(dy, transpose_matrix(l.w)); `````` Joseph Redmon committed Oct 06, 2021 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 `````` return dx; } // Update weights and biases of connected layer // layer l: layer to update // float rate: SGD learning rate // float momentum: SGD momentum term // float decay: l2 normalization term void update_connected_layer(layer l, float rate, float momentum, float decay) { // TODO: 3.3 // Apply our updates using our SGD update rule // assume l.dw = dL/dw - momentum * update_prev // we want l.dw = dL/dw - momentum * update_prev + decay * w // then we update l.w = l.w - rate * l.dw // lastly, l.dw is the negative update (-update) but for the next iteration // we want it to be (-momentum * update) so we just need to scale it a little // Do the same for biases as well but no need to use weight decay on biases `````` Shawn Nithyan Stanley committed Oct 31, 2021 99 100 101 102 103 104 `````` axpy_matrix(decay, l.w, l.dw); axpy_matrix(-rate, l.dw, l.w); scal_matrix(momentum, l.dw); axpy_matrix(-rate, l.db, l.b); scal_matrix(momentum, l.db); `````` Joseph Redmon committed Oct 06, 2021 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 ``````} layer make_connected_layer(int inputs, int outputs) { layer l = {0}; l.w = random_matrix(inputs, outputs, sqrtf(2.f/inputs)); l.dw = make_matrix(inputs, outputs); l.b = make_matrix(1, outputs); l.db = make_matrix(1, outputs); l.x = calloc(1, sizeof(matrix)); l.forward = forward_connected_layer; l.backward = backward_connected_layer; l.update = update_connected_layer; return l; } ``````