Deep Learning 4J 學習(一) 異或例子
1.maven
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>jiantsing</groupId>
<artifactId>dltest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<nd4j.version>0.7.2</nd4j.version>
<dl4j.version>0.7.2</dl4j.version>
<datavec.version>0.7.2</datavec.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<dependencies>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native</artifactId>
<version>${nd4j.version}</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>dl4j-spark_2.11</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>org.datavec</groupId>
<artifactId>datavec-spark_${scala.binary.version}</artifactId>
<version>${datavec.version}</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>${dl4j.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.jiantsing.test.XorTest</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
2.測試類
package com.jiantsing.test;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration.ListBuilder;
import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer.Builder;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
public class XorTest {
public static void main(String[] args) {
// TODO Auto-generated method stub
// list off input values, 4 training samples with data for 2
// input-neurons each
//建立一個4*2的輸入矩陣,初始值為0
INDArray input = Nd4j.zeros(4, 2);
// correspondending list with expected output values, 4 training samples
// with data for 2 output-neurons each
//建立一個4*2的目標矩陣,初始值為0
INDArray labels = Nd4j.zeros(4, 2);
// create first dataset
// when first input=0 and second input=0
/*輸入矩陣下標
(0,0) (0,1) 0 0
(1,0) (1,1) 1 0
(2,0) (2,1) 0 1
(3,0) (3,1) 1 1
期望值
(0,0) (0,1) 1 0
(1,0) (1,1) 0 1
(2,0) (2,1) 0 1
(3,0) (3,1) 1 0
the labels (these should be binarized label matrices such that the specified label has a value of 1 in the desired column with the label)
列值為1的為期望的列的下標,因此列數就是分類數,列中為1的為期望列
*/
input.putScalar(new int[]{0, 0}, 0);
input.putScalar(new int[]{0, 1}, 0);
// then the first output fires for false, and the second is 0 (see class
// comment)
labels.putScalar(new int[]{0, 0}, 1);
labels.putScalar(new int[]{0, 1}, 0);
// when first input=1 and second input=0
input.putScalar(new int[]{1, 0}, 1);
input.putScalar(new int[]{1, 1}, 0);
// then xor is true, therefore the second output neuron fires
labels.putScalar(new int[]{1, 0}, 0);
labels.putScalar(new int[]{1, 1}, 1);
// same as above
input.putScalar(new int[]{2, 0}, 0);
input.putScalar(new int[]{2, 1}, 1);
labels.putScalar(new int[]{2, 0}, 0);
labels.putScalar(new int[]{2, 1}, 1);
// when both inputs fire, xor is false again - the first output should
// fire
input.putScalar(new int[]{3, 0}, 1);
input.putScalar(new int[]{3, 1}, 1);
labels.putScalar(new int[]{3, 0}, 1);
labels.putScalar(new int[]{3, 1}, 0);
// create dataset object
DataSet ds = new DataSet(input, labels);
// Set up network configuration
NeuralNetConfiguration.Builder builder = new NeuralNetConfiguration.Builder();
// how often should the training set be run, we need something above
// 1000, or a higher learning-rate - found this values just by trial and
// error
builder.iterations(10000);//迭代次數,建議1000以上
// learning rate
builder.learningRate(0.1);//學習率
// fixed seed for the random generator, so any run of this program
// brings the same results - may not work if you do something like
// ds.shuffle()
builder.seed(123);//隨機種子
// not applicable, this network is to small - but for bigger networks it
// can help that the network will not only recite the training data
builder.useDropConnect(false);
// a standard algorithm for moving on the error-plane, this one works
// best for me, LINE_GRADIENT_DESCENT or CONJUGATE_GRADIENT can do the
// job, too - it's an empirical value which one matches best to
// your problem
builder.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT);
// init the bias with 0 - empirical value, too
builder.biasInit(0);//偏置值???
// from "http://deeplearning4j.org/architecture": The networks can
// process the input more quickly and more accurately by ingesting
// minibatches 5-10 elements at a time in parallel.
// this example runs better without, because the dataset is smaller than
// the mini batch size
builder.miniBatch(false);//不適用平行計算,因為資料集規模小
// create a multilayer network with 2 layers (including the output
// layer, excluding the input payer)
ListBuilder listBuilder = builder.list();
DenseLayer.Builder hiddenLayerBuilder = new DenseLayer.Builder();//全連線層,隱藏層
// two input connections - simultaneously defines the number of input
// neurons, because it's the first non-input-layer
hiddenLayerBuilder.nIn(2);//兩個輸入值
// number of outgooing connections, nOut simultaneously defines the
// number of neurons in this layer
hiddenLayerBuilder.nOut(8);//輸出為什麼為4???,可以變
// put the output through the sigmoid function, to cap the output
// valuebetween 0 and 1
hiddenLayerBuilder.activation(Activation.SIGMOID);//閾值函式,s
// random initialize weights with values between 0 and 1
hiddenLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
hiddenLayerBuilder.dist(new UniformDistribution(0, 1));
// build and set as layer 0
listBuilder.layer(0, hiddenLayerBuilder.build());
// MCXENT or NEGATIVELOGLIKELIHOOD (both are mathematically equivalent) work ok for this example - this
// function calculates the error-value (aka 'cost' or 'loss function value'), and quantifies the goodness
// or badness of a prediction, in a differentiable way
// For classification (with mutually exclusive classes, like here), use multiclass cross entropy, in conjunction
// with softmax activation function
Builder outputLayerBuilder = new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
// must be the same amout as neurons in the layer before
outputLayerBuilder.nIn(8);//與隱藏層的out要相同???
// two neurons in this layer
outputLayerBuilder.nOut(2);//目標輸出個數,與label矩陣一致
outputLayerBuilder.activation(Activation.SOFTMAX);
outputLayerBuilder.weightInit(WeightInit.DISTRIBUTION);
outputLayerBuilder.dist(new UniformDistribution(0, 1));//輸出的上下限值
listBuilder.layer(1, outputLayerBuilder.build());
// no pretrain phase for this network
listBuilder.pretrain(false);
// seems to be mandatory
// according to agibsonccc: You typically only use that with
// pretrain(true) when you want to do pretrain/finetune without changing
// the previous layers finetuned weights that's for autoencoders and
// rbms
listBuilder.backprop(true);
// build and init the network, will check if everything is configured
// correct
MultiLayerConfiguration conf = listBuilder.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
// add an listener which outputs the error every 100 parameter updates
net.setListeners(new ScoreIterationListener(100));
// C&P from GravesLSTMCharModellingExample
// Print the number of parameters in the network (and for each layer)
Layer[] layers = net.getLayers();
int totalNumParams = 0;
for (int i = 0; i < layers.length; i++) {
int nParams = layers[i].numParams();//10,12=22數值是怎麼來的???
System.out.println("Number of parameters in layer " + i + ": " + nParams);
totalNumParams += nParams;
}
System.out.println("Total number of network parameters: " + totalNumParams);
// here the actual learning takes place
net.fit(ds);
// create output for every training sample
INDArray output = net.output(ds.getFeatureMatrix());
System.out.println(output);//訓練結果
// let Evaluation prints stats how often the right output had the
// highest value
Evaluation eval = new Evaluation(2);
eval.eval(ds.getLabels(), output);
System.out.println(eval.stats());//列印歸類結果
System.out.println("測試結果:");
INDArray output2 = Nd4j.zeros(2, 2);
output2.putScalar(new int[]{0, 0}, 0);
output2.putScalar(new int[]{0, 1}, 0);
output2.putScalar(new int[]{1, 0}, 1);
output2.putScalar(new int[]{1, 1}, 1);
System.out.println(net.output(output2));//輸出測試結果
}
}
3.輸出結果
Number of parameters in layer 0: 24
Number of parameters in layer 1: 18
Total number of network parameters: 42
[[1.00, 0.00],
[0.00, 1.00],
[0.00, 1.00],
[1.00, 0.00]]
Examples labeled as 0 classified by model as 0: 2 times
Examples labeled as 1 classified by model as 1: 2 times
==========================Scores========================================
Accuracy: 1
Precision: 1
Recall: 1
F1 Score: 1
========================================================================
測試結果:
[[1.00, 0.00],
[1.00, 0.00]]
4.原始碼