reinforcement with randomising

2014-11-11 15:34:09 +01:00
parent 9ef4274396
commit 42af5a4d2b
10 changed files with 223 additions and 24 deletions
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -2,7 +2,7 @@ include ../Makefile.const

 LIB_DIR = ../lib
 GEN_TESTS=g-01 g-02
-NN_TESTS=nn-01 nn-02 nn-03 nn-04
+NN_TESTS= nn-reinforcement nn-01 nn-02 nn-03 nn-04
 ALL_TESTS=$(NN_TESTS) $(GEN_TESTS)

 LIBS=$(LIB_DIR)/Genetics.a $(LIB_DIR)/NeuronNetwork.a
--- a/tests/nn-reinforcement.cpp
+++ b/tests/nn-reinforcement.cpp
@@ -0,0 +1,94 @@
+#include "../src/NeuronNetwork/FeedForwardQuick"
+#include "../src/NeuronNetwork/Learning/Reinforcement.h"
+#include "../src/NeuronNetwork/Solution.h"
+
+#include <iostream>
+#include <vector>
+
+class X: public Shin::NeuronNetwork::Problem
+{
+	public:
+		X(const X& a) :q(a.q) {}
+		X(const std::vector<bool> &a):q(a) {}
+		std::vector<bool> representation() const
+		{
+			return q;
+		}
+	protected:
+		std::vector<bool> q;
+};
+
+int main()
+{
+	srand(time(NULL));
+
+	std::vector<X> p;
+
+	p.push_back(X(std::vector<bool>({0,0})));
+
+	p.push_back(X(std::vector<bool>({1,1})));
+
+	Shin::NeuronNetwork::FeedForwardNetworkQuick q({2,6,2});
+	Shin::NeuronNetwork::Learning::Reinforcement b(q);
+	int i=0;
+	b.setQualityFunction(
+		[&i](const Shin::NeuronNetwork::Solution &s)->double
+		{
+			if(i%2==0)
+			{
+				//ocekavame 1
+				int e=(s[0]-0.80)*15.0;//+(abs(s[1])-0.5)*100.0;
+				return e;
+			}else
+			{
+				//ocekavame 0
+				int e=(0.20-s[0])*15.0;//+(0.4-abs(s[1]))*100.0;
+				return e;
+			}
+			return 1.0;
+		});
+	for(i=0;i < 500000000;i++)
+	{
+		if(i==75000)
+		{
+			std::cerr << "SSSSSS1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n";
+			b.setCoef(1);
+		}
+		if(i==150000)
+		{
+			std::cerr << "SSSSSS1XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n";
+			b.setCoef(0.51);
+		}
+		if(i==300000)
+		{
+			std::cerr << "SSSSSS2XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n";
+			b.setCoef(0.15);
+		}
+		b.learn(p[i%2]);
+	
+		if(i%100000==0)
+			srand(time(NULL));
+		if(i%10000==0)
+			for(int j=0;j<2;j++)
+			{
+				std::cerr << j%4 <<". FOR: [" << p[j%4].representation()[0] << "," <<p[j%4].representation()[0] << "] res: " << q.solve(p[j%4])[0] << "\n";
+			}
+	}
+
+/*	int i=0;
+	std::cerr << i%4 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
+
+	for(int i=0;i<2000;i++)sa	
+	{
+		b.teach(p[i%2],s[i%2]);
+		std::cerr << i%2 <<". FOR: [" << p[i%2].representation()[0] << "] res: " << q.solve(p[i%2])[0] << " should be " << s[i%2][0]<<"\n";
+	}
+	b.debugOn();
+	for(int i=0;i<2;i++)
+	{
+		b.teach(p[i%2],s[i%2]);
+		std::cerr << i%4 <<". FOR: [" << p[i%4].representation()[0] << "," <<p[i%4].representation()[0] << "] res: " << q.solve(p[i%4])[0] << " should be " <<
+		s[i%4][0]<<"\n";
+	}
+	b.debugOff();*/
+}