XCSF 1.4.8
XCSF learning classifier system
Loading...
Searching...
No Matches
xcs_rl.c
Go to the documentation of this file.
1/*
2 * This program is free software: you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation, either version 3 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program. If not, see <http://www.gnu.org/licenses/>.
14 */
15
25#include "xcs_rl.h"
26#include "clset.h"
27#include "ea.h"
28#include "env.h"
29#include "pa.h"
30#include "param.h"
31#include "perf.h"
32#include "utils.h"
33
42static double
43xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
44{
46 param_set_explore(xcsf, explore);
48 *error = 0; // mean prediction error over all steps taken
49 double reward = 0;
50 bool done = false;
51 int steps = 0;
52 while (steps < xcsf->TELETRANSPORTATION && !done) {
54 const double *state = env_get_state(xcsf);
55 const int action = xcs_rl_decision(xcsf, state);
56 reward = env_execute(xcsf, action);
57 done = env_is_done(xcsf);
58 xcs_rl_update(xcsf, state, action, reward, done);
59 *error +=
60 xcs_rl_error(xcsf, action, reward, done, env_max_payoff(xcsf));
61 xcs_rl_end_step(xcsf, state, action, reward);
62 ++steps;
63 }
65 *error /= steps;
66 if (!env_multistep(xcsf)) {
67 return (reward > 0) ? 1 : 0;
68 }
69 return steps;
70}
71
77double
79{
80 double error = 0; // prediction error: individual trial
81 double werr = 0; // prediction error: windowed total
82 double tperf = 0; // steps to goal: total over all trials
83 double wperf = 0; // steps to goal: windowed total
84 for (int cnt = 0; cnt < xcsf->MAX_TRIALS; ++cnt) {
85 xcs_rl_trial(xcsf, &error, true); // explore
86 const double perf = xcs_rl_trial(xcsf, &error, false); // exploit
87 wperf += perf;
88 tperf += perf;
89 werr += error;
90 perf_print(xcsf, &wperf, &werr, cnt);
91 }
92 return tperf / xcsf->MAX_TRIALS;
93}
94
103double
104xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action,
105 const double reward)
106{
109 clset_match(xcsf, state, true);
110 pa_build(xcsf, state);
111 const double prediction = pa_val(xcsf, action);
112 const double error = (xcsf->loss_ptr)(xcsf, &prediction, &reward);
113 param_set_explore(xcsf, true); // ensure EA is executed
114 xcs_rl_update(xcsf, state, action, reward, true);
115 xcs_rl_end_step(xcsf, state, action, reward);
117 xcsf->error += (error - xcsf->error) * xcsf->BETA;
118 return error;
119}
120
125void
127{
128 xcsf->prev_reward = 0;
129 xcsf->prev_pred = 0;
130 if (xcsf->x_dim < 1) { // memory allocation guard
131 printf("xcs_rl_init_trial(): error x_dim less than 1\n");
132 xcsf->x_dim = 1;
133 exit(EXIT_FAILURE);
134 }
135 xcsf->prev_state = malloc(sizeof(double) * xcsf->x_dim);
136 clset_init(&xcsf->prev_aset);
137 clset_init(&xcsf->kset);
138}
139
144void
146{
147 clset_free(&xcsf->prev_aset);
148 clset_kill(xcsf, &xcsf->kset);
149 free(xcsf->prev_state);
150}
151
156void
158{
159 clset_init(&xcsf->mset);
160 clset_init(&xcsf->aset);
161}
162
170void
171xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action,
172 const double reward)
173{
174 clset_free(&xcsf->mset);
175 clset_free(&xcsf->prev_aset);
176 xcsf->prev_aset = xcsf->aset;
177 xcsf->prev_reward = reward;
178 xcsf->prev_pred = pa_val(xcsf, action);
179 memcpy(xcsf->prev_state, state, sizeof(double) * xcsf->x_dim);
180}
181
191void
192xcs_rl_update(struct XCSF *xcsf, const double *state, const int action,
193 const double reward, const bool done)
194{
195 clset_action(xcsf, action); // create action set
196 if (xcsf->prev_aset.list != NULL) { // update previous action set and run EA
197 const double p = xcsf->prev_reward + (xcsf->GAMMA * pa_best_val(xcsf));
198 clset_validate(&xcsf->prev_aset);
199 clset_update(xcsf, &xcsf->prev_aset, xcsf->prev_state, &p, false);
200 if (xcsf->explore) {
201 ea(xcsf, &xcsf->prev_aset);
202 }
203 }
204 if (done) { // in terminal state: update current action set and run EA
205 clset_validate(&xcsf->aset);
206 clset_update(xcsf, &xcsf->aset, state, &reward, true);
207 if (xcsf->explore) {
208 ea(xcsf, &xcsf->aset);
209 }
210 }
211}
212
222double
223xcs_rl_error(struct XCSF *xcsf, const int action, const double reward,
224 const bool done, const double max_p)
225{
226 double error = 0;
227 const double prediction = pa_val(xcsf, action);
228 if (xcsf->prev_aset.list != NULL) {
229 const double p = xcsf->prev_reward + (xcsf->GAMMA * prediction);
230 error += (xcsf->loss_ptr)(xcsf, &xcsf->prev_pred, &p) / max_p;
231 }
232 if (done) {
233 error += (xcsf->loss_ptr)(xcsf, &prediction, &reward) / max_p;
234 }
235 xcsf->error += (error - xcsf->error) * xcsf->BETA;
236 return error;
237}
238
246int
247xcs_rl_decision(struct XCSF *xcsf, const double *state)
248{
249 clset_match(xcsf, state, true);
250 pa_build(xcsf, state);
251 if (xcsf->explore && rand_uniform(0, 1) < xcsf->P_EXPLORE) {
252 return pa_rand_action(xcsf);
253 }
254 return pa_best_action(xcsf);
255}
void clset_kill(const struct XCSF *xcsf, struct Set *set)
Frees the set and the classifiers.
Definition clset.c:590
void clset_update(struct XCSF *xcsf, struct Set *set, const double *x, const double *y, const bool cur)
Provides reinforcement to the set and performs set subsumption.
Definition clset.c:448
void clset_validate(struct Set *set)
Removes classifiers with 0 numerosity from the set.
Definition clset.c:480
void clset_action(struct XCSF *xcsf, const int action)
Constructs the action set from the match set.
Definition clset.c:404
void clset_init(struct Set *set)
Initialises a new set.
Definition clset.c:328
void clset_match(struct XCSF *xcsf, const double *x, const bool cover)
Constructs the match set - forward propagates conditions and actions.
Definition clset.c:356
void clset_free(struct Set *set)
Frees the set, but not the classifiers.
Definition clset.c:572
Functions operating on sets of classifiers.
void ea(struct XCSF *xcsf, const struct Set *set)
Executes the evolutionary algorithm (EA).
Definition ea.c:199
Evolutionary algorithm functions.
Built-in problem environment interface.
static void env_reset(const struct XCSF *xcsf)
Resets the environment.
Definition env.h:116
static const double * env_get_state(const struct XCSF *xcsf)
Returns the current environment perceptions.
Definition env.h:96
static bool env_multistep(const struct XCSF *xcsf)
Returns whether the environment is a multistep problem.
Definition env.h:62
static double env_max_payoff(const struct XCSF *xcsf)
Returns the maximum payoff value possible in the environment.
Definition env.h:85
static bool env_is_done(const struct XCSF *xcsf)
Returns whether the environment is in a terminal state.
Definition env.h:51
static double env_execute(const struct XCSF *xcsf, const int action)
Executes the specified action and returns the payoff.
Definition env.h:74
double pa_best_val(const struct XCSF *xcsf)
Returns the highest value in the prediction array.
Definition pa.c:165
int pa_rand_action(const struct XCSF *xcsf)
Returns a random action from the prediction array.
Definition pa.c:150
double pa_val(const struct XCSF *xcsf, const int action)
Returns the value of a specified action in the prediction array.
Definition pa.c:178
void pa_build(const struct XCSF *xcsf, const double *x)
Builds the prediction array for the specified input.
Definition pa.c:62
int pa_best_action(const struct XCSF *xcsf)
Returns the best action in the prediction array.
Definition pa.c:123
Prediction array functions.
const char * param_set_explore(struct XCSF *xcsf, const bool a)
Definition param.c:881
Functions for setting and printing parameters.
void perf_print(const struct XCSF *xcsf, double *error, double *terror, const int trial)
Displays the current training and test performance.
Definition perf.c:34
System performance printing.
XCSF data structure.
Definition xcsf.h:85
double rand_uniform(const double min, const double max)
Returns a uniform random float [min,max].
Definition utils.c:62
Utility functions for random number handling, etc.
double xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action, const double reward)
Creates and updates an action set for a given (state, action, reward).
Definition xcs_rl.c:104
void xcs_rl_update(struct XCSF *xcsf, const double *state, const int action, const double reward, const bool done)
Provides reinforcement to the sets.
Definition xcs_rl.c:192
int xcs_rl_decision(struct XCSF *xcsf, const double *state)
Selects an action to perform in a reinforcement learning problem.
Definition xcs_rl.c:247
void xcs_rl_init_step(struct XCSF *xcsf)
Initialises a step in a reinforcement learning trial.
Definition xcs_rl.c:157
static double xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
Executes a reinforcement learning trial using a built-in environment.
Definition xcs_rl.c:43
void xcs_rl_end_trial(struct XCSF *xcsf)
Frees memory used by a reinforcement learning trial.
Definition xcs_rl.c:145
double xcs_rl_error(struct XCSF *xcsf, const int action, const double reward, const bool done, const double max_p)
Returns the reinforcement learning system prediction error.
Definition xcs_rl.c:223
double xcs_rl_exp(struct XCSF *xcsf)
Executes a reinforcement learning experiment.
Definition xcs_rl.c:78
void xcs_rl_init_trial(struct XCSF *xcsf)
Initialises a reinforcement learning trial.
Definition xcs_rl.c:126
void xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action, const double reward)
Ends a step in a reinforcement learning trial.
Definition xcs_rl.c:171
Reinforcement learning functions.