XCSF  1.4.7
XCSF learning classifier system
xcs_rl.c
Go to the documentation of this file.
1 /*
2  * This program is free software: you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation, either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program. If not, see <http://www.gnu.org/licenses/>.
14  */
15 
25 #include "xcs_rl.h"
26 #include "clset.h"
27 #include "ea.h"
28 #include "env.h"
29 #include "pa.h"
30 #include "param.h"
31 #include "perf.h"
32 #include "utils.h"
33 
42 static double
43 xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
44 {
45  env_reset(xcsf);
46  param_set_explore(xcsf, explore);
48  *error = 0; // mean prediction error over all steps taken
49  double reward = 0;
50  bool done = false;
51  int steps = 0;
52  while (steps < xcsf->TELETRANSPORTATION && !done) {
54  const double *state = env_get_state(xcsf);
55  const int action = xcs_rl_decision(xcsf, state);
56  reward = env_execute(xcsf, action);
57  done = env_is_done(xcsf);
58  xcs_rl_update(xcsf, state, action, reward, done);
59  *error +=
60  xcs_rl_error(xcsf, action, reward, done, env_max_payoff(xcsf));
61  xcs_rl_end_step(xcsf, state, action, reward);
62  ++steps;
63  }
65  *error /= steps;
66  if (!env_multistep(xcsf)) {
67  return (reward > 0) ? 1 : 0;
68  }
69  return steps;
70 }
71 
77 double
79 {
80  double error = 0; // prediction error: individual trial
81  double werr = 0; // prediction error: windowed total
82  double tperf = 0; // steps to goal: total over all trials
83  double wperf = 0; // steps to goal: windowed total
84  for (int cnt = 0; cnt < xcsf->MAX_TRIALS; ++cnt) {
85  xcs_rl_trial(xcsf, &error, true); // explore
86  const double perf = xcs_rl_trial(xcsf, &error, false); // exploit
87  wperf += perf;
88  tperf += perf;
89  werr += error;
90  perf_print(xcsf, &wperf, &werr, cnt);
91  }
92  return tperf / xcsf->MAX_TRIALS;
93 }
94 
103 double
104 xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action,
105  const double reward)
106 {
109  clset_match(xcsf, state, true);
110  pa_build(xcsf, state);
111  const double prediction = pa_val(xcsf, action);
112  const double error = (xcsf->loss_ptr)(xcsf, &prediction, &reward);
113  param_set_explore(xcsf, true); // ensure EA is executed
114  xcs_rl_update(xcsf, state, action, reward, true);
115  xcs_rl_end_step(xcsf, state, action, reward);
117  xcsf->error += (error - xcsf->error) * xcsf->BETA;
118  return error;
119 }
120 
125 void
127 {
128  xcsf->prev_reward = 0;
129  xcsf->prev_pred = 0;
130  if (xcsf->x_dim < 1) { // memory allocation guard
131  printf("xcs_rl_init_trial(): error x_dim less than 1\n");
132  xcsf->x_dim = 1;
133  exit(EXIT_FAILURE);
134  }
135  xcsf->prev_state = malloc(sizeof(double) * xcsf->x_dim);
136  clset_init(&xcsf->prev_aset);
137  clset_init(&xcsf->kset);
138 }
139 
144 void
146 {
147  clset_free(&xcsf->prev_aset);
148  clset_kill(xcsf, &xcsf->kset);
149  free(xcsf->prev_state);
150 }
151 
156 void
158 {
159  clset_init(&xcsf->mset);
160  clset_init(&xcsf->aset);
161 }
162 
170 void
171 xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action,
172  const double reward)
173 {
174  clset_free(&xcsf->mset);
175  clset_free(&xcsf->prev_aset);
176  xcsf->prev_aset = xcsf->aset;
177  xcsf->prev_reward = reward;
178  xcsf->prev_pred = pa_val(xcsf, action);
179  memcpy(xcsf->prev_state, state, sizeof(double) * xcsf->x_dim);
180 }
181 
191 void
192 xcs_rl_update(struct XCSF *xcsf, const double *state, const int action,
193  const double reward, const bool done)
194 {
195  clset_action(xcsf, action); // create action set
196  if (xcsf->prev_aset.list != NULL) { // update previous action set and run EA
197  const double p = xcsf->prev_reward + (xcsf->GAMMA * pa_best_val(xcsf));
198  clset_validate(&xcsf->prev_aset);
199  clset_update(xcsf, &xcsf->prev_aset, xcsf->prev_state, &p, false);
200  if (xcsf->explore) {
201  ea(xcsf, &xcsf->prev_aset);
202  }
203  }
204  if (done) { // in terminal state: update current action set and run EA
205  clset_validate(&xcsf->aset);
206  clset_update(xcsf, &xcsf->aset, state, &reward, true);
207  if (xcsf->explore) {
208  ea(xcsf, &xcsf->aset);
209  }
210  }
211 }
212 
222 double
223 xcs_rl_error(struct XCSF *xcsf, const int action, const double reward,
224  const bool done, const double max_p)
225 {
226  double error = 0;
227  const double prediction = pa_val(xcsf, action);
228  if (xcsf->prev_aset.list != NULL) {
229  const double p = xcsf->prev_reward + (xcsf->GAMMA * prediction);
230  error += (xcsf->loss_ptr)(xcsf, &xcsf->prev_pred, &p) / max_p;
231  }
232  if (done) {
233  error += (xcsf->loss_ptr)(xcsf, &prediction, &reward) / max_p;
234  }
235  xcsf->error += (error - xcsf->error) * xcsf->BETA;
236  return error;
237 }
238 
246 int
247 xcs_rl_decision(struct XCSF *xcsf, const double *state)
248 {
249  clset_match(xcsf, state, true);
250  pa_build(xcsf, state);
251  if (xcsf->explore && rand_uniform(0, 1) < xcsf->P_EXPLORE) {
252  return pa_rand_action(xcsf);
253  }
254  return pa_best_action(xcsf);
255 }
void clset_kill(const struct XCSF *xcsf, struct Set *set)
Frees the set and the classifiers.
Definition: clset.c:590
void clset_update(struct XCSF *xcsf, struct Set *set, const double *x, const double *y, const bool cur)
Provides reinforcement to the set and performs set subsumption.
Definition: clset.c:448
void clset_validate(struct Set *set)
Removes classifiers with 0 numerosity from the set.
Definition: clset.c:480
void clset_action(struct XCSF *xcsf, const int action)
Constructs the action set from the match set.
Definition: clset.c:404
void clset_init(struct Set *set)
Initialises a new set.
Definition: clset.c:328
void clset_match(struct XCSF *xcsf, const double *x, const bool cover)
Constructs the match set - forward propagates conditions and actions.
Definition: clset.c:356
void clset_free(struct Set *set)
Frees the set, but not the classifiers.
Definition: clset.c:572
Functions operating on sets of classifiers.
void ea(struct XCSF *xcsf, const struct Set *set)
Executes the evolutionary algorithm (EA).
Definition: ea.c:199
Evolutionary algorithm functions.
Built-in problem environment interface.
static void env_reset(const struct XCSF *xcsf)
Resets the environment.
Definition: env.h:116
static bool env_multistep(const struct XCSF *xcsf)
Returns whether the environment is a multistep problem.
Definition: env.h:62
static const double * env_get_state(const struct XCSF *xcsf)
Returns the current environment perceptions.
Definition: env.h:96
static double env_max_payoff(const struct XCSF *xcsf)
Returns the maximum payoff value possible in the environment.
Definition: env.h:85
static bool env_is_done(const struct XCSF *xcsf)
Returns whether the environment is in a terminal state.
Definition: env.h:51
static double env_execute(const struct XCSF *xcsf, const int action)
Executes the specified action and returns the payoff.
Definition: env.h:74
Definition: __init__.py:1
double pa_best_val(const struct XCSF *xcsf)
Returns the highest value in the prediction array.
Definition: pa.c:165
int pa_rand_action(const struct XCSF *xcsf)
Returns a random action from the prediction array.
Definition: pa.c:150
double pa_val(const struct XCSF *xcsf, const int action)
Returns the value of a specified action in the prediction array.
Definition: pa.c:178
void pa_build(const struct XCSF *xcsf, const double *x)
Builds the prediction array for the specified input.
Definition: pa.c:62
int pa_best_action(const struct XCSF *xcsf)
Returns the best action in the prediction array.
Definition: pa.c:123
Prediction array functions.
const char * param_set_explore(struct XCSF *xcsf, const bool a)
Definition: param.c:881
Functions for setting and printing parameters.
void perf_print(const struct XCSF *xcsf, double *error, double *terror, const int trial)
Displays the current training and test performance.
Definition: perf.c:34
System performance printing.
XCSF data structure.
Definition: xcsf.h:85
double rand_uniform(const double min, const double max)
Returns a uniform random float [min,max].
Definition: utils.c:62
Utility functions for random number handling, etc.
double xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action, const double reward)
Creates and updates an action set for a given (state, action, reward).
Definition: xcs_rl.c:104
void xcs_rl_update(struct XCSF *xcsf, const double *state, const int action, const double reward, const bool done)
Provides reinforcement to the sets.
Definition: xcs_rl.c:192
int xcs_rl_decision(struct XCSF *xcsf, const double *state)
Selects an action to perform in a reinforcement learning problem.
Definition: xcs_rl.c:247
void xcs_rl_init_step(struct XCSF *xcsf)
Initialises a step in a reinforcement learning trial.
Definition: xcs_rl.c:157
static double xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
Executes a reinforcement learning trial using a built-in environment.
Definition: xcs_rl.c:43
void xcs_rl_end_trial(struct XCSF *xcsf)
Frees memory used by a reinforcement learning trial.
Definition: xcs_rl.c:145
double xcs_rl_error(struct XCSF *xcsf, const int action, const double reward, const bool done, const double max_p)
Returns the reinforcement learning system prediction error.
Definition: xcs_rl.c:223
double xcs_rl_exp(struct XCSF *xcsf)
Executes a reinforcement learning experiment.
Definition: xcs_rl.c:78
void xcs_rl_init_trial(struct XCSF *xcsf)
Initialises a reinforcement learning trial.
Definition: xcs_rl.c:126
void xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action, const double reward)
Ends a step in a reinforcement learning trial.
Definition: xcs_rl.c:171
Reinforcement learning functions.