xcsf/xcs__rl_8c_source.html

 /*

  * This program is free software: you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation, either version 3 of the License, or

  * (at your option) any later version.

  *

  * This program is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  * GNU General Public License for more details.

  *

  * You should have received a copy of the GNU General Public License

  * along with this program.  If not, see <http://www.gnu.org/licenses/>.

  */


 #include "xcs_rl.h"

 #include "clset.h"

 #include "ea.h"

 #include "env.h"

 #include "pa.h"

 #include "param.h"

 #include "perf.h"

 #include "utils.h"


 static double

 xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)

 {

     env_reset(xcsf);

     param_set_explore(xcsf, explore);

     xcs_rl_init_trial(xcsf);

     *error = 0; // mean prediction error over all steps taken

     double reward = 0;

     bool done = false;

     int steps = 0;

     while (steps < xcsf->TELETRANSPORTATION && !done) {

         xcs_rl_init_step(xcsf);

         const double *state = env_get_state(xcsf);

         const int action = xcs_rl_decision(xcsf, state);

         reward = env_execute(xcsf, action);

         done = env_is_done(xcsf);

         xcs_rl_update(xcsf, state, action, reward, done);

         *error +=

             xcs_rl_error(xcsf, action, reward, done, env_max_payoff(xcsf));

         xcs_rl_end_step(xcsf, state, action, reward);

         ++steps;

     }

     xcs_rl_end_trial(xcsf);

     *error /= steps;

     if (!env_multistep(xcsf)) {

         return (reward > 0) ? 1 : 0;

     }

     return steps;

 }


 double

 xcs_rl_exp(struct XCSF *xcsf)

 {

     double error = 0; // prediction error: individual trial

     double werr = 0; // prediction error: windowed total

     double tperf = 0; // steps to goal: total over all trials

     double wperf = 0; // steps to goal: windowed total

     for (int cnt = 0; cnt < xcsf->MAX_TRIALS; ++cnt) {

         xcs_rl_trial(xcsf, &error, true); // explore

         const double perf = xcs_rl_trial(xcsf, &error, false); // exploit

         wperf += perf;

         tperf += perf;

         werr += error;

         perf_print(xcsf, &wperf, &werr, cnt);

     }

     return tperf / xcsf->MAX_TRIALS;

 }


 double

 xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action,

            const double reward)

 {

     xcs_rl_init_trial(xcsf);

     xcs_rl_init_step(xcsf);

     clset_match(xcsf, state, true);

     pa_build(xcsf, state);

     const double prediction = pa_val(xcsf, action);

     const double error = (xcsf->loss_ptr)(xcsf, &prediction, &reward);

     param_set_explore(xcsf, true); // ensure EA is executed

     xcs_rl_update(xcsf, state, action, reward, true);

     xcs_rl_end_step(xcsf, state, action, reward);

     xcs_rl_end_trial(xcsf);

     xcsf->error += (error - xcsf->error) * xcsf->BETA;

     return error;

 }


 void

 xcs_rl_init_trial(struct XCSF *xcsf)

 {

     xcsf->prev_reward = 0;

     xcsf->prev_pred = 0;

     if (xcsf->x_dim < 1) { // memory allocation guard

         printf("xcs_rl_init_trial(): error x_dim less than 1\n");

         xcsf->x_dim = 1;

         exit(EXIT_FAILURE);

     }

     xcsf->prev_state = malloc(sizeof(double) * xcsf->x_dim);

     clset_init(&xcsf->prev_aset);

     clset_init(&xcsf->kset);

 }


 void

 xcs_rl_end_trial(struct XCSF *xcsf)

 {

     clset_free(&xcsf->prev_aset);

     clset_kill(xcsf, &xcsf->kset);

     free(xcsf->prev_state);

 }


 void

 xcs_rl_init_step(struct XCSF *xcsf)

 {

     clset_init(&xcsf->mset);

     clset_init(&xcsf->aset);

 }


 void

 xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action,

                 const double reward)

 {

     clset_free(&xcsf->mset);

     clset_free(&xcsf->prev_aset);

     xcsf->prev_aset = xcsf->aset;

     xcsf->prev_reward = reward;

     xcsf->prev_pred = pa_val(xcsf, action);

     memcpy(xcsf->prev_state, state, sizeof(double) * xcsf->x_dim);

 }


 void

 xcs_rl_update(struct XCSF *xcsf, const double *state, const int action,

               const double reward, const bool done)

 {

     clset_action(xcsf, action); // create action set

     if (xcsf->prev_aset.list != NULL) { // update previous action set and run EA

         const double p = xcsf->prev_reward + (xcsf->GAMMA * pa_best_val(xcsf));

         clset_validate(&xcsf->prev_aset);

         clset_update(xcsf, &xcsf->prev_aset, xcsf->prev_state, &p, false);

         if (xcsf->explore) {

             ea(xcsf, &xcsf->prev_aset);

         }

     }

     if (done) { // in terminal state: update current action set and run EA

         clset_validate(&xcsf->aset);

         clset_update(xcsf, &xcsf->aset, state, &reward, true);

         if (xcsf->explore) {

             ea(xcsf, &xcsf->aset);

         }

     }

 }


 double

 xcs_rl_error(struct XCSF *xcsf, const int action, const double reward,

              const bool done, const double max_p)

 {

     double error = 0;

     const double prediction = pa_val(xcsf, action);

     if (xcsf->prev_aset.list != NULL) {

         const double p = xcsf->prev_reward + (xcsf->GAMMA * prediction);

         error += (xcsf->loss_ptr)(xcsf, &xcsf->prev_pred, &p) / max_p;

     }

     if (done) {

         error += (xcsf->loss_ptr)(xcsf, &prediction, &reward) / max_p;

     }

     xcsf->error += (error - xcsf->error) * xcsf->BETA;

     return error;

 }


 int

 xcs_rl_decision(struct XCSF *xcsf, const double *state)

 {

     clset_match(xcsf, state, true);

     pa_build(xcsf, state);

     if (xcsf->explore && rand_uniform(0, 1) < xcsf->P_EXPLORE) {

         return pa_rand_action(xcsf);

     }

     return pa_best_action(xcsf);

 }

clset_kill
void clset_kill(const struct XCSF *xcsf, struct Set *set)
Frees the set and the classifiers.
Definition: clset.c:590

clset_update
void clset_update(struct XCSF *xcsf, struct Set *set, const double *x, const double *y, const bool cur)
Provides reinforcement to the set and performs set subsumption.
Definition: clset.c:448

clset_validate
void clset_validate(struct Set *set)
Removes classifiers with 0 numerosity from the set.
Definition: clset.c:480

clset_action
void clset_action(struct XCSF *xcsf, const int action)
Constructs the action set from the match set.
Definition: clset.c:404

clset_init
void clset_init(struct Set *set)
Initialises a new set.
Definition: clset.c:328

clset_match
void clset_match(struct XCSF *xcsf, const double *x, const bool cover)
Constructs the match set - forward propagates conditions and actions.
Definition: clset.c:356

clset_free
void clset_free(struct Set *set)
Frees the set, but not the classifiers.
Definition: clset.c:572

clset.h
Functions operating on sets of classifiers.

ea
void ea(struct XCSF *xcsf, const struct Set *set)
Executes the evolutionary algorithm (EA).
Definition: ea.c:199

ea.h
Evolutionary algorithm functions.

env.h
Built-in problem environment interface.

env_reset
static void env_reset(const struct XCSF *xcsf)
Resets the environment.
Definition: env.h:116

env_multistep
static bool env_multistep(const struct XCSF *xcsf)
Returns whether the environment is a multistep problem.
Definition: env.h:62

env_get_state
static const double * env_get_state(const struct XCSF *xcsf)
Returns the current environment perceptions.
Definition: env.h:96

env_max_payoff
static double env_max_payoff(const struct XCSF *xcsf)
Returns the maximum payoff value possible in the environment.
Definition: env.h:85

env_is_done
static bool env_is_done(const struct XCSF *xcsf)
Returns whether the environment is in a terminal state.
Definition: env.h:51

env_execute
static double env_execute(const struct XCSF *xcsf, const int action)
Executes the specified action and returns the payoff.
Definition: env.h:74

xcsf
Definition: __init__.py:1

pa_best_val
double pa_best_val(const struct XCSF *xcsf)
Returns the highest value in the prediction array.
Definition: pa.c:165

pa_rand_action
int pa_rand_action(const struct XCSF *xcsf)
Returns a random action from the prediction array.
Definition: pa.c:150

pa_val
double pa_val(const struct XCSF *xcsf, const int action)
Returns the value of a specified action in the prediction array.
Definition: pa.c:178

pa_build
void pa_build(const struct XCSF *xcsf, const double *x)
Builds the prediction array for the specified input.
Definition: pa.c:62

pa_best_action
int pa_best_action(const struct XCSF *xcsf)
Returns the best action in the prediction array.
Definition: pa.c:123

pa.h
Prediction array functions.

param_set_explore
const char * param_set_explore(struct XCSF *xcsf, const bool a)
Definition: param.c:881

param.h
Functions for setting and printing parameters.

perf_print
void perf_print(const struct XCSF *xcsf, double *error, double *terror, const int trial)
Displays the current training and test performance.
Definition: perf.c:34

perf.h
System performance printing.

XCSF
XCSF data structure.
Definition: xcsf.h:85

rand_uniform
double rand_uniform(const double min, const double max)
Returns a uniform random float [min,max].
Definition: utils.c:62

utils.h
Utility functions for random number handling, etc.

xcs_rl_fit
double xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action, const double reward)
Creates and updates an action set for a given (state, action, reward).
Definition: xcs_rl.c:104

xcs_rl_update
void xcs_rl_update(struct XCSF *xcsf, const double *state, const int action, const double reward, const bool done)
Provides reinforcement to the sets.
Definition: xcs_rl.c:192

xcs_rl_decision
int xcs_rl_decision(struct XCSF *xcsf, const double *state)
Selects an action to perform in a reinforcement learning problem.
Definition: xcs_rl.c:247

xcs_rl_init_step
void xcs_rl_init_step(struct XCSF *xcsf)
Initialises a step in a reinforcement learning trial.
Definition: xcs_rl.c:157

xcs_rl_trial
static double xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
Executes a reinforcement learning trial using a built-in environment.
Definition: xcs_rl.c:43

xcs_rl_end_trial
void xcs_rl_end_trial(struct XCSF *xcsf)
Frees memory used by a reinforcement learning trial.
Definition: xcs_rl.c:145

xcs_rl_error
double xcs_rl_error(struct XCSF *xcsf, const int action, const double reward, const bool done, const double max_p)
Returns the reinforcement learning system prediction error.
Definition: xcs_rl.c:223

xcs_rl_exp
double xcs_rl_exp(struct XCSF *xcsf)
Executes a reinforcement learning experiment.
Definition: xcs_rl.c:78

xcs_rl_init_trial
void xcs_rl_init_trial(struct XCSF *xcsf)
Initialises a reinforcement learning trial.
Definition: xcs_rl.c:126

xcs_rl_end_step
void xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action, const double reward)
Ends a step in a reinforcement learning trial.
Definition: xcs_rl.c:171

xcs_rl.h
Reinforcement learning functions.