xcsf/xcs__rl_8c_source.html

/*

 * This program is free software: you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation, either version 3 of the License, or

 * (at your option) any later version.

 *

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */


#include "xcs_rl.h"

#include "clset.h"

#include "ea.h"

#include "env.h"

#include "pa.h"

#include "param.h"

#include "perf.h"

#include "utils.h"


static double


xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)

{

    env_reset(xcsf);

    param_set_explore(xcsf, explore);

    xcs_rl_init_trial(xcsf);

    *error = 0; // mean prediction error over all steps taken

    double reward = 0;

    bool done = false;

    int steps = 0;

    while (steps < xcsf->TELETRANSPORTATION && !done) {

        xcs_rl_init_step(xcsf);

        const double *state = env_get_state(xcsf);

        const int action = xcs_rl_decision(xcsf, state);

        reward = env_execute(xcsf, action);

        done = env_is_done(xcsf);

        xcs_rl_update(xcsf, state, action, reward, done);

        *error +=

            xcs_rl_error(xcsf, action, reward, done, env_max_payoff(xcsf));

        xcs_rl_end_step(xcsf, state, action, reward);

        ++steps;

    }

    xcs_rl_end_trial(xcsf);

    *error /= steps;

    if (!env_multistep(xcsf)) {

        return (reward > 0) ? 1 : 0;

    }

    return steps;

}


double


xcs_rl_exp(struct XCSF *xcsf)

{

    double error = 0; // prediction error: individual trial

    double werr = 0; // prediction error: windowed total

    double tperf = 0; // steps to goal: total over all trials

    double wperf = 0; // steps to goal: windowed total

    for (int cnt = 0; cnt < xcsf->MAX_TRIALS; ++cnt) {

        xcs_rl_trial(xcsf, &error, true); // explore

        const double perf = xcs_rl_trial(xcsf, &error, false); // exploit

        wperf += perf;

        tperf += perf;

        werr += error;

        perf_print(xcsf, &wperf, &werr, cnt);

    }

    return tperf / xcsf->MAX_TRIALS;

}


double


xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action,

           const double reward)

{

    xcs_rl_init_trial(xcsf);

    xcs_rl_init_step(xcsf);

    clset_match(xcsf, state, true);

    pa_build(xcsf, state);

    const double prediction = pa_val(xcsf, action);

    const double error = (xcsf->loss_ptr)(xcsf, &prediction, &reward);

    param_set_explore(xcsf, true); // ensure EA is executed

    xcs_rl_update(xcsf, state, action, reward, true);

    xcs_rl_end_step(xcsf, state, action, reward);

    xcs_rl_end_trial(xcsf);

    xcsf->error += (error - xcsf->error) * xcsf->BETA;

    return error;

}


void


xcs_rl_init_trial(struct XCSF *xcsf)

{

    xcsf->prev_reward = 0;

    xcsf->prev_pred = 0;

    if (xcsf->x_dim < 1) { // memory allocation guard

        printf("xcs_rl_init_trial(): error x_dim less than 1\n");

        xcsf->x_dim = 1;

        exit(EXIT_FAILURE);

    }

    xcsf->prev_state = malloc(sizeof(double) * xcsf->x_dim);

    clset_init(&xcsf->prev_aset);

    clset_init(&xcsf->kset);

}


void


xcs_rl_end_trial(struct XCSF *xcsf)

{

    clset_free(&xcsf->prev_aset);

    clset_kill(xcsf, &xcsf->kset);

    free(xcsf->prev_state);

}


void


xcs_rl_init_step(struct XCSF *xcsf)

{

    clset_init(&xcsf->mset);

    clset_init(&xcsf->aset);

}


void


xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action,

                const double reward)

{

    clset_free(&xcsf->mset);

    clset_free(&xcsf->prev_aset);

    xcsf->prev_aset = xcsf->aset;

    xcsf->prev_reward = reward;

    xcsf->prev_pred = pa_val(xcsf, action);

    memcpy(xcsf->prev_state, state, sizeof(double) * xcsf->x_dim);

}


void


xcs_rl_update(struct XCSF *xcsf, const double *state, const int action,

              const double reward, const bool done)

{

    clset_action(xcsf, action); // create action set

    if (xcsf->prev_aset.list != NULL) { // update previous action set and run EA

        const double p = xcsf->prev_reward + (xcsf->GAMMA * pa_best_val(xcsf));

        clset_validate(&xcsf->prev_aset);

        clset_update(xcsf, &xcsf->prev_aset, xcsf->prev_state, &p, false);

        if (xcsf->explore) {

            ea(xcsf, &xcsf->prev_aset);

        }

    }

    if (done) { // in terminal state: update current action set and run EA

        clset_validate(&xcsf->aset);

        clset_update(xcsf, &xcsf->aset, state, &reward, true);

        if (xcsf->explore) {

            ea(xcsf, &xcsf->aset);

        }

    }

}


double


xcs_rl_error(struct XCSF *xcsf, const int action, const double reward,

             const bool done, const double max_p)

{

    double error = 0;

    const double prediction = pa_val(xcsf, action);

    if (xcsf->prev_aset.list != NULL) {

        const double p = xcsf->prev_reward + (xcsf->GAMMA * prediction);

        error += (xcsf->loss_ptr)(xcsf, &xcsf->prev_pred, &p) / max_p;

    }

    if (done) {

        error += (xcsf->loss_ptr)(xcsf, &prediction, &reward) / max_p;

    }

    xcsf->error += (error - xcsf->error) * xcsf->BETA;

    return error;

}


int


xcs_rl_decision(struct XCSF *xcsf, const double *state)

{

    clset_match(xcsf, state, true);

    pa_build(xcsf, state);

    if (xcsf->explore && rand_uniform(0, 1) < xcsf->P_EXPLORE) {

        return pa_rand_action(xcsf);

    }

    return pa_best_action(xcsf);

}


clset_kill
void clset_kill(const struct XCSF *xcsf, struct Set *set)
Frees the set and the classifiers.
Definition clset.c:590

clset_update
void clset_update(struct XCSF *xcsf, struct Set *set, const double *x, const double *y, const bool cur)
Provides reinforcement to the set and performs set subsumption.
Definition clset.c:448

clset_validate
void clset_validate(struct Set *set)
Removes classifiers with 0 numerosity from the set.
Definition clset.c:480

clset_action
void clset_action(struct XCSF *xcsf, const int action)
Constructs the action set from the match set.
Definition clset.c:404

clset_init
void clset_init(struct Set *set)
Initialises a new set.
Definition clset.c:328

clset_match
void clset_match(struct XCSF *xcsf, const double *x, const bool cover)
Constructs the match set - forward propagates conditions and actions.
Definition clset.c:356

clset_free
void clset_free(struct Set *set)
Frees the set, but not the classifiers.
Definition clset.c:572

clset.h
Functions operating on sets of classifiers.

ea
void ea(struct XCSF *xcsf, const struct Set *set)
Executes the evolutionary algorithm (EA).
Definition ea.c:199

ea.h
Evolutionary algorithm functions.

env.h
Built-in problem environment interface.

env_reset
static void env_reset(const struct XCSF *xcsf)
Resets the environment.
Definition env.h:116

env_get_state
static const double * env_get_state(const struct XCSF *xcsf)
Returns the current environment perceptions.
Definition env.h:96

env_multistep
static bool env_multistep(const struct XCSF *xcsf)
Returns whether the environment is a multistep problem.
Definition env.h:62

env_max_payoff
static double env_max_payoff(const struct XCSF *xcsf)
Returns the maximum payoff value possible in the environment.
Definition env.h:85

env_is_done
static bool env_is_done(const struct XCSF *xcsf)
Returns whether the environment is in a terminal state.
Definition env.h:51

env_execute
static double env_execute(const struct XCSF *xcsf, const int action)
Executes the specified action and returns the payoff.
Definition env.h:74

xcsf
Definition __init__.py:1

pa_best_val
double pa_best_val(const struct XCSF *xcsf)
Returns the highest value in the prediction array.
Definition pa.c:165

pa_rand_action
int pa_rand_action(const struct XCSF *xcsf)
Returns a random action from the prediction array.
Definition pa.c:150

pa_val
double pa_val(const struct XCSF *xcsf, const int action)
Returns the value of a specified action in the prediction array.
Definition pa.c:178

pa_build
void pa_build(const struct XCSF *xcsf, const double *x)
Builds the prediction array for the specified input.
Definition pa.c:62

pa_best_action
int pa_best_action(const struct XCSF *xcsf)
Returns the best action in the prediction array.
Definition pa.c:123

pa.h
Prediction array functions.

param_set_explore
const char * param_set_explore(struct XCSF *xcsf, const bool a)
Definition param.c:881

param.h
Functions for setting and printing parameters.

perf_print
void perf_print(const struct XCSF *xcsf, double *error, double *terror, const int trial)
Displays the current training and test performance.
Definition perf.c:34

perf.h
System performance printing.

XCSF
XCSF data structure.
Definition xcsf.h:85

rand_uniform
double rand_uniform(const double min, const double max)
Returns a uniform random float [min,max].
Definition utils.c:62

utils.h
Utility functions for random number handling, etc.

xcs_rl_fit
double xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action, const double reward)
Creates and updates an action set for a given (state, action, reward).
Definition xcs_rl.c:104

xcs_rl_update
void xcs_rl_update(struct XCSF *xcsf, const double *state, const int action, const double reward, const bool done)
Provides reinforcement to the sets.
Definition xcs_rl.c:192

xcs_rl_decision
int xcs_rl_decision(struct XCSF *xcsf, const double *state)
Selects an action to perform in a reinforcement learning problem.
Definition xcs_rl.c:247

xcs_rl_init_step
void xcs_rl_init_step(struct XCSF *xcsf)
Initialises a step in a reinforcement learning trial.
Definition xcs_rl.c:157

xcs_rl_trial
static double xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
Executes a reinforcement learning trial using a built-in environment.
Definition xcs_rl.c:43

xcs_rl_end_trial
void xcs_rl_end_trial(struct XCSF *xcsf)
Frees memory used by a reinforcement learning trial.
Definition xcs_rl.c:145

xcs_rl_error
double xcs_rl_error(struct XCSF *xcsf, const int action, const double reward, const bool done, const double max_p)
Returns the reinforcement learning system prediction error.
Definition xcs_rl.c:223

xcs_rl_exp
double xcs_rl_exp(struct XCSF *xcsf)
Executes a reinforcement learning experiment.
Definition xcs_rl.c:78

xcs_rl_init_trial
void xcs_rl_init_trial(struct XCSF *xcsf)
Initialises a reinforcement learning trial.
Definition xcs_rl.c:126

xcs_rl_end_step
void xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action, const double reward)
Ends a step in a reinforcement learning trial.
Definition xcs_rl.c:171

xcs_rl.h
Reinforcement learning functions.