52 while (steps < xcsf->TELETRANSPORTATION && !done) {
67 return (reward > 0) ? 1 : 0;
84 for (
int cnt = 0; cnt <
xcsf->MAX_TRIALS; ++cnt) {
92 return tperf /
xcsf->MAX_TRIALS;
111 const double prediction =
pa_val(
xcsf, action);
112 const double error = (
xcsf->loss_ptr)(
xcsf, &prediction, &reward);
128 xcsf->prev_reward = 0;
130 if (
xcsf->x_dim < 1) {
131 printf(
"xcs_rl_init_trial(): error x_dim less than 1\n");
135 xcsf->prev_state = malloc(
sizeof(
double) *
xcsf->x_dim);
149 free(
xcsf->prev_state);
177 xcsf->prev_reward = reward;
179 memcpy(
xcsf->prev_state, state,
sizeof(
double) *
xcsf->x_dim);
193 const double reward,
const bool done)
196 if (
xcsf->prev_aset.list != NULL) {
224 const bool done,
const double max_p)
227 const double prediction =
pa_val(
xcsf, action);
228 if (
xcsf->prev_aset.list != NULL) {
229 const double p =
xcsf->prev_reward + (
xcsf->GAMMA * prediction);
230 error += (
xcsf->loss_ptr)(
xcsf, &
xcsf->prev_pred, &p) / max_p;
233 error += (
xcsf->loss_ptr)(
xcsf, &prediction, &reward) / max_p;
void clset_kill(const struct XCSF *xcsf, struct Set *set)
Frees the set and the classifiers.
void clset_update(struct XCSF *xcsf, struct Set *set, const double *x, const double *y, const bool cur)
Provides reinforcement to the set and performs set subsumption.
void clset_validate(struct Set *set)
Removes classifiers with 0 numerosity from the set.
void clset_action(struct XCSF *xcsf, const int action)
Constructs the action set from the match set.
void clset_init(struct Set *set)
Initialises a new set.
void clset_match(struct XCSF *xcsf, const double *x, const bool cover)
Constructs the match set - forward propagates conditions and actions.
void clset_free(struct Set *set)
Frees the set, but not the classifiers.
Functions operating on sets of classifiers.
void ea(struct XCSF *xcsf, const struct Set *set)
Executes the evolutionary algorithm (EA).
Evolutionary algorithm functions.
Built-in problem environment interface.
static void env_reset(const struct XCSF *xcsf)
Resets the environment.
static bool env_multistep(const struct XCSF *xcsf)
Returns whether the environment is a multistep problem.
static const double * env_get_state(const struct XCSF *xcsf)
Returns the current environment perceptions.
static double env_max_payoff(const struct XCSF *xcsf)
Returns the maximum payoff value possible in the environment.
static bool env_is_done(const struct XCSF *xcsf)
Returns whether the environment is in a terminal state.
static double env_execute(const struct XCSF *xcsf, const int action)
Executes the specified action and returns the payoff.
double pa_best_val(const struct XCSF *xcsf)
Returns the highest value in the prediction array.
int pa_rand_action(const struct XCSF *xcsf)
Returns a random action from the prediction array.
double pa_val(const struct XCSF *xcsf, const int action)
Returns the value of a specified action in the prediction array.
void pa_build(const struct XCSF *xcsf, const double *x)
Builds the prediction array for the specified input.
int pa_best_action(const struct XCSF *xcsf)
Returns the best action in the prediction array.
Prediction array functions.
const char * param_set_explore(struct XCSF *xcsf, const bool a)
Functions for setting and printing parameters.
void perf_print(const struct XCSF *xcsf, double *error, double *terror, const int trial)
Displays the current training and test performance.
System performance printing.
double rand_uniform(const double min, const double max)
Returns a uniform random float [min,max].
Utility functions for random number handling, etc.
double xcs_rl_fit(struct XCSF *xcsf, const double *state, const int action, const double reward)
Creates and updates an action set for a given (state, action, reward).
void xcs_rl_update(struct XCSF *xcsf, const double *state, const int action, const double reward, const bool done)
Provides reinforcement to the sets.
int xcs_rl_decision(struct XCSF *xcsf, const double *state)
Selects an action to perform in a reinforcement learning problem.
void xcs_rl_init_step(struct XCSF *xcsf)
Initialises a step in a reinforcement learning trial.
static double xcs_rl_trial(struct XCSF *xcsf, double *error, const bool explore)
Executes a reinforcement learning trial using a built-in environment.
void xcs_rl_end_trial(struct XCSF *xcsf)
Frees memory used by a reinforcement learning trial.
double xcs_rl_error(struct XCSF *xcsf, const int action, const double reward, const bool done, const double max_p)
Returns the reinforcement learning system prediction error.
double xcs_rl_exp(struct XCSF *xcsf)
Executes a reinforcement learning experiment.
void xcs_rl_init_trial(struct XCSF *xcsf)
Initialises a reinforcement learning trial.
void xcs_rl_end_step(struct XCSF *xcsf, const double *state, const int action, const double reward)
Ends a step in a reinforcement learning trial.
Reinforcement learning functions.