Policy iteration on FrozenLake-v0
(C++)
Code
#include "cubeai/base/cubeai_types.h"
#include "cubeai/rl/algorithms/dp/policy_iteration.h"
#include "cubeai/rl/trainers/rl_serial_agent_trainer.h"
#include "cubeai/rl/policies/uniform_discrete_policy.h"
#include "cubeai/rl/policies/stochastic_adaptor_policy.h"
#include "gymfcpp/gymfcpp_types.h"
#include "gymfcpp/frozen_lake_env.h"
#include "gymfcpp/time_step.h"
#include <boost/python.hpp>
#include <iostream>
namespace rl_example_7
{
using cubeai::real_t;
using cubeai::uint_t;
using cubeai::rl::policies::UniformDiscretePolicy;
using cubeai::rl::policies::StochasticAdaptorPolicy;
using cubeai::rl::algos::dp::PolicyIteration;
using cubeai::rl::algos::dp::PolicyIterationConfig;
using cubeai::rl::RLSerialAgentTrainer;
using cubeai::rl::RLSerialTrainerConfig;
typedef gymfcpp::TimeStep<uint_t> time_step_type;
}
int main() {
using namespace rl_example_7;
Py_Initialize();
auto gym_module = boost::python::import("__main__");
auto gym_namespace = gym_module.attr("__dict__");
gymfcpp::FrozenLake<4> env("v0", gym_namespace);
env.make();
UniformDiscretePolicy policy(env.n_states(), env.n_actions());
StochasticAdaptorPolicy policy_adaptor(env.n_states(), env.n_actions(), policy);
PolicyIterationConfig config;
config.gamma = 1.0;
config.n_policy_eval_steps = 100;
config.tolerance = 1.0e-8;
PolicyIteration<gymfcpp::FrozenLake<4>, UniformDiscretePolicy, StochasticAdaptorPolicy> algorithm(config,
policy, policy_adaptor);
RLSerialTrainerConfig trainer_config = {10, 10000, 1.0e-8};
RLSerialAgentTrainer<gymfcpp::FrozenLake<4>,
PolicyIteration<gymfcpp::FrozenLake<4>,
UniformDiscretePolicy,
StochasticAdaptorPolicy>> trainer(trainer_config, algorithm);
auto info = trainer.train(env);
std::cout<<info<<std::endl;
// save the value function into a csv file
policy_itr.save("policy_iteration_frozen_lake_v0.csv");
return 0;
}