4 #include "definitions.hpp" 16 #include "cpp11-range-master/range.hpp" 80 template<
class Sim,
class SampleType=Samples<
typename Sim::State,
typename Sim::Action>>
82 Sim& sim, SampleType& samples,
83 const function<
typename Sim::Action(
typename Sim::State&)>& policy,
84 long horizon,
long runs,
long tran_limit=-1,
prec_t prob_term=0.0,
85 random_device::result_type seed = random_device{}()){
90 default_random_engine generator(seed);
91 uniform_real_distribution<double> distribution(0.0,1.0);
93 for(
auto run=0l; run < runs; run++){
95 typename Sim::State state = sim.init_state();
96 samples.add_initial(state);
98 for(
auto step : range(0l,horizon)){
100 if(sim.end_condition(state) || (tran_limit > 0 && transitions > tran_limit) )
103 auto action = policy(state);
104 auto reward_state = sim.transition(state,action);
106 auto reward = reward_state.first;
107 auto nextstate = move(reward_state.second);
109 samples.add_sample(move(state), move(action), nextstate, reward, 1.0, step, run);
110 state = move(nextstate);
113 if( (prob_term > 0.0) && (distribution(generator) <= prob_term) )
118 if(tran_limit > 0 && transitions > tran_limit)
131 template<
class Sim,
class SampleType=Samples<
typename Sim::State,
typename Sim::Action>>
134 const function<
typename Sim::Action(
typename Sim::State&)>& policy,
135 long horizon,
long runs,
long tran_limit=-1,
prec_t prob_term=0.0,
136 random_device::result_type seed = random_device{}()){
138 SampleType samples = SampleType();
139 simulate(sim, samples, policy, horizon, runs, tran_limit, prob_term, seed);
168 pair<vector<typename Sim::State>,
numvec>
170 const function<
typename Sim::Action(
typename Sim::State&)>& policy,
171 long horizon,
long runs,
prec_t prob_term=0.0,
172 random_device::result_type seed = random_device{}()){
174 long transitions = 0;
176 default_random_engine generator(seed);
177 uniform_real_distribution<double> distribution(0.0,1.0);
180 vector<typename Sim::State> start_states(runs);
183 for(
auto run : range(0l,runs)){
184 typename Sim::State state = sim.init_state();
185 start_states[run] = state;
188 for(
auto step : range(0l,horizon)){
190 if(sim.end_condition(state))
193 auto action = policy(state);
194 auto reward_state = sim.transition(state,action);
196 auto reward = reward_state.first;
197 auto nextstate = move(reward_state.second);
199 runreturn += reward * pow(discount, step);
200 state = move(nextstate);
202 if( (prob_term > 0.0) && (distribution(generator) <= prob_term) )
206 returns[run] = runreturn;
208 return make_pair(move(start_states), move(returns));
228 using State =
typename Sim::State;
229 using Action =
typename Sim::Action;
231 RandomPolicy(
const Sim& sim, random_device::result_type seed = random_device{}()) :
232 sim(sim), gen(seed){};
235 Action operator() (State state){
236 uniform_int_distribution<long> dst(0,sim.action_count(state)-1);
237 return sim.action(state,dst(gen));
244 default_random_engine gen;
257 template<
typename Sim>
261 using State =
typename Sim::State;
262 using Action =
typename Sim::Action;
277 RandomizedPolicy(
const Sim& sim,
const vector<numvec>& probabilities,random_device::result_type seed = random_device{}()):
278 gen(seed), distributions(probabilities.size()), sim(sim){
280 for(
auto pi : indices(probabilities)){
283 const numvec& prob = probabilities[pi];
284 prec_t sum = accumulate(prob.begin(), prob.end(), 0.0);
287 throw invalid_argument(
"Action probabilities must sum to 1 in state " + to_string(pi));
289 distributions[pi] = discrete_distribution<long>(prob.begin(), prob.end());
294 Action operator() (State state){
296 long sl =
static_cast<long>(state);
297 assert(sl >= 0 &&
size_t(sl) < distributions.size());
299 auto& dst = distributions[sl];
301 return sim.action(state,dst(gen));
307 default_random_engine
gen;
325 template<
typename Sim>
329 using State =
typename Sim::State;
330 using Action =
typename Sim::Action;
341 actions(actions), sim(sim) {};
344 Action operator() (State state){
346 long sl =
static_cast<long>(state);
348 assert(sl >= 0 &&
size_t(sl) < actions.size());
351 return sim.action(state,actions[sl]);
392 random_device::result_type seed = random_device{}()) :
393 gen(seed), mdp(mdp), initial(initial){
395 if(abs(initial.sum_probabilities() - 1) >
SOLPREC)
396 throw invalid_argument(
"Initial transition probabilities must sum to 1");
406 ModelSimulator(const_pointer_cast<const MDP>(mdp), initial, seed) {};
412 auto dst = discrete_distribution<long>(probs.begin(), probs.end());
413 return inds[dst(gen)];
428 assert(state >= 0 &&
size_t(state) < mdp->size());
429 const auto& mdpstate = (*mdp)[state];
431 assert(action >= 0 &&
size_t(action) < mdpstate.size());
432 const auto& mdpaction = mdpstate[action];
434 if(!mdpstate.is_valid(action))
435 throw invalid_argument(
"Cannot transition using an invalid action");
437 const auto& tran = mdpaction.get_outcome();
439 const numvec& probs = tran.get_probabilities();
440 const numvec& rews = tran.get_rewards();
441 const indvec& inds = tran.get_indices();
445 prec_t prob_termination = 1 - tran.sum_probabilities();
447 discrete_distribution<long> dst;
449 if(prob_termination >
SOLPREC){
452 copy_probs.push_back(prob_termination);
454 dst = discrete_distribution<long>(copy_probs.begin(), copy_probs.end());
456 dst = discrete_distribution<long>(probs.begin(), probs.end());
459 const size_t nextindex = dst(gen);
462 const State nextstate = nextindex < inds.size() ?
463 inds[nextindex] : mdp->size();
466 const prec_t reward = nextindex < inds.size() ?
467 rews[nextindex] : 0.0;
469 return make_pair(reward, nextstate);
480 {
return (
size_t(s) >= mdp->size()) || (action_count(s) == 0);};
484 {
return (*mdp)[state].size();};
492 default_random_engine
gen;
495 shared_ptr<const MDP>
mdp;
shared_ptr< const MDP > mdp
MDP used for the simulation.
Definition: Simulation.hpp:495
A simulator that behaves as the provided MDP.
Definition: Simulation.hpp:377
Action action(State, long index) const
Returns an action with the given index.
Definition: Simulation.hpp:487
State init_state()
Returns a sample from the initial states.
Definition: Simulation.hpp:409
pair< double, State > transition(State state, Action action)
Returns a sample of the reward and a decision state following a state.
Definition: Simulation.hpp:426
const Sim & sim
simulator reference
Definition: Simulation.hpp:313
RandomizedPolicy(const Sim &sim, const vector< numvec > &probabilities, random_device::result_type seed=random_device{}())
Initializes randomized polices, transition probabilities for each state.
Definition: Simulation.hpp:277
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
DeterministicPolicy(const Sim &sim, indvec actions)
Initializes randomized polices, transition probabilities for each state.
Definition: Simulation.hpp:340
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
ModelSimulator(const shared_ptr< MDP > &mdp, const Transition &initial, random_device::result_type seed=random_device{}())
Build a model simulator and share and MDP.
Definition: Simulation.hpp:405
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
ModelSimulator(const shared_ptr< const MDP > &mdp, const Transition &initial, random_device::result_type seed=random_device{}())
Build a model simulator and share and MDP.
Definition: Simulation.hpp:391
size_t action_count(State state) const
State dependent action list.
Definition: Simulation.hpp:483
Transition initial
Initial distribution.
Definition: Simulation.hpp:498
pair< vector< typename Sim::State >, numvec > simulate_return(Sim &sim, prec_t discount, const function< typename Sim::Action(typename Sim::State &)> &policy, long horizon, long runs, prec_t prob_term=0.0, random_device::result_type seed=random_device{}())
Runs the simulator and computer the returns from the simulation.
Definition: Simulation.hpp:169
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
long Action
Type of actions.
Definition: Simulation.hpp:383
const Sim & sim
simulator reference
Definition: Simulation.hpp:359
default_random_engine gen
Random number engine.
Definition: Simulation.hpp:302
A randomized policy that chooses actions according to the provided vector of probabilities.
Definition: Simulation.hpp:258
long State
Type of states.
Definition: Simulation.hpp:381
A deterministic policy that chooses actions according to the provided action index.
Definition: Simulation.hpp:326
default_random_engine gen
Random number engine.
Definition: Simulation.hpp:488
vector< discrete_distribution< long > > distributions
List of discrete distributions for all states.
Definition: Simulation.hpp:310
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
indvec actions
List of which action to take in which state.
Definition: Simulation.hpp:352
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
A random policy with state-dependent action sets which are discrete.
Definition: Simulation.hpp:225
bool end_condition(State s) const
Checks whether the decision state is terminal.
Definition: Simulation.hpp:479
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
void simulate(Sim &sim, SampleType &samples, const function< typename Sim::Action(typename Sim::State &)> &policy, long horizon, long runs, long tran_limit=-1, prec_t prob_term=0.0, random_device::result_type seed=random_device{}())
Definition: Simulation.hpp:81