CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
Simulation.hpp
1 #pragma once
2 
3 #include "Samples.hpp"
4 #include "definitions.hpp"
5 
6 #include <utility>
7 #include <vector>
8 #include <memory>
9 #include <random>
10 #include <functional>
11 #include <cmath>
12 #include <algorithm>
13 #include <cmath>
14 #include <string>
15 
16 #include "cpp11-range-master/range.hpp"
17 
18 
19 namespace craam{
20 namespace msen {
21 
22 using namespace std;
23 using namespace util::lang;
24 
26 
41 
44 
47 
50 
53 
56 
60 
63 
66 
80 template<class Sim, class SampleType=Samples<typename Sim::State, typename Sim::Action>>
81 void simulate(
82  Sim& sim, SampleType& samples,
83  const function<typename Sim::Action(typename Sim::State&)>& policy,
84  long horizon, long runs, long tran_limit=-1, prec_t prob_term=0.0,
85  random_device::result_type seed = random_device{}()){
86 
87  long transitions = 0;
88 
89  // initialize random numbers to be used with random termination
90  default_random_engine generator(seed);
91  uniform_real_distribution<double> distribution(0.0,1.0);
92 
93  for(auto run=0l; run < runs; run++){
94 
95  typename Sim::State state = sim.init_state();
96  samples.add_initial(state);
97 
98  for(auto step : range(0l,horizon)){
99  // check form termination conditions
100  if(sim.end_condition(state) || (tran_limit > 0 && transitions > tran_limit) )
101  break;
102 
103  auto action = policy(state);
104  auto reward_state = sim.transition(state,action);
105 
106  auto reward = reward_state.first;
107  auto nextstate = move(reward_state.second);
108 
109  samples.add_sample(move(state), move(action), nextstate, reward, 1.0, step, run);
110  state = move(nextstate);
111 
112  // test the termination probability only after at least one transition
113  if( (prob_term > 0.0) && (distribution(generator) <= prob_term) )
114  break;
115  transitions++;
116  };
117 
118  if(tran_limit > 0 && transitions > tran_limit)
119  break;
120  }
121 }
122 
131 template<class Sim, class SampleType=Samples<typename Sim::State, typename Sim::Action>>
132 SampleType simulate(
133  Sim& sim,
134  const function<typename Sim::Action(typename Sim::State&)>& policy,
135  long horizon, long runs, long tran_limit=-1, prec_t prob_term=0.0,
136  random_device::result_type seed = random_device{}()){
137 
138  SampleType samples = SampleType();
139  simulate(sim, samples, policy, horizon, runs, tran_limit, prob_term, seed);
140  return samples;
141 }
142 
143 
167 template<class Sim>
168 pair<vector<typename Sim::State>, numvec>
169 simulate_return(Sim& sim, prec_t discount,
170  const function<typename Sim::Action(typename Sim::State&)>& policy,
171  long horizon, long runs, prec_t prob_term=0.0,
172  random_device::result_type seed = random_device{}()){
173 
174  long transitions = 0;
175  // initialize random numbers to be used with random termination
176  default_random_engine generator(seed);
177  uniform_real_distribution<double> distribution(0.0,1.0);
178 
179  // pre-initialize output values
180  vector<typename Sim::State> start_states(runs);
181  numvec returns(runs);
182 
183  for(auto run : range(0l,runs)){
184  typename Sim::State state = sim.init_state();
185  start_states[run] = state;
186 
187  prec_t runreturn = 0;
188  for(auto step : range(0l,horizon)){
189  // check from-state termination conditions
190  if(sim.end_condition(state))
191  break;
192 
193  auto action = policy(state);
194  auto reward_state = sim.transition(state,action);
195 
196  auto reward = reward_state.first;
197  auto nextstate = move(reward_state.second);
198 
199  runreturn += reward * pow(discount, step);
200  state = move(nextstate);
201  // test the termination probability only after at least one transition
202  if( (prob_term > 0.0) && (distribution(generator) <= prob_term) )
203  break;
204  transitions++;
205  };
206  returns[run] = runreturn;
207  }
208  return make_pair(move(start_states), move(returns));
209 }
210 
211 // ************************************************************************************
212 // **** Random(ized) policies ****
213 // ************************************************************************************
214 
224 template<class Sim>
226 
227 public:
228  using State = typename Sim::State;
229  using Action = typename Sim::Action;
230 
231  RandomPolicy(const Sim& sim, random_device::result_type seed = random_device{}()) :
232  sim(sim), gen(seed){};
233 
235  Action operator() (State state){
236  uniform_int_distribution<long> dst(0,sim.action_count(state)-1);
237  return sim.action(state,dst(gen));
238  };
239 
240 private:
242  const Sim& sim;
244  default_random_engine gen;
245 };
246 
257 template<typename Sim>
259 
260 public:
261  using State = typename Sim::State;
262  using Action = typename Sim::Action;
263 
277  RandomizedPolicy(const Sim& sim, const vector<numvec>& probabilities,random_device::result_type seed = random_device{}()):
278  gen(seed), distributions(probabilities.size()), sim(sim){
279 
280  for(auto pi : indices(probabilities)){
281 
282  // check that this distribution is correct
283  const numvec& prob = probabilities[pi];
284  prec_t sum = accumulate(prob.begin(), prob.end(), 0.0);
285 
286  if(abs(sum - 1) > SOLPREC){
287  throw invalid_argument("Action probabilities must sum to 1 in state " + to_string(pi));
288  }
289  distributions[pi] = discrete_distribution<long>(prob.begin(), prob.end());
290  }
291  };
292 
294  Action operator() (State state){
295  // check that the state is valid for this policy
296  long sl = static_cast<long>(state);
297  assert(sl >= 0 && size_t(sl) < distributions.size());
298 
299  auto& dst = distributions[sl];
300  // existence of the action is check by the simulator
301  return sim.action(state,dst(gen));
302  };
303 
304 protected:
305 
307  default_random_engine gen;
308 
310  vector<discrete_distribution<long>> distributions;
311 
313  const Sim& sim;
314 };
315 
316 
325 template<typename Sim>
327 
328 public:
329  using State = typename Sim::State;
330  using Action = typename Sim::Action;
331 
340  DeterministicPolicy(const Sim& sim, indvec actions):
341  actions(actions), sim(sim) {};
342 
344  Action operator() (State state){
345  // check that the state is valid for this policy
346  long sl = static_cast<long>(state);
347 
348  assert(sl >= 0 && size_t(sl) < actions.size());
349 
350  // existence of the action is checked by the simulator
351  return sim.action(state,actions[sl]);
352  };
353 
354 protected:
356  indvec actions;
357 
359  const Sim& sim;
360 };
361 
362 
363 
364 // ************************************************************************************
365 // **** MDP simulation ****
366 // ************************************************************************************
367 
378 
379 public:
381  typedef long State;
383  typedef long Action;
384 
391  ModelSimulator(const shared_ptr<const MDP>& mdp, const Transition& initial,
392  random_device::result_type seed = random_device{}()) :
393  gen(seed), mdp(mdp), initial(initial){
394 
395  if(abs(initial.sum_probabilities() - 1) > SOLPREC)
396  throw invalid_argument("Initial transition probabilities must sum to 1");
397  }
398 
405  ModelSimulator(const shared_ptr<MDP>& mdp, const Transition& initial,random_device::result_type seed = random_device{}()) :
406  ModelSimulator(const_pointer_cast<const MDP>(mdp), initial, seed) {};
407 
409  State init_state(){
410  const numvec& probs = initial.get_probabilities();
411  const indvec& inds = initial.get_indices();
412  auto dst = discrete_distribution<long>(probs.begin(), probs.end());
413  return inds[dst(gen)];
414  }
415 
426  pair<double,State> transition(State state, Action action){
427 
428  assert(state >= 0 && size_t(state) < mdp->size());
429  const auto& mdpstate = (*mdp)[state];
430 
431  assert(action >= 0 && size_t(action) < mdpstate.size());
432  const auto& mdpaction = mdpstate[action];
433 
434  if(!mdpstate.is_valid(action))
435  throw invalid_argument("Cannot transition using an invalid action");
436 
437  const auto& tran = mdpaction.get_outcome();
438 
439  const numvec& probs = tran.get_probabilities();
440  const numvec& rews = tran.get_rewards();
441  const indvec& inds = tran.get_indices();
442 
443  // check if the transition sums to 1, if not use the remainder
444  // as a probability of terminating
445  prec_t prob_termination = 1 - tran.sum_probabilities();
446 
447  discrete_distribution<long> dst;
448 
449  if(prob_termination > SOLPREC){
450  // copy the probabilities (there should be a faster way too)
451  numvec copy_probs(probs);
452  copy_probs.push_back(prob_termination);
453 
454  dst = discrete_distribution<long>(copy_probs.begin(), copy_probs.end());
455  }else{
456  dst = discrete_distribution<long>(probs.begin(), probs.end());
457  }
458 
459  const size_t nextindex = dst(gen);
460 
461  // check if need to transition to a terminal state
462  const State nextstate = nextindex < inds.size() ?
463  inds[nextindex] : mdp->size();
464 
465  // reward is zero when transitioning to a terminal state
466  const prec_t reward = nextindex < inds.size() ?
467  rews[nextindex] : 0.0;
468 
469  return make_pair(reward, nextstate);
470  }
471 
479  bool end_condition(State s) const
480  {return (size_t(s) >= mdp->size()) || (action_count(s) == 0);};
481 
483  size_t action_count(State state) const
484  {return (*mdp)[state].size();};
485 
487  Action action(State, long index) const
488  {return index;};
489 
490 protected:
492  default_random_engine gen;
493 
495  shared_ptr<const MDP> mdp;
496 
499 };
500 
503 
511 
514 
515 
516 } // end namespace msen
517 } // end namespace craam
shared_ptr< const MDP > mdp
MDP used for the simulation.
Definition: Simulation.hpp:495
A simulator that behaves as the provided MDP.
Definition: Simulation.hpp:377
Action action(State, long index) const
Returns an action with the given index.
Definition: Simulation.hpp:487
State init_state()
Returns a sample from the initial states.
Definition: Simulation.hpp:409
pair< double, State > transition(State state, Action action)
Returns a sample of the reward and a decision state following a state.
Definition: Simulation.hpp:426
const Sim & sim
simulator reference
Definition: Simulation.hpp:313
RandomizedPolicy(const Sim &sim, const vector< numvec > &probabilities, random_device::result_type seed=random_device{}())
Initializes randomized polices, transition probabilities for each state.
Definition: Simulation.hpp:277
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
DeterministicPolicy(const Sim &sim, indvec actions)
Initializes randomized polices, transition probabilities for each state.
Definition: Simulation.hpp:340
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
ModelSimulator(const shared_ptr< MDP > &mdp, const Transition &initial, random_device::result_type seed=random_device{}())
Build a model simulator and share and MDP.
Definition: Simulation.hpp:405
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
ModelSimulator(const shared_ptr< const MDP > &mdp, const Transition &initial, random_device::result_type seed=random_device{}())
Build a model simulator and share and MDP.
Definition: Simulation.hpp:391
size_t action_count(State state) const
State dependent action list.
Definition: Simulation.hpp:483
Transition initial
Initial distribution.
Definition: Simulation.hpp:498
pair< vector< typename Sim::State >, numvec > simulate_return(Sim &sim, prec_t discount, const function< typename Sim::Action(typename Sim::State &)> &policy, long horizon, long runs, prec_t prob_term=0.0, random_device::result_type seed=random_device{}())
Runs the simulator and computer the returns from the simulation.
Definition: Simulation.hpp:169
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
long Action
Type of actions.
Definition: Simulation.hpp:383
const Sim & sim
simulator reference
Definition: Simulation.hpp:359
default_random_engine gen
Random number engine.
Definition: Simulation.hpp:302
A randomized policy that chooses actions according to the provided vector of probabilities.
Definition: Simulation.hpp:258
long State
Type of states.
Definition: Simulation.hpp:381
A deterministic policy that chooses actions according to the provided action index.
Definition: Simulation.hpp:326
default_random_engine gen
Random number engine.
Definition: Simulation.hpp:488
vector< discrete_distribution< long > > distributions
List of discrete distributions for all states.
Definition: Simulation.hpp:310
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
indvec actions
List of which action to take in which state.
Definition: Simulation.hpp:352
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
A random policy with state-dependent action sets which are discrete.
Definition: Simulation.hpp:225
bool end_condition(State s) const
Checks whether the decision state is terminal.
Definition: Simulation.hpp:479
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
void simulate(Sim &sim, SampleType &samples, const function< typename Sim::Action(typename Sim::State &)> &policy, long horizon, long runs, long tran_limit=-1, prec_t prob_term=0.0, random_device::result_type seed=random_device{}())
Definition: Simulation.hpp:81