CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
Samples.hpp
1 #pragma once
2 
3 #include "definitions.hpp"
4 #include "RMDP.hpp"
5 #include "modeltools.hpp"
6 
7 #include <set>
8 #include <memory>
9 #include <unordered_map>
10 #include <functional>
11 #include <cassert>
12 #include <utility>
13 #include <vector>
14 #include <string>
15 
16 #include "cpp11-range-master/range.hpp"
17 
18 namespace craam{
19 
21 namespace msen{
22 
23 using namespace util::lang;
24 using namespace std;
25 
26 
45 template <class State, class Action>
46 class Sample {
47 public:
48  Sample(State state_from, Action action, State state_to,
49  prec_t reward, prec_t weight, long step, long run):
50  _state_from(move(state_from)), _action(move(action)),
51  _state_to(move(state_to)), _reward(reward), _weight(weight), _step(step), _run(run){
52  assert(weight >= 0);};
53 
55  State state_from() const {return _state_from;};
57  Action action() const {return _action;};
59  State state_to() const {return _state_to;};
61  prec_t reward() const {return _reward;};
63  prec_t weight() const {return _weight;};
65  long step() const {return _step;};
67  long run() const {return _run;};
68 
69 protected:
71  State _state_from;
73  Action _action;
75  State _state_to;
81  long _step;
83  long _run;
84 };
85 
86 
95 template <class State, class Action>
96 class Samples {
97 public:
98 
99  Samples(): states_from(), actions(), states_to(), rewards(), weights(), runs(), steps(), initial() {};
100 
101 
103  void add_initial(const State& decstate){
104  this->initial.push_back(decstate);
105  };
106 
108  void add_initial(State&& decstate){
109  this->initial.push_back(decstate);
110  };
111 
113  void add_sample(const Sample<State,Action>& sample){
114  states_from.push_back(sample.state_from());
115  actions.push_back(sample.action());
116  states_to.push_back(sample.state_to());
117  rewards.push_back(sample.reward());
118  weights.push_back(sample.weight());
119  steps.push_back(sample.step());
120  runs.push_back(sample.run());
121  };
122 
124  void add_sample(State state_from, Action action,
125  State state_to, prec_t reward, prec_t weight,
126  long step, long run){
127 
128  states_from.push_back(move(state_from));
129  actions.push_back(move(action));
130  states_to.push_back(move(state_to));
131  rewards.push_back(reward);
132  weights.push_back(weight);
133  steps.push_back(step);
134  runs.push_back(run);
135  }
136 
142  prec_t result = 0;
143  set<int> runs;
144 
145  for(size_t si : indices(*this)){
146  auto es = get_sample(si);
147  result += es.reward() * pow(discount,es.step());
148  runs.insert(es.run());
149  }
150 
151  result /= runs.size();
152  return result;
153  };
154 
156  size_t size() const {return states_from.size();};
157 
160  assert(i >=0 && size_t(i) < size());
161  return Sample<State,Action>(states_from[i],actions[i],states_to[i],
162  rewards[i],weights[i],steps[i],runs[i]);};
163 
166  return get_sample(i);
167  };
168 
170  const vector<State>& get_initial() const{return initial;};
171 
172  const vector<State>& get_states_from() const{return states_from;};
173  const vector<Action>& get_actions() const{return actions;};
174  const vector<State>& get_states_to() const{return states_to;};
175  const vector<prec_t>& get_rewards() const{return rewards;};
176  const vector<prec_t>& get_weights() const{return weights;};
177  const vector<long>& get_runs() const{return runs;};
178  const vector<long>& get_steps() const{return steps;};
179 
180 protected:
181 
182  vector<State> states_from;
183  vector<Action> actions;
184  vector<State> states_to;
185  vector<prec_t> rewards;
186  vector<prec_t> weights;
187  vector<long> runs;
188  vector<long> steps;
189 
190  vector<State> initial;
191 };
192 
197 template<class Sim, class... U>
200 }
201 
202 // **********************************************************************
203 // ****** Discrete simulation specialization ******************
204 // **********************************************************************
205 
206 
211 
245 template< typename State,
246  typename Action,
247  typename SHash = std::hash<State>,
248  typename AHash = std::hash<Action>>
250 public:
251 
253  SampleDiscretizerSI() : discretesamples(make_shared<DiscreteSamples>()),
254  action_map(), state_map() {};
255 
257  void add_samples(const Samples<State,Action>& samples){
258 
259  // initial states
260  for(const State& ins : samples.get_initial()){
261  discretesamples->add_initial(add_state(ins));
262  }
263 
264  // samples
265  for(auto si : indices(samples)){
266  const auto ds = samples.get_sample(si);
267  discretesamples->add_sample(
268  add_state(ds.state_from()),
269  add_action(ds.action()),
270  add_state(ds.state_to()),
271  ds.reward(), ds.weight(),
272  ds.step(), ds.run());
273  }
274  }
275 
276 
278  long add_state(const State& dstate){
279  auto iter = state_map.find(dstate);
280  long index;
281  if(iter == state_map.end()){
282  index = state_map.size();
283  state_map[dstate] = index;
284  }
285  else{
286  index = iter->second;
287  }
288  return index;
289  }
290 
292  long add_action(const Action& action){
293  auto iter = action_map.find(action);
294  long index;
295  if(iter == action_map.end()){
296  index = action_map.size();
297  action_map[action] = index;
298  }
299  else{
300  index = iter->second;
301  }
302  return index;
303  }
304 
306  shared_ptr<DiscreteSamples> get_discrete(){return discretesamples;};
307 
308 protected:
309  shared_ptr<DiscreteSamples> discretesamples;
310 
311  unordered_map<Action,long,AHash> action_map;
312  unordered_map<State,long,SHash> state_map;
313 };
314 
315 
346 template<
347  typename State,
348  typename Action,
349  typename SAHash = std::hash<pair<State,
350  Action>>,
351  typename SHash = std::hash<State> >
353 public:
354 
356  SampleDiscretizerSD() : discretesamples(make_shared<DiscreteSamples>()), action_map(),
357  action_count(), state_map() {};
358 
360  void add_samples(const Samples<State,Action>& samples){
361 
362  // initial states
363  for(const auto& ins : samples.get_initial()){
364  discretesamples->add_initial(add_state(ins));
365  }
366 
367  // transition samples
368  for(auto si : indices(samples)){
369 
370  const auto es = samples.get_sample(si);
371 
372  discretesamples->add_sample(add_state(es.state_from()),
373  add_action(es.state_from(), es.action()),
374  add_state(es.state_to()),
375  es.reward(), es.weight(),
376  es.step(), es.run());
377  }
378  }
379 
381  long add_state(const State& dstate){
382  auto iter = state_map.find(dstate);
383  long index;
384  if(iter == state_map.end()){
385  index = state_map.size();
386  state_map[dstate] = index;
387  }
388  else{
389  index = iter->second;
390  }
391  return index;
392  }
393 
395  long add_action(const State& dstate, const Action& action){
396  auto da = make_pair(dstate, action);
397  auto iter = action_map.find(da);
398  long index;
399  if(iter == action_map.end()){
400  index = (action_count[dstate]++);
401  action_map[da] = index;
402  }
403  else{
404  index = iter->second;
405  }
406  return index;
407  }
408 
410  shared_ptr<DiscreteSamples> get_discrete(){return discretesamples;};
411 
412 protected:
413  shared_ptr<DiscreteSamples> discretesamples;
414 
415  unordered_map<pair<State,Action>,long,SAHash> action_map;
416 
418  unordered_map<State,long,SHash> action_count;
419  unordered_map<State,long,SHash> state_map;
420 };
421 
422 
423 
459 public:
460 
462  SampledMDP(): mdp(make_shared<MDP>()), initial(), state_action_weights() {}
463 
464 
516  void add_samples(const DiscreteSamples& samples){
517  // copy the state and action counts to be
518  auto old_state_action_weights = state_action_weights;
519 
520  // add transition samples
521  for(size_t si : indices(samples)){
522 
523  DiscreteSample s = samples.get_sample(si);
524 
525  // -----------------
526  // Computes sample weights:
527  // the idea is to normalize new samples by the same
528  // value as the existing samples and then re-normalize
529  // this is linear complexity
530  // -----------------
531 
532 
533  // weight used to normalize old data
534  prec_t weight = 1.0; // this needs to be initialized to 1.0
535  // whether the sample weight has been initialized
536  bool weight_initialized = false;
537 
538  // resize transition counts
539  // the actual values are updated later
540  if((size_t) s.state_from() >= state_action_weights.size()){
541  state_action_weights.resize(s.state_from()+1);
542 
543  // we know that the value will not be found in old data
544  weight_initialized = true;
545  }
546 
547  // check if we have something for the action
548  numvec& actioncount = state_action_weights[s.state_from()];
549  if((size_t)s.action() >= actioncount.size()){
550  actioncount.resize(s.action()+1);
551 
552  // we know that the value will not be found in old data
553  weight_initialized = true;
554  }
555 
556  // update the new count
557  assert(size_t(s.state_from()) < state_action_weights.size());
558  assert(size_t(s.action()) < state_action_weights[s.state_from()].size());
559 
560  state_action_weights[s.state_from()][s.action()] += s.weight();
561 
562  // get number of existing transitions
563  // this is only run when we do not know if we have any prior
564  // sample
565  if(!weight_initialized &&
566  (size_t(s.state_from()) < old_state_action_weights.size()) &&
567  (size_t(s.action()) < old_state_action_weights[s.state_from()].size())) {
568 
569  size_t cnt = old_state_action_weights[s.state_from()][s.action()];
570 
571  // adjust the weight of the new sample to be consistent
572  // with the previous normalization (use 1.0 if no previous action)
573  weight = 1.0 / prec_t(cnt);
574  }
575  // ---------------------
576 
577  // adds a transition
578  add_transition( *mdp, s.state_from(), s.action(), s.state_to(),
579  weight*s.weight(),
580  s.reward());
581  }
582 
583  // make sure to set action validity based on whether there have been
584  // samples observed for the action
585  for(size_t si : indices(*mdp)){
586  auto& state = mdp->get_state(si);
587 
588  // valid only if there are some samples for the action
589  for(size_t ai : indices(state)){
590  state.set_valid(ai, state_action_weights[si][ai] > 0);
591  }
592  }
593 
594  // Normalize the transition probabilities and rewards
595  mdp->normalize();
596 
597  // set initial distribution
598  for(long state : samples.get_initial()){
599  initial.add_sample(state, 1.0, 0.0);
600  }
601  initial.normalize();
602  }
603 
604 
606  shared_ptr<const MDP> get_mdp() const {return const_pointer_cast<const MDP>(mdp);}
607 
610  shared_ptr<MDP> get_mdp_mod() {return mdp;}
611 
613  Transition get_initial() const {return initial;}
614 
617  vector<vector<prec_t>> get_state_action_weights(){return state_action_weights;}
618 
623  long state_count(){return state_action_weights.size();}
624 protected:
625 
627  shared_ptr<MDP> mdp;
628 
631 
633  vector<vector<prec_t>> state_action_weights;
634 };
635 
636 
637 /*
638 Constructs a robust MDP from integer samples.
639 
640 In integer samples each decision state, expectation state,
641 and action are identified by an integer.
642 
643 There is some extra memory penalty in this class over a plain MDP since it stores
644 the number of samples observed for each state and action.
645 
646 Important: Actions that are not sampled (no samples per that state
647 and action pair) are labeled as invalid and are not included in the computation
648 of value function or the solution.
649 
650 */
651 //template<typename Model>
652 //class SampledRMDP{
653 //public:
654 //
655 // /** Constructs an empty MDP from discrete samples */
656 // SampledRMDP();
657 //
658 // /**
659 // Constructs or adds states and actions based on the provided samples. Transition
660 // probabilities of the existing samples are normalized.
661 //
662 // \param samples Source of the samples
663 // */
664 // void add_samples(const DiscreteSamples& samples);
665 //
666 // /** \returns A constant pointer to the internal MDP */
667 // shared_ptr<const Model> get_rmdp() const {return const_pointer_cast<const Model>(mdp);}
668 //
669 // /** \returns A modifiable pointer to the internal MDP.
670 // Take care when changing. */
671 // shared_ptr<Model> get_rmdp_mod() {return mdp;}
672 //
673 // /** Initial distribution */
674 // Transition get_initial() const {return initial;}
675 //
676 //protected:
677 //
678 // /** Internal MDP representation */
679 // shared_ptr<Model> mdp;
680 //
681 // /** Initial distribution */
682 // Transition initial;
683 //
684 // /** Sample counts */
685 // vector<vector<size_t>> state_action_counts;
686 //};
687 
688 
689 } // end namespace msen
690 } // end namespace craam
void add_transition(Model &mdp, long fromid, long actionid, long outcomeid, long toid, prec_t probability, prec_t reward)
Adds a transition probability and reward for a particular outcome.
Definition: modeltools.hpp:39
shared_ptr< const MDP > get_mdp() const
Definition: Samples.hpp:606
Transition initial
Initial distribution.
Definition: Samples.hpp:630
Sample< State, Action > operator[](long i) const
Access to samples.
Definition: Samples.hpp:165
State _state_to
Destination state.
Definition: Samples.hpp:75
SampleDiscretizerSD()
Constructs new internal discrete samples.
Definition: Samples.hpp:356
A general robust Markov decision process.
Definition: RMDP.hpp:182
prec_t _weight
Sample weight.
Definition: Samples.hpp:79
shared_ptr< MDP > get_mdp_mod()
Definition: Samples.hpp:610
long state_count()
Returns thenumber of states in the samples (the highest observed index.
Definition: Samples.hpp:623
long step() const
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:65
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:381
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
Action _action
Action taken.
Definition: Samples.hpp:73
prec_t reward() const
Reward associated with the sample.
Definition: Samples.hpp:61
Transition get_initial() const
Definition: Samples.hpp:613
prec_t _reward
Reward associated with the sample.
Definition: Samples.hpp:77
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:278
long run() const
Number of the actual execution.
Definition: Samples.hpp:67
shared_ptr< MDP > mdp
Internal MDP representation.
Definition: Samples.hpp:627
long add_action(const Action &action)
Returns a action index, and creates a new one if it does not exists.
Definition: Samples.hpp:292
vector< vector< prec_t > > state_action_weights
Sample counts.
Definition: Samples.hpp:633
SampledMDP()
Constructs an empty MDP from discrete samples.
Definition: Samples.hpp:462
Constructs an MDP from integer samples.
Definition: Samples.hpp:458
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:257
prec_t weight() const
Sample weight.
Definition: Samples.hpp:63
Turns arbitrary samples to discrete ones (with continuous numbers assigned to states) assuming that a...
Definition: Samples.hpp:352
const vector< State > & get_initial() const
List of initial states.
Definition: Samples.hpp:170
SampleDiscretizerSI()
Constructs new internal discrete samples.
Definition: Samples.hpp:253
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
State state_from() const
Original state.
Definition: Samples.hpp:55
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:306
prec_t mean_return(prec_t discount)
Computes the discounted mean return over all the samples.
Definition: Samples.hpp:141
void add_samples(const DiscreteSamples &samples)
Constructs or adds states and actions based on the provided samples.
Definition: Samples.hpp:516
void add_initial(const State &decstate)
Adds an initial state.
Definition: Samples.hpp:103
void add_initial(State &&decstate)
Adds an initial state.
Definition: Samples.hpp:108
Samples< typename Sim::State, typename Sim::Action > make_samples(U &&... u)
A helper function that constructs a samples object based on the simulator that is provided to it...
Definition: Samples.hpp:198
Sample< State, Action > get_sample(long i) const
Access to samples.
Definition: Samples.hpp:159
long _step
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:81
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:360
long _run
Number of the actual execution.
Definition: Samples.hpp:83
void add_sample(const Sample< State, Action > &sample)
Adds a sample starting in a decision state.
Definition: Samples.hpp:113
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:410
General representation of samples: See Sample for definitions of individual values.
Definition: Samples.hpp:96
Action action() const
Action taken.
Definition: Samples.hpp:57
State state_to() const
Destination state.
Definition: Samples.hpp:59
long add_action(const State &dstate, const Action &action)
Returns an action index, and creates a new one if it does not exists.
Definition: Samples.hpp:395
void add_sample(State state_from, Action action, State state_to, prec_t reward, prec_t weight, long step, long run)
Adds a sample starting in a decision state.
Definition: Samples.hpp:124
Represents a single transition between two states after taking an action: where: ...
Definition: Samples.hpp:46
Turns arbitrary samples to discrete ones assuming that actions are state independent.
Definition: Samples.hpp:249
unordered_map< State, long, SHash > action_count
keeps the number of actions for each state
Definition: Samples.hpp:418
vector< vector< prec_t > > get_state_action_weights()
Definition: Samples.hpp:617
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
size_t size() const
Number of samples.
Definition: Samples.hpp:156