CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
Action.hpp
1 #pragma once
2 
3 #include "definitions.hpp"
4 #include "Transition.hpp"
5 
6 #include "cpp11-range-master/range.hpp"
7 #include <utility>
8 #include <vector>
9 #include <limits>
10 #include <cassert>
11 #include <string>
12 #include <numeric>
13 #include <limits>
14 #include <algorithm>
15 #include <stdexcept>
16 #include <cmath>
17 
18 namespace craam {
19 
20 using namespace std;
21 using namespace util::lang;
22 
23 // **************************************************************************************
24 // *** Regular action
25 // **************************************************************************************
26 
32 protected:
35 
36 public:
37 
39  RegularAction() : outcome() {};
40 
42  RegularAction(const Transition& outcome) : outcome(outcome) {};
43 
45  vector<Transition> get_outcomes() const {return vector<Transition>{outcome};};
46 
48  const Transition& get_outcome(long outcomeid) const {assert(outcomeid == 0); return outcome;};
49 
51  Transition& get_outcome(long outcomeid) {assert(outcomeid == 0);return outcome;};
52 
54  const Transition& operator[](long outcomeid) const {return get_outcome(outcomeid);}
55 
57  Transition& operator[](long outcomeid) {return get_outcome(outcomeid);}
58 
60  const Transition& get_outcome() const {return outcome;};
61 
63  Transition& get_outcome() {return outcome;};
64 
69  Transition& create_outcome(long outcomeid){assert(outcomeid == 0);return outcome;}
70 
72  void normalize() {outcome.normalize();};
73 
75  size_t outcome_count() const {return 1;};
76 
77 
79  void to_string(string& result) const{
80  result.append("1(reg)");
81  };
82 
85  bool is_nature_correct(numvec oid) const {return oid.size() == outcome.size();};
86 
88  prec_t mean_reward() const { return outcome.mean_reward();};
89 
94  prec_t mean_reward(numvec natpolicy) const { return outcome.mean_reward(natpolicy);};
95 
97  Transition mean_transition() const {return outcome;};
98 
99 
103  Transition mean_transition(numvec natpolicy) const {
104  return Transition(outcome.get_indices(),natpolicy,numvec(outcome.size(),0.0));
105  };
106 
109  string to_json(long actionid = -1) const{
110  string result{"{"};
111  result += "\"actionid\" : ";
112  result += std::to_string(actionid);
113  result += ",\"transition\" : ";
114  result += outcome.to_json(-1);
115  result += "}";
116  return result;
117  }
118 };
119 
120 
121 // **************************************************************************************
122 // Outcome Management (a helper class)
123 // **************************************************************************************
124 
129 
130 protected:
132  vector<Transition> outcomes;
133 
134 public:
136  OutcomeManagement() : outcomes() {};
137 
139  OutcomeManagement(const vector<Transition>& outcomes) : outcomes(outcomes) {};
140 
142  virtual ~OutcomeManagement() {};
143 
150  virtual Transition& create_outcome(long outcomeid){
151  if(outcomeid < 0)
152  throw invalid_argument("Outcomeid must be non-negative.");
153 
154  if(outcomeid >= (long) outcomes.size())
155  outcomes.resize(outcomeid + 1);
156 
157  return outcomes[outcomeid];
158  }
159 
163  virtual Transition& create_outcome(){return create_outcome(outcomes.size());};
164 
166  const Transition& get_outcome(long outcomeid) const {
167  assert((outcomeid >= 0l && outcomeid < (long) outcomes.size()));
168  return outcomes[outcomeid];};
169 
171  Transition& get_outcome(long outcomeid) {
172  assert((outcomeid >= 0l && outcomeid < (long) outcomes.size()));
173  return outcomes[outcomeid];};
174 
176  const Transition& operator[](long outcomeid) const {return get_outcome(outcomeid);}
177 
179  Transition& operator[](long outcomeid) {return get_outcome(outcomeid);}
180 
182  size_t outcome_count() const {return outcomes.size();};
183 
185  size_t size() const {return outcome_count();};
186 
190  void add_outcome(long outcomeid, const Transition& t){ create_outcome(outcomeid) = t; }
191 
194  void add_outcome(const Transition& t){add_outcome(outcomes.size(), t);};
195 
197  const vector<Transition>& get_outcomes() const {return outcomes;};
198 
200  void normalize(){
201  for(Transition& t : outcomes)
202  t.normalize();
203  }
204 
206  bool is_nature_correct(numvec oid) const
207  {return (oid.size() == outcomes.size());};
208 
210  void to_string(string& result) const{
211  result.append(std::to_string(get_outcomes().size()));
212  }
213 };
214 
215 
216 // **************************************************************************************
217 // Weighted Outcome Action
218 // **************************************************************************************
219 
220 
231 
232 protected:
235 
236 public:
237 
240  : OutcomeManagement(), distribution(0) {};
241 
243  WeightedOutcomeAction(const vector<Transition>& outcomes)
244  : OutcomeManagement(outcomes), distribution(outcomes.size(),
245  1.0 / prec_t(outcomes.size())) {};
246 
248 
272  Transition& create_outcome(long outcomeid) override{
273  if(outcomeid < 0)
274  throw invalid_argument("Outcomeid must be non-negative.");
275  // 1: compute the weight for the new outcome and old ones
276 
277  size_t newsize = outcomeid + 1; // new size of the list of outcomes
278  size_t oldsize = outcomes.size(); // current size of the set
279  if(newsize <= oldsize){// no need to add anything
280  return outcomes[outcomeid];
281  }
282  // new uniform weight for each element
283  prec_t newweight = 1.0/prec_t(outcomeid+1);
284  // check if need to scale the existing weights
285  if(oldsize > 0){
286  auto weightsum = accumulate(distribution.begin(), distribution.end(), 0.0);
287  // only scale when the sum is not zero
288  if(weightsum > 0){
289  prec_t normal = (oldsize * newweight) / weightsum;
290  transform(distribution.begin(), distribution.end(),distribution.begin(),
291  [normal](prec_t x){return x * normal;});
292  }
293  }
294  outcomes.resize(newsize);
295  // got to resize the distribution too and assign weights that are uniform
296  distribution.resize(newsize, newweight);
297  return outcomes[outcomeid];
298  }
299 
315  Transition& create_outcome(long outcomeid, prec_t weight){
316  if(outcomeid < 0)
317  throw invalid_argument("Outcomeid must be non-negative.");
318  assert(weight >= 0 && weight <= 1);
319 
320  if(outcomeid >= static_cast<long>(outcomes.size())){ // needs to resize arrays
321  outcomes.resize(outcomeid+1);
322  distribution.resize(outcomeid+1);
323  }
324  set_distribution(outcomeid, weight);
325  return outcomes[outcomeid];
326  }
327 
335  void set_distribution(const numvec& distribution){
336  if(distribution.size() != outcomes.size())
337  throw invalid_argument("Invalid distribution size.");
338  prec_t sum = accumulate(distribution.begin(),distribution.end(), 0.0);
339  if(sum < 0.99 || sum > 1.001)
340  throw invalid_argument("Distribution does not sum to 1.");
341  if((*min_element(distribution.begin(),distribution.end())) < 0)
342  throw invalid_argument("Distribution must be non-negative.");
343 
344  this->distribution = distribution;
345  }
346 
355  void set_distribution(long outcomeid, prec_t weight){
356  assert(outcomeid >= 0 && (size_t) outcomeid < outcomes.size());
357  distribution[outcomeid] = weight;
358  }
359 
361  const numvec& get_distribution() const {return distribution;};
362 
369  auto weightsum = accumulate(distribution.begin(), distribution.end(), 0.0);
370 
371  if(weightsum > 0.0){
372  for(auto& p : distribution)
373  p /= weightsum;
374  }else{
375  throw invalid_argument("Distribution sums to 0 and cannot be normalized.");
376  }
377  }
378 
383  return abs(1.0-accumulate(distribution.begin(), distribution.end(), 0.0)) < SOLPREC;
384  }
385 
391  distribution.clear();
392  if(outcomes.size() > 0)
393  distribution.resize(outcomes.size(), 1.0/ (prec_t) outcomes.size());
394  }
395 
397  void to_string(string& result) const {
398  result.append(std::to_string(get_outcomes().size()));
399  result.append(" / ");
400  result.append(std::to_string(get_distribution().size()));
401  }
402 
404  prec_t mean_reward(const numvec& outcomedist) const{
405  assert(outcomedist.size() == outcomes.size());
406  prec_t result = 0;
407  for(size_t i = 0; i < outcomes.size(); i++){
408  result += outcomedist[i] * outcomes[i].mean_reward();
409  }
410  return result;
411  }
412 
414  prec_t mean_reward() const {
415  return mean_reward(distribution);
416  }
417 
419  Transition mean_transition(const numvec& outcomedist) const{
420  assert(outcomedist.size() == outcomes.size());
421  Transition result;
422  for(size_t i = 0; i < outcomes.size(); i++)
423  outcomes[i].probabilities_addto(outcomedist[i], result);
424  return result;
425  }
426 
429  return mean_transition(distribution);
430  }
433  string to_json(long actionid = -1) const{
434  string result{"{"};
435  result += "\"actionid\" : ";
436  result += std::to_string(actionid);
437  result += ",\"outcomes\" : [";
438  for(auto oi : indices(outcomes)){
439  const auto& o = outcomes[oi];
440  result +=o.to_json(oi);
441  result +=",";
442  }
443  if(!outcomes.empty()) result.pop_back(); // remove last comma
444  result += "],\"distribution\" : [";
445  for(auto d : distribution){
446  result += std::to_string(d);
447  result += ",";
448  }
449  if(!distribution.empty()) result.pop_back(); // remove last comma
450  result += "]}";
451  return result;
452  }
453 };
454 
455 }
456 
457 
458 
const Transition & get_outcome() const
Returns the single outcome.
Definition: Action.hpp:60
bool is_nature_correct(numvec oid) const
Whether the provided outcomeid is correct.
Definition: Action.hpp:206
virtual Transition & create_outcome()
Creates a new outcome at the end.
Definition: Action.hpp:163
string to_json(long actionid=-1) const
Returns a json representation of action.
Definition: Action.hpp:433
prec_t mean_reward() const
Returns the mean reward from the transition.
Definition: Action.hpp:88
Transition & operator[](long outcomeid)
Returns a transition for the outcome.
Definition: Action.hpp:179
Transition & operator[](long outcomeid)
Returns the outcome.
Definition: Action.hpp:57
Transition & create_outcome(long outcomeid)
Adds a sufficient number of empty outcomes for the outcomeid to be a correct identifier.
Definition: Action.hpp:69
void add_outcome(const Transition &t)
Adds an outcome defined by the transition as the last outcome.
Definition: Action.hpp:194
bool is_distribution_normalized() const
Checks whether the outcome distribution is normalized.
Definition: Action.hpp:382
bool is_nature_correct(numvec oid) const
Whether the provided outcome is valid.
Definition: Action.hpp:85
string to_json(long actionid=-1) const
Returns a json representation of the action.
Definition: Action.hpp:109
const numvec & get_distribution() const
Returns the baseline distribution over outcomes.
Definition: Action.hpp:361
RegularAction()
Creates an empty action.
Definition: Action.hpp:39
const vector< Transition > & get_outcomes() const
Returns the list of outcomes.
Definition: Action.hpp:197
size_t size() const
Returns number of outcomes.
Definition: Action.hpp:185
void set_distribution(const numvec &distribution)
Sets the base distribution over the outcomes.
Definition: Action.hpp:335
prec_t mean_reward(numvec natpolicy) const
Returns the mean reward from the transition.
Definition: Action.hpp:94
void normalize()
Normalizes transitions for outcomes.
Definition: Action.hpp:200
Transition mean_transition(const numvec &outcomedist) const
Returns the mean transition probabilities for the provided nature action.
Definition: Action.hpp:419
Transition & get_outcome(long outcomeid)
Returns the single outcome.
Definition: Action.hpp:51
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
const Transition & get_outcome(long outcomeid) const
Returns a transition for the outcome.
Definition: Action.hpp:166
Transition & create_outcome(long outcomeid) override
Adds a sufficient number (or 0) of empty outcomes/transitions for the provided outcomeid to be a vali...
Definition: Action.hpp:272
virtual ~OutcomeManagement()
Empty virtual destructor.
Definition: Action.hpp:142
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
An action in a robust MDP that allows for outcomes chosen by nature.
Definition: Action.hpp:230
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
void normalize_distribution()
Normalizes outcome weights to sum to one.
Definition: Action.hpp:368
vector< Transition > outcomes
List of possible outcomes.
Definition: Action.hpp:132
void add_outcome(long outcomeid, const Transition &t)
Adds an outcome defined by the transition.
Definition: Action.hpp:190
string to_json(long outcomeid=-1) const
Returns a json representation of transition probabilities.
Definition: Transition.hpp:350
size_t outcome_count() const
Returns number of outcomes.
Definition: Action.hpp:182
void to_string(string &result) const
Appends a string representation to the argument.
Definition: Action.hpp:210
Transition & get_outcome()
Returns the single outcome.
Definition: Action.hpp:63
void to_string(string &result) const
Appends a string representation to the argument.
Definition: Action.hpp:397
void normalize()
Normalizes transition probabilities.
Definition: Action.hpp:72
const Transition & operator[](long outcomeid) const
Returns the outcome.
Definition: Action.hpp:54
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Transition mean_transition(numvec natpolicy) const
Returns the mean transition probabilities.
Definition: Action.hpp:103
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
Transition mean_transition() const
Returns the mean transition probabilities.
Definition: Action.hpp:97
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
const Transition & get_outcome(long outcomeid) const
Returns the single outcome.
Definition: Action.hpp:48
void to_string(string &result) const
Appends a string representation to the argument.
Definition: Action.hpp:79
Transition & create_outcome(long outcomeid, prec_t weight)
Adds a sufficient number of empty outcomes/transitions for the provided outcomeid to be a valid ident...
Definition: Action.hpp:315
void set_distribution(long outcomeid, prec_t weight)
Sets weight for a particular outcome.
Definition: Action.hpp:355
numvec distribution
Weights used in computing the worst/best case.
Definition: Action.hpp:234
Action in a regular MDP.
Definition: Action.hpp:31
const Transition & operator[](long outcomeid) const
Returns a transition for the outcome.
Definition: Action.hpp:176
OutcomeManagement()
Empty list of outcomes.
Definition: Action.hpp:136
size_t outcome_count() const
Returns number of outcomes (1).
Definition: Action.hpp:75
void uniform_distribution()
Sets an initial uniform value for the distribution.
Definition: Action.hpp:390
Transition outcome
Transition probabilities.
Definition: Action.hpp:34
WeightedOutcomeAction(const vector< Transition > &outcomes)
Initializes outcomes to the provided vector.
Definition: Action.hpp:243
vector< Transition > get_outcomes() const
Returns the outcomes.
Definition: Action.hpp:45
A class that manages creation and access to outcomes to be used by actions.
Definition: Action.hpp:128
prec_t mean_reward() const
Returns the mean reward from the transition using the nominal distribution on outcomes.
Definition: Action.hpp:414
void normalize()
Normalizes the transition probabilities to sum to 1.
Definition: Transition.hpp:171
Transition & get_outcome(long outcomeid)
Returns a transition for the outcome.
Definition: Action.hpp:171
prec_t mean_reward(const numvec &outcomedist) const
Returns the mean reward from the transition for the provided nature action.
Definition: Action.hpp:404
OutcomeManagement(const vector< Transition > &outcomes)
Initializes with a list of outcomes.
Definition: Action.hpp:139
Transition mean_transition() const
Returns the mean transition probabilities using the nominal distribution on outcomes.
Definition: Action.hpp:428
virtual Transition & create_outcome(long outcomeid)
Adds a sufficient number of empty outcomes for the outcomeid to be a valid identifier.
Definition: Action.hpp:150
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
RegularAction(const Transition &outcome)
Initializes outcomes to the provided transition vector.
Definition: Action.hpp:42
WeightedOutcomeAction()
Creates an empty action.
Definition: Action.hpp:239
prec_t mean_reward(const numvec &probabilities) const
Computes the mean return from this transition with custom transition probabilities.
Definition: Transition.hpp:234