CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
Transition.hpp
1 #pragma once
2 
3 #include "definitions.hpp"
4 
5 #include<vector>
6 #include<string>
7 #include <algorithm>
8 #include <stdexcept>
9 #include <numeric>
10 #include <cmath>
11 
12 #include "cpp11-range-master/range.hpp"
13 
14 namespace craam {
15 
16 using namespace std;
17 using namespace util::lang;
18 
20 const prec_t tolerance = 1e-5;
21 
31 class Transition {
32 
33 public:
34  Transition() : indices(0), probabilities(0), rewards(0) {};
35 
46  Transition(const indvec& indices,
47  const numvec& probabilities,
48  const numvec& rewards) : Transition() {
49 
50  if(indices.size() != probabilities.size() || indices.size() != rewards.size())
51  throw invalid_argument("All parameters for the constructor of Transition must have the same size.");
52  auto sorted = sort_indexes(indices);
53  for(auto k : sorted)
54  add_sample(indices[k],probabilities[k],rewards[k]);
55  }
56 
66  Transition(const indvec& indices,
67  const numvec& probabilities) : Transition() {
68 
69  if(indices.size() != probabilities.size())
70  throw invalid_argument("All parameters for the constructor of Transition must have the same size.");
71  auto sorted = sort_indexes(indices);
72  for(auto k : sorted)
73  add_sample(indices[k],probabilities[k],0.0);
74  }
75 
82  Transition(const numvec& probabilities) : Transition() {
83  for(auto k : util::lang::indices(probabilities))
84  add_sample(k, probabilities[k], 0.0);
85  }
86 
116  void add_sample(long stateid, prec_t probability, prec_t reward){
117 
118  if(probability < -0.001) throw invalid_argument("probabilities must be non-negative.");
119  if(stateid < 0) throw invalid_argument("State id must be non-negative.");
120  // if the probability is 0 or negative, just do not add the sample
121  if(probability <= 0) return;
122 
123  // test for the last index; the index is not in the transition yet and belong to the end
124  if(indices.size() == 0 || this->indices.back() < stateid){
125  indices.push_back(stateid);
126  probabilities.push_back(probability);
127  rewards.push_back(reward);
128  }
129  // the index is already in the transitions, or belongs in the middle
130  else{
131  size_t findex; // lower bound on the index of the element
132  bool present; // whether the index was found
133 
134  // test the last element for efficiency sake
135  if(stateid == indices.back()){
136  findex = indices.size() - 1;
137  present = true;
138  }
139  else{
140  // find the closest existing index to the new one
141  auto fiter = lower_bound(indices.cbegin(),indices.cend(),stateid);
142  findex = fiter - indices.cbegin();
143  present = (*fiter == stateid);
144  }
145  // there is a transition to this element already
146  if(present){
147  auto p_old = probabilities[findex];
148  probabilities[findex] += probability;
149  auto r_old = rewards[findex];
150  auto new_reward = (p_old * r_old + probability * reward) /
151  (probabilities[findex]);
152  rewards[findex] = new_reward;
153  // the transition is not there, the element needs to be inserted
154  }else{
155  indices.insert(indices.cbegin()+findex,stateid);
156  probabilities.insert(probabilities.cbegin()+findex,probability);
157  rewards.insert(rewards.cbegin()+findex,reward);
158  }
159  }
160 
161  }
162 
163  prec_t sum_probabilities() const{
164  return accumulate(probabilities.cbegin(),probabilities.cend(),0.0);
165  }
166 
171  void normalize(){
172  // nothing to do if there are no transitions
173  if(probabilities.empty())
174  return;
175 
176  prec_t sp = sum_probabilities();
177  if(sp > tolerance){
178  for(auto& p : probabilities)
179  p /= sp;
180  }else{
181  throw invalid_argument("Probabilities sum to 0 (or close) and cannot be normalized.");
182  }
183  }
184 
186  bool is_normalized() const{
187  if(indices.empty()) return true;
188  else return abs(1.0 - sum_probabilities()) < tolerance;
189  }
190 
202  prec_t value(numvec const& valuefunction, prec_t discount, numvec probabilities) const{
203  assert(valuefunction.size() >= probabilities.size());
204  assert(rewards.size() == probabilities.size());
205  assert(probabilities.size() == indices.size());
206 
207  if(indices.empty())
208  throw range_error("No transitions defined for the state action-pair. Cannot compute value.");
209  prec_t value = 0.0;
210 
211  //Note: in simple benchmarks, the simd statement seems to speed up the computation
212  // by a factor of 2-4 with -march=native on a computer with AVX support
213  #pragma omp simd reduction(+:value)
214  for(size_t c = 0; c < size(); c++){
215  value += probabilities[c] * (rewards[c] + discount * valuefunction[indices[c]]);
216  }
217  return value;
218  }
219 
228  prec_t value(numvec const& valuefunction, prec_t discount = 1.0) const{
229 
230  return value(valuefunction, discount, probabilities);
231  }
232 
234  prec_t mean_reward(const numvec& probabilities) const{
235  assert(probabilities.size() == size());
236  if(indices.empty())
237  throw range_error("No transitions defined. Cannot compute mean reward.");
238 
239  return inner_product(cbegin(probabilities), end(probabilities), cbegin(rewards), 0.0);
240  }
241 
242 
245  return mean_reward(probabilities);
246  }
247 
249  size_t size() const {
250  return indices.size();
251  };
252 
254  bool empty() const {
255  return indices.empty();
256  };
257 
262  long max_index() const {
263  return indices.empty() ? -1 : indices.back();
264  };
265 
273  void probabilities_addto(prec_t scale, numvec& transition) const{
274  for(size_t i : util::lang::indices(*this))
275  transition[indices[i]] += scale*probabilities[i];
276  }
277 
286  void probabilities_addto(prec_t scale, Transition& transition) const{
287  for(size_t i : util::lang::indices(*this))
288  transition.add_sample(indices[i], scale*probabilities[i], scale*rewards[i]);
289  }
290 
296  numvec probabilities_vector(size_t size) const{
297 
298  if(max_index() >= 0 && static_cast<long>(size) <= max_index())
299  throw range_error("Size must be greater than the maximal index");
300  numvec result(size, 0.0);
301  for(size_t i : util::lang::indices(indices))
302  result[indices[i]] = probabilities[i];
303  return result;
304  }
305 
312  numvec rewards_vector(size_t size) const{
313 
314  if(max_index() >= 0 && static_cast<long>(size) <= max_index())
315  throw range_error("Size must be greater than the maximal index");
316  numvec result(size, 0.0);
317  for(size_t i : util::lang::indices(indices))
318  result[indices[i]] = rewards[i];
319  return result;
320  }
321 
323  const indvec& get_indices() const {return indices;};
324 
326  long get_index(long k){assert(k>=0 && k < long(size())); return indices[k];}
327 
332  const numvec& get_probabilities() const {return probabilities;};
337  const numvec& get_rewards() const {return rewards;};
338 
340  void set_reward(long sampleid, prec_t reward) {rewards[sampleid] = reward;};
341 
343  prec_t get_reward(long sampleid) const {
344  assert(sampleid >= 0 && sampleid < long(size()));
345  return rewards[sampleid];
346  };
347 
350  string to_json(long outcomeid = -1) const{
351  string result{"{"};
352  result += "\"outcomeid\" : ";
353  result += std::to_string(outcomeid);
354  result += ",\"stateids\" : [";
355  for(auto i : indices){
356  result += std::to_string(i);
357  result += ",";
358  }
359  if(!indices.empty()) result.pop_back();// remove last comma
360  result += "],\"probabilities\" : [";
361  for(auto p : probabilities){
362  result += std::to_string(p);
363  result += ",";
364  }
365  if(!probabilities.empty()) result.pop_back();// remove last comma
366  result += "],\"rewards\" : [" ;
367  for(auto r : rewards){
368  result += std::to_string(r);
369  result += ",";
370  }
371  if(!rewards.empty()) result.pop_back();// remove last comma
372  result += "]}";
373  return result;
374  }
375 
376 protected:
377 
384 };
385 
386 }
numvec rewards_vector(size_t size) const
Constructs and returns a dense vector of rewards, which includes 0 transition probabilities.
Definition: Transition.hpp:312
Transition(const numvec &probabilities)
Creates a single transition from raw data with uniformly zero rewards, where destination states are i...
Definition: Transition.hpp:82
void set_reward(long sampleid, prec_t reward)
Sets the reward for a transition to a particular state.
Definition: Transition.hpp:340
numvec probabilities_vector(size_t size) const
Constructs and returns a dense vector of probabilities, which includes 0 transition probabilities...
Definition: Transition.hpp:296
void probabilities_addto(prec_t scale, Transition &transition) const
Scales transition probabilities and rewards according to the provided parameter and adds them to the ...
Definition: Transition.hpp:286
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
void probabilities_addto(prec_t scale, numvec &transition) const
Scales transition probabilities according to the provided parameter and adds them to the provided vec...
Definition: Transition.hpp:273
long max_index() const
Returns the maximal indexes involved in the transition.
Definition: Transition.hpp:262
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
string to_json(long outcomeid=-1) const
Returns a json representation of transition probabilities.
Definition: Transition.hpp:350
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
numvec rewards
List of rewards associated with transitions.
Definition: Transition.hpp:383
numvec probabilities
List of probability distributions to states.
Definition: Transition.hpp:381
long get_index(long k)
Index of the k-th state with non-zero probability.
Definition: Transition.hpp:326
indvec indices
List of state indices.
Definition: Transition.hpp:379
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
prec_t value(numvec const &valuefunction, prec_t discount=1.0) const
Computes value for the transition and a value function.
Definition: Transition.hpp:228
bool is_normalized() const
Definition: Transition.hpp:186
vector< size_t > sort_indexes(vector< T > const &v)
Sort indices by values in ascending order.
Definition: definitions.hpp:69
prec_t value(numvec const &valuefunction, prec_t discount, numvec probabilities) const
Computes value for the transition and a value function.
Definition: Transition.hpp:202
const numvec & get_rewards() const
Rewards for indices with positive probabilities returned by get_indices.
Definition: Transition.hpp:337
const prec_t tolerance
tolerance for checking whether a transition probability is normalized
Definition: Transition.hpp:20
bool empty() const
Checks if the transition is empty.
Definition: Transition.hpp:254
Transition(const indvec &indices, const numvec &probabilities, const numvec &rewards)
Creates a single transition from raw data.
Definition: Transition.hpp:46
void normalize()
Normalizes the transition probabilities to sum to 1.
Definition: Transition.hpp:171
prec_t get_reward(long sampleid) const
Gets the reward for a transition to a particular state.
Definition: Transition.hpp:343
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
prec_t mean_reward() const
Computes the mean return from this transition.
Definition: Transition.hpp:244
void add_sample(long stateid, prec_t probability, prec_t reward)
Adds a single transitions probability to the existing probabilities.
Definition: Transition.hpp:116
prec_t mean_reward(const numvec &probabilities) const
Computes the mean return from this transition with custom transition probabilities.
Definition: Transition.hpp:234
Transition(const indvec &indices, const numvec &probabilities)
Creates a single transition from raw data with uniformly zero rewards.
Definition: Transition.hpp:66