3 #include "definitions.hpp" 12 #include "cpp11-range-master/range.hpp" 34 Transition() : indices(0), probabilities(0), rewards(0) {};
47 const numvec& probabilities,
50 if(indices.size() != probabilities.size() || indices.size() != rewards.size())
51 throw invalid_argument(
"All parameters for the constructor of Transition must have the same size.");
54 add_sample(indices[k],probabilities[k],rewards[k]);
69 if(indices.size() != probabilities.size())
70 throw invalid_argument(
"All parameters for the constructor of Transition must have the same size.");
73 add_sample(indices[k],probabilities[k],0.0);
83 for(
auto k : util::lang::indices(probabilities))
84 add_sample(k, probabilities[k], 0.0);
118 if(probability < -0.001)
throw invalid_argument(
"probabilities must be non-negative.");
119 if(stateid < 0)
throw invalid_argument(
"State id must be non-negative.");
121 if(probability <= 0)
return;
124 if(indices.size() == 0 || this->indices.back() < stateid){
125 indices.push_back(stateid);
126 probabilities.push_back(probability);
127 rewards.push_back(reward);
135 if(stateid == indices.back()){
136 findex = indices.size() - 1;
141 auto fiter = lower_bound(indices.cbegin(),indices.cend(),stateid);
142 findex = fiter - indices.cbegin();
143 present = (*fiter == stateid);
147 auto p_old = probabilities[findex];
148 probabilities[findex] += probability;
149 auto r_old = rewards[findex];
150 auto new_reward = (p_old * r_old + probability * reward) /
151 (probabilities[findex]);
152 rewards[findex] = new_reward;
155 indices.insert(indices.cbegin()+findex,stateid);
156 probabilities.insert(probabilities.cbegin()+findex,probability);
157 rewards.insert(rewards.cbegin()+findex,reward);
163 prec_t sum_probabilities()
const{
164 return accumulate(probabilities.cbegin(),probabilities.cend(),0.0);
173 if(probabilities.empty())
176 prec_t sp = sum_probabilities();
178 for(
auto& p : probabilities)
181 throw invalid_argument(
"Probabilities sum to 0 (or close) and cannot be normalized.");
187 if(indices.empty())
return true;
188 else return abs(1.0 - sum_probabilities()) <
tolerance;
203 assert(valuefunction.size() >= probabilities.size());
204 assert(rewards.size() == probabilities.size());
205 assert(probabilities.size() == indices.size());
208 throw range_error(
"No transitions defined for the state action-pair. Cannot compute value.");
213 #pragma omp simd reduction(+:value) 214 for(
size_t c = 0; c < size(); c++){
215 value += probabilities[c] * (rewards[c] + discount * valuefunction[indices[c]]);
230 return value(valuefunction, discount, probabilities);
235 assert(probabilities.size() == size());
237 throw range_error(
"No transitions defined. Cannot compute mean reward.");
239 return inner_product(cbegin(probabilities), end(probabilities), cbegin(rewards), 0.0);
245 return mean_reward(probabilities);
250 return indices.size();
255 return indices.empty();
263 return indices.empty() ? -1 : indices.back();
274 for(
size_t i : util::lang::indices(*
this))
275 transition[indices[i]] += scale*probabilities[i];
287 for(
size_t i : util::lang::indices(*
this))
288 transition.
add_sample(indices[i], scale*probabilities[i], scale*rewards[i]);
298 if(max_index() >= 0 && static_cast<long>(size) <= max_index())
299 throw range_error(
"Size must be greater than the maximal index");
301 for(
size_t i : util::lang::indices(indices))
302 result[indices[i]] = probabilities[i];
314 if(max_index() >= 0 && static_cast<long>(size) <= max_index())
315 throw range_error(
"Size must be greater than the maximal index");
317 for(
size_t i : util::lang::indices(indices))
318 result[indices[i]] = rewards[i];
326 long get_index(
long k){assert(k>=0 && k <
long(size()));
return indices[k];}
344 assert(sampleid >= 0 && sampleid <
long(size()));
345 return rewards[sampleid];
352 result +=
"\"outcomeid\" : ";
353 result += std::to_string(outcomeid);
354 result +=
",\"stateids\" : [";
355 for(
auto i : indices){
356 result += std::to_string(i);
359 if(!indices.empty()) result.pop_back();
360 result +=
"],\"probabilities\" : [";
361 for(
auto p : probabilities){
362 result += std::to_string(p);
365 if(!probabilities.empty()) result.pop_back();
366 result +=
"],\"rewards\" : [" ;
367 for(
auto r : rewards){
368 result += std::to_string(r);
371 if(!rewards.empty()) result.pop_back();
numvec rewards_vector(size_t size) const
Constructs and returns a dense vector of rewards, which includes 0 transition probabilities.
Definition: Transition.hpp:312
Transition(const numvec &probabilities)
Creates a single transition from raw data with uniformly zero rewards, where destination states are i...
Definition: Transition.hpp:82
void set_reward(long sampleid, prec_t reward)
Sets the reward for a transition to a particular state.
Definition: Transition.hpp:340
numvec probabilities_vector(size_t size) const
Constructs and returns a dense vector of probabilities, which includes 0 transition probabilities...
Definition: Transition.hpp:296
void probabilities_addto(prec_t scale, Transition &transition) const
Scales transition probabilities and rewards according to the provided parameter and adds them to the ...
Definition: Transition.hpp:286
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
void probabilities_addto(prec_t scale, numvec &transition) const
Scales transition probabilities according to the provided parameter and adds them to the provided vec...
Definition: Transition.hpp:273
long max_index() const
Returns the maximal indexes involved in the transition.
Definition: Transition.hpp:262
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
string to_json(long outcomeid=-1) const
Returns a json representation of transition probabilities.
Definition: Transition.hpp:350
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
numvec rewards
List of rewards associated with transitions.
Definition: Transition.hpp:383
numvec probabilities
List of probability distributions to states.
Definition: Transition.hpp:381
long get_index(long k)
Index of the k-th state with non-zero probability.
Definition: Transition.hpp:326
indvec indices
List of state indices.
Definition: Transition.hpp:379
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
prec_t value(numvec const &valuefunction, prec_t discount=1.0) const
Computes value for the transition and a value function.
Definition: Transition.hpp:228
bool is_normalized() const
Definition: Transition.hpp:186
vector< size_t > sort_indexes(vector< T > const &v)
Sort indices by values in ascending order.
Definition: definitions.hpp:69
prec_t value(numvec const &valuefunction, prec_t discount, numvec probabilities) const
Computes value for the transition and a value function.
Definition: Transition.hpp:202
const numvec & get_rewards() const
Rewards for indices with positive probabilities returned by get_indices.
Definition: Transition.hpp:337
const prec_t tolerance
tolerance for checking whether a transition probability is normalized
Definition: Transition.hpp:20
bool empty() const
Checks if the transition is empty.
Definition: Transition.hpp:254
Transition(const indvec &indices, const numvec &probabilities, const numvec &rewards)
Creates a single transition from raw data.
Definition: Transition.hpp:46
void normalize()
Normalizes the transition probabilities to sum to 1.
Definition: Transition.hpp:171
prec_t get_reward(long sampleid) const
Gets the reward for a transition to a particular state.
Definition: Transition.hpp:343
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
prec_t mean_reward() const
Computes the mean return from this transition.
Definition: Transition.hpp:244
void add_sample(long stateid, prec_t probability, prec_t reward)
Adds a single transitions probability to the existing probabilities.
Definition: Transition.hpp:116
prec_t mean_reward(const numvec &probabilities) const
Computes the mean return from this transition with custom transition probabilities.
Definition: Transition.hpp:234
Transition(const indvec &indices, const numvec &probabilities)
Creates a single transition from raw data with uniformly zero rewards.
Definition: Transition.hpp:66