10 #include "../cpp11-range-master/range.hpp" 12 namespace craam {
namespace algorithms{
45 assert(v.size() == p.size());
51 assert(v.size() == p.size());
54 transform(begin(v), end(v), begin(minusv), negate<prec_t>());
56 return make_pair(result.first, -result.second);
62 assert(v.size() == p.size());
64 long index = min_element(begin(v), end(v)) - begin(v);
66 return make_pair(dist,v[index]);
72 assert(v.size() == p.size());
74 long index = max_element(begin(v), end(v)) - begin(v);
76 return make_pair(dist,v[index]);
100 numvec qvalues(rewards.size());
103 for(
size_t i = 0; i < rewards.size(); i++){
104 qvalues[i] = rewards[i] + discount * valuefunction[nonzero_indices[i]];
134 throw invalid_argument(
"Action with no action.get_outcomes().");
137 for(
size_t i = 0; i < action.
size(); i++)
138 outcomevalues[i] = action[i].value(valuefunction, discount);
140 return nature.first(outcomevalues, action.
get_distribution(), nature.second);
159 template<
class AType,
class T>
166 assert(actionid >= 0 && actionid <
long(state.
size()));
168 if(actionid < 0 || actionid >= (
long) state.
size())
throw range_error(
"invalid actionid: " + to_string(actionid) +
" for action count: " + to_string(state.
get_actions().size()) );
170 const auto& action = state[actionid];
172 if(!state.
is_valid(actionid))
throw invalid_argument(
"Cannot take an invalid action");
174 return value_action(action, valuefunction, discount, nature);
193 template<
typename AType,
typename T>
199 return make_tuple(-1,
numvec(),0);
201 prec_t maxvalue = -numeric_limits<prec_t>::infinity();
206 for(
size_t i = 0; i < state.
get_actions().size(); i++){
207 const auto& action = state[i];
212 auto value =
value_action(action, valuefunction, discount, nature);
213 if(value.second > maxvalue){
214 maxvalue = value.second;
216 result_outcome = move(value.first);
222 throw invalid_argument(
"all actions are invalid.");
224 return make_tuple(result,result_outcome,maxvalue);
246 Solution(move(valuefunction), move(policy)),
247 natpolicy(this->valuefunction.size(),
numvec(0)) {};
250 vector<numvec> natpolicy,
prec_t residual = -1,
long iterations = -1) :
251 Solution(move(valuefunction), move(policy), residual, iterations),
252 natpolicy(move(natpolicy)) {};
277 if(natspec.size() != statecount)
278 throw invalid_argument(
"Size of nature specification does not match the number of states.");
280 process_valuefunction(statecount, valuefunction);
288 template<
class SType>
294 if(policy.empty() || policy[stateid] < 0){
298 tie(solution.
natpolicy[stateid], newvalue) =
value_fix_state(state, valuefunction, discount, policy[stateid], natspec[stateid]);
305 template<
class SType>
320 return PolicyNature<T>(vector<NatureInstance<T>>(statecount, make_pair(nature, threshold)));
324 template<
class Model,
class T>
327 return PolicyNature<T>(vector<NatureInstance<T>>(m.state_count(), make_pair(nature, threshold)));
337 template <
class T1,
class T2>
338 vector<pair<T1,T2>> zip(
const vector<T1>& v1,
const vector<T2>& v2){
339 assert(v1.size() == v2.size());
340 vector<pair<T1,T2>> result(v1.size());
341 for(
size_t i=0; i< v1.size(); i++){
342 result[i] = make_pair(v1[i], v2[i]);
347 template <
class T1,
class T2>
348 vector<pair<T1,T2>> zip(
const T1& v1,
const vector<T2>& v2){
349 vector<pair<T1,T2>> result(v2.size());
350 for(
size_t i=0; i< v2.size(); i++){
351 result[i] = make_pair(v1, v2[i]);
380 template<
class SType,
class T = prec_t >
386 assert(nature.size() == thresholds.size());
389 return vi_gs<SType, PolicyNature<T>>(mdp, discount, move(valuefunction),
391 iterations, maxresidual);
395 template<
class SType,
class T = prec_t >
401 assert(nature.size() == thresholds.size());
404 return vi_gs<SType, PolicyNature<T>>(mdp, discount, move(valuefunction),
406 iterations, maxresidual);
433 template<
class SType,
class T = prec_t>
439 bool print_progress=
false) {
440 assert(nature.size() == thresholds.size());
444 return mpi_jac<SType, PolicyNature<T>>(mdp, discount, valuefunction,
446 iterations_pi, maxresidual_pi,
447 iterations_vi, maxresidual_vi,
452 template<
class SType,
class T = prec_t>
458 bool print_progress=
false) {
459 assert(nature.size() == thresholds.size());
463 return mpi_jac<SType, PolicyNature<T>>(mdp, discount, valuefunction,
465 iterations_pi, maxresidual_pi,
466 iterations_vi, maxresidual_vi,
483 if(nature ==
"robust_l1")
return robust_l1;
485 throw invalid_argument(
"Unknown nature.");
State for sa-rectangular uncertainty (or no uncertainty) in an MDP.
Definition: State.hpp:38
A solution to a plain MDP.
Definition: values.hpp:211
vec_scal_t value_fix_state(const SAState< AType > &state, numvec const &valuefunction, prec_t discount, long actionid, const NatureInstance< T > &nature)
Computes the value of a fixed action and any response of nature.
Definition: robust_values.hpp:161
auto rsolve_mpi(const GRMDP< SType > &mdp, prec_t discount, const vector< NatureResponse< T >> &nature, const vector< T > &thresholds, const numvec &valuefunction=numvec(0), const indvec &policy=indvec(0), unsigned long iterations_pi=MAXITER, prec_t maxresidual_pi=SOLPREC, unsigned long iterations_vi=MAXITER, prec_t maxresidual_vi=SOLPREC/2, bool print_progress=false)
Modified policy iteration using Jacobi value iteration in the inner loop.
Definition: robust_values.hpp:434
prec_t update_solution(SolutionRobust &solution, const SType &state, long stateid, const numvec &valuefunction, prec_t discount) const
Computes the Bellman update and updates the solution to the best response It does not update the valu...
Definition: robust_values.hpp:289
size_t size() const
Number of actions.
Definition: State.hpp:57
const numvec & get_distribution() const
Returns the baseline distribution over outcomes.
Definition: Action.hpp:361
const vector< Transition > & get_outcomes() const
Returns the list of outcomes.
Definition: Action.hpp:197
size_t size() const
Returns number of outcomes.
Definition: Action.hpp:185
PolicyNature< T > uniform_nature(size_t statecount, NatureResponse< T > nature, T threshold)
A helper function that simply copies a nature specification across all states.
Definition: robust_values.hpp:318
bool is_terminal() const
True if the state is considered terminal (no actions).
Definition: State.hpp:119
A general robust Markov decision process.
Definition: RMDP.hpp:182
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
A robust solution to a robust or regular MDP.
Definition: robust_values.hpp:233
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
An action in a robust MDP that allows for outcomes chosen by nature.
Definition: Action.hpp:230
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
SolutionRobust(numvec valuefunction, indvec policy)
Empty SolutionRobust for a problem with policy and value function.
Definition: robust_values.hpp:245
vec_scal_t optimistic_l1(const numvec &v, const numvec &p, prec_t threshold)
L1 optimistic response.
Definition: robust_values.hpp:50
indvec policy
index of the action to take for each states
Definition: values.hpp:215
The class abstracts some operations of value / policy iteration in order to generalize to various typ...
Definition: robust_values.hpp:260
vec_scal_t robust_l1(const numvec &v, const numvec &p, prec_t threshold)
L1 robust response.
Definition: robust_values.hpp:44
Definition: values.hpp:252
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
SolutionRobust new_solution(size_t statecount, numvec valuefunction) const
Constructs a new robust solution.
Definition: robust_values.hpp:276
pair< numvec, prec_t > vec_scal_t
Pair of a vector and a scalar.
Definition: definitions.hpp:34
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
bool is_valid(long actionid) const
Returns whether the actions is valid.
Definition: State.hpp:100
SolutionRobust()
Empty SolutionRobust.
Definition: robust_values.hpp:239
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
const Transition & get_outcome(long outcomeid) const
Returns the single outcome.
Definition: Action.hpp:48
vector< numvec > natpolicy
Randomized policy of nature, probabilities only for states that have non-zero probability in the MDP ...
Definition: robust_values.hpp:236
NatureResponse< prec_t > string_to_nature(string nature)
Converts a string representation of nature response to the appropriate nature response call...
Definition: robust_values.hpp:480
pair< numvec, prec_t > worstcase_l1(numvec const &z, numvec const &q, prec_t t)
Computes the solution of: min_p p^T * z s.t.
Definition: definitions.hpp:111
vector< NatureInstance< T > > natspec
Specification of natures response (the function that nature computes, could be different for each sta...
Definition: robust_values.hpp:265
tuple< prec_t, numvec, prec_t > ind_vec_scal_t
Tuple of a index, vector and a scalar.
Definition: definitions.hpp:37
auto rsolve_vi(const GRMDP< SType > &mdp, prec_t discount, const vector< NatureResponse< T >> &nature, const vector< T > &thresholds, numvec valuefunction=numvec(0), const indvec &policy=numvec(0), unsigned long iterations=MAXITER, prec_t maxresidual=SOLPREC)
Gauss-Seidel variant of value iteration (not parallelized).
Definition: robust_values.hpp:381
helper functions
Definition: State.hpp:204
SolutionRobust(size_t statecount)
Empty SolutionRobust for a problem with statecount states.
Definition: robust_values.hpp:242
vec_scal_t optimistic_unbounded(const numvec &v, const numvec &p, T)
best outcome, threshold is ignored
Definition: robust_values.hpp:71
const numvec & get_rewards() const
Rewards for indices with positive probabilities returned by get_indices.
Definition: Transition.hpp:337
Action in a regular MDP.
Definition: Action.hpp:31
ind_vec_scal_t value_max_state(const SAState< AType > &state, const numvec &valuefunction, prec_t discount, const NatureInstance< T > &nature)
Finds the greedy action and its value for the given value function.
Definition: robust_values.hpp:195
const vector< AType > & get_actions() const
Returns set of all actions.
Definition: State.hpp:116
vec_scal_t(*)(numvec const &v, numvec const &p, T threshold) NatureResponse
Function representing constraints on nature.
Definition: robust_values.hpp:34
PolicyNature(vector< NatureInstance< T >> natspec)
Constructs the object from a policy and a specification of nature.
Definition: robust_values.hpp:272
prec_t update_value(const SolutionRobust &solution, const SType &state, long stateid, const numvec &valuefunction, prec_t discount) const
Computes a fixed Bellman update using the current solution policy.
Definition: robust_values.hpp:306
PolicyNature(indvec policy, vector< NatureInstance< T >> natspec)
Constructs the object from a policy and a specification of nature.
Definition: robust_values.hpp:268
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
constexpr unsigned long MAXITER
Default number of iterations.
Definition: definitions.hpp:43
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
pair< NatureResponse< T >, T > NatureInstance
Represents an instance of nature that can be used to directly compute the response.
Definition: robust_values.hpp:40
vec_scal_t robust_unbounded(const numvec &v, const numvec &p, T)
worst outcome, threshold is ignored
Definition: robust_values.hpp:61
vec_scal_t value_action(const RegularAction &action, const numvec &valuefunction, prec_t discount, const NatureInstance< T > &nature)
Computes an ambiguous value (e.g.
Definition: robust_values.hpp:94