10 #include "../cpp11-range-master/range.hpp" 76 throw invalid_argument(
"WeightedOutcomeAction with no outcomes");
81 averagevalue += distribution[i] * action[i].value(valuefunction, discount);
97 assert(distribution.size() == action.
get_outcomes().size());
98 if(action.
get_outcomes().empty())
throw invalid_argument(
"WeightedOutcomeAction with no outcomes");
100 prec_t averagevalue = 0.0;
102 for(
size_t i = 0; i < action.
get_outcomes().size(); i++)
103 averagevalue += distribution[i] * action[i].value(valuefunction, discount);
122 template<
class AType>
126 return make_pair(-1,0.0);
128 prec_t maxvalue = -numeric_limits<prec_t>::infinity();
131 for(
size_t i = 0; i < state.
size(); i++){
132 auto const& action = state[i];
137 auto value =
value_action(action, valuefunction, discount);
138 if(value >= maxvalue){
146 throw invalid_argument(
"all actions are invalid.");
148 return make_pair(result, maxvalue);
160 template<
class AType>
162 prec_t discount,
long actionid) {
166 if(actionid < 0 || actionid >= (
long) state.
get_actions().size())
167 throw range_error(
"invalid actionid: " + to_string(actionid) +
" for action count: " +
170 const auto& action = state[actionid];
172 if(!state.
is_valid(actionid))
throw invalid_argument(
"Cannot take an invalid action");
187 template<
class AType>
190 long actionid,
numvec distribution) {
194 assert(actionid >= 0 && actionid <
long(state.
size()));
196 if(actionid < 0 || actionid >=
long(state.
size()))
throw range_error(
"invalid actionid: " 197 + to_string(actionid) +
" for action count: " + to_string(state.
get_actions().size()) );
199 const auto& action = state[actionid];
201 if(!state.
is_valid(actionid))
throw invalid_argument(
"Cannot take an invalid action");
203 return value_action(action, valuefunction, discount, distribution);
221 Solution(): valuefunction(0), policy(0), residual(-1),iterations(-1) {};
224 Solution(
size_t statecount): valuefunction(statecount, 0.0), policy(statecount, -1), residual(-1),iterations(-1) {};
228 valuefunction(move(valuefunction)), policy(move(policy)), residual(residual), iterations(iterations) {};
236 if(initial.
max_index() >= (long) valuefunction.size())
throw invalid_argument(
"Too many indexes in the initial distribution.");
237 return initial.
value(valuefunction);
267 Solution new_solution(
size_t statecount,
numvec valuefunction)
const {
268 process_valuefunction(statecount, valuefunction);
269 assert(valuefunction.size() == statecount);
270 Solution solution =
Solution(move(valuefunction), process_policy(statecount));
277 template<
class SType>
280 assert(stateid <
long(solution.
policy.size()));
284 if(policy.empty() || policy[stateid] < 0){
287 return value_fix_state(state, valuefunction, discount, policy[stateid]);
294 template<
class SType>
301 void process_valuefunction(
size_t statecount,
numvec& valuefunction)
const{
304 if(!valuefunction.empty()){
305 if(valuefunction.size() != statecount)
throw invalid_argument(
"Incorrect dimensions of value function.");
307 valuefunction.assign(statecount, 0.0);
310 indvec process_policy(
size_t statecount)
const {
313 if(policy.size() != statecount)
throw invalid_argument(
"Incorrect dimensions of policy function.");
316 return indvec(statecount, -1);
349 template<
class SType,
class ResponseType = PolicyDeterministic>
356 typename ResponseType::solution_type solution =
357 response.new_solution(states.size(), move(valuefunction));
363 prec_t residual = numeric_limits<prec_t>::infinity();
366 for(i = 0; i < iterations && residual > maxresidual; i++){
369 for(
size_t s = 0l; s < states.size(); s++){
370 prec_t newvalue = response.update_solution(solution, states[s], s, solution.valuefunction, discount);
372 residual = max(residual, abs(solution.valuefunction[s] - newvalue));
373 solution.valuefunction[s] = newvalue;
376 solution.residual = residual;
377 solution.iterations = i;
404 template<
class SType,
class ResponseType = PolicyDeterministic>
409 bool print_progress=
false) {
412 typename ResponseType::solution_type solution =
413 response.new_solution(states.size(), move(valuefunction));
418 numvec oddvalue = solution.valuefunction;
419 numvec evenvalue = oddvalue;
421 numvec residuals(states.size());
424 prec_t residual_pi = numeric_limits<prec_t>::infinity();
429 numvec * sourcevalue = & oddvalue;
430 numvec * targetvalue = & evenvalue;
432 for(i = 0; i < iterations_pi; i++){
435 cout <<
"Policy iteration " << i <<
"/" << iterations_pi <<
":" << endl;
437 swap(targetvalue, sourcevalue);
439 prec_t residual_vi = numeric_limits<prec_t>::infinity();
442 #pragma omp parallel for 443 for(
auto s = 0l; s < long(states.size()); s++){
444 prec_t newvalue = response.update_solution(solution, states[s], s, *sourcevalue, discount);
445 residuals[s] = abs((*sourcevalue)[s] - newvalue);
446 (*targetvalue)[s] = newvalue;
448 residual_pi = *max_element(residuals.cbegin(),residuals.cend());
450 if(print_progress) cout <<
" Bellman residual: " << residual_pi << endl;
453 if(residual_pi <= maxresidual_pi)
456 if(print_progress) cout <<
" Value iteration: " << flush;
459 for(
size_t j = 0; j < iterations_vi && residual_vi > maxresidual_vi; j++){
460 if(print_progress) cout <<
"." << flush;
462 swap(targetvalue, sourcevalue);
464 #pragma omp parallel for 465 for(
auto s = 0l; s < (long) states.size(); s++){
466 prec_t newvalue = response.update_value(solution, states[s], s, *sourcevalue, discount);
467 residuals[s] = abs((*sourcevalue)[s] - newvalue);
468 (*targetvalue)[s] = newvalue;
470 residual_vi = *max_element(residuals.begin(),residuals.end());
472 if(print_progress) cout << endl <<
" Residual (fixed policy): " << residual_vi << endl << endl;
474 solution.valuefunction = move(*targetvalue);
475 solution.residual = residual_pi;
476 solution.iterations = i;
504 template<
class SType>
509 return vi_gs<SType, PolicyDeterministic>(mdp, discount, move(valuefunction),
532 template<
class SType>
537 bool print_progress=
false) {
539 return mpi_jac<SType, PolicyDeterministic>(mdp, discount, valuefunction,
PolicyDeterministic(policy),
540 iterations_pi, maxresidual_pi,
541 iterations_vi, maxresidual_vi,
State for sa-rectangular uncertainty (or no uncertainty) in an MDP.
Definition: State.hpp:38
A solution to a plain MDP.
Definition: values.hpp:211
vec_scal_t value_fix_state(const SAState< AType > &state, numvec const &valuefunction, prec_t discount, long actionid, const NatureInstance< T > &nature)
Computes the value of a fixed action and any response of nature.
Definition: robust_values.hpp:161
size_t size() const
Number of actions.
Definition: State.hpp:57
const numvec & get_distribution() const
Returns the baseline distribution over outcomes.
Definition: Action.hpp:361
const vector< Transition > & get_outcomes() const
Returns the list of outcomes.
Definition: Action.hpp:197
prec_t residual
Bellman residual of the computation.
Definition: values.hpp:217
auto solve_mpi(const GRMDP< SType > &mdp, prec_t discount, const numvec &valuefunction=numvec(0), const indvec &policy=indvec(0), unsigned long iterations_pi=MAXITER, prec_t maxresidual_pi=SOLPREC, unsigned long iterations_vi=MAXITER, prec_t maxresidual_vi=SOLPREC/2, bool print_progress=false)
Modified policy iteration using Jacobi value iteration in the inner loop.
Definition: values.hpp:533
bool is_terminal() const
True if the state is considered terminal (no actions).
Definition: State.hpp:119
const vector< SType > & get_states() const
Definition: RMDP.hpp:248
A general robust Markov decision process.
Definition: RMDP.hpp:182
long max_index() const
Returns the maximal indexes involved in the transition.
Definition: Transition.hpp:262
PolicyDeterministic()
All actions will be optimized.
Definition: values.hpp:260
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
prec_t update_solution(Solution &solution, const SType &state, long stateid, const numvec &valuefunction, prec_t discount) const
Computed the Bellman update and updates the solution to the best response It does not update the valu...
Definition: values.hpp:278
numvec valuefunction
Value function.
Definition: values.hpp:213
An action in a robust MDP that allows for outcomes chosen by nature.
Definition: Action.hpp:230
auto mpi_jac(const GRMDP< SType > &mdp, prec_t discount, const numvec &valuefunction=numvec(0), const ResponseType &response=PolicyDeterministic(), unsigned long iterations_pi=MAXITER, prec_t maxresidual_pi=SOLPREC, unsigned long iterations_vi=MAXITER, prec_t maxresidual_vi=SOLPREC/2, bool print_progress=false)
Modified policy iteration using Jacobi value iteration in the inner loop.
Definition: values.hpp:405
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
indvec policy
index of the action to take for each states
Definition: values.hpp:215
Solution(size_t statecount)
Empty solution for a problem with statecount states.
Definition: values.hpp:224
Definition: values.hpp:252
indvec policy
Partial policy specification (action -1 is ignored and optimized)
Definition: values.hpp:257
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
bool is_valid(long actionid) const
Returns whether the actions is valid.
Definition: State.hpp:100
prec_t update_value(const Solution &solution, const SType &state, long stateid, const numvec &valuefunction, prec_t discount) const
Computes a fixed Bellman update using the current solution policy.
Definition: values.hpp:295
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
const Transition & get_outcome(long outcomeid) const
Returns the single outcome.
Definition: Action.hpp:48
Solution(numvec valuefunction, indvec policy, prec_t residual=-1, long iterations=-1)
Empty solution for a problem with a given value function and policy.
Definition: values.hpp:227
prec_t value(numvec const &valuefunction, prec_t discount, numvec probabilities) const
Computes value for the transition and a value function.
Definition: Transition.hpp:202
long iterations
Number of iterations taken.
Definition: values.hpp:219
Action in a regular MDP.
Definition: Action.hpp:31
ind_vec_scal_t value_max_state(const SAState< AType > &state, const numvec &valuefunction, prec_t discount, const NatureInstance< T > &nature)
Finds the greedy action and its value for the given value function.
Definition: robust_values.hpp:195
const vector< AType > & get_actions() const
Returns set of all actions.
Definition: State.hpp:116
PolicyDeterministic(indvec policy)
A partial policy that can be used to fix some actions policy[s] = -1 means that the action should be ...
Definition: values.hpp:265
auto vi_gs(const GRMDP< SType > &mdp, prec_t discount, numvec valuefunction=numvec(0), const ResponseType &response=PolicyDeterministic(), unsigned long iterations=MAXITER, prec_t maxresidual=SOLPREC)
Gauss-Seidel variant of value iteration (not parallelized).
Definition: values.hpp:350
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
constexpr unsigned long MAXITER
Default number of iterations.
Definition: definitions.hpp:43
auto solve_vi(const GRMDP< SType > &mdp, prec_t discount, numvec valuefunction=numvec(0), const indvec &policy=numvec(0), unsigned long iterations=MAXITER, prec_t maxresidual=SOLPREC)
Gauss-Seidel variant of value iteration (not parallelized).
Definition: values.hpp:505
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
prec_t total_return(const Transition &initial) const
Computes the total return of the solution given the initial distribution.
Definition: values.hpp:235
vec_scal_t value_action(const RegularAction &action, const numvec &valuefunction, prec_t discount, const NatureInstance< T > &nature)
Computes an ambiguous value (e.g.
Definition: robust_values.hpp:94