6 #include "../cpp11-range-master/range.hpp" 8 namespace craam{
namespace algorithms{
11 using namespace Eigen;
19 return state.mean_transition(policies.first[index], policies.second[index]);
25 return state.mean_transition(policy[index]);
31 return state.mean_reward(policies.first[index], policies.second[index]);
37 return state.mean_reward(policy[index]);
55 template<
typename SType,
typename Policies>
58 MatrixXd result = MatrixXd::Zero(n,n);
61 #pragma omp parallel for 62 for(
size_t s = 0; s < n; s++){
69 for(
size_t j=0; j < t.
size(); j++)
70 result(s,indexes[j]) = probabilities[j];
72 for(
size_t j=0; j < t.
size(); j++)
73 result(indexes[j],s) = probabilities[j];
90 template<
typename SType,
typename Policy>
96 #pragma omp parallel for 97 for(
size_t s=0; s < n; s++){
98 const SType& state = rmdp[s];
99 if(state.is_terminal())
122 template<
typename SType,
typename Policies>
125 const Policies& policies) {
130 const VectorXd initial_vec = Map<const VectorXd,Unaligned>(ivec.data(),ivec.size());
133 MatrixXd t_mat = MatrixXd::Identity(n,n) - discount *
transition_mat(rmdp, policies,
true);
137 Map<VectorXd,Unaligned>(result.data(),result.size()) = HouseholderQR<MatrixXd>(t_mat).solve(initial_vec);
numvec occfreq_mat(const GRMDP< SType > &rmdp, const Transition &init, prec_t discount, const Policies &policies)
Computes occupancy frequencies using matrix representation of transition probabilities.
Definition: occupancies.hpp:124
numvec rewards_vec(const GRMDP< SType > &rmdp, const Policy &policies)
Constructs the rewards vector for each state for the RMDP.
Definition: occupancies.hpp:91
numvec probabilities_vector(size_t size) const
Constructs and returns a dense vector of probabilities, which includes 0 transition probabilities...
Definition: Transition.hpp:296
prec_t mean_reward_state(const SType &state, long index, const pair< indvec, vector< numvec >> &policies)
Helper function to deal with variable indexing.
Definition: occupancies.hpp:30
const vector< SType > & get_states() const
Definition: RMDP.hpp:248
MatrixXd transition_mat(const GRMDP< SType > &rmdp, const Policies &policies, bool transpose=false)
Constructs the transition (or its transpose) matrix for the policy.
Definition: occupancies.hpp:56
A general robust Markov decision process.
Definition: RMDP.hpp:182
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
Transition mean_transition_state(const SType &state, long index, const pair< indvec, vector< numvec >> &policies)
Helper function to deal with variable indexing.
Definition: occupancies.hpp:18
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
helper functions
Definition: State.hpp:204
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18