CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
occupancies.hpp
1 #pragma once
2 
3 #include "../RMDP.hpp"
4 
5 #include <Eigen/Dense>
6 #include "../cpp11-range-master/range.hpp"
7 
8 namespace craam{namespace algorithms{
9 
10 using namespace std;
11 using namespace Eigen;
12 
14 namespace internal{
15 
17  template<class SType>
18  inline Transition mean_transition_state(const SType& state, long index, const pair<indvec,vector<numvec>>& policies){
19  return state.mean_transition(policies.first[index], policies.second[index]);
20  }
21 
23  template<class SType>
24  inline Transition mean_transition_state(const SType& state, long index, const indvec& policy){
25  return state.mean_transition(policy[index]);
26  }
27 
29  template<class SType>
30  inline prec_t mean_reward_state(const SType& state, long index, const pair<indvec,vector<numvec>>& policies){
31  return state.mean_reward(policies.first[index], policies.second[index]);
32  }
33 
35  template<class SType>
36  inline prec_t mean_reward_state(const SType& state, long index, const indvec& policy){
37  return state.mean_reward(policy[index]);
38  }
39 }
40 
55 template<typename SType, typename Policies>
56 inline MatrixXd transition_mat(const GRMDP<SType>& rmdp, const Policies& policies, bool transpose = false) {
57  const size_t n = rmdp.state_count();
58  MatrixXd result = MatrixXd::Zero(n,n);
59 
60  const auto& states = rmdp.get_states();
61  #pragma omp parallel for
62  for(size_t s = 0; s < n; s++){
63  const Transition&& t = internal::mean_transition_state(states[s], s, policies);
64 
65  const auto& indexes = t.get_indices();
66  const auto& probabilities = t.get_probabilities();
67 
68  if(!transpose){
69  for(size_t j=0; j < t.size(); j++)
70  result(s,indexes[j]) = probabilities[j];
71  }else{
72  for(size_t j=0; j < t.size(); j++)
73  result(indexes[j],s) = probabilities[j];
74  }
75  }
76  return result;
77 }
78 
90 template<typename SType, typename Policy>
91 inline numvec rewards_vec(const GRMDP<SType>& rmdp, const Policy& policies){
92 
93  const auto n = rmdp.state_count();
94  numvec rewards(n);
95 
96  #pragma omp parallel for
97  for(size_t s=0; s < n; s++){
98  const SType& state = rmdp[s];
99  if(state.is_terminal())
100  rewards[s] = 0;
101  else
102  rewards[s] = internal::mean_reward_state(state, s, policies);
103  }
104  return rewards;
105 }
106 
122 template<typename SType, typename Policies>
123 inline numvec
124 occfreq_mat(const GRMDP<SType>& rmdp, const Transition& init, prec_t discount,
125  const Policies& policies) {
126  const auto n = rmdp.state_count();
127 
128  // initial distribution
129  const numvec& ivec = init.probabilities_vector(n);
130  const VectorXd initial_vec = Map<const VectorXd,Unaligned>(ivec.data(),ivec.size());
131 
132  // get transition matrix and construct (I - gamma * P^T)
133  MatrixXd t_mat = MatrixXd::Identity(n,n) - discount * transition_mat(rmdp, policies, true);
134 
135  // solve set of linear equations
136  numvec result(n,0);
137  Map<VectorXd,Unaligned>(result.data(),result.size()) = HouseholderQR<MatrixXd>(t_mat).solve(initial_vec);
138 
139  return result;
140 }
141 
142 }}
numvec occfreq_mat(const GRMDP< SType > &rmdp, const Transition &init, prec_t discount, const Policies &policies)
Computes occupancy frequencies using matrix representation of transition probabilities.
Definition: occupancies.hpp:124
numvec rewards_vec(const GRMDP< SType > &rmdp, const Policy &policies)
Constructs the rewards vector for each state for the RMDP.
Definition: occupancies.hpp:91
numvec probabilities_vector(size_t size) const
Constructs and returns a dense vector of probabilities, which includes 0 transition probabilities...
Definition: Transition.hpp:296
prec_t mean_reward_state(const SType &state, long index, const pair< indvec, vector< numvec >> &policies)
Helper function to deal with variable indexing.
Definition: occupancies.hpp:30
const vector< SType > & get_states() const
Definition: RMDP.hpp:248
MatrixXd transition_mat(const GRMDP< SType > &rmdp, const Policies &policies, bool transpose=false)
Constructs the transition (or its transpose) matrix for the policy.
Definition: occupancies.hpp:56
A general robust Markov decision process.
Definition: RMDP.hpp:182
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
Transition mean_transition_state(const SType &state, long index, const pair< indvec, vector< numvec >> &policies)
Helper function to deal with variable indexing.
Definition: occupancies.hpp:18
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
helper functions
Definition: State.hpp:204
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18