CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
State.hpp
1 #pragma once
2 
3 #include "Action.hpp"
4 
5 #include <utility>
6 #include <tuple>
7 #include <vector>
8 #include <stdexcept>
9 #include <limits>
10 #include <string>
11 
12 #include "cpp11-range-master/range.hpp"
13 
14 
15 namespace craam {
16 
17 using namespace std;
18 
19 
20 
21 // **************************************************************************************
22 // SA State (SA rectangular, also used for a regular MDP)
23 // **************************************************************************************
24 
37 template<class AType>
38 class SAState{
39 protected:
41  vector<AType> actions;
44  vector<bool> valid;
45 
46 public:
47 
48  SAState() : actions(0), valid(0) {};
49 
51  SAState(const vector<AType>& actions) : actions(actions), valid(actions.size(),true) { };
52 
54  size_t action_count() const { return actions.size();};
55 
57  size_t size() const { return action_count();};
58 
66  AType& create_action(long actionid){
67  assert(actionid >= 0);
68 
69  // assumes that the default constructor makes the actions invalid
70  if(actionid >= (long) actions.size()){
71  actions.resize(actionid+1);
72  valid.resize(actionid+1, false);
73  }
74 
75  // set only the action that is being added as valid
76  valid[actionid] = true;
77  return actions[actionid];
78  }
79 
81  AType& create_action() {return create_action(actions.size());};
82 
84  const AType& get_action(long actionid) const
85  {assert(actionid >= 0 && size_t(actionid) < action_count());
86  return actions[actionid];};
87 
89  const AType& operator[](long actionid) const {return get_action(actionid);}
90 
92  AType& get_action(long actionid)
93  {assert(actionid >= 0 && size_t(actionid) < action_count());
94  return actions[actionid];};
95 
97  AType& operator[](long actionid) {return get_action(actionid);}
98 
100  bool is_valid(long actionid) const {
101  assert(actionid < long(valid.size()) && actionid >= 0);
102  return valid[actionid];
103  };
104 
109  void set_valid(long actionid, bool value = true){
110  assert(actionid < long(valid.size()) && actionid >= 0);
111  valid[actionid] = value;
112  };
113 
114 
116  const vector<AType>& get_actions() const {return actions;};
117 
119  bool is_terminal() const {return actions.empty();};
120 
122  void normalize(){
123  for(AType& a : actions)
124  a.normalize();
125  }
126 
128  bool is_action_correct(long aid, numvec nataction) const{
129  if( (aid < 0) || ((size_t)aid >= actions.size()))
130  return false;
131 
132  return actions[aid].is_nature_correct(nataction);
133  }
134 
136  bool is_action_correct(long aid) const{
137  if( (aid < 0) || ((size_t)aid >= actions.size()))
138  return false;
139  else
140  return true;
141  }
142 
144  prec_t mean_reward(long actionid, numvec nataction) const{
145  if(is_terminal()) return 0;
146  else return get_action(actionid).mean_reward(nataction);
147  }
148 
150  prec_t mean_reward(long actionid) const{
151  if(is_terminal()) return 0;
152  else return get_action(actionid).mean_reward();
153  }
154 
161  Transition mean_transition(long action, numvec nataction) const{
162  if(is_terminal()) return Transition();
163  else return get_action(action).mean_transition(nataction);
164  }
165 
169  Transition mean_transition(long action) const{
170  if(is_terminal()) return Transition();
171  else return get_action(action).mean_transition();
172  }
173 
176  string to_json(long stateid = -1) const{
177  string result{"{"};
178  result += "\"stateid\" : ";
179  result += std::to_string(stateid);
180  result += ",\"actions\" : [";
181  for(auto ai : indices(actions)){
182  const auto& a = actions[ai];
183  result += a.to_json(ai);
184  result += ",";
185  }
186  if(!actions.empty()) result.pop_back(); // remove last comma
187  result += ("]}");
188  return result;
189  }
190 };
191 
192 // **********************************************************************
193 // ********************* SPECIFIC STATE DEFINITIONS ***************
194 // **********************************************************************
195 
200 }
201 
202 
204 namespace internal{
205  using namespace craam;
206 
208  template<class SType>
209  bool is_action_correct(const SType& state, long stateid, const std::pair<indvec,vector<numvec>>& policies){
210  return state.is_action_correct(policies.first[stateid], policies.second[stateid]);
211  }
212 
214  template<class SType>
215  bool is_action_correct(const SType& state, long stateid, const indvec& policy){
216  return state.is_action_correct(policy[stateid]);
217  }
218 }
State for sa-rectangular uncertainty (or no uncertainty) in an MDP.
Definition: State.hpp:38
const AType & operator[](long actionid) const
Returns an existing action.
Definition: State.hpp:89
size_t size() const
Number of actions.
Definition: State.hpp:57
bool is_terminal() const
True if the state is considered terminal (no actions).
Definition: State.hpp:119
SAState< RegularAction > RegularState
Regular MDP state with no outcomes.
Definition: State.hpp:197
AType & create_action()
Creates an action at the last position of the state.
Definition: State.hpp:81
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
Transition mean_transition(long action) const
Returns the mean transition probabilities following the action and outcome.
Definition: State.hpp:169
size_t action_count() const
Number of actions.
Definition: State.hpp:54
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
Transition mean_transition(long action, numvec nataction) const
Returns the mean transition probabilities following the action and outcome.
Definition: State.hpp:161
SAState< WeightedOutcomeAction > WeightedRobustState
State with uncertain outcomes with L1 constraints on the distribution.
Definition: State.hpp:199
prec_t mean_reward(long actionid, numvec nataction) const
Returns the mean reward following the action (and outcome).
Definition: State.hpp:144
vector< bool > valid
whether actions can be used in computation. If false, that means that they should not be used in algo...
Definition: State.hpp:44
void set_valid(long actionid, bool value=true)
Set action validity.
Definition: State.hpp:109
bool is_valid(long actionid) const
Returns whether the actions is valid.
Definition: State.hpp:100
vector< AType > actions
list of actions
Definition: State.hpp:41
bool is_action_correct(long aid) const
Checks whether the prescribed action correct.
Definition: State.hpp:136
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
prec_t mean_reward(long actionid) const
Returns the mean reward following the action.
Definition: State.hpp:150
const AType & get_action(long actionid) const
Returns an existing action.
Definition: State.hpp:84
AType & operator[](long actionid)
Returns an existing action.
Definition: State.hpp:97
void normalize()
Normalizes transition probabilities to sum to one.
Definition: State.hpp:122
helper functions
Definition: State.hpp:204
bool is_action_correct(long aid, numvec nataction) const
Checks whether the prescribed action and outcome are correct.
Definition: State.hpp:128
AType & get_action(long actionid)
Returns an existing action.
Definition: State.hpp:92
const vector< AType > & get_actions() const
Returns set of all actions.
Definition: State.hpp:116
AType & create_action(long actionid)
Creates an action given by actionid if it does not exists.
Definition: State.hpp:66
bool is_action_correct(const SType &state, long stateid, const std::pair< indvec, vector< numvec >> &policies)
checks state and policy with a policy of nature
Definition: State.hpp:209
string to_json(long stateid=-1) const
Returns json representation of the state.
Definition: State.hpp:176
SAState(const vector< AType > &actions)
Initializes state with actions and sets them all to valid.
Definition: State.hpp:51
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18