CRAAM  2.0.0
Robust and Approximate Markov Decision Processes
RMDP.hpp
1 #pragma once
2 
3 #include "State.hpp"
4 
5 #include <vector>
6 #include <istream>
7 #include <fstream>
8 #include <memory>
9 #include <tuple>
10 #include <cassert>
11 #include <limits>
12 #include <algorithm>
13 #include <string>
14 #include <sstream>
15 #include <utility>
16 #include <iostream>
17 
18 #include "cpp11-range-master/range.hpp"
19 
153 namespace craam {
155 
156 using namespace std;
157 using namespace util::lang;
158 
159 // **************************************************************************************
160 // Generic MDP Class
161 // **************************************************************************************
162 
181 template<class SType>
182 class GRMDP{
183 protected:
185  vector<SType> states;
186 
187 public:
188 
192  typedef vector<numvec> policy_rand;
193 
201  GRMDP(long state_count) : states(state_count){};
202 
204  GRMDP() : states() {};
205 
211  SType& create_state(long stateid){
212  assert(stateid >= 0);
213  if(stateid >= (long) states.size())
214  states.resize(stateid + 1);
215  return states[stateid];
216  }
217 
222  SType& create_state(){ return create_state(states.size());};
223 
225  size_t state_count() const {return states.size();};
226 
228  size_t size() const {return state_count();};
229 
231  const SType& get_state(long stateid) const {
232  assert(stateid >= 0 && size_t(stateid) < state_count());
233  return states[stateid];};
234 
236  const SType& operator[](long stateid) const {return get_state(stateid);};
237 
238 
240  SType& get_state(long stateid) {
241  assert(stateid >= 0 && size_t(stateid) < state_count());
242  return states[stateid];};
243 
245  SType& operator[](long stateid){return get_state(stateid);};
246 
248  const vector<SType>& get_states() const {return states;};
249 
256  bool is_normalized() const{
257  for(auto const& s : states){
258  for(auto const& a : s.get_actions()){
259  for(auto const& t : a.get_outcomes()){
260  if(!t.is_normalized()) return false;
261  } } }
262  return true;
263  }
264 
266  void normalize(){
267  for(SType& s : states)
268  s.normalize();
269  }
270 
284  template<typename Policy>
285  long is_policy_correct(const Policy& policies) const{
286  for(auto si : indices(states) ){
287  // ignore terminal states
288  if(states[si].is_terminal())
289  continue;
290 
291  // call function of the state
292  if(!states[si].is_action_correct(policies))
293  return si;
294  }
295  return -1;
296  }
297 
298  // ----------------------------------------------
299  // Reading and writing files
300  // ----------------------------------------------
301 
319  void to_csv(ostream& output, bool header = true) const{
320  //write header is so requested
321  if(header){
322  output << "idstatefrom," << "idaction," <<
323  "idoutcome," << "idstateto," << "probability," << "reward" << endl;
324  }
325 
326  //idstatefrom
327  for(size_t i = 0l; i < this->states.size(); i++){
328  const auto& actions = (this->states[i]).get_actions();
329  //idaction
330  for(size_t j = 0; j < actions.size(); j++){
331  const auto& outcomes = actions[j].get_outcomes();
332  //idoutcome
333  for(size_t k = 0; k < outcomes.size(); k++){
334  const auto& tran = outcomes[k];
335 
336  auto& indices = tran.get_indices();
337  const auto& rewards = tran.get_rewards();
338  const auto& probabilities = tran.get_probabilities();
339  //idstateto
340  for (size_t l = 0; l < tran.size(); l++){
341  output << i << ',' << j << ',' << k << ',' << indices[l] << ','
342  << probabilities[l] << ',' << rewards[l] << endl;
343  }
344  }
345  }
346  }
347  }
348 
354  void to_csv_file(const string& filename, bool header = true) const{
355  ofstream ofs(filename, ofstream::out);
356  to_csv(ofs,header);
357  ofs.close();
358  }
359 
360  // string representation
365  string to_string() const{
366  string result;
367 
368  for(size_t si : indices(states)){
369  const auto& s = get_state(si);
370  result += (std::to_string(si));
371  result += (" : ");
372  result += (std::to_string(s.action_count()));
373  result += ("\n");
374  for(size_t ai : indices(s)){
375  result += (" ");
376  result += (std::to_string(ai));
377  result += (" : ");
378  const auto& a = s.get_action(ai);
379  a.to_string(result);
380  result += ("\n");
381  }
382  }
383  return result;
384  }
385 
390  string to_json() const{
391  string result{"{\"states\" : ["};
392  for(auto si : indices(states)){
393  const auto& s = states[si];
394  result += s.to_json(si);
395  result += ",";
396  }
397  if(!states.empty()) result.pop_back(); // remove last comma
398  result += "]}";
399  return result;
400 
401  }
402 };
403 
404 // **********************************************************************
405 // ********************* TEMPLATE DECLARATIONS ********************
406 // **********************************************************************
407 
412 
417 
418 }
GRMDP(long state_count)
Constructs the RMDP with a pre-allocated number of states.
Definition: RMDP.hpp:201
indvec policy_det
Decision-maker&#39;s policy: Which action to take in which state.
Definition: RMDP.hpp:190
const vector< SType > & get_states() const
Definition: RMDP.hpp:248
A general robust Markov decision process.
Definition: RMDP.hpp:182
size_t size() const
Number of states.
Definition: RMDP.hpp:228
bool is_normalized() const
Check if all transitions in the process sum to one.
Definition: RMDP.hpp:256
const SType & get_state(long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:231
vector< SType > states
Internal list of states.
Definition: RMDP.hpp:185
void to_csv_file(const string &filename, bool header=true) const
Saves the transition probabilities and rewards to a CSV file.
Definition: RMDP.hpp:354
GRMDP< RegularState > MDP
Regular MDP with discrete actions and one outcome per action.
Definition: RMDP.hpp:411
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
GRMDP()
Constructs an empty RMDP.
Definition: RMDP.hpp:204
SType & create_state()
Creates a new state at the end of the states.
Definition: RMDP.hpp:222
string to_string() const
Returns a brief string representation of the RMDP.
Definition: RMDP.hpp:365
SType & create_state(long stateid)
Assures that the MDP state exists and if it does not, then it is created.
Definition: RMDP.hpp:211
SType & operator[](long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:245
string to_json() const
Returns a json representation of the RMDP.
Definition: RMDP.hpp:390
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
GRMDP< WeightedRobustState > RMDP
An uncertain MDP with outcomes and weights.
Definition: RMDP.hpp:416
long is_policy_correct(const Policy &policies) const
Checks if the policy and nature&#39;s policy are both correct.
Definition: RMDP.hpp:285
const SType & operator[](long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:236
SType & get_state(long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:240
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
void normalize()
Normalize all transitions to sum to one for all states, actions, outcomes.
Definition: RMDP.hpp:266
vector< numvec > policy_rand
Nature&#39;s policy: Which outcome to take in which state.
Definition: RMDP.hpp:192
void to_csv(ostream &output, bool header=true) const
Saves the model to a stream as a simple csv file.
Definition: RMDP.hpp:319