code/craam/RMDP_8hpp_source.html

 #pragma once

 #include "State.hpp"

 #include <vector>
 #include <istream>
 #include <fstream>
 #include <memory>
 #include <tuple>
 #include <cassert>
 #include <limits>
 #include <algorithm>
 #include <string>
 #include <sstream>
 #include <utility>
 #include <iostream>

 #include "cpp11-range-master/range.hpp"

 namespace craam {

 using namespace std;
 using namespace util::lang;

 // **************************************************************************************
 //  Generic MDP Class
 // **************************************************************************************

 template<class SType>
 class GRMDP{
 protected:
     vector<SType> states;

 public:

     typedef indvec policy_det;
     typedef vector<numvec> policy_rand;

     GRMDP(long state_count) : states(state_count){};

     GRMDP() : states() {};

     SType& create_state(long stateid){
         assert(stateid >= 0);
         if(stateid >= (long) states.size())
             states.resize(stateid + 1);
         return states[stateid];
     }

     SType& create_state(){ return create_state(states.size());};

     size_t state_count() const {return states.size();};

     size_t size() const {return state_count();};

     const SType& get_state(long stateid) const {
         assert(stateid >= 0 && size_t(stateid) < state_count());
         return states[stateid];};

     const SType& operator[](long stateid) const {return get_state(stateid);};


     SType& get_state(long stateid) {
         assert(stateid >= 0 && size_t(stateid) < state_count());
         return states[stateid];};

     SType& operator[](long stateid){return get_state(stateid);};

     const vector<SType>& get_states() const {return states;};

     bool is_normalized() const{
         for(auto const& s : states){
             for(auto const& a : s.get_actions()){
                 for(auto const& t : a.get_outcomes()){
                     if(!t.is_normalized()) return false;
         } } }
         return true;
     }

     void normalize(){
         for(SType& s : states)
             s.normalize();
     }

     template<typename Policy>
     long is_policy_correct(const Policy& policies) const{
         for(auto si : indices(states) ){
             // ignore terminal states
             if(states[si].is_terminal())
                 continue;

             // call function of the state
             if(!states[si].is_action_correct(policies))
                 return si;
         }
         return -1;
     }

     // ----------------------------------------------
     // Reading and writing files
     // ----------------------------------------------

     void to_csv(ostream& output, bool header = true) const{
         //write header is so requested
         if(header){
             output << "idstatefrom," << "idaction," <<
                 "idoutcome," << "idstateto," << "probability," << "reward" << endl;
         }

         //idstatefrom
         for(size_t i = 0l; i < this->states.size(); i++){
             const auto& actions = (this->states[i]).get_actions();
             //idaction
             for(size_t j = 0; j < actions.size(); j++){
                 const auto& outcomes = actions[j].get_outcomes();
                 //idoutcome
                 for(size_t k = 0; k < outcomes.size(); k++){
                     const auto& tran = outcomes[k];

                     auto& indices = tran.get_indices();
                     const auto& rewards = tran.get_rewards();
                     const auto& probabilities = tran.get_probabilities();
                     //idstateto
                     for (size_t l = 0; l < tran.size(); l++){
                         output << i << ',' << j << ',' << k << ',' << indices[l] << ','
                                 << probabilities[l] << ',' << rewards[l] << endl;
                     }
                 }
             }
         }
     }

     void to_csv_file(const string& filename, bool header = true) const{
         ofstream ofs(filename, ofstream::out);
         to_csv(ofs,header);
         ofs.close();
     }

     // string representation
     string to_string() const{
         string result;

         for(size_t si : indices(states)){
             const auto& s = get_state(si);
             result += (std::to_string(si));
             result += (" : ");
             result += (std::to_string(s.action_count()));
             result += ("\n");
             for(size_t ai : indices(s)){
                 result += ("    ");
                 result += (std::to_string(ai));
                 result += (" : ");
                 const auto& a = s.get_action(ai);
                 a.to_string(result);
                 result += ("\n");
             }
         }
         return result;
     }

     string to_json() const{
         string result{"{\"states\" : ["};
         for(auto si : indices(states)){
             const auto& s = states[si];
             result += s.to_json(si);
             result += ",";
         }
         if(!states.empty()) result.pop_back(); // remove last comma
         result += "]}";
         return result;

     }
 };

 // **********************************************************************
 // *********************    TEMPLATE DECLARATIONS    ********************
 // **********************************************************************

 typedef GRMDP<RegularState> MDP;

 typedef GRMDP<WeightedRobustState> RMDP;

 }
craam::GRMDP::GRMDP
GRMDP(long state_count)
Constructs the RMDP with a pre-allocated number of states.
Definition: RMDP.hpp:201

craam::GRMDP::policy_det
indvec policy_det
Decision-maker&#39;s policy: Which action to take in which state.
Definition: RMDP.hpp:190

craam::GRMDP::get_states
const vector< SType > & get_states() const
Definition: RMDP.hpp:248

craam::GRMDP
A general robust Markov decision process.
Definition: RMDP.hpp:182

std

lang

craam::GRMDP::size
size_t size() const
Number of states.
Definition: RMDP.hpp:228

craam::GRMDP::is_normalized
bool is_normalized() const
Check if all transitions in the process sum to one.
Definition: RMDP.hpp:256

craam::GRMDP::get_state
const SType & get_state(long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:231

craam::GRMDP::states
vector< SType > states
Internal list of states.
Definition: RMDP.hpp:185

craam::GRMDP::to_csv_file
void to_csv_file(const string &filename, bool header=true) const
Saves the transition probabilities and rewards to a CSV file.
Definition: RMDP.hpp:354

craam::MDP
GRMDP< RegularState > MDP
Regular MDP with discrete actions and one outcome per action.
Definition: RMDP.hpp:411

craam::GRMDP::state_count
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225

craam::GRMDP::GRMDP
GRMDP()
Constructs an empty RMDP.
Definition: RMDP.hpp:204

craam::GRMDP::create_state
SType & create_state()
Creates a new state at the end of the states.
Definition: RMDP.hpp:222

craam::GRMDP::to_string
string to_string() const
Returns a brief string representation of the RMDP.
Definition: RMDP.hpp:365

craam::GRMDP::create_state
SType & create_state(long stateid)
Assures that the MDP state exists and if it does not, then it is created.
Definition: RMDP.hpp:211

craam::GRMDP::operator[]
SType & operator[](long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:245

craam::GRMDP::to_json
string to_json() const
Returns a json representation of the RMDP.
Definition: RMDP.hpp:390

craam::indvec
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31

craam::RMDP
GRMDP< WeightedRobustState > RMDP
An uncertain MDP with outcomes and weights.
Definition: RMDP.hpp:416

craam::GRMDP::is_policy_correct
long is_policy_correct(const Policy &policies) const
Checks if the policy and nature&#39;s policy are both correct.
Definition: RMDP.hpp:285

craam::GRMDP::operator[]
const SType & operator[](long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:236

craam::GRMDP::get_state
SType & get_state(long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:240

craam
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18

craam::GRMDP::normalize
void normalize()
Normalize all transitions to sum to one for all states, actions, outcomes.
Definition: RMDP.hpp:266

craam::GRMDP::policy_rand
vector< numvec > policy_rand
Nature&#39;s policy: Which outcome to take in which state.
Definition: RMDP.hpp:192

craam::GRMDP::to_csv
void to_csv(ostream &output, bool header=true) const
Saves the model to a stream as a simple csv file.
Definition: RMDP.hpp:319