18 #include "cpp11-range-master/range.hpp" 181 template<
class SType>
201 GRMDP(
long state_count) : states(state_count){};
212 assert(stateid >= 0);
213 if(stateid >= (
long) states.size())
214 states.resize(stateid + 1);
215 return states[stateid];
228 size_t size()
const {
return state_count();};
232 assert(stateid >= 0 &&
size_t(stateid) < state_count());
233 return states[stateid];};
236 const SType&
operator[](
long stateid)
const {
return get_state(stateid);};
241 assert(stateid >= 0 &&
size_t(stateid) < state_count());
242 return states[stateid];};
257 for(
auto const& s : states){
258 for(
auto const& a : s.get_actions()){
259 for(
auto const& t : a.get_outcomes()){
260 if(!t.is_normalized())
return false;
267 for(SType& s : states)
284 template<
typename Policy>
286 for(
auto si : indices(states) ){
288 if(states[si].is_terminal())
292 if(!states[si].is_action_correct(policies))
319 void to_csv(ostream& output,
bool header =
true)
const{
322 output <<
"idstatefrom," <<
"idaction," <<
323 "idoutcome," <<
"idstateto," <<
"probability," <<
"reward" << endl;
327 for(
size_t i = 0l; i < this->states.size(); i++){
328 const auto& actions = (this->states[i]).get_actions();
330 for(
size_t j = 0; j < actions.size(); j++){
331 const auto& outcomes = actions[j].get_outcomes();
333 for(
size_t k = 0; k < outcomes.size(); k++){
334 const auto& tran = outcomes[k];
336 auto& indices = tran.get_indices();
337 const auto& rewards = tran.get_rewards();
338 const auto& probabilities = tran.get_probabilities();
340 for (
size_t l = 0; l < tran.size(); l++){
341 output << i <<
',' << j <<
',' << k <<
',' << indices[l] <<
',' 342 << probabilities[l] <<
',' << rewards[l] << endl;
354 void to_csv_file(
const string& filename,
bool header =
true)
const{
355 ofstream ofs(filename, ofstream::out);
368 for(
size_t si : indices(states)){
369 const auto& s = get_state(si);
370 result += (std::to_string(si));
372 result += (std::to_string(s.action_count()));
374 for(
size_t ai : indices(s)){
376 result += (std::to_string(ai));
378 const auto& a = s.get_action(ai);
391 string result{
"{\"states\" : ["};
392 for(
auto si : indices(states)){
393 const auto& s = states[si];
394 result += s.to_json(si);
397 if(!states.empty()) result.pop_back();
GRMDP(long state_count)
Constructs the RMDP with a pre-allocated number of states.
Definition: RMDP.hpp:201
indvec policy_det
Decision-maker's policy: Which action to take in which state.
Definition: RMDP.hpp:190
const vector< SType > & get_states() const
Definition: RMDP.hpp:248
A general robust Markov decision process.
Definition: RMDP.hpp:182
size_t size() const
Number of states.
Definition: RMDP.hpp:228
bool is_normalized() const
Check if all transitions in the process sum to one.
Definition: RMDP.hpp:256
const SType & get_state(long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:231
vector< SType > states
Internal list of states.
Definition: RMDP.hpp:185
void to_csv_file(const string &filename, bool header=true) const
Saves the transition probabilities and rewards to a CSV file.
Definition: RMDP.hpp:354
GRMDP< RegularState > MDP
Regular MDP with discrete actions and one outcome per action.
Definition: RMDP.hpp:411
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
GRMDP()
Constructs an empty RMDP.
Definition: RMDP.hpp:204
SType & create_state()
Creates a new state at the end of the states.
Definition: RMDP.hpp:222
string to_string() const
Returns a brief string representation of the RMDP.
Definition: RMDP.hpp:365
SType & create_state(long stateid)
Assures that the MDP state exists and if it does not, then it is created.
Definition: RMDP.hpp:211
SType & operator[](long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:245
string to_json() const
Returns a json representation of the RMDP.
Definition: RMDP.hpp:390
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
GRMDP< WeightedRobustState > RMDP
An uncertain MDP with outcomes and weights.
Definition: RMDP.hpp:416
long is_policy_correct(const Policy &policies) const
Checks if the policy and nature's policy are both correct.
Definition: RMDP.hpp:285
const SType & operator[](long stateid) const
Retrieves an existing state.
Definition: RMDP.hpp:236
SType & get_state(long stateid)
Retrieves an existing state.
Definition: RMDP.hpp:240
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
void normalize()
Normalize all transitions to sum to one for all states, actions, outcomes.
Definition: RMDP.hpp:266
vector< numvec > policy_rand
Nature's policy: Which outcome to take in which state.
Definition: RMDP.hpp:192
void to_csv(ostream &output, bool header=true) const
Saves the model to a stream as a simple csv file.
Definition: RMDP.hpp:319