4 #include "Transition.hpp" 5 #include "modeltools.hpp" 6 #include "algorithms/values.hpp" 7 #include "algorithms/robust_values.hpp" 8 #include "algorithms/occupancies.hpp" 17 #include "cpp11-range-master/range.hpp" 28 T max_value(vector<T> x){
29 return (x.size() > 0) ? *max_element(x.begin(), x.end()) : -1;
55 mdp(mdp), state2observ(state2observ), initial(initial),
56 obscount(1+max_value(state2observ)),
57 action_counts(obscount, -1){
58 check_parameters(*mdp, state2observ, initial);
60 for(
auto state : range((
size_t) 0, mdp->state_count())){
61 auto obs = state2observ[state];
64 auto ac = mdp->get_state(state).action_count();
65 if(action_counts[obs] >= 0){
66 if(action_counts[obs] != (
long) ac){
67 throw invalid_argument(
"Inconsistent number of actions: " + to_string(ac) +
68 " instead of " + to_string(action_counts[obs]) +
69 " in state " + to_string(state));
72 action_counts[obs] = ac;
89 :
MDPI(make_shared<const
MDP>(mdp),state2observ, initial){}
91 size_t obs_count()
const {
return obscount; };
92 size_t state_count()
const {
return mdp->state_count(); };
93 long state2obs(
long state){
return state2observ[state];};
94 size_t action_count(
long obsid) {
return action_counts[obsid];};
103 indvec statepol(state_count());
104 obspol2statepol(obspol, statepol);
115 assert(obspol.size() == (size_t) obscount);
116 assert(mdp->state_count() == statepol.size());
118 for(
auto s : range((
size_t)0, state_count())){
119 statepol[s] = obspol[state2observ[s]];
129 if((
size_t) tran.
max_index() >= state_count())
130 throw invalid_argument(
"Transition to a non-existing state.");
132 for(
auto i : range((
size_t)0, tran.
size())){
137 result.
add_sample(state2obs(state), prob, reward);
143 shared_ptr<const MDP>
get_mdp() {
return mdp;};
150 indvec policy(obscount, -1);
152 default_random_engine gen(seed);
154 for(
auto obs : range(0l, obscount)){
155 auto ac = action_counts[obs];
159 uniform_int_distribution<int> dist(0,ac-1);
160 policy[obs] = dist(gen);
174 return sol.total_return(initial);
186 void to_csv(ostream& output_mdp, ostream& output_state2obs, ostream& output_initial,
187 bool headers =
true)
const{
189 mdp->to_csv(output_mdp, headers);
192 output_state2obs <<
"idstate,idobs" << endl;
194 for(
auto i : indices(state2observ)){
195 output_state2obs << i <<
"," << state2observ[i] << endl;
200 output_initial <<
"idstate,probability" << endl;
202 const indvec& inindices = initial.get_indices();
203 const numvec& probabilities = initial.get_probabilities();
205 for(
auto i : indices(inindices)){
206 output_initial << inindices[i] <<
"," << probabilities[i] << endl;
219 void to_csv_file(
const string& output_mdp,
const string& output_state2obs,
220 const string& output_initial,
bool headers =
true)
const{
223 ofstream ofs_mdp(output_mdp),
224 ofs_state2obs(output_state2obs),
225 ofs_initial(output_initial);
228 to_csv(ofs_mdp, ofs_state2obs, ofs_initial, headers);
231 ofs_mdp.close(); ofs_state2obs.close(); ofs_initial.close();
244 template<
typename T = MDPI>
245 static unique_ptr<T>
from_csv(istream& input_mdp, istream& input_state2obs,
246 istream& input_initial,
bool headers =
true){
253 if(headers) input_state2obs >> line;
256 input_state2obs >> line;
257 while(input_state2obs.good()){
259 stringstream linestream(line);
261 getline(linestream, cellstring,
',');
262 auto idstate = stoi(cellstring);
263 getline(linestream, cellstring,
',');
264 auto idobs = stoi(cellstring);
265 state2obs[idstate] = idobs;
267 input_state2obs >> line;
271 if(headers) input_initial >> line;
274 input_initial >> line;
275 while(input_initial.good()){
277 stringstream linestream(line);
279 getline(linestream, cellstring,
',');
280 auto idstate = stoi(cellstring);
281 getline(linestream, cellstring,
',');
282 auto prob = stof(cellstring);
285 input_initial >> line;
288 shared_ptr<const MDP> csmdp = make_shared<const MDP>(std::move(mdp));
289 return make_unique<T>(csmdp, state2obs, initial);
292 template<
typename T = MDPI>
294 const string& input_state2obs,
295 const string& input_initial,
296 bool headers =
true){
298 ifstream ifs_mdp(input_mdp),
299 ifs_state2obs(input_state2obs),
300 ifs_initial(input_initial);
303 return from_csv<T>(ifs_mdp, ifs_state2obs, ifs_initial, headers);
308 shared_ptr<const MDP>
mdp;
326 throw invalid_argument(
"Number of observation indexes must match the number of states.");
328 if(state2observ.size() == 0)
329 throw invalid_argument(
"Cannot have empty observations.");
330 if(*min_element(state2observ.begin(), state2observ.end()) < 0)
331 throw invalid_argument(
"Observation indexes must be non-negative");
334 throw invalid_argument(
"An initial transition to a non-existent state.");
336 throw invalid_argument(
"The initial transition must be normalized.");
354 :
MDPI(mdp, state2observ, initial), robust_mdp(), state2outcome(mdp->state_count(),-1){
355 initialize_robustmdp();
363 :
MDPI(mdp, state2observ, initial), robust_mdp(), state2outcome(mdp.state_count(),-1){
364 initialize_robustmdp();
379 if(weights.size() != state_count()){
380 throw invalid_argument(
"Size of distribution must match the number of states.");
384 for(
size_t i : indices(weights)){
385 const auto rmdp_stateid = state2observ[i];
386 const auto rmdp_outcomeid = state2outcome[i];
389 auto& rstate = robust_mdp.get_state(rmdp_stateid);
390 for(
size_t ai : indices(rstate)){
391 rstate.get_action(ai).set_distribution(rmdp_outcomeid, weights[i]);
396 for(
size_t si : indices(robust_mdp)){
397 auto& s = robust_mdp.get_state(si);
398 for(
size_t ai : indices(s)){
399 auto& a = s.get_action(ai);
401 const numvec& dist = a.get_distribution();
402 if(accumulate(dist.begin(), dist.end(), 0.0) > 0.0){
403 a.normalize_distribution();
407 a.uniform_distribution();
429 if(initobspol.size() > 0 && initobspol.size() != obs_count()){
430 throw invalid_argument(
"Initial policy must be defined for all observations.");
433 indvec obspol(initobspol);
434 if(obspol.size() == 0){
435 obspol.resize(obs_count(),0);
437 indvec statepol(state_count(),0);
438 obspol2statepol(obspol,statepol);
441 const Transition oinitial = transition2obs(initial);
443 for(
auto iter : range(0l, iterations)){
449 update_importance_weights(importanceweights);
451 auto&& s =
mpi_jac(robust_mdp, discount);
456 obspol2statepol(obspol, statepol);
479 if(initobspol.size() > 0 && initobspol.size() != obs_count()){
480 throw invalid_argument(
"Initial policy must be defined for all observations.");
483 indvec obspol(initobspol);
484 if(obspol.size() == 0){
485 obspol.resize(obs_count(),0);
487 indvec statepol(state_count(),0);
488 obspol2statepol(obspol,statepol);
490 for(
auto iter : range(0l, iterations)){
497 update_importance_weights(importanceweights);
506 obspol2statepol(obspol, statepol);
513 static unique_ptr<MDPI_R>
from_csv(istream& input_mdp, istream& input_state2obs,
514 istream& input_initial,
bool headers =
true){
516 return MDPI::from_csv<MDPI_R>(input_mdp,input_state2obs,input_initial, headers);
521 const string& input_state2obs,
522 const string& input_initial,
523 bool headers =
true){
524 return MDPI::from_csv_file<MDPI_R>(input_mdp,input_state2obs,input_initial, headers);
536 auto obs_count = *max_element(state2observ.begin(), state2observ.end()) + 1;
539 indvec outcome_count(obs_count, 0);
541 for(
size_t state_index : indices(*mdp)){
542 auto obs = state2observ[state_index];
548 for(
auto action_index : range(0l, action_counts[obs])){
550 const Transition& old_tran = mdp->get_state(state_index).get_action(action_index).get_outcome();
555 for(
auto k : range((
size_t) 0, old_tran.
size())){
562 state2outcome[state_index] = outcome_count[obs]++;
numvec occfreq_mat(const GRMDP< SType > &rmdp, const Transition &init, prec_t discount, const Policies &policies)
Computes occupancy frequencies using matrix representation of transition probabilities.
Definition: occupancies.hpp:124
Transition transition2obs(const Transition &tran)
Converts a transition from states to observations, adding probabilities of individual states...
Definition: ImMDP.hpp:128
indvec obspol2statepol(const indvec &obspol) const
Converts a policy defined in terms of observations to a policy defined in terms of states...
Definition: ImMDP.hpp:102
void to_csv_file(const string &output_mdp, const string &output_state2obs, const string &output_initial, bool headers=true) const
Saves the MDPI to a set of 3 csv files, for transitions, observations, and the initial distribution...
Definition: ImMDP.hpp:219
static unique_ptr< T > from_csv(istream &input_mdp, istream &input_state2obs, istream &input_initial, bool headers=true)
Loads an MDPI from a set of 3 csv files, for transitions, observations, and the initial distribution...
Definition: ImMDP.hpp:245
Represents an MDP with implementability constraints.
Definition: ImMDP.hpp:37
PolicyNature< T > uniform_nature(size_t statecount, NatureResponse< T > nature, T threshold)
A helper function that simply copies a nature specification across all states.
Definition: robust_values.hpp:318
Transition get_initial() const
Initial distribution of MDP.
Definition: ImMDP.hpp:146
long obscount
number of observations
Definition: ImMDP.hpp:314
A general robust Markov decision process.
Definition: RMDP.hpp:182
const indvec & get_indices() const
Indices with positive probabilities.
Definition: Transition.hpp:323
Transition initial
initial distribution
Definition: ImMDP.hpp:312
long max_index() const
Returns the maximal indexes involved in the transition.
Definition: Transition.hpp:262
shared_ptr< const MDP > get_mdp()
Internal MDP representation.
Definition: ImMDP.hpp:143
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
auto mpi_jac(const GRMDP< SType > &mdp, prec_t discount, const numvec &valuefunction=numvec(0), const ResponseType &response=PolicyDeterministic(), unsigned long iterations_pi=MAXITER, prec_t maxresidual_pi=SOLPREC, unsigned long iterations_vi=MAXITER, prec_t maxresidual_vi=SOLPREC/2, bool print_progress=false)
Modified policy iteration using Jacobi value iteration in the inner loop.
Definition: values.hpp:405
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
indvec state2outcome
Maps the index of the mdp state to the index of the observation within the state corresponding to the...
Definition: ImMDP.hpp:532
shared_ptr< const MDP > mdp
the underlying MDP
Definition: ImMDP.hpp:308
indvec action_counts
number of actions for each observation
Definition: ImMDP.hpp:316
MDPI_R(const MDP &mdp, const indvec &state2observ, const Transition &initial)
Calls the base constructor and also constructs the corresponding robust MDP.
Definition: ImMDP.hpp:362
MDPI(const MDP &mdp, const indvec &state2observ, const Transition &initial)
Constructs the MDP with implementability constraints.
Definition: ImMDP.hpp:88
vec_scal_t robust_l1(const numvec &v, const numvec &p, prec_t threshold)
L1 robust response.
Definition: robust_values.hpp:44
Definition: values.hpp:252
const numvec & get_probabilities() const
Returns list of positive probabilities for indexes returned by get_indices.
Definition: Transition.hpp:332
size_t state_count() const
Number of states.
Definition: RMDP.hpp:225
void to_csv(ostream &output_mdp, ostream &output_state2obs, ostream &output_initial, bool headers=true) const
Saves the MDPI to a set of 3 csv files, for transitions, observations, and the initial distribution...
Definition: ImMDP.hpp:186
void initialize_robustmdp()
Constructs a robust version of the implementable MDP.
Definition: ImMDP.hpp:534
static void check_parameters(const MDP &mdp, const indvec &state2observ, const Transition &initial)
Checks whether the parameters are correct.
Definition: ImMDP.hpp:322
indvec solve_robust(long iterations, prec_t threshold, prec_t discount, const indvec &initobspol=indvec(0))
Uses a robust MDP formulation to solve the MDPI.
Definition: ImMDP.hpp:477
Main namespace for algorithms that operate on MDPs and RMDPs.
Definition: occupancies.hpp:8
MDPI_R(const shared_ptr< const MDP > &mdp, const indvec &state2observ, const Transition &initial)
Calls the base constructor and also constructs the corresponding robust MDP.
Definition: ImMDP.hpp:353
size_t size() const
Returns the number of target states with non-zero transition probabilities.
Definition: Transition.hpp:249
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
constexpr prec_t SOLPREC
Default solution precision.
Definition: definitions.hpp:40
Model & from_csv(Model &mdp, istream &input, bool header=true)
Loads an GRMDP definition from a simple csv file.
Definition: modeltools.hpp:82
bool is_normalized() const
Definition: Transition.hpp:186
SType & create_state(long stateid)
Assures that the MDP state exists and if it does not, then it is created.
Definition: RMDP.hpp:211
const numvec & get_rewards() const
Rewards for indices with positive probabilities returned by get_indices.
Definition: Transition.hpp:337
const RMDP & get_robust_mdp() const
Definition: ImMDP.hpp:367
void update_importance_weights(const numvec &weights)
Updates the weights on outcomes in the robust MDP based on the state weights provided.
Definition: ImMDP.hpp:378
Model & from_csv_file(Model &mdp, const string &filename, bool header=true)
Loads the transition probabilities and rewards from a CSV file.
Definition: modeltools.hpp:127
void obspol2statepol(const indvec &obspol, indvec &statepol) const
Converts a policy defined in terms of observations to a policy defined in terms of states...
Definition: ImMDP.hpp:114
RMDP robust_mdp
Robust representation of the MDPI.
Definition: ImMDP.hpp:525
AType & create_action(long actionid)
Creates an action given by actionid if it does not exists.
Definition: State.hpp:66
indvec solve_reweighted(long iterations, prec_t discount, const indvec &initobspol=indvec(0))
Uses a simple iterative algorithm to solve the MDPI.
Definition: ImMDP.hpp:428
static unique_ptr< MDPI_R > from_csv_file(const string &input_mdp, const string &input_state2obs, const string &input_initial, bool headers=true)
Loads the class from an set of CSV files.
Definition: ImMDP.hpp:520
vector< long > indvec
Default index vector.
Definition: definitions.hpp:31
An MDP with implementability constraints.
Definition: ImMDP.hpp:345
constexpr unsigned long MAXITER
Default number of iterations.
Definition: definitions.hpp:43
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
prec_t total_return(prec_t discount, prec_t precision=SOLPREC) const
Computes a return of an observation policy.
Definition: ImMDP.hpp:172
MDPI(const shared_ptr< const MDP > &mdp, const indvec &state2observ, const Transition &initial)
Constructs the MDP with implementability constraints.
Definition: ImMDP.hpp:54
indvec state2observ
maps index of a state to the index of the observation
Definition: ImMDP.hpp:310
void add_sample(long stateid, prec_t probability, prec_t reward)
Adds a single transitions probability to the existing probabilities.
Definition: Transition.hpp:116
indvec random_policy(random_device::result_type seed=random_device{}())
Constructs a random observation policy.
Definition: ImMDP.hpp:149