3 #include "definitions.hpp" 5 #include "modeltools.hpp" 9 #include <unordered_map> 16 #include "cpp11-range-master/range.hpp" 45 template <
class State,
class Action>
48 Sample(State state_from, Action action, State state_to,
50 _state_from(move(state_from)), _action(move(action)),
51 _state_to(move(state_to)), _reward(reward), _weight(weight), _step(step), _run(run){
52 assert(weight >= 0);};
57 Action
action()
const {
return _action;};
65 long step()
const {
return _step;};
67 long run()
const {
return _run;};
95 template <
class State,
class Action>
99 Samples(): states_from(), actions(), states_to(), rewards(), weights(), runs(), steps(), initial() {};
104 this->initial.push_back(decstate);
109 this->initial.push_back(decstate);
115 actions.push_back(sample.
action());
116 states_to.push_back(sample.
state_to());
117 rewards.push_back(sample.
reward());
118 weights.push_back(sample.
weight());
119 steps.push_back(sample.
step());
120 runs.push_back(sample.
run());
126 long step,
long run){
128 states_from.push_back(move(state_from));
129 actions.push_back(move(action));
130 states_to.push_back(move(state_to));
131 rewards.push_back(reward);
132 weights.push_back(weight);
133 steps.push_back(step);
145 for(
size_t si : indices(*
this)){
146 auto es = get_sample(si);
147 result += es.reward() * pow(discount,es.step());
148 runs.insert(es.run());
151 result /= runs.size();
156 size_t size()
const {
return states_from.size();};
160 assert(i >=0 &&
size_t(i) < size());
162 rewards[i],weights[i],steps[i],runs[i]);};
166 return get_sample(i);
172 const vector<State>& get_states_from()
const{
return states_from;};
173 const vector<Action>& get_actions()
const{
return actions;};
174 const vector<State>& get_states_to()
const{
return states_to;};
175 const vector<prec_t>& get_rewards()
const{
return rewards;};
176 const vector<prec_t>& get_weights()
const{
return weights;};
177 const vector<long>& get_runs()
const{
return runs;};
178 const vector<long>& get_steps()
const{
return steps;};
182 vector<State> states_from;
183 vector<Action> actions;
184 vector<State> states_to;
185 vector<prec_t> rewards;
186 vector<prec_t> weights;
190 vector<State> initial;
197 template<
class Sim,
class... U>
245 template<
typename State,
247 typename SHash = std::hash<State>,
248 typename AHash = std::hash<Action>>
254 action_map(), state_map() {};
261 discretesamples->add_initial(add_state(ins));
265 for(
auto si : indices(samples)){
267 discretesamples->add_sample(
268 add_state(ds.state_from()),
269 add_action(ds.action()),
270 add_state(ds.state_to()),
271 ds.reward(), ds.weight(),
272 ds.step(), ds.run());
279 auto iter = state_map.find(dstate);
281 if(iter == state_map.end()){
282 index = state_map.size();
283 state_map[dstate] = index;
286 index = iter->second;
293 auto iter = action_map.find(action);
295 if(iter == action_map.end()){
296 index = action_map.size();
297 action_map[action] = index;
300 index = iter->second;
309 shared_ptr<DiscreteSamples> discretesamples;
311 unordered_map<Action,long,AHash> action_map;
312 unordered_map<State,long,SHash> state_map;
349 typename SAHash = std::hash<pair<State,
351 typename SHash = std::hash<State> >
357 action_count(), state_map() {};
364 discretesamples->add_initial(add_state(ins));
368 for(
auto si : indices(samples)){
372 discretesamples->add_sample(add_state(es.state_from()),
373 add_action(es.state_from(), es.action()),
374 add_state(es.state_to()),
375 es.reward(), es.weight(),
376 es.step(), es.run());
382 auto iter = state_map.find(dstate);
384 if(iter == state_map.end()){
385 index = state_map.size();
386 state_map[dstate] = index;
389 index = iter->second;
396 auto da = make_pair(dstate, action);
397 auto iter = action_map.find(da);
399 if(iter == action_map.end()){
400 index = (action_count[dstate]++);
401 action_map[da] = index;
404 index = iter->second;
413 shared_ptr<DiscreteSamples> discretesamples;
415 unordered_map<pair<State,Action>,long,SAHash> action_map;
419 unordered_map<State,long,SHash> state_map;
518 auto old_state_action_weights = state_action_weights;
521 for(
size_t si : indices(samples)){
536 bool weight_initialized =
false;
540 if((
size_t) s.
state_from() >= state_action_weights.size()){
541 state_action_weights.resize(s.
state_from()+1);
544 weight_initialized =
true;
549 if((
size_t)s.
action() >= actioncount.size()){
550 actioncount.resize(s.
action()+1);
553 weight_initialized =
true;
557 assert(
size_t(s.
state_from()) < state_action_weights.size());
565 if(!weight_initialized &&
566 (
size_t(s.
state_from()) < old_state_action_weights.size()) &&
573 weight = 1.0 /
prec_t(cnt);
585 for(
size_t si : indices(*mdp)){
586 auto& state = mdp->get_state(si);
589 for(
size_t ai : indices(state)){
590 state.set_valid(ai, state_action_weights[si][ai] > 0);
599 initial.add_sample(state, 1.0, 0.0);
606 shared_ptr<const MDP>
get_mdp()
const {
return const_pointer_cast<
const MDP>(mdp);}
void add_transition(Model &mdp, long fromid, long actionid, long outcomeid, long toid, prec_t probability, prec_t reward)
Adds a transition probability and reward for a particular outcome.
Definition: modeltools.hpp:39
shared_ptr< const MDP > get_mdp() const
Definition: Samples.hpp:606
Transition initial
Initial distribution.
Definition: Samples.hpp:630
Sample< State, Action > operator[](long i) const
Access to samples.
Definition: Samples.hpp:165
State _state_to
Destination state.
Definition: Samples.hpp:75
SampleDiscretizerSD()
Constructs new internal discrete samples.
Definition: Samples.hpp:356
A general robust Markov decision process.
Definition: RMDP.hpp:182
prec_t _weight
Sample weight.
Definition: Samples.hpp:79
shared_ptr< MDP > get_mdp_mod()
Definition: Samples.hpp:610
long state_count()
Returns thenumber of states in the samples (the highest observed index.
Definition: Samples.hpp:623
long step() const
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:65
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:381
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28
Action _action
Action taken.
Definition: Samples.hpp:73
prec_t reward() const
Reward associated with the sample.
Definition: Samples.hpp:61
Transition get_initial() const
Definition: Samples.hpp:613
prec_t _reward
Reward associated with the sample.
Definition: Samples.hpp:77
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:278
long run() const
Number of the actual execution.
Definition: Samples.hpp:67
shared_ptr< MDP > mdp
Internal MDP representation.
Definition: Samples.hpp:627
long add_action(const Action &action)
Returns a action index, and creates a new one if it does not exists.
Definition: Samples.hpp:292
vector< vector< prec_t > > state_action_weights
Sample counts.
Definition: Samples.hpp:633
SampledMDP()
Constructs an empty MDP from discrete samples.
Definition: Samples.hpp:462
Constructs an MDP from integer samples.
Definition: Samples.hpp:458
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:257
prec_t weight() const
Sample weight.
Definition: Samples.hpp:63
Turns arbitrary samples to discrete ones (with continuous numbers assigned to states) assuming that a...
Definition: Samples.hpp:352
const vector< State > & get_initial() const
List of initial states.
Definition: Samples.hpp:170
SampleDiscretizerSI()
Constructs new internal discrete samples.
Definition: Samples.hpp:253
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31
State state_from() const
Original state.
Definition: Samples.hpp:55
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:306
prec_t mean_return(prec_t discount)
Computes the discounted mean return over all the samples.
Definition: Samples.hpp:141
void add_samples(const DiscreteSamples &samples)
Constructs or adds states and actions based on the provided samples.
Definition: Samples.hpp:516
void add_initial(const State &decstate)
Adds an initial state.
Definition: Samples.hpp:103
void add_initial(State &&decstate)
Adds an initial state.
Definition: Samples.hpp:108
Samples< typename Sim::State, typename Sim::Action > make_samples(U &&... u)
A helper function that constructs a samples object based on the simulator that is provided to it...
Definition: Samples.hpp:198
Sample< State, Action > get_sample(long i) const
Access to samples.
Definition: Samples.hpp:159
long _step
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:81
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:360
long _run
Number of the actual execution.
Definition: Samples.hpp:83
void add_sample(const Sample< State, Action > &sample)
Adds a sample starting in a decision state.
Definition: Samples.hpp:113
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:410
General representation of samples: See Sample for definitions of individual values.
Definition: Samples.hpp:96
Action action() const
Action taken.
Definition: Samples.hpp:57
State state_to() const
Destination state.
Definition: Samples.hpp:59
long add_action(const State &dstate, const Action &action)
Returns an action index, and creates a new one if it does not exists.
Definition: Samples.hpp:395
void add_sample(State state_from, Action action, State state_to, prec_t reward, prec_t weight, long step, long run)
Adds a sample starting in a decision state.
Definition: Samples.hpp:124
Represents a single transition between two states after taking an action: where: ...
Definition: Samples.hpp:46
Turns arbitrary samples to discrete ones assuming that actions are state independent.
Definition: Samples.hpp:249
unordered_map< State, long, SHash > action_count
keeps the number of actions for each state
Definition: Samples.hpp:418
vector< vector< prec_t > > get_state_action_weights()
Definition: Samples.hpp:617
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18
size_t size() const
Number of samples.
Definition: Samples.hpp:156