code/craam/Samples_8hpp_source.html

 #pragma once

 #include "definitions.hpp"
 #include "RMDP.hpp"
 #include "modeltools.hpp"

 #include <set>
 #include <memory>
 #include <unordered_map>
 #include <functional>
 #include <cassert>
 #include <utility>
 #include <vector>
 #include <string>

 #include "cpp11-range-master/range.hpp"

 namespace craam{

 namespace msen{

 using namespace util::lang;
 using namespace std;


 template <class State, class Action>
 class Sample {
 public:
     Sample(State state_from, Action action, State state_to,
            prec_t reward, prec_t weight, long step, long run):
         _state_from(move(state_from)), _action(move(action)),
         _state_to(move(state_to)), _reward(reward), _weight(weight), _step(step), _run(run){
         assert(weight >= 0);};

     State state_from() const {return _state_from;};
     Action action() const {return _action;};
     State state_to() const {return _state_to;};
     prec_t reward() const {return _reward;};
     prec_t weight() const {return _weight;};
     long step() const {return _step;};
     long run() const {return _run;};

 protected:
     State _state_from;
     Action _action;
     State _state_to;
     prec_t _reward;
     prec_t _weight;
     long _step;
     long _run;
 };


 template <class State, class Action>
 class Samples {
 public:

    Samples(): states_from(), actions(), states_to(), rewards(), weights(), runs(), steps(), initial() {};


     void add_initial(const State& decstate){
         this->initial.push_back(decstate);
     };

     void add_initial(State&& decstate){
         this->initial.push_back(decstate);
     };

     void add_sample(const Sample<State,Action>& sample){
         states_from.push_back(sample.state_from());
         actions.push_back(sample.action());
         states_to.push_back(sample.state_to());
         rewards.push_back(sample.reward());
         weights.push_back(sample.weight());
         steps.push_back(sample.step());
         runs.push_back(sample.run());
     };

     void add_sample(State state_from, Action action,
                     State state_to, prec_t reward, prec_t weight,
                     long step, long run){

         states_from.push_back(move(state_from));
         actions.push_back(move(action));
         states_to.push_back(move(state_to));
         rewards.push_back(reward);
         weights.push_back(weight);
         steps.push_back(step);
         runs.push_back(run);
     }

     prec_t mean_return(prec_t discount){
         prec_t result = 0;
         set<int> runs;

         for(size_t si : indices(*this)){
             auto es = get_sample(si);
             result += es.reward() * pow(discount,es.step());
             runs.insert(es.run());
         }

         result /= runs.size();
         return result;
     };

     size_t size() const {return states_from.size();};

     Sample<State,Action> get_sample(long i) const{
         assert(i >=0 && size_t(i) < size());
         return Sample<State,Action>(states_from[i],actions[i],states_to[i],
                 rewards[i],weights[i],steps[i],runs[i]);};

     Sample<State,Action> operator[](long i) const{
         return get_sample(i);
     };

     const vector<State>& get_initial() const{return initial;};

     const vector<State>& get_states_from() const{return states_from;};
     const vector<Action>& get_actions() const{return actions;};
     const vector<State>& get_states_to() const{return states_to;};
     const vector<prec_t>& get_rewards() const{return rewards;};
     const vector<prec_t>& get_weights() const{return weights;};
     const vector<long>& get_runs() const{return runs;};
     const vector<long>& get_steps() const{return steps;};

 protected:

     vector<State> states_from;
     vector<Action> actions;
     vector<State> states_to;
     vector<prec_t> rewards;
     vector<prec_t> weights;
     vector<long> runs;
     vector<long> steps;

     vector<State> initial;
 };

 template<class Sim, class... U>
 Samples<typename Sim::State, typename Sim::Action> make_samples(U&&... u){
     return Samples<typename Sim::State, typename Sim::Action>(forward<U>(u)...);
 }

 // **********************************************************************
 // ****** Discrete simulation specialization ******************
 // **********************************************************************


 using DiscreteSamples = Samples<long,long>;
 using DiscreteSample = Sample<long,long>;

 template<   typename State,
             typename Action,
             typename SHash = std::hash<State>,
             typename AHash = std::hash<Action>>
 class SampleDiscretizerSI{
 public:

     SampleDiscretizerSI() : discretesamples(make_shared<DiscreteSamples>()),
         action_map(), state_map() {};

     void add_samples(const Samples<State,Action>& samples){

         // initial states
         for(const State& ins : samples.get_initial()){
             discretesamples->add_initial(add_state(ins));
         }

         // samples
         for(auto si : indices(samples)){
             const auto ds = samples.get_sample(si);
             discretesamples->add_sample(
                                      add_state(ds.state_from()),
                                      add_action(ds.action()),
                                      add_state(ds.state_to()),
                                      ds.reward(), ds.weight(),
                                      ds.step(), ds.run());
         }
     }


     long add_state(const State& dstate){
         auto iter = state_map.find(dstate);
         long index;
         if(iter == state_map.end()){
             index = state_map.size();
             state_map[dstate] = index;
         }
         else{
             index = iter->second;
         }
         return index;
     }

     long add_action(const Action& action){
         auto iter = action_map.find(action);
         long index;
         if(iter == action_map.end()){
             index = action_map.size();
             action_map[action] = index;
         }
         else{
             index = iter->second;
         }
         return index;
     }

     shared_ptr<DiscreteSamples> get_discrete(){return discretesamples;};

 protected:
     shared_ptr<DiscreteSamples> discretesamples;

     unordered_map<Action,long,AHash> action_map;
     unordered_map<State,long,SHash> state_map;
 };


 template<
     typename State,
     typename Action,
     typename SAHash = std::hash<pair<State,
                                      Action>>,
     typename SHash = std::hash<State> >
 class SampleDiscretizerSD{
 public:

     SampleDiscretizerSD() : discretesamples(make_shared<DiscreteSamples>()), action_map(),
                             action_count(), state_map() {};

     void add_samples(const Samples<State,Action>& samples){

         // initial states
         for(const auto& ins : samples.get_initial()){
             discretesamples->add_initial(add_state(ins));
         }

         // transition samples
         for(auto si : indices(samples)){

             const auto es = samples.get_sample(si);

             discretesamples->add_sample(add_state(es.state_from()),
                                         add_action(es.state_from(), es.action()),
                                         add_state(es.state_to()),
                                         es.reward(), es.weight(),
                                         es.step(), es.run());
         }
     }

     long add_state(const State& dstate){
         auto iter = state_map.find(dstate);
         long index;
         if(iter == state_map.end()){
             index = state_map.size();
             state_map[dstate] = index;
         }
         else{
             index = iter->second;
         }
         return index;
     }

     long add_action(const State& dstate, const Action& action){
         auto da = make_pair(dstate, action);
         auto iter = action_map.find(da);
         long index;
         if(iter == action_map.end()){
             index = (action_count[dstate]++);
             action_map[da] = index;
         }
         else{
             index = iter->second;
         }
         return index;
     }

     shared_ptr<DiscreteSamples> get_discrete(){return discretesamples;};

 protected:
     shared_ptr<DiscreteSamples> discretesamples;

     unordered_map<pair<State,Action>,long,SAHash> action_map;

     unordered_map<State,long,SHash> action_count;
     unordered_map<State,long,SHash> state_map;
 };


 class SampledMDP{
 public:

     SampledMDP(): mdp(make_shared<MDP>()), initial(), state_action_weights() {}


     void add_samples(const DiscreteSamples& samples){
         // copy the state and action counts to be
         auto old_state_action_weights = state_action_weights;

         // add transition samples
         for(size_t si : indices(samples)){

             DiscreteSample s = samples.get_sample(si);

             // -----------------
             // Computes sample weights:
             // the idea is to normalize new samples by the same
             // value as the existing samples and then re-normalize
             // this is linear complexity
             // -----------------


             // weight used to normalize old data
             prec_t weight = 1.0; // this needs to be initialized to 1.0
             // whether the sample weight has been initialized
             bool weight_initialized = false;

             // resize transition counts
             // the actual values are updated later
             if((size_t) s.state_from() >= state_action_weights.size()){
                 state_action_weights.resize(s.state_from()+1);

                 // we know that the value will not be found in old data
                 weight_initialized = true;
             }

             // check if we have something for the action
             numvec& actioncount = state_action_weights[s.state_from()];
             if((size_t)s.action() >= actioncount.size()){
                 actioncount.resize(s.action()+1);

                 // we know that the value will not be found in old data
                 weight_initialized = true;
             }

             // update the new count
             assert(size_t(s.state_from()) < state_action_weights.size());
             assert(size_t(s.action()) < state_action_weights[s.state_from()].size());

             state_action_weights[s.state_from()][s.action()] += s.weight();

             // get number of existing transitions
             // this is only run when we do not know if we have any prior
             // sample
             if(!weight_initialized &&
                     (size_t(s.state_from()) < old_state_action_weights.size()) &&
                     (size_t(s.action()) < old_state_action_weights[s.state_from()].size())) {

                 size_t cnt = old_state_action_weights[s.state_from()][s.action()];

                 // adjust the weight of the new sample to be consistent
                 // with the previous normalization (use 1.0 if no previous action)
                 weight = 1.0 / prec_t(cnt);
             }
             // ---------------------

             // adds a transition
             add_transition( *mdp, s.state_from(), s.action(), s.state_to(),
                             weight*s.weight(),
                             s.reward());
         }

         // make sure to set action validity based on whether there have been
         // samples observed for the action
         for(size_t si : indices(*mdp)){
             auto& state = mdp->get_state(si);

             // valid only if there are some samples for the action
             for(size_t ai : indices(state)){
                 state.set_valid(ai, state_action_weights[si][ai] > 0);
             }
         }

         //  Normalize the transition probabilities and rewards
         mdp->normalize();

         // set initial distribution
         for(long state : samples.get_initial()){
             initial.add_sample(state, 1.0, 0.0);
         }
         initial.normalize();
     }


     shared_ptr<const MDP> get_mdp() const {return const_pointer_cast<const MDP>(mdp);}

     shared_ptr<MDP> get_mdp_mod() {return mdp;}

     Transition get_initial() const {return initial;}

     vector<vector<prec_t>> get_state_action_weights(){return state_action_weights;}

     long state_count(){return state_action_weights.size();}
 protected:

     shared_ptr<MDP> mdp;

     Transition initial;

     vector<vector<prec_t>> state_action_weights;
 };


 /*
 Constructs a robust MDP from integer samples.

 In integer samples each decision state, expectation state,
 and action are identified by an integer.

 There is some extra memory penalty in this class over a plain MDP since it stores
 the number of samples observed for each state and action.

 Important: Actions that are not sampled (no samples per that state
 and action pair) are labeled as invalid and are not included in the computation
 of value function or the solution.

 */
 //template<typename Model>
 //class SampledRMDP{
 //public:
 //
 //    /** Constructs an empty MDP from discrete samples */
 //    SampledRMDP();
 //
 //    /**
 //    Constructs or adds states and actions based on the provided samples. Transition
 //    probabilities of the existing samples are normalized.
 //
 //    \param samples Source of the samples
 //    */
 //    void add_samples(const DiscreteSamples& samples);
 //
 //    /** \returns A constant pointer to the internal MDP */
 //    shared_ptr<const Model> get_rmdp() const {return const_pointer_cast<const Model>(mdp);}
 //
 //    /** \returns A modifiable pointer to the internal MDP.
 //    Take care when changing. */
 //    shared_ptr<Model> get_rmdp_mod() {return mdp;}
 //
 //    /** Initial distribution */
 //    Transition get_initial() const {return initial;}
 //
 //protected:
 //
 //    /** Internal MDP representation */
 //    shared_ptr<Model> mdp;
 //
 //    /** Initial distribution */
 //    Transition initial;
 //
 //    /** Sample counts */
 //    vector<vector<size_t>> state_action_counts;
 //};


 } // end namespace msen
 } // end namespace craam
craam::add_transition
void add_transition(Model &mdp, long fromid, long actionid, long outcomeid, long toid, prec_t probability, prec_t reward)
Adds a transition probability and reward for a particular outcome.
Definition: modeltools.hpp:39

craam::msen::SampledMDP::get_mdp
shared_ptr< const MDP > get_mdp() const
Definition: Samples.hpp:606

craam::msen::SampledMDP::initial
Transition initial
Initial distribution.
Definition: Samples.hpp:630

craam::msen::Samples::operator[]
Sample< State, Action > operator[](long i) const
Access to samples.
Definition: Samples.hpp:165

craam::msen::Sample::_state_to
State _state_to
Destination state.
Definition: Samples.hpp:75

craam::msen::SampleDiscretizerSD::SampleDiscretizerSD
SampleDiscretizerSD()
Constructs new internal discrete samples.
Definition: Samples.hpp:356

craam::GRMDP
A general robust Markov decision process.
Definition: RMDP.hpp:182

craam::msen::Sample::_weight
prec_t _weight
Sample weight.
Definition: Samples.hpp:79

craam::msen::SampledMDP::get_mdp_mod
shared_ptr< MDP > get_mdp_mod()
Definition: Samples.hpp:610

craam::msen::SampledMDP::state_count
long state_count()
Returns thenumber of states in the samples (the highest observed index.
Definition: Samples.hpp:623

std

lang

craam::msen::Sample::step
long step() const
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:65

craam::msen::SampleDiscretizerSD::add_state
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:381

craam::prec_t
double prec_t
Default precision used throughout the code.
Definition: definitions.hpp:25

craam::numvec
vector< prec_t > numvec
Default numerical vector.
Definition: definitions.hpp:28

craam::msen::Sample::_action
Action _action
Action taken.
Definition: Samples.hpp:73

craam::msen::Sample::reward
prec_t reward() const
Reward associated with the sample.
Definition: Samples.hpp:61

craam::msen::SampledMDP::get_initial
Transition get_initial() const
Definition: Samples.hpp:613

craam::msen::Sample::_reward
prec_t _reward
Reward associated with the sample.
Definition: Samples.hpp:77

craam::msen::SampleDiscretizerSI::add_state
long add_state(const State &dstate)
Returns a state index, and creates a new one if it does not exists.
Definition: Samples.hpp:278

craam::msen::Sample::run
long run() const
Number of the actual execution.
Definition: Samples.hpp:67

craam::msen::SampledMDP::mdp
shared_ptr< MDP > mdp
Internal MDP representation.
Definition: Samples.hpp:627

craam::msen::SampleDiscretizerSI::add_action
long add_action(const Action &action)
Returns a action index, and creates a new one if it does not exists.
Definition: Samples.hpp:292

craam::msen::SampledMDP::state_action_weights
vector< vector< prec_t > > state_action_weights
Sample counts.
Definition: Samples.hpp:633

craam::msen::SampledMDP::SampledMDP
SampledMDP()
Constructs an empty MDP from discrete samples.
Definition: Samples.hpp:462

craam::msen::SampledMDP
Constructs an MDP from integer samples.
Definition: Samples.hpp:458

craam::msen::SampleDiscretizerSI::add_samples
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:257

craam::msen::Sample::weight
prec_t weight() const
Sample weight.
Definition: Samples.hpp:63

craam::msen::SampleDiscretizerSD
Turns arbitrary samples to discrete ones (with continuous numbers assigned to states) assuming that a...
Definition: Samples.hpp:352

craam::msen::Samples::get_initial
const vector< State > & get_initial() const
List of initial states.
Definition: Samples.hpp:170

craam::msen::SampleDiscretizerSI::SampleDiscretizerSI
SampleDiscretizerSI()
Constructs new internal discrete samples.
Definition: Samples.hpp:253

craam::Transition
Represents sparse transition probabilities and rewards from a single state.
Definition: Transition.hpp:31

craam::msen::Sample::state_from
State state_from() const
Original state.
Definition: Samples.hpp:55

craam::msen::SampleDiscretizerSI::get_discrete
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:306

craam::msen::Samples::mean_return
prec_t mean_return(prec_t discount)
Computes the discounted mean return over all the samples.
Definition: Samples.hpp:141

craam::msen::SampledMDP::add_samples
void add_samples(const DiscreteSamples &samples)
Constructs or adds states and actions based on the provided samples.
Definition: Samples.hpp:516

craam::msen::Samples::add_initial
void add_initial(const State &decstate)
Adds an initial state.
Definition: Samples.hpp:103

craam::msen::Samples::add_initial
void add_initial(State &&decstate)
Adds an initial state.
Definition: Samples.hpp:108

craam::msen::make_samples
Samples< typename Sim::State, typename Sim::Action > make_samples(U &&... u)
A helper function that constructs a samples object based on the simulator that is provided to it...
Definition: Samples.hpp:198

craam::msen::Samples::get_sample
Sample< State, Action > get_sample(long i) const
Access to samples.
Definition: Samples.hpp:159

craam::msen::Sample::_step
long _step
Number of the step in an one execution of the simulation.
Definition: Samples.hpp:81

craam::msen::SampleDiscretizerSD::add_samples
void add_samples(const Samples< State, Action > &samples)
Adds samples to the discrete samples.
Definition: Samples.hpp:360

craam::msen::Sample::_run
long _run
Number of the actual execution.
Definition: Samples.hpp:83

craam::msen::Samples::add_sample
void add_sample(const Sample< State, Action > &sample)
Adds a sample starting in a decision state.
Definition: Samples.hpp:113

craam::msen::SampleDiscretizerSD::get_discrete
shared_ptr< DiscreteSamples > get_discrete()
Returns a shared pointer to the discrete samples.
Definition: Samples.hpp:410

craam::msen::Samples
General representation of samples:  See Sample for definitions of individual values.
Definition: Samples.hpp:96

craam::msen::Sample::action
Action action() const
Action taken.
Definition: Samples.hpp:57

craam::msen::Sample::state_to
State state_to() const
Destination state.
Definition: Samples.hpp:59

craam::msen::SampleDiscretizerSD::add_action
long add_action(const State &dstate, const Action &action)
Returns an action index, and creates a new one if it does not exists.
Definition: Samples.hpp:395

craam::msen::Samples::add_sample
void add_sample(State state_from, Action action, State state_to, prec_t reward, prec_t weight, long step, long run)
Adds a sample starting in a decision state.
Definition: Samples.hpp:124

craam::msen::Sample
Represents a single transition between two states after taking an action:  where: ...
Definition: Samples.hpp:46

craam::msen::SampleDiscretizerSI
Turns arbitrary samples to discrete ones assuming that actions are state independent.
Definition: Samples.hpp:249

craam::msen::SampleDiscretizerSD::action_count
unordered_map< State, long, SHash > action_count
keeps the number of actions for each state
Definition: Samples.hpp:418

craam::msen::SampledMDP::get_state_action_weights
vector< vector< prec_t > > get_state_action_weights()
Definition: Samples.hpp:617

craam
Main namespace which includes modeling a solving functionality.
Definition: Action.hpp:18

craam::msen::Samples::size
size_t size() const
Number of samples.
Definition: Samples.hpp:156