theory MDP_fin imports "MDP-Rewards.MDP_reward" begin
locale MDP_on = MDP_act_disc arb_act A K r l for
A and
K :: "'s ::countable × 'a ::countable ==> 's pmf"and r l arb_act + fixes S :: "'s set" assumes
fin_states: "finite S"and
fin_actions: "∧s. finite (A s)"and
K_closed: "set_pmf (K (s,a)) ⊆ S" begin
lemmaLb_indep: assumes"∧s. s ∈ S ==> apply_bfun v s = apply_bfun v' s" and"s ∈ S" shows"Lb v s = Lb v' s" proof - have"measure_pmf.expectation (K (s, a)) (apply_bfun v) = measure_pmf.expectation (K (s, a)) (apply_bfun v')"for a using assms K_closed subsetD by (auto intro!: AE_pmfI Bochner_Integration.integral_cong_AE) thus ?thesis unfoldingLb_eq_SUP_La La_intby auto qed
end
locale MDP_nat_type = MDP_act A K for A :: "nat ==> nat set"and K + assumes A_fin : "∧s. finite (A s)"
locale MDP_nat = MDP_nat_type + fixesstates :: nat assumes K_closed: "∀s < states. set_pmf (K (s,a)) ⊆ {0..<states}" assumes K_closed_compl: "∀s≥ states. set_pmf (K (s,a)) ⊆ {states..}" assumes A_outside: "∧s. s ≥ states ==> A s = {0}"
locale MDP_nat_disc = MDP_nat arb_act A K states + MDP_act_disc arb_act A K r l for A K r l arb_act states + assumes reward_zero_outside: "∀s≥states. r (s,a) = 0" begin lemmaLb_eq_La_max': "Lb v s = (MAX a ∈ A s. La a v s)" unfoldingLb_eq_La_max using finite_arg_max_eq_Max[of "A s""λa. La a v s"] A_ne A_fin by auto
abbreviation"state_space ≡ {0..<states}"
lemma set_pmf_Xn': "s ∉ state_space ==> set_pmf (Xn' p s i) ⊆ {states..}" using K_closed_compl by (induction i arbitrary: p s) (auto dest!: subsetD simp: Suc_Xn' linorder_not_less)
lemma set_pmf_Pn': "s ∉ state_space ==> (∀sa ∈ set_pmf (Pn' p s i). fst sa∉ state_space)" using set_pmf_Xn'[unfolded Xn'_Pn'] by fastforce
lemma reward_Pn'_notin: "s ∉ state_space ==> (∀sa ∈ set_pmf (Pn' p s i). r sa = 0)" using set_pmf_Pn' reward_zero_outside by (fastforce simp: linorder_not_less)
lemma ν_zero_notin: assumes"s ∉ state_space" shows"ν p s = 0" proof - have"ν_fin p n s = 0"for n using assms reward_Pn'_notin by (auto simp: ν_fin_eq_Pn intro!: sum.neutral integral_eq_zero_AE AE_pmfI) thus ?thesis unfolding ν_defby auto qed
lemma ν_opt_zero_notin: assumes"s ∉ state_space" shows"ν_opt s = 0" unfolding ν_opt_def using assms ν_zero_notin policies_ne by auto
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.