Quelle MDP_fin.thy

Sprache: Isabelle

theory MDP_fin
  imports
  "MDP-Rewards.MDP_reward"
begin

locale MDP_on = MDP_act_disc arb_act A K r l
  for
    A and
    K :: "'s ::countable × 'a ::countable ==> 's pmf" and r l arb_act +
  fixes S :: "'s set"
  assumes
    fin_states: "finite S" and
    fin_actions: "∧s. finite (A s)" and
    K_closed: "set_pmf (K (s,a)) ⊆ S"
begin

lemma L_{b_indep}:
  assumes "∧s. s ∈ S ==> apply_bfun v s = apply_bfun v' s"
    and "s ∈ S"
  shows "L_b v s = L_b v' s"
proof -
  have "measure_pmf.expectation (K (s, a)) (apply_bfun v) = measure_pmf.expectation (K (s, a)) (apply_bfun v')" for a
    using assms K_closed subsetD
    by (auto intro!: AE_pmfI Bochner_Integration.integral_cong_AE)
  thus ?thesis
    unfolding L_{b_eq_SUP_L}_a L_{a_int} by auto
qed

end

locale MDP_nat_type = MDP_act A K
  for A :: "nat ==> nat set" and K +
  assumes A_fin : "∧s. finite (A s)"

locale MDP_nat = MDP_nat_type +
  fixes states :: nat
  assumes K_closed: "∀s < states. set_pmf (K (s,a)) ⊆ {0..<states}"
  assumes K_closed_compl: "∀s≥ states. set_pmf (K (s,a)) ⊆ {states..}"
  assumes A_outside: "∧s. s ≥ states ==> A s = {0}"

locale MDP_nat_disc = MDP_nat arb_act A K states + MDP_act_disc arb_act A K r l
  for A K r l arb_act states +
  assumes reward_zero_outside: "∀s≥states. r (s,a) = 0"
begin
lemma L_{b_eq_L}_{a_max}': "L_b v s = (MAX a ∈ A s. L_a a v s)"
  unfolding L_{b_eq_L}_{a_max}
  using finite_arg_max_eq_Max[of "A s" "λa. L_a a v s"] A_ne A_fin
  by auto

abbreviation "state_space ≡ {0..<states}"

lemma set_pmf_Xn': "s ∉ state_space ==> set_pmf (Xn' p s i) ⊆ {states..}"
  using K_closed_compl
  by (induction i arbitrary: p s) (auto dest!: subsetD simp: Suc_Xn' linorder_not_less)

lemma set_pmf_Pn': "s ∉ state_space ==> (∀sa ∈ set_pmf (Pn' p s i). fst sa∉ state_space)"
  using set_pmf_Xn'[unfolded Xn'_Pn'] by fastforce

lemma reward_Pn'_notin: "s ∉ state_space ==> (∀sa ∈ set_pmf (Pn' p s i). r sa = 0)"
  using set_pmf_Pn' reward_zero_outside by (fastforce simp: linorder_not_less)

lemma ν_zero_notin:
  assumes "s ∉ state_space"
  shows "ν p s = 0"
proof -
  have "ν_fin p n s = 0" for n
    using assms reward_Pn'_notin
    by (auto simp: ν_fin_eq_Pn intro!: sum.neutral integral_eq_zero_AE AE_pmfI)
  thus ?thesis
    unfolding ν_def by auto
qed

lemma ν_opt_zero_notin:
  assumes "s ∉ state_space"
  shows "ν_opt s = 0"
  unfolding ν_opt_def using assms ν_zero_notin policies_ne by auto

end

end

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.3 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.