Quelle MDP_reward.thy

Sprache: Isabelle

(* Author: Maximilian Schäffeler *)

theory MDP_reward
  imports
    Bounded_Functions
    MDP_reward_Util
    Blinfun_Util
    MDP_disc
begin

section ‹Markov Decision Processes with Rewards›

locale MDP_reward = discrete_MDP A K
  for
    A and
    K :: "'s ::countable × 'a ::countable ==> 's pmf" +
  fixes
    r :: "('s × 'a) ==> real" and
    l :: real
  assumes
    zero_le_disc [simp]: "0 ≤ l" and
    r_bounded: "bounded (range r)"
begin

text ‹
extension to the basic MDPs is formalized with another locale.
assumes the existence of a reward function @{term r} which takes a state-action pair to a real
. We assume that the function is bounded @{prop r_bounded}.

, we fix a discounting factor @{term l}, where @{term "0 ≤ l ∧ l < 1"}.
›

subsection ‹Util›
subsubsection ‹Basic Properties of rewards›
lemma r_bfun: "r ∈ bfun"
  using r_bounded
  by auto

lemma r_bounded': "bounded (r ` X)"
  by (auto intro: r_bounded bounded_subset)

definition "r_M = (⊔sa. ∣r sa∣)"

lemma abs_r_le_r_M: "∣r sa∣ ≤ r_M"
  using bounded_norm_le_SUP_norm r_bounded r_{M_def} by fastforce

lemma abs_r_{M_eq_r}_M [simp]: "∣r_M∣ = r_M"
  using abs_r_le_r_M by fastforce

lemma r_{M_nonneg}: "0 ≤ r_M"
  using abs_r_{M_eq_r}_M by linarith

lemma measurable_r_nth [measurable]: "(λt. r (t !! i)) ∈ borel_measurable S"
  by measurable

lemma integrable_r_nth [simp]: "integrable (T p s) (λt. r (t !! i))"
  by (fastforce simp: bounded_iff intro: abs_r_le_r_M)

lemma expectation_abs_r_le: "measure_pmf.expectation d (λa. ∣r (s, a)∣) ≤ r_M"
  using abs_r_le_r_M
  by (fastforce intro!: measure_pmf.integral_le_const measure_pmf.integrable_const_bound)

lemma abs_exp_r_le: "∣measure_pmf.expectation d r∣ ≤ r_M"
  using abs_r_le_r_M
  by (fastforce intro!: measure_pmf.integral_le_const order.trans[OF integral_abs_bound] measure_pmf.integrable_const_bound)

subsubsection ‹Infinite disounted sums›
lemma abs_disc_eq[simp]: "∣l ^ i * x∣ = l ^ i * ∣x∣"
  by (auto simp: abs_mult)

lemma norm_l_pow_eq[simp]: "norm (l^t *_R F) = l^t * norm F"
  by auto

subsection ‹Total Reward for Single Traces›

abbreviation "ν_trace_fin t N ≡ ∑i < N. l ^ i * r (t !! i)"
abbreviation "ν_trace t ≡ ∑i. l ^ i * r (t !! i)"

lemma abs_ν_trace_fin_le: "∣ν_trace_fin t N∣ ≤ (∑i < N. l^i * r_M)"
  by (auto intro!: sum_mono order.trans[OF sum_abs] mult_left_mono abs_r_le_r_M)

lemma measurable_suminf_reward[measurable]: "ν_trace ∈ borel_measurable S"
  by measurable

lemma integrable_ν_trace_fin: "integrable (T p s) (λt. ν_trace_fin t N)"
  by (fastforce simp: bounded_iff intro: abs_ν_trace_fin_le)

context
  fixes p :: "('s, 'a) pol"
begin

subsection ‹Expected Finite-Horizon Discounted Reward›
definition "ν_fin n s = ∫t. ν_trace_fin t n ∂T p s"

lemma abs_ν_fin_le: "∣ν_fin N s∣ ≤ (∑i<N. l^i * r_M)"
  unfolding ν_fin_def
  using abs_ν_trace_fin_le
  by (fastforce intro!: prob_space.integral_le_const order_trans[OF integral_abs_bound])

lemma ν_fin_bfun: "(λs. ν_fin N s) ∈ bfun"
  by (auto intro!: abs_ν_fin_le)

lift_definition ν_{b_fin} :: "nat ==> 's ==>_b real" is ν_fin
  using ν_fin_bfun .

lemma ν_fin_Suc[simp]: "ν_fin (Suc n) s = ν_fin n s + l ^ n * ∫t. r (t !! n) ∂T p s"
  by (simp add: ν_fin_def)

lemma ν_fin_zero[simp]: "ν_fin 0 s = 0"
  by (simp add: ν_fin_def)

lemma ν_fin_eq_Pn: "ν_fin n s = (∑i<n. l^i * measure_pmf.expectation (Pn' p s i) r)"
  by (induction n) (auto simp: Pn'_eq_T integral_distr)
end

subsection ‹Expected Total Discounted Reward›

definition "ν p s = lim (λn. ν_fin p n s)"

lemmas ν_eq_lim = ν_def

lemma ν_eq_Pn: "ν p s = (∑i. l^i * measure_pmf.expectation (Pn' p s i) r)"
  by (simp add: ν_fin_eq_Pn ν_eq_lim suminf_eq_lim)

subsection ‹Reward of a Decision Rule›
context
  fixes d :: "('s, 'a) dec"
begin
abbreviation "r_dec s ≡ ∫a. r (s, a) ∂d s"

lemma abs_r_dec_le: "∣r_dec s∣ ≤ r_M"
  using expectation_abs_r_le integral_abs_bound order_trans by fast

lemma r_dec_eq_r_K0: "r_dec s = measure_pmf.expectation (K0' d s) r"
  by (simp add: K0'_def)

lemma r_dec_bfun: "r_dec ∈ bfun"
  using abs_r_dec_le by (auto intro!: bfun_normI)

lift_definition r_dec_b :: "'s ==>_b real" is "r_dec"
  using r_dec_bfun .

declare r_dec_b.rep_eq[simp] bfun.Bfun_inverse[simp]

lemma norm_r_dec_le: "norm r_dec_b ≤ r_M"
  by (simp add: abs_r_dec_le norm_bound)
end

lemma r_dec_det [simp]: "r_dec (mk_dec_det d) s = r (s, d s)"
  unfolding mk_dec_det_def by auto

subsection ‹Transition Probability Matrix for MDPs›

context
  fixes p :: "nat ==> ('s, 'a) dec"
begin
definition "P_X n = push_exp (λs. Xn' (mk_markovian p) s n)"

lemma P_{X_0}[simp]: "P_X 0 = id"
  by (simp add: P_{X_def})

lemma P_{X_bounded_linear}[simp]: "bounded_linear (P_X t)"
  unfolding P_{X_def} by simp

lemma norm_P_X [simp]: "onorm (P_X t) = 1"
  unfolding P_{X_def} by simp

lemma norm_P_{X_apply}[simp]: "norm (P_X n x) ≤ norm x"
  using onorm[OF P_{X_bounded_linear}] by simp

lemma P_{X_bound_r}: "norm (P_X t (r_dec_b (p t))) ≤ r_M"
  using norm_P_{X_apply} norm_r_dec_le order.trans by blast

lemma P_{X_bounded_r}: "bounded (range (λt. (P_X t (r_dec_b (p t)))))"
  using P_{X_bound_r} by (auto intro!: boundedI)

end

lemma ν_fin_elem: "ν_fin (mk_markovian p) n s = (∑i<n. l^i * P_X p i (r_dec_b (p i)) s)"
  unfolding P_{X_def} ν_fin_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
  using measure_pmf_in_subprob_algebra abs_r_le_r_M
  by (subst integral_bind) (auto simp: r_dec_eq_r_K0)

lemma ν_{b_fin_eq_}P_X: "ν_{b_fin} (mk_markovian p) n = (∑i<n. l^i *_R P_X p i (r_dec_b (p i)))"
  by (auto simp: ν_fin_elem sum_apply_bfun ν_{b_fin}.rep_eq)

lemma ν_fin_eq_P_X: "ν_fin (mk_markovian p) n = (∑i<n. l^i *_R P_X p i (r_dec_b (p i)))"
  by (metis ν_{b_fin}.rep_eq ν_{b_fin_eq_}P_X)

text ‹
{term "P₁ d v"} defines for each state the expected value of @{term v}
taking a single step in the MDP according to the decision rule @{term d}.
›

context
  fixes d :: "('s, 'a) dec"
begin
lift_definition P₁ :: "('s ==>_b real) ==>_L ('s ==>_b real)" is "push_exp (K_st d)"
  using push_exp_bounded_linear .

lemma P₁_bfun_one [simp]:"P₁ 1 = 1"
  by (auto simp: P₁.rep_eq)

lemma P₁_pow_bfun_one [simp]: "(P₁^^t) 1 = 1"
  by (induction t) auto

lemma P₁_pow: "blinfun_apply (P₁ ^^ n) = blinfun_apply P₁ ^^ n"
  by (induction n) auto

lemma norm_P₁ [simp]: "norm P₁ = 1"
  by (simp add: norm_blinfun.rep_eq P₁.rep_eq)
end

lemma P_{X_Suc}: "P_X p (Suc n) v = P₁ (p 0) ((P_X (λn. p (Suc n)) n) v)"
  unfolding P_{X_def} P₁.rep_eq
  by (fastforce intro!: abs_le_norm_bfun integral_bind[where K = "count_space UNIV"]
      simp: measure_pmf_in_subprob_algebra measure_pmf_bind Suc_Xn'_markovian)

lemma P_{X_Suc}': "P_X p (Suc n) v = P_X p n (P₁ (p n) v)"
proof (induction n arbitrary: p)
  case 0
  thus ?case
    by (simp add: P_{X_Suc})
next
  case (Suc n)
  thus ?case
    by (metis P_{X_Suc})
qed

lemma P_{X_const}: "P_X (λ_. d) n = P₁ d ^^ n"
  by (induction n) (auto simp add: P₁_pow P_{X_Suc})

lemma P_{X_sconst}: "P_X (λ_. p) n = P₁ p ^^n"
  using P_{X_const}.

lemma norm_P_n[simp]: "onorm (P₁ d ^^ n) = 1"
  using norm_P_X[of "λ_. d"] by (auto simp: P_{X_sconst})

lemma norm_P₁_pow [simp]: "norm (P₁ d ^^ t) = 1"
  by (simp add: norm_blinfun.rep_eq)

lemma P_{X_Suc_n_elem}: "P_X p n (P₁ (p n) v) = P_X p (Suc n) v"
  using P_{X_Suc}' P₁.rep_eq by auto

lemma P₁_eq_P_{X_one}: "blinfun_apply (P₁ (p 0)) = P_X p 1"
  by (auto simp: P_{X_Suc}' P₁.rep_eq)

lemma P₁_pos: "0 ≤ u ==> 0 ≤ P₁ d u"
  by (auto simp: P₁.rep_eq less_eq_bfun_def)

lemma P₁_nonneg: "nonneg_blinfun (P₁ d)"
  by (simp add: P₁_pos nonneg_blinfun_def)

lemma P₁_n_pos: "0 ≤ u ==> 0 ≤ (P₁ d ^^ n) u"
  by (induction n) (auto simp: P₁.rep_eq less_eq_bfun_def)

lemma P₁_n_nonneg: "nonneg_blinfun (P₁ d ^^ n)"
  by (simp add: P₁_n_pos nonneg_blinfun_def)

lemma P₁_n_disc_pos: "0 ≤ u ==> 0 ≤ (l^n *_R P₁ d ^^n) u"
  by (auto simp: P₁_n_pos scaleR_nonneg_nonneg blinfun.scaleR_left)

lemma P₁_sum_pos: "0 ≤ u ==> 0 ≤ (∑t≤n. l^t *_R (P₁ d ^^ t)) u"
  using P₁_n_pos P₁_pos
  by (induction n) (auto simp: blinfun.add_left blinfun.scaleR_left scaleR_nonneg_nonneg)

lemma P₁_sum_ge:
  assumes "0 ≤ u"
  shows "u ≤ (∑t≤n. l^t *_R P₁ d ^^t) u"
  using P₁_n_disc_pos[OF assms, of "Suc _"]
  by (induction n) (auto intro: add_increasing2 simp add: blinfun.add_left)

subsection ‹The Bellman Operator›
definition "L d v ≡ r_dec_b d + l *_R P₁ d v"

lemma norm_L_le: "norm (L d v) ≤ r_M + l * norm v"
  using norm_blinfun[of "P₁ d"] norm_P₁ norm_r_dec_le
  by (auto intro!: norm_add_rule_thm mult_left_mono simp: L_def)

lemma abs_L_le: "∣L d v s∣ ≤ r_M + l * norm v"
  using order.trans[OF norm_le_norm_bfun norm_L_le] by auto

subsubsection ‹Bellman Operator for Single Actions›
abbreviation "L_a a v s ≡ r (s, a) + l * measure_pmf.expectation (K (s,a)) v"

lemma L_{a_le}:
  fixes v :: "'s ==>_b real"
  shows "∣L_a a v s∣ ≤ r_M + l * norm v"
  using abs_r_le_r_M
  by (fastforce intro: order_trans[OF abs_triangle_ineq] order_trans[OF integral_abs_bound]
      add_mono mult_mono measure_pmf.integral_le_const abs_le_norm_bfun
      simp: abs_mult)

lemma L_{a_bounded}:
  "bounded (range (λa. L_a a (apply_bfun v) s))"
  using L_{a_le} by (auto intro!: boundedI)

lemma L_{a_int}:
  fixes d :: "'a pmf" and v :: "'s ==>_b real"
  shows "(∫a. L_a a v s ∂d) = (∫a. r (s, a) ∂d) + l * ∫a. ∫s'. v s' ∂K (s, a) ∂d"
proof (subst Bochner_Integration.integral_add)
  show "integrable d (λa. r (s, a))"
    using abs_r_le_r_M by (fastforce intro!: bounded_integrable simp: bounded_iff)
  show "integrable d (λa. l * ∫s'. v s' ∂K (s, a))"
    by (intro bounded_integrable)
      (auto intro!: mult_mono order_trans[OF integral_abs_bound] boundedI[of _ "l * norm v"]
        measure_pmf.integral_le_const simp: abs_le_norm_bfun abs_mult)
qed auto

lemma L_eq_L_a: "L d v s = measure_pmf.expectation (d s) (λa. L_a a v s)"
  unfolding L_{a_int} L_def K_st_def P₁.rep_eq
  by (auto simp: measure_pmf_bind integral_measure_pmf_bind[where B = "norm v"] abs_le_norm_bfun)

lemma L_eq_L_{a_det}: "L (mk_dec_det d) v s = L_a (d s) v s"
  by (auto simp: L_eq_L_a mk_dec_det_def)

lemma L_{a_eq_L}: "measure_pmf.expectation p (λa. L_a a (apply_bfun v) s) =
  L (λt. if t = s then p else return_pmf (SOME a. a ∈ A t)) v s"
  unfolding L_eq_L_a by auto

lemma L_le: "L d v s ≤ r_M + l * norm v"
  unfolding L_def
  using norm_P₁ norm_blinfun[of "(P₁ d)"] abs_r_dec_le
  by (fastforce intro: order_trans[OF le_norm_bfun] add_mono mult_left_mono dest: abs_le_D1)

lemma L_{a_le}': "L_a a (apply_bfun v) s ≤ r_M + l * norm v"
  using L_{a_le} abs_le_D1 by blast

subsection ‹Optimality Equations›

definition "L (v :: 's ==>_b real) s = (⊔d ∈ D_R. L d v s)"

lemma L_bfun: "L v ∈ bfun"
  unfolding L_def using abs_L_le ex_dec by (fastforce intro!: cSup_abs_le bfun_normI)

lift_definition L_b :: "('s ==>_b real) ==> 's ==>_b real" is L
  using L_bfun .

lemma L_bounded[simp, intro]: "bounded (range (λp. L p v s))"
  using abs_L_le by (auto intro!: boundedI)

lemma L_bounded'[simp, intro]: "bounded ((λp. L p v s) ` X)"
  by (auto intro: bounded_subset)

lemma L_bdd_above[simp, intro]: "bdd_above ((λp. L p v s) ` X)"
  by (auto intro: bounded_imp_bdd_above)

lemma L_le_L_b: "is_dec d ==> L d v ≤ L_b v"
  by (fastforce simp: L_b.rep_eq L_def intro!: cSUP_upper)

subsubsection ‹Equivalences involving @{const L_b}›

lemma SUP_step_MR_eq:
  "L v s = (⊔pa ∈ {pa. set_pmf pa ⊆ A s}. (∫a. L_a a v s ∂measure_pmf pa))"
  unfolding L_def
proof (intro antisym)
  show "(⊔d∈D_R. L d v s) ≤ (⊔pa ∈ {pa. set_pmf pa ⊆ A s}. ∫a. L_a a v s ∂measure_pmf pa)"
  proof (rule cSUP_mono)
    show "D_R ≠ {}"
      using D_{R_ne} .
  next show "bdd_above ((λpa. ∫a. L_a a v s ∂measure_pmf pa) ` {pa. set_pmf pa ⊆ A s})"
      using L_{a_bounded} L_{a_le}
      by (auto intro!: order_trans[OF integral_abs_bound]
          bounded_imp_bdd_above boundedI[where B = "r_M + l * norm v"]
          measure_pmf.integral_le_const bounded_integrable)
  next show "∃m∈{pa. set_pmf pa ⊆ A s}. L n v s ≤ ∫a. L_a a v s ∂measure_pmf m" if "n ∈ D_R" for n
      using that
      by (fastforce simp: L_eq_L_a L_{a_int} is_dec_def)
  qed
next
  have aux: "{pa. set_pmf pa ⊆ A s} ≠ {}"
    using D_{R_ne} is_dec_def by auto
  show "(⊔pa∈{pa. set_pmf pa ⊆ A s}. ∫a. L_a a v s ∂measure_pmf pa) ≤ (⊔d∈D_R. L d v s)"
  proof (intro cSUP_least[OF aux] cSUP_upper2)
    fix n
    assume h: "n ∈ {pa. set_pmf pa ⊆ A s}"
    let ?p = "(λs'. if s = s' then n else SOME a. set_pmf a ⊆ A s')"
    have aux: "∃a. set_pmf a ⊆ A sa" for sa
      using ex_dec is_dec_def by blast
    show "?p ∈ D_R"
      unfolding is_dec_def using h someI_ex[OF aux] by auto
    thus "(∫a. L_a a v s ∂n) ≤ L ?p v s"
      by (auto simp: L_eq_L_a)
    show "bdd_above ((λd. L d v s) ` D_R)"
      by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
  next
  qed
qed

lemma L_{b_eq_SUP_L}_a: "L_b v s = (⊔p ∈ {p. set_pmf p ⊆ A s}. ∫a. L_a a v s ∂measure_pmf p)"
  using SUP_step_MR_eq L_b.rep_eq by presburger

lemma SUP_step_det_eq: "(⊔d ∈ D_D. L (mk_dec_det d) v s) = (⊔a ∈ A s. L_a a v s)"
proof (intro antisym cSUP_mono)
  show "bdd_above ((λa. L_a a v s) ` A s)"
    using L_{a_bounded} by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
  show "bdd_above ((λd. L (mk_dec_det d) v s) ` D_D)"
    by (auto intro!: bounded_imp_bdd_above boundedI abs_L_le)
  show "∃m∈A s. L (mk_dec_det n) v s ≤ L_a m v s" if "n ∈ D_D" for n
    using that is_dec_det_def by (auto simp: L_eq_L_{a_det} intro: bexI[of _ "n s"])
  show "∃m∈D_D. L_a n v s ≤ L (mk_dec_det m) v s" if "n ∈ A s" for n
    using that A_ne
    by (fastforce simp: L_eq_L_{a_det} is_dec_det_def some_in_eq
        intro!: bexI[of _ "λs'. if s = s' then _ else SOME a. a ∈ A s'"])
qed (auto simp: A_ne)

lemma integrable_L_a: "integrable (measure_pmf x) (λa. L_a a (apply_bfun v) s)"
proof (intro Bochner_Integration.integrable_add integrable_mult_right)
  show "integrable (measure_pmf x) (λx. r (s, x))"
    using abs_r_le_r_M
    by (auto intro: measure_pmf.integrable_const_bound[of _ "r_M"])
next
  show "integrable (measure_pmf x) (λx. measure_pmf.expectation (K (s, x)) v)"
    by (auto intro!: bounded_integrable boundedI order.trans[OF integral_abs_bound]
        measure_pmf.integral_le_const abs_le_norm_bfun)
qed

lemma SUP_L_{a_eq_det}:
  fixes v :: "'s ==>_b real"
  shows "(⊔p∈{p. set_pmf p ⊆ A s}. ∫a. L_a a v s ∂measure_pmf p) = (⊔a∈A s. L_a a v s)"
proof (intro antisym)
  show "(⊔pa∈{pa. set_pmf pa ⊆ A s}. measure_pmf.expectation pa (λa. L_a a v s))
    ≤ (⊔a∈A s. L_a a v s)"
    using ex_dec is_dec_def integrable_L_a A_ne L_{a_bounded}
    by (fastforce intro: bounded_range_subset intro!: cSUP_least lemma_4_3_1)
  show "(⊔a∈A s. L_a a v s) ≤ (⊔p∈{p. set_pmf p ⊆ A s}. ∫a. L_a a v s ∂measure_pmf p)"
    unfolding SUP_step_MR_eq[symmetric] SUP_step_det_eq[symmetric] L_def
    using ex_dec_det by (fastforce intro!: cSUP_mono)
qed

lemma L_eq_SUP_det: "L v s = (⊔d ∈ D_D. L (mk_dec_det d) v s)"
  using SUP_step_MR_eq SUP_step_det_eq SUP_L_{a_eq_det} by auto

lemma L_{b_eq_SUP_det}: "L_b v s = (⊔d ∈ D_D. L (mk_dec_det d) v s)"
  using L_eq_SUP_det unfolding L_b.rep_eq by auto

subsection ‹Monotonicity›

lemma P_{X_mono}[intro]: "a ≤ b ==> P_X p n a ≤ P_X p n b"
  by (fastforce simp: P_{X_def} intro: integral_mono)

lemma P₁_mono[intro]: "a ≤ b ==> P₁ p a ≤ P₁ p b"
  using P₁_nonneg by auto

lemma L_mono[intro]: "u ≤ v ==> L d u ≤ L d v"
  unfolding L_def by (auto intro: scaleR_left_mono)

lemma L_{b_mono}[intro]: "u ≤ v ==> L_b u ≤ L_b v"
  using  ex_dec L_mono[of u v]
  by (fastforce intro!: cSUP_mono simp: L_b.rep_eq L_def)

lemma step_mono:
  assumes "L_b v ≤ v" "d ∈ D_R"
  shows "L d v ≤ v"
  using assms L_le_L_b order.trans by blast

lemma step_mono_elem_det:
  assumes "v ≤ L_b v" "e > 0"
  shows "∃d∈D_D. v ≤ L (mk_dec_det d) v + e *_R 1"
proof -
  have "v s ≤ (⊔a∈A s. L_a a v s)" for s
    using SUP_step_det_eq L_{b_eq_SUP_det} assms(1) by fastforce
  hence "∃a∈A s. v s - e < L_a a v s" for s
    using A_ne L_{a_le}'
    by (subst less_cSUP_iff[symmetric]) (fastforce simp: assms add_strict_increasing algebra_simps intro!: bdd_above.I2)+
  hence aux: "∃a∈A s. v s ≤ L_a a v s + e" for s
    by (auto simp: diff_less_eq intro: less_imp_le)
  then obtain d where "is_dec_det d" "v s ≤ L (mk_dec_det d) v s + e" for s
    by (metis L_eq_L_{a_det} is_dec_det_def)
  thus ?thesis
    by fastforce
qed

lemma step_mono_elem:
  assumes "v ≤ L_b v" "e > 0"
  shows "∃d∈D_R. v ≤ L d v + e *_R 1"
  using assms step_mono_elem_det by blast

lemma P_{X_L_le}:
  assumes "L_b v ≤ v" "p ∈ Π_M_R"
  shows "P_X p n (L (p n) v) ≤ P_X p n v"
  using assms step_mono by auto

end

locale MDP_reward_disc = MDP_reward A K r l
  for
    A and
    K :: "'s ::countable × 'a ::countable ==> 's pmf" and
    r l +
  assumes
    disc_lt_one [simp]: "l < 1"
begin

definition "is_opt_act v s = is_arg_max (λa. L_a a v s) (λa. a ∈ A s)"
abbreviation "opt_acts v s ≡ {a. is_opt_act v s a}"

lemma summable_disc [intro, simp]: "summable (λi. l ^ i * x)"
  by (simp add: mult.commute)

lemma summable_r_disc[intro, simp]:
  "summable (λi. ∣l ^ i * r (sa i)∣)"
  "summable (λi. l ^ i * ∣r (sa i)∣)"
  "summable (λi. l ^ i * r (sa i))"
proof -
  show "summable (λi. ∣l ^ i * r (sa i)∣)"
    using abs_r_le_r_M
    by (fastforce intro!: mult_left_mono summable_comparison_test'[OF summable_disc])
  thus "summable (λi. l ^ i * r (sa i))" "summable (λi. l ^ i * ∣r (sa i)∣)"
    by (auto intro: summable_rabs_cancel)
qed

lemma summable_norm_disc_I[intro]:
  assumes "summable (λt. (l^t * norm F))"
  shows "summable (λt. norm (l^t *_R F))"
  using assms by auto

lemma summable_norm_disc_I'[intro]:
  assumes "summable (λt. (l^t * norm (F t)))"
  shows "summable (λt. norm (l^t *_R F t))"
  using assms by auto

lemma summable_discI [intro]:
  assumes "bounded (range F)"
  shows "summable (λt. l^t * norm (F t))"
proof -
  obtain b where "norm (F x) ≤ b" for x
    using assms by (auto simp: bounded_iff)
  thus ?thesis
    using Abel_lemma[of l 1 F b] by (auto simp: mult.commute)
qed

lemma summable_disc_reward [intro]:
  assumes "bounded (range (F :: nat ==> 'b :: banach))"
  shows "summable (λt. l^t *_R (F t))"
  using assms by (auto intro: summable_norm_cancel)

lemma summable_norm_bfun_disc: "summable (λt. l^t * norm (apply_bfun f t))"
  using norm_le_norm_bfun
  by (auto simp: mult.commute[of "l^_"] intro!: Abel_lemma[of _ 1 _ "norm f"])

lemma summable_bfun_disc [simp]: "summable (λt. l^t * (apply_bfun f t))"
proof -
  have "norm (l^t * apply_bfun f t) = l^t * norm (apply_bfun f t)" for t
    by (auto simp: abs_mult)
  hence "summable (λt. norm (l^t * (apply_bfun f t)))"
    by (auto simp only: abs_mult)
  thus ?thesis
    by (auto intro: summable_norm_cancel)
qed

lemma norm_bfun_disc_le: "norm f ≤ B ==> (∑x. l^x * norm (apply_bfun f x)) ≤ (∑x. l^x * B)"
  by (fastforce intro!: suminf_le mult_left_mono norm_le_norm_bfun intro: order.trans)

lemma norm_bfun_disc_le': "norm f ≤ B ==> (∑x. l^x * (apply_bfun f x)) ≤ (∑x. l^x * B)"
  by (auto simp: mult_left_mono intro!: suminf_le order.trans[OF _ norm_bfun_disc_le])

lemma sum_disc_lim_l: "(∑x. l^x * B) = B /(1-l)"
  by (simp add: suminf_mult2[symmetric] summable_geometric suminf_geometric[of l])

lemma sum_disc_bound: "(∑x. l^x * apply_bfun f x) ≤ (norm f) /(1-l)"
  using norm_bfun_disc_le' sum_disc_lim  by auto

lemma sum_disc_bound':
  fixes f :: "nat ==> 'b ==>_b real"
  assumes h: "∀n. norm (f n) ≤ B"
  shows "norm (∑x. l^x *_R f x) ≤ B /(1-l)"
proof -
  have "norm (∑x. l^x *_R f x) ≤ (∑x. norm (l^x *_R f x))"
    using h
    by (fastforce intro!: boundedI summable_norm)
  also have "… ≤ (∑x. l^x * B)"
    using h
    by (auto intro!: suminf_le boundedI simp: mult_mono')
  also have "… = B /(1-l)"
    by (simp add: sum_disc_lim)
  finally show "norm (∑x. l^x *_R f x) ≤ B /(1-l)" .
qed

lemma abs_ν_trace_le: "∣ν_trace t∣ ≤ (∑i. l ^ i * r_M)"
  by (auto intro!: abs_r_le_r_M mult_left_mono order_trans[OF summable_rabs] suminf_le)

lemma integrable_ν_trace: "integrable (T p s) ν_trace"
  by (fastforce simp: bounded_iff intro: abs_ν_trace_le)

context
  fixes p :: "('s, 'a) pol"
begin

lemma ν_eq_ν_trace: "ν p s = ∫t. ν_trace t ∂T p s"
proof -
  have "(λn. ν_fin p n s) <---- ∫t. ν_trace t ∂T p s"
    unfolding ν_fin_def
  proof(intro integral_dominated_convergence)
    show "AE x in T p s. ν_trace_fin x <---- ν_trace x"
      using summable_LIMSEQ by blast
  next
    have "(∑i<N. l ^ i * r_M) ≤ (∑N. l ^ N * r_M)" for N
      by (auto intro: sum_le_suminf simp: r_{M_nonneg})
    thus "AE x in T p s. norm (ν_trace_fin x N) ≤ (∑N. l ^ N * r_M)" for N
      using order_trans[OF abs_ν_trace_fin_le] by fastforce
  qed auto
  thus ?thesis
    using ν_eq_lim limI by fastforce
qed

lemma abs_ν_le: "∣ν p s∣ ≤ (∑i. l^i * r_M)"
  unfolding ν_eq_Pn
  using abs_exp_r_le
  by (fastforce intro!: order.trans[OF summable_rabs] suminf_le summable_comparison_test'[OF summable_disc] mult_left_mono)

lemma ν_le: "ν p s ≤ (∑i. l^i * r_M)"
  by (auto intro: abs_ν_le abs_le_D1)

(* 6.1.2 in Puterman *)
lemma ν_bfun: "ν p ∈ bfun"
  by (auto intro!: abs_ν_le)

lift_definition ν_b :: "'s ==>_b real" is "ν p"
  using ν_bfun by blast

lemma norm_ν_le: "norm ν_b ≤ r_M / (1-l)"
  using abs_ν_le sum_disc_lim
  by (auto simp: ν_b.rep_eq norm_bfun_def' intro: cSUP_least)
end

lemma ν_as_markovian: "ν (mk_markovian (as_markovian p (return_pmf s))) s = ν p s"
  by (auto simp: ν_eq_Pn Pn_as_markovian_eq Pn'_def)

lemma ν_{b_as_markovian}: "ν_b (mk_markovian (as_markovian p (return_pmf s))) s = ν_b p s"
  using ν_as_markovian by (auto simp: ν_b.rep_eq)

subsection ‹Optimal Reward›

definition "ν_MD s ≡ ⊔p ∈ Π_M_D. ν (mk_markovian_det p) s"
definition "ν_opt s ≡ ⊔p ∈ Π_H_R. ν p s"

lemma ν_opt_bfun: "ν_opt ∈ bfun"
  using abs_ν_le policies_ne
  by (fastforce simp: ν_opt_def intro!: order_trans[OF cSup_abs_le] bfun_normI)

lift_definition ν_{b_opt} :: "'s ==>_b real" is ν_opt
  using ν_opt_bfun .

lemma ν_{b_opt_eq}: "ν_{b_opt} s = (⊔p ∈ Π_H_R. ν_b p s)"
  using ν_b.rep_eq ν_{b_opt}.rep_eq ν_opt_def by presburger

lemma ν_le_ν_opt [intro]:
  assumes "is_policy p"
  shows "ν p s ≤ ν_opt s"
  unfolding ν_opt_def using abs_ν_le assms
  by (force intro: cSUP_upper intro!: bounded_imp_bdd_above boundedI)

lemma ν_{b_le_opt} [intro]: "p ∈ Π_H_R ==> ν_b p ≤ ν_{b_opt}"
  using ν_le by (fastforce simp: ν_b.rep_eq ν_{b_opt}.rep_eq)

lemma ν_{b_le_opt_MD} [intro]: "p ∈ Π_M_D ==> ν_b (mk_markovian_det p) ≤ ν_{b_opt}"
  by (auto simp: mk_markovian_det_def is_dec_det_def is_dec_def is_policy_def)

lemma ν_{b_le_opt_DD} [intro]: "is_dec_det d ==> ν_b (mk_stationary_det d) ≤ ν_{b_opt}"
  by (auto simp add: is_policy_def mk_markovian_def)

lemma ν_{b_le_opt_DR} [intro]: "is_dec d ==> ν_b (mk_stationary d) ≤ ν_{b_opt}"
  by (auto simp add: is_policy_def mk_markovian_def)

lemma ν_{b_opt_eq_MR}: "ν_{b_opt} s = (⊔p ∈ Π_M_R. ν_b (mk_markovian p) s)"
proof (rule antisym)
  show "ν_{b_opt} s ≤ (⊔p∈Π_M_R. ν_b (mk_markovian p) s)"
    unfolding ν_{b_opt_eq}
  proof (rule cSUP_mono)
    show "Π_H_R ≠ {}"
      using policies_ne by simp
    show "bdd_above ((λp. ν_b (mk_markovian p) s) ` Π_M_R)"
      by (auto intro!: boundedI bounded_imp_bdd_above abs_ν_le simp: ν_b.rep_eq)
    show "n ∈ Π_H_R ==> ∃m∈Π_M_R. ν_b n s ≤ ν_b (mk_markovian m) s" for n
      using is_Π_M_{R_as_markovian} by (subst ν_{b_as_markovian}[symmetric]) fastforce
  qed
  show "(⊔p∈Π_M_R. ν_b (mk_markovian p) s) ≤ ν_{b_opt} s"
    using Π_M_{R_ne} Π_M_{R_imp_policies}
    by (auto intro!: cSUP_mono bounded_imp_bdd_above boundedI abs_ν_le simp: ν_{b_opt_eq}  ν_b.rep_eq)
qed

lemma summable_norm_disc_reward'[simp]: "summable (λt. l^t * norm (P_X p t (r_dec_b (p t))))"
  using P_{X_bounded_r} by auto

lemma summable_disc_reward_P_X [simp]: "summable (λt. l^t *_R P_X p t (r_dec_b (p t)))"
  using summable_disc_reward P_{X_bounded_r} by blast

lemma disc_reward_tendsto:
  "(λn. ∑t<n. l^t *_R P_X p t (r_dec_b (p t))) <---- (∑t. l^t *_R P_X p t (r_dec_b (p t)))"
  by (simp add: summable_LIMSEQ)

lemma ν_eq_P_X: "ν (mk_markovian p) = (∑i. l^i *_R P_X p i (r_dec_b (p i)))"
proof -
  have "ν (mk_markovian p) s = (∑i. l^i * P_X p i (r_dec_b (p i)) s)" for s
    unfolding ν_b.rep_eq P_{X_def} ν_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
    using measure_pmf_in_subprob_algebra abs_r_le_r_M
    by (subst integral_bind) (auto simp: r_dec_eq_r_K0)
  thus ?thesis
    by (auto simp: suminf_apply_bfun)
qed

lemma ν_{b_eq_}P_X: "ν_b (mk_markovian p) = (∑i. l^i *_R P_X p i (r_dec_b (p i)))"
  by (auto simp: ν_eq_P_X ν_b.rep_eq)

lemma ν_{b_fin_tendsto_}ν_b: "(ν_{b_fin} (mk_markovian p)) <---- ν_b (mk_markovian p)"
  using disc_reward_tendsto ν_{b_eq_}P_X ν_{b_fin_eq_}P_X
  by presburger

lemma norm_P₁_l_less: "norm (l *_R P₁ d) < 1"
  by auto
lemma disc_P₁_tendsto: "(λn. (∑t≤n. l^t *_R P₁ d ^^t)) <---- (∑t. l^t *_R P₁ d ^^t)"
  by (fastforce simp: bounded_iff intro: summable_LIMSEQ')

lemma disc_P₁_lim: "lim (λn. (∑t≤n. l^t *_R P₁ d ^^ t)) = (∑t. l^t *_R P₁ d ^^t)"
  using limI disc_P₁_tendsto
  by blast

lemma convergent_disc_P₁: "convergent (λn. (∑t≤n. l^t *_R P₁ d ^^t))"
  using convergentI disc_P₁_tendsto
  by blast

lemma P₁_suminf_ge:
  assumes "0 ≤ u" shows "u ≤ (∑t. l^t *_R P₁ d ^^t) u"
proof -
  have aux: "∧x. (λn. (∑t≤n. l^t *_R P₁ d ^^t) u x) <---- (∑t. l^t *_R P₁ d ^^t) u x"
    using bfun_tendsto_apply_bfun disc_P₁_lim lim_blinfun_apply[OF convergent_disc_P₁]
    by fastforce
  have "∧n. u ≤ (∑t≤n. l^t *_R P₁ d ^^t) u"
    using P₁_sum_ge[OF assms] by auto
  thus ?thesis
    by (auto intro!: LIMSEQ_le_const[OF aux])
qed

lemma P₁_suminf_pos:
  assumes "0 ≤ u"
  shows "0 ≤ (∑t. l^t *_R P₁ d ^^t) u"
  using P₁_suminf_ge[of u] assms order.trans by auto

lemma lemma_6_1_2_b:
  assumes "v ≤ u"
  shows "(∑t. l^t *_R P₁ d ^^t) v ≤ (∑t. l^t *_R P₁ d ^^t) u"
proof -
  have "0 ≤ (∑n. l ^ n *_R P₁ d ^^ n) (u - v)"
    using P₁_suminf_pos assms by simp
  thus ?thesis
    by (simp add: blinfun.diff_right)
qed

lemma ν_stationary: "ν_b (mk_stationary d) = (∑t. l^t *_R (P₁ d ^^ t)) (r_dec_b d)"
proof -
  have "ν_b (mk_stationary d) = (∑t. (l ^ t *_R (P₁ d ^^ t)) (r_dec_b d))"
    by (simp add: ν_{b_eq_}P_X scaleR_blinfun.rep_eq P_{X_sconst})
  also have "... = (∑t. (l ^ t *_R (P₁ d ^^ t))) (r_dec_b d)"
    by (subst bounded_linear.suminf[where f = "λx. blinfun_apply x (r_dec_b d)"])
      (auto intro!: bounded_linear.suminf boundedI)
  finally show ?thesis .
qed

lemma ν_stationary_inv: "ν_b (mk_stationary d) = inv_L (id_blinfun - l *_R P₁ d) (r_dec_b d)"
  by (auto simp: ν_stationary inv_{L_inf_sum} blincomp_scaleR_right)

text ‹The value of a markovian policy can be expressed in terms of @{const L}.›

lemma ν_step: "ν_b (mk_markovian p) = L (p 0) (ν_b (mk_markovian (λn. p (Suc n))))"
proof -
  have s: "summable (λt. l^t *_R (P_X p (Suc t) (r_dec_b (p (Suc t)))))"
    using P_{X_bound_r} by (auto intro!: boundedI[of _ r_M])
  have
    "ν_b (mk_markovian p) = r_dec_b (p 0) + (∑t. l ^ (Suc t) *_R P_X p (Suc t) (r_dec_b (p (Suc t))))"
    by (subst suminf_split_head) (auto simp: ν_{b_eq_}P_X)
  also have
    "… = r_dec_b (p 0) + l *_R (∑t. P₁ (p 0) (l^t *_R P_X (λn. p (Suc n)) t (r_dec_b (p (Suc t)))))"
    using suminf_scaleR_right[OF s] by (auto simp: P_{X_Suc} blinfun.scaleR_right)
  also have
    "… = L (p 0) (ν_b (mk_markovian (λn. p (Suc n))))"
    using blinfun.bounded_linear_right bounded_linear.suminf[of "blinfun_apply (P₁ (p 0))"]
    by (fastforce simp add: ν_{b_eq_}P_X L_def)
  finally show ?thesis .
qed

lemma L_ν_fix: "ν_b (mk_stationary d) = L d (ν_b (mk_stationary d))"
  using ν_step .

lemma L_fix_ν:
  assumes "L p v = v"
  shows "v = ν_b (mk_stationary p)"
proof -
  have "r_dec_b p = (id_blinfun - l *_R P₁ p) v"
    using assms by (auto simp: eq_diff_eq L_def blinfun.diff_left blinfun.scaleR_left)
  hence "v = (∑t. (l *_R P₁ p)^^t) (r_dec_b p)"
    using inv_norm_le'(2)[OF norm_P₁_l_less] by auto
  thus "v = ν_b (mk_stationary p)"
    by (auto simp: ν_stationary blincomp_scaleR_right)
qed

lemma L_ν_fix_iff: "L d v = v ⟷ v = ν_b (mk_stationary d)"
  using L_fix_ν L_ν_fix by auto

subsection ‹Properties of Solutions of the Optimality Equations›

abbreviation "P_d p n v ≡ l^n *_R P_X p n v"

lemma P_{d_lim}: "(λn. (P_d p n v)) <---- 0"
proof -
  have "(λn. l^n * norm v) <---- 0"
    by (auto intro!: tendsto_eq_intros)
  moreover have "norm (P_d p n v) ≤ l^n * norm v" for p n
    by (simp add: mult_mono')
  ultimately have "(λn. norm (P_d p n v)) <---- 0" for p
    by (auto simp: Lim_transform_bound[where g = "λn. (l^n * norm v)"])
  thus "(λn. (P_d p n v)) <---- 0" for p
    using tendsto_norm_zero_cancel by fast
qed

(* 6.2.2 a) in Puterman *)

lemma L_dec_ge_opt:
  assumes "L_b v ≤ v"
  shows "ν_{b_opt} ≤ v"
proof -
  have "ν_b (mk_markovian p) ≤ v" if "p ∈ Π_M_R" for p
  proof -
    let ?p = "mk_markovian p"
    have aux: "ν_{b_fin} ?p n + l^n *_R P_X p n v ≤ v" for n
    proof (induction n)
      case (Suc n)
      have "P_X p n (r_dec_b (p n)) + l *_R (P_X p (Suc n) v) ≤ P_X p n v"
        using P_{X_L_le} assms that by (simp add: P_{X_Suc_n_elem} L_def linear_simps)
      hence "ν_{b_fin} ?p (n + 1) + l^(n + 1) *_R (P_X p (n + 1) v) ≤ ν_{b_fin} ?p n + l^n *_R (P_X p n v)"
        by (auto simp del: scaleR_scaleR intro: scaleR_left_mono simp: ν_{b_fin_eq_}P_X
            mult.commute[of l] scaleR_add_right[symmetric] scaleR_scaleR[symmetric])
      also have "… ≤ v"
        using Suc.IH by (auto simp: ν_{b_fin_eq_}P_X)
      finally show ?case
        by auto
    qed (auto simp: ν_{b_fin_eq_}P_X)
    have 1: "(λn. (ν_{b_fin} ?p n + P_d p n v) s) <---- ν_b ?p s" for s
      using bfun_tendsto_apply_bfun Limits.tendsto_add[OF ν_{b_fin_tendsto_}ν_b P_{d_lim}] by fastforce
    have "ν_b ?p s ≤ v s" for s
      using that aux assms by (fastforce intro!: lim_mono[OF _ 1, of  _ _ "λn. v s"])
    thus ?thesis
      using that by blast
  qed
  thus ?thesis
    using policies_ne by (fastforce simp: is_policy_def ν_{b_opt_eq_MR} intro!: cSUP_least)
qed

lemma L_inc_le_opt:
  assumes "v ≤ L_b v"
  shows "v ≤ ν_{b_opt}"
proof -
  have le_elem: "v s ≤ ν_{b_opt} s + (e/(1-l))" if "e > 0" for s e
  proof -
    obtain d where "d ∈ D_R" and hd: "v ≤ L d v + e *_R 1"
      using assms step_mono_elem ‹e > 0› by blast
    let ?Pinf = "(∑i. l^i *_R P₁ d^^i)"
    have "v ≤ r_dec_b d + l *_R (P₁ d) v + e *_R 1"
      using hd L_def by fastforce
    hence "(id_blinfun - l *_R P₁ d) v ≤ r_dec_b d + e *_R 1"
      by (auto simp: blinfun.diff_left blinfun.scaleR_left algebra_simps)
    hence "?Pinf ((id_blinfun - l *_R P₁ d) v) ≤ ?Pinf (r_dec_b d + e *_R 1)"
      using lemma_6_1_2_b P₁_def hd by auto
    hence "v ≤ ?Pinf (r_dec_b d + e *_R 1)"
      using inv_norm_le'(2)[of "l *_R P₁ d"] by (auto simp: blincomp_scaleR_right)
    also have "… = ν_b (mk_stationary d) + e *_R ?Pinf 1"
      by (simp add: ν_stationary blinfun.add_right blinfun.scaleR_right)
    also have "… = ν_b (mk_stationary d) + e *_R (∑i. (l^i *_R ((P₁ d^^i))) 1)"
      using convergent_disc_P₁
      by (auto simp: summable_iff_convergent' bounded_linear.suminf[of "λx. blinfun_apply x 1"])
    also have "… = ν_b (mk_stationary d) + e *_R (∑i. (l^i *_R 1))"
      by (auto simp: scaleR_blinfun.rep_eq)
    also have "… ≤ (ν_b (mk_stationary d) + (e / (1-l)) *_R 1)"
      by (auto simp: bounded_linear.suminf[symmetric, where f = "λx. x *_R 1"]
          suminf_geometric bounded_linear_scaleR_left summable_geometric)
    finally have "v s ≤ (ν_b (mk_stationary d) + (e/(1-l)) *_R 1) s"
      by auto
    thus "v s ≤ ν_{b_opt} s + (e/(1-l))"
      using ‹d ∈ D_R› ν_{b_le_opt}
      by (auto simp: is_policy_def mk_markovian_def less_eq_bfun_def intro: order_trans)
  qed
  have "v s ≤ ν_{b_opt} s + e" if "e > 0" for s e
  proof -
    have "e * (1 - l) > 0"
      by (simp add: ‹0 < e›)
    thus "v s ≤ ν_{b_opt} s + e"
      using disc_lt_one that le_elem by (fastforce split: if_splits)
  qed
  thus ?thesis
    by (fastforce intro: field_le_epsilon)
qed
lemma L_fix_imp_opt:
  assumes "v = L_b v"
  shows "v = ν_{b_opt}"
  using assms dual_order.antisym[OF L_dec_ge_opt L_inc_le_opt] by auto

lemma bounded_P: "bounded (P₁ ` X)"
  by (auto simp: bounded_iff)

subsection ‹Solutions to the Optimality Equation›
subsubsection ‹@{const L_b} and @{const L} are Contraction Mappings›
declare bounded_apply_blinfun[intro] bounded_apply_bfun'[intro]

lemma contraction_L: "dist (L_b v) (L_b u) ≤ l * dist v u"
proof -
  have "dist (L_b v s) (L_b u s) ≤ l * dist v u" if "L_b u s ≤ L_b v s" for s v u
  proof -
    have "dist (L_b v s) (L_b u s) ≤ (⊔d ∈ D_R. L d v s - L d u s)"
      using ex_dec that by (fastforce intro!: le_SUP_diff' simp: dist_real_def L_b.rep_eq L_def)
    also have "… = (⊔d ∈ D_R. l * (P₁ d (v - u) s))"
      by (auto simp: L_def right_diff_distrib blinfun.diff_right)
    also have "… = l * (⊔d ∈ D_R. P₁ d (v - u) s)"
      using D_{R_ne} bounded_P by (fastforce intro: bounded_SUP_mul)
    also have "… ≤ l * norm (⊔d ∈ D_R. P₁ d (v - u) s)"
      by (simp add: mult_left_mono)
    also have "… ≤ l * (⊔d ∈ D_R. norm ((P₁ d (v - u)) s))"
    proof -
      have "bounded ((λx. norm ((P₁ x (v - u)) s)) ` D_R)"
        using bounded_apply_bfun' bounded_P bounded_apply_blinfun bounded_norm_comp by metis
      thus ?thesis
        using D_{R_ne} ex_dec bounded_norm_comp by (fastforce intro!: mult_left_mono)
    qed
    also have "… ≤ l * (⊔p ∈ D_R. norm (P₁ p ((v - u))))"
      using D_{R_ne} abs_le_norm_bfun bounded_P
      by (fastforce simp: bounded_norm_comp intro!: bounded_imp_bdd_above mult_left_mono cSUP_mono)
    also have "… ≤ l * (⊔p ∈ D_R. norm ((v - u)))"
      using norm_push_exp_le_norm D_{R_ne}
      by (fastforce simp: P₁.rep_eq intro!: mult_left_mono cSUP_mono)
    also have "… = l * dist v u"
      by (auto simp: dist_norm)
    finally show ?thesis .
  qed
  hence "L_b u s ≤ L_b v s ==> dist (L_b v s) (L_b u s) ≤ l * dist v u"
    "L_b v s ≤ L_b u s ==> dist (L_b v s) (L_b u s) ≤ l * dist v u" for u v s
    by (fastforce simp: dist_commute)+
  thus ?thesis
    using linear[of "L_b u _"] by (fastforce intro: dist_bound)
qed

lemma is_contraction_L: "is_contraction L_b"
  using contraction_L zero_le_disc disc_lt_one unfolding is_contraction_def by blast

lemma contraction_L: "dist (L p v) (L p u) ≤ l * dist v u"
proof -
  have aux: "L p v s - L p u s ≤ l * dist v u" if lea: "L p v s ≥ L p u s" for v s u
  proof -
    have "L p v s - L p u s = (l *_R (P₁ p v - P₁ p u)) s"
      by (simp add: L_def scale_right_diff_distrib)
    also have "… ≤ l * norm (P₁ p (v - u) s)"
      by (auto simp: blinfun.diff_right intro!: mult_left_mono)
    also have "… ≤ l * norm (P₁ p (v - u))"
      using abs_le_norm_bfun by (auto intro!: mult_left_mono)
    also have "… ≤ l * dist v u"
      by (simp add: P₁.rep_eq mult_left_mono norm_push_exp_le_norm dist_norm)
    finally show ?thesis
      by auto
  qed
  have "dist (L p v s) (L p u s) ≤ l * dist v u" for v s u
    using aux[of v _ u] aux[of u _ v]
    by (cases "L p v s ≥ L p u s") (auto simp: dist_real_def dist_commute)
  thus "dist (L p v) (L p u) ≤ l * dist v u"
    by (simp add: dist_bound)
qed

lemma is_contraction_L: "is_contraction (L p)"
  unfolding is_contraction_def using contraction_L disc_lt_one zero_le_disc by blast

subsubsection ‹Existence of a Fixpoint of @{const L_b}›
lemma L_{b_conv}:
  "∃!v. L_b v = v" "(λn. (L_b ^^ n) v) <---- (THE v. L_b v = v)"
  using banach'[OF is_contraction_L] by auto

lemma L_{b_fix_iff_opt} [simp]: "L_b v = v ⟷ v = ν_{b_opt}"
  using banach'(1) is_contraction_L L_fix_imp_opt by metis

lemma ν_{b_opt_fix}: "ν_{b_opt} = (THE v. L_b v = v)"
  by auto

lemma L_{b_opt} [simp]: "L_b ν_{b_opt} = ν_{b_opt}"
  by auto

lemma L_{b_lim}: "(λn. (L_b ^^ n) v) <---- ν_{b_opt}"
  using L_{b_conv}(2) ν_{b_opt_fix} by presburger

lemma thm_6_2_6: "ν_b p = ν_{b_opt} ⟷ L_b (ν_b p) = ν_b p"
  by force

lemma thm_6_2_6': "ν p = ν_opt ⟷ L_b (ν_b p) = ν_b p"
  using thm_6_2_6 ν_b.rep_eq ν_{b_opt}.rep_eq by fastforce

subsection ‹Existence of Optimal Policies›

definition "ν_improving v d ⟷ (∀s. is_arg_max (λd. (L d v) s) (λd. d ∈ D_R) d)"

lemma ν_improving_iff: "ν_improving v d ⟷ d ∈ D_R ∧ (∀d' ∈ D_R. ∀s. L d' v s ≤ L d v s)"
  by (auto simp: ν_improving_def is_arg_max_linorder)

lemma ν_improving_D_MR[dest]: "ν_improving v d ==> d ∈ D_R"
  by (auto simp add: ν_improving_iff)

lemma ν_improving_ge: "ν_improving v d ==> d' ∈ D_R ==> L d' v s ≤ L d v s"
  by (auto simp: ν_improving_iff)

lemma ν_improving_imp_L_b: "ν_improving v d ==> L_b v = L d v"
  by (fastforce intro!: cSup_eq_maximum simp: ν_improving_iff L_b.rep_eq L_def)

lemma L_{b_imp_}ν_improving:
  assumes "d ∈ D_R" "L_b v = L d v"
  shows "ν_improving v d"
  using assms L_le_L_b by (auto simp: ν_improving_iff assms(2)[symmetric])

lemma ν_improving_alt:
  assumes "d ∈ D_R"
  shows "ν_improving v d ⟷ L_b v = L d v"
  using L_{b_imp_}ν_improving ν_improving_imp_L_b assms by blast

definition "ν_conserving d = ν_improving (ν_{b_opt}) d"

lemma ν_conserving_iff: "ν_conserving d ⟷ d ∈ D_R ∧ (∀d' ∈ D_R. ∀s. L d' ν_{b_opt} s ≤ L d ν_{b_opt} s)"
  by (auto simp: ν_conserving_def ν_improving_iff)

lemma ν_conserving_ge: "ν_conserving d ==> d' ∈ D_R ==> L d' ν_{b_opt} s ≤ L d ν_{b_opt} s"
  by (auto simp: ν_conserving_iff intro: ν_improving_ge)

lemma ν_conserving_imp_L_b [simp]: "ν_conserving d ==> L d ν_{b_opt} = ν_{b_opt}"
  using ν_improving_imp_L_b by (fastforce simp: ν_conserving_def)

lemma L_{b_imp_}ν_conserving:
  assumes "d ∈ D_R" "L_b ν_{b_opt} = L d ν_{b_opt}"
  shows "ν_conserving d"
  using L_{b_imp_}ν_improving assms by (auto simp: ν_conserving_def)

lemma ν_conserving_alt:
  assumes "d ∈ D_R"
  shows "ν_conserving d ⟷ L_b ν_{b_opt} = L d ν_{b_opt}"
  unfolding ν_conserving_def using ν_improving_alt assms by auto

lemma ν_conserving_alt':
  assumes "d ∈ D_R"
  shows "ν_conserving d ⟷ L d ν_{b_opt} = ν_{b_opt}"
  using assms ν_conserving_alt by auto

subsubsection ‹Conserving Decision Rules are Optimal›

theorem ex_improving_imp_conserving:
  assumes "∧v. ∃d. ν_improving v (mk_dec_det d)"
  shows "∃d. ν_conserving (mk_dec_det d)"
  by (simp add: assms ν_conserving_def)

theorem conserving_imp_opt[simp]:
  assumes "ν_conserving (mk_dec_det d)"
  shows "ν_b (mk_stationary_det d) = ν_{b_opt}"
  using L_ν_fix_iff ν_conserving_imp_L_b[OF assms] by simp

lemma conserving_imp_opt':
  assumes "∃d. ν_conserving (mk_dec_det d)"
  shows "∃d ∈ D_D. (ν_b (mk_stationary_det d)) = ν_{b_opt}"
  using assms by (fastforce simp: ν_conserving_def)

theorem improving_att_imp_det_opt:
  assumes "∧v. ∃d. ν_improving v (mk_dec_det d)"
  shows "ν_{b_opt} s = (⊔d ∈ D_D. ν_b (mk_stationary_det d) s)"
proof -
  obtain d where d: "ν_conserving (mk_dec_det d)"
    using assms ex_improving_imp_conserving by auto
  hence "d ∈ D_D"
    using ν_conserving_iff is_dec_mk_dec_det_iff by blast
  thus ?thesis
    using Π_M_{R_imp_policies} ν_{b_le_opt}
    by (fastforce intro!: cSup_eq_maximum[where z = "ν_{b_opt} s", symmetric]
        simp: conserving_imp_opt[OF d] image_iff)
qed

lemma L_{b_sup_att_dec}:
  assumes "d ∈ D_R" "L_b v = L d v"
  shows "∃d' ∈ D_D. L_b v = L (mk_dec_det d') v"
proof -
  have "∃a∈ A s. L d v s = L_a a v s" for s
    unfolding L_eq_L_a
    using assms is_dec_def L_{a_bounded} A_ne L_b.rep_eq L_def
    by (intro lemma_4_3_1')
      (auto intro: bounded_range_subset simp: assms(2)[symmetric] L_eq_L_a[symmetric] SUP_step_MR_eq)
  then obtain d' where d: "d' s ∈ A s" "L d v s = L_a (d' s) v s" for s
    by metis
  thus ?thesis
    using assms d
    by (fastforce simp: is_dec_det_def mk_dec_det_def L_eq_L_a)
qed

lemma L_{b_sup_att_dec}':
  assumes "d ∈ D_R" "L_b v = L d v"
  shows "∃d' ∈ D_D. ν_improving v (mk_dec_det d')"
  using L_{b_sup_att_dec} ν_improving_alt assms by force

subsubsection ‹Deterministic Decision Rules are Optimal›

lemma opt_imp_opt_dec_det:
  assumes "p ∈ Π_H_R" "ν_b p = ν_{b_opt}"
  shows "∃d ∈ D_D. ν_b (mk_stationary_det d) = ν_{b_opt}"
proof -
  have aux: "L (as_markovian p (return_pmf s) 0) ν_{b_opt} s = ν_{b_opt} s" for s
  proof -
    let ?ps = "as_markovian p (return_pmf s)"
    have markovian_suc_le: "ν_b (mk_markovian (λn. as_markovian p (return_pmf s) (Suc n))) ≤ ν_{b_opt}"
      using is_Π_M_{R_as_markovian} assms by (auto simp: is_policy_def mk_markovian_def)
    have aux_le: "∧x f g. f ≤ g ==> apply_bfun f x ≤ apply_bfun g x"
      unfolding less_eq_bfun_def by auto
    have "ν_{b_opt} s = ν_b (mk_markovian ?ps) s"
      using assms ν_{b_as_markovian} by metis
    also have "… = L (?ps 0) (ν_b (mk_markovian (λn. ?ps (Suc n)))) s"
      using ν_step by blast
    also have "… ≤ L (?ps 0) (ν_{b_opt}) s"
      unfolding L_def using markovian_suc_le P₁_mono by (auto intro!: mult_left_mono)
    finally have "ν_{b_opt} s ≤ L (?ps 0) (ν_{b_opt}) s" .
    have "as_markovian p (return_pmf s) 0 ∈ D_R"
      using is_Π_M_{R_as_markovian} assms by fast
    have "L (?ps 0) ν_{b_opt} ≤ ν_{b_opt}"
      using ‹?ps 0 ∈ D_R› L_le_L_b[of "?ps 0" "ν_{b_opt}"] by simp
    thus "L (?ps 0) ν_{b_opt} s = ν_{b_opt} s"
      using ‹ν_{b_opt} s ≤ (L (?ps 0) ν_{b_opt}) s› by (auto intro!: antisym)
  qed
  have "L (p []) v s = L (as_markovian p (return_pmf s) 0) v s" for v s
    by (auto simp: L_def P₁.rep_eq K_st_def)
  hence "L (p []) ν_{b_opt} = ν_{b_opt}"
    using aux by auto
  hence "∃d ∈ D_D. L (mk_dec_det d) ν_{b_opt} = ν_{b_opt}"
    using L_{b_sup_att_dec} assms(1) L_{b_opt} is_policy_def mem_Collect_eq by metis
  thus ?thesis
    using conserving_imp_opt' ν_conserving_alt' by blast
qed

subsubsection ‹Optimal Decision Rules for Finite Action Spaces›

(* 6.2.10 *)
lemma ex_opt_act:
assumes "∧s. finite (A s)"
shows "∃a ∈ A s. L_a a (v :: _ ==>_b _) s = L_b v s"
      unfolding L_b.rep_eq L_eq_SUP_det SUP_step_det_eq
      using arg_max_on_in[OF assms A_ne]
      by (auto simp: cSup_eq_Sup_fin Sup_fin_Max assms A_ne finite_arg_max_eq_Max[symmetric])

lemma ex_opt_dec_det:
assumes "∧s. finite (A s)"
shows "∃d ∈ D_D. L (mk_dec_det d) (v :: _ ==>_b _) = L_b v"
  unfolding is_dec_det_def mk_dec_det_def
  using ex_opt_act[OF assms]  someI_ex
  apply (auto intro!: exI[of _ ‹λs. SOME a. a ∈ A s ∧ L_a a v s = L_b v s›] bfun_eqI)
   apply (smt (verit, best) someI_ex)
  apply (subst L_eq_L_a)
  apply (subst expectation_return_pmf)
  by (smt (verit, best) someI_ex)

lemma thm_6_2_10:
  assumes "∧s. finite (A s)"
  shows "∃d ∈ D_D. ν_{b_opt} = ν_b (mk_stationary_det d)"
  using assms conserving_imp_opt' L_{b_opt} L_ν_fix_iff ex_opt_dec_det
  by metis

subsubsection ‹Existence of Epsilon-Optimal Policies›

lemma ex_det_eps:
  assumes "0 < e"
  shows "∃d ∈ D_D. L_b v ≤ L (mk_dec_det d) v + e *_R 1"
proof -
  have "∃a ∈ A s. L_b v s ≤ L_a a v s + e" for s
  proof -
    have "bdd_above ((λa. L_a a v s) ` A s)"
      using L_{a_le} by (auto intro!: boundedI bounded_imp_bdd_above)
    hence "∃a ∈ A s. L_b v s - e < L_a a v s"
      unfolding L_b.rep_eq L_eq_SUP_det SUP_step_det_eq
      by (auto simp: less_cSUP_iff[OF A_ne, symmetric] ‹0 < e›)
    thus "∃a ∈ A s. L_b v s ≤ L_a a v s + e"
      by force
  qed
  thus ?thesis
    unfolding mk_dec_det_def is_dec_det_def
    by (auto simp: L_def P₁.rep_eq bind_return_pmf K_st_def less_eq_bfun_def) metis
qed

lemma thm_6_2_11:
  assumes "eps > 0"
  shows "∃d ∈ D_D. ν_{b_opt} ≤ ν_b (mk_stationary_det d) + eps *_R 1"
proof -
  have "(1-l) * eps > 0"
    by (simp add: assms)
  then obtain d where "d ∈ D_D" and d: "L_b ν_{b_opt} ≤ L (mk_dec_det d) ν_{b_opt} + ((1-l)*eps) *_R 1"
    using ex_det_eps[of _ ν_{b_opt}] by auto
  let ?d = "mk_dec_det d"
  let ?lK = "l *_R P₁ ?d"
  let ?lK_opt = "l *_R P₁ ?d ν_{b_opt}"
  have "ν_{b_opt} ≤ r_dec_b ?d + ?lK_opt + ((1-l)*eps) *_R 1"
    using L_def L_fix_imp_opt d by simp
  hence "ν_{b_opt} - ?lK_opt - ((1-l)*eps) *_R 1 ≤ r_dec_b ?d"
    by (simp add: cancel_ab_semigroup_add_class.diff_right_commute diff_le_eq)
  hence "(∑i. ?lK ^^ i) (ν_{b_opt} - ?lK_opt - ((1-l)*eps) *_R 1) ≤ ν_b (mk_stationary ?d)"
    using lemma_6_1_2_b suminf_cong by (simp add: blincomp_scaleR_right ν_stationary)
  hence "((∑i. ?lK ^^ i) o_L (id_blinfun - ?lK)) ν_{b_opt} - (∑i. ?lK ^^ i) (((1-l)*eps) *_R 1)
    ≤ (ν_b (mk_stationary ?d))"
    by (simp add: blinfun.diff_right blinfun.diff_left blinfun.scaleR_left)
  hence le: "ν_{b_opt} - (∑i. ?lK ^^ i) (((1-l)*eps) *_R 1) ≤ ν_b (mk_stationary ?d)"
    by (auto simp: inv_norm_le')
  have s: "summable (λi. (l *_R P₁ ?d)^^i)"
    using convergent_disc_P₁ summable_iff_convergent'
    by (simp add: blincomp_scaleR_right summable_iff_convergent')
  have "(∑i. ?lK ^^ i) (((1-l)*eps) *_R 1) = eps *_R 1"
  proof -
    have "(∑i. ?lK ^^ i) (((1-l)*eps) *_R 1) = ((1-l)*eps) *_R (∑i. ?lK^^i) 1"
      using blinfun.scaleR_right by blast
    also have "… = ((1-l)*eps) *_R (∑i. (?lK^^i) 1) "
      using s by (auto simp: bounded_linear.suminf[of "λx. blinfun_apply x 1"])
    also have "… = ((1-l)*eps) *_R (∑i. (l ^ i)) *_R 1"
      by (auto simp: blinfun.scaleR_left blincomp_scaleR_right bounded_linear_scaleR_left
          bounded_linear.suminf[of "λx. x *_R 1"])
    also have "… = ((1-l)*eps) *_R (1 / (1-l)) *_R 1"
      by (simp add: suminf_geometric)
    also have "… = eps *_R 1"
      using disc_lt_one ‹0 < (1 - l) * eps› by auto
    finally show ?thesis .
  qed
  thus ?thesis
    using ‹d ∈ D_D› diff_le_eq le
    by auto
qed

lemma ex_det_dist_eps:
  assumes "0 < (e :: real)"
  shows "∃d ∈ D_D. dist (L_b v) (L (mk_dec_det d) v) ≤ e"
proof -
  obtain d where "d ∈ D_D" "L (mk_dec_det d) v ≤ (L_b v)"
    and h2: "L_b v ≤ L (mk_dec_det d) v + e *_R 1"
    using assms ex_det_eps L_le_L_b by blast
  hence "0 ≤ L_b v - L (mk_dec_det d) v"
    by simp
  moreover have "L_b v - L (mk_dec_det d) v ≤ e *_R 1"
    using h2 by (simp add: add.commute diff_le_eq)
  ultimately have "∀s. ∣(L_b v) s - L (mk_dec_det d) v s∣ ≤ e"
    unfolding less_eq_bfun_def by auto
  hence "dist (L_b v) (L (mk_dec_det d) v) ≤ e"
    unfolding dist_bfun.rep_eq by (auto intro!: cSUP_least simp: dist_real_def)
  thus ?thesis
    using ‹d ∈ D_D›
    by auto
qed

lemma less_imp_ex_add_le: "(x :: real) < y ==> ∃eps>0. x + eps ≤ y"
  by (meson field_le_epsilon less_le_not_le nle_le)

lemma ν_{b_opt_le_det}: "ν_{b_opt} s ≤ (⊔d ∈ D_D. ν_b (mk_stationary_det d) s)"
proof (subst le_cSUP_iff, safe)
  fix y
  assume "y < ν_{b_opt} s"
  then obtain eps where 1: "y ≤ ν_{b_opt} s - eps" and "eps > 0"
    using less_imp_ex_add_le by force
  hence "eps / 2 > 0" by auto
  obtain d where "d ∈ D_D" and "ν_{b_opt} s ≤ ν_b (mk_stationary_det d) s + eps / 2"
    using thm_6_2_11[OF ‹eps / 2 > 0›] by fastforce
  hence "y < ν_b (mk_stationary_det d) s"
    using ‹eps > 0› by (auto simp: diff_less_eq intro: le_less_trans[OF 1])
  thus "∃i∈D_D. y < ν_b (mk_stationary_det i) s"
    using ‹d ∈ D_D› by blast
next
  show "D_D = {} ==> False"
    using D_det_ne by blast
  show "bdd_above ((λd. ν_b (mk_stationary_det d) s) ` D_D)"
    by (auto intro!: bounded_imp_bdd_above boundedI abs_ν_le simp: ν_b.rep_eq)
qed

lemma ν_{b_opt_eq_det}: "ν_{b_opt} s = (⊔d ∈ D_D. ν_b (mk_stationary_det d) s)"
  using ν_{b_le_opt_DD} D_det_ne
  by (fastforce intro!: antisym[OF ν_{b_opt_le_det}] cSUP_least)

(* unused, delete? *)
lemma lemma_6_3_1_a:
  assumes "v0 ∈ bfun"
  shows "uniform_limit UNIV (λn. ((λv. L (Bfun v)) ^^ n) v0) ν_opt sequentially"
proof -
  have L_Bfun_eq: "v0 ∈ bfun ==> ((λv. L (Bfun v))^^n) v0 = (L_b ^^n) (Bfun v0)" for n
    by (induction n) (auto simp: L_b.rep_eq apply_bfun_inverse)
  have "uniform_limit UNIV (λn. (L_b ^^ n) (Bfun v0)) ν_{b_opt} sequentially"
    by (intro tendsto_bfun_uniform_limit[OF L_{b_lim}])
  hence "uniform_limit UNIV (λn. (L_b ^^ n) (Bfun v0)) ν_opt sequentially"
    by (simp add: ν_opt_bfun ν_{b_opt}.rep_eq)
  thus ?thesis
    by (auto simp: assms L_Bfun_eq)
qed

lemma dist_Suc_tendsto_zero:
  assumes "(λn. f n) <---- (y::_::real_normed_vector)"
  shows "(λn. dist (f n) (f (Suc n))) <---- 0"
  using assms tendsto_diff tendsto_norm LIMSEQ_Suc by (fastforce simp: dist_norm)

lemma dist_L_{b_tendsto}: "(λn. dist ((L_b^^n) v) ((L_b^^(Suc n)) v)) <---- 0"
  using L_{b_lim} by (fast intro!: dist_Suc_tendsto_zero)

definition "max_L_ex s v ≡ has_arg_max (λa. L_a a v s) (A s)"

lemma ν_{b_fin_zero}[simp]: "ν_{b_fin} p 0 = 0"
  by (auto simp: ν_{b_fin}.rep_eq)

lemma ν_{b_fin_Suc}[simp]:
  "ν_{b_fin} (mk_stationary d) (Suc n) = ν_{b_fin} (mk_stationary d) n + ((l *_R P₁ d)^^ n) (r_dec_b d)"
  by (auto simp: P_{X_sconst} ν_{b_fin}.rep_eq ν_fin_eq_P_X blincomp_scaleR_right blinfun.scaleR_left)

lemma ν_{b_fin_eq}: "ν_{b_fin} (mk_stationary d) n = (∑i < n. ((l *_R P₁ d)^^ i)) (r_dec_b d)"
  by (induction n) (auto simp add: plus_blinfun.rep_eq)

lemma L_iter: "(L d ^^ m) v = ν_{b_fin} (mk_stationary d) m + ((l *_R P₁ d)^^ m) v"
proof (induction m arbitrary: v)
  case (Suc m)
  have "(L d ^^ Suc m) v = (L d ^^ m) (L d v)"
    by (simp add: funpow_Suc_right del: funpow.simps)
  also have "… = ν_{b_fin} (mk_stationary d) m + ((l *_R P₁ d) ^^ m) (L d v)"
    using Suc by simp
  also have "… = ν_{b_fin} (mk_stationary d) (Suc m) + ((l *_R P₁ d) ^^ Suc m) v"
    unfolding L_def
    by (auto simp: P₁_pow blinfun.bilinear_simps blincomp_scaleR_right funpow_swap1)
  finally show ?case .
qed simp

lemma bounded_stationary_ν_{b_fin}: "bounded ((λx. (ν_{b_fin} (mk_stationary x) N) s) ` X)"
  using ν_{b_fin}.rep_eq abs_ν_fin_le by (auto intro!: boundedI)

lemma bounded_disc_P₁: "bounded ((λx. (((l *_R P₁ x) ^^ m) v) s) ` X)"
  by (auto simp: P_{X_const}[symmetric] blinfun.bilinear_simps blincomp_scaleR_right
      intro!: boundedI[of _  "l ^ m * norm v"] mult_left_mono order.trans[OF abs_le_norm_bfun])

lemma bounded_disc_P₁': "bounded ((λx. ((P₁ x ^^ m) v) s) ` X)"
  by (auto simp: P_{X_const}[symmetric] intro!: boundedI[of _  "norm v"] order.trans[OF abs_le_norm_bfun])

lemma L_iter_le_L_b: "is_dec d ==> (L d ^^ n) v ≤ (L_b ^^ n) v"
  using order_trans[OF L_mono L_le_L_b] by (induction n) auto

end

subsection ‹More Restrictive MDP Locales›
locale MDP_fin_acts = discrete_MDP +
  assumes "∧s. finite (A s)"

locale MDP_att_L = MDP_reward_disc A K r l
  for
    A and
    K :: "'s ::countable × 'a ::countable ==> 's pmf" and
    r and l +
  assumes Sup_att: "max_L_ex (s :: 's) v"
begin
theorem L_{b_eq_argmax_L}_a:
  fixes v :: "'s ==>_b real"
  assumes "is_arg_max (λa. L_a a v s) (λa. a ∈ A s) a"
  shows "L_b v s = L_a a v s"
  using L_{a_le} assms A_ne L_b.rep_eq L_eq_SUP_det SUP_step_det_eq
  by (auto intro!: cSUP_upper2 antisym cSUP_least simp: is_arg_max_linorder)

lemma L_{a_le_arg_max}: "a ∈ A s ==> L_a a v s ≤ L_a (arg_max_on (λa. L_a a v s) (A s)) v s"
  using Sup_att app_arg_max_ge[OF Sup_att[unfolded max_L_ex_def]]
  by (simp add: arg_max_on_def)

lemma arg_max_on_in: "has_arg_max f Q ==> arg_max_on f Q ∈ Q"
  using has_arg_max_arg_max by (auto simp: arg_max_on_def)

lemma L_{b_eq_L}_{a_max}: "L_b v s = L_a (arg_max_on (λa. L_a a v s) (A s)) v s"
  using app_arg_max_eq_SUP[symmetric] Sup_att max_L_ex_def
  by (auto simp: L_{b_eq_SUP_det} SUP_step_det_eq)

lemma ex_opt_det: "∃d ∈ D_D. L_b v = L (mk_dec_det d) v"
proof -
  define d where "d = (λs. arg_max_on (λa. L_a a v s) (A s))"
  have "L_b v s = L (mk_dec_det d) v s" for s
    by (auto simp: d_def L_{b_eq_L}_{a_max} L_eq_L_{a_det})
  moreover have "d ∈ D_D"
    using Sup_att arg_max_on_in by (auto simp: d_def is_dec_det_def max_L_ex_def)
  ultimately show ?thesis
    by auto
qed

lemma ex_improving_det: "∃d ∈ D_D. ν_improving v (mk_dec_det d)"
  using ν_improving_alt ex_opt_det by auto
end

locale MDP_act = discrete_MDP A K for A :: "'s::countable ==> 'a::countable set" and K +
  fixes arb_act ::  "'a set ==> 'a"
  assumes arb_act_in[simp]: "X ≠ {} ==> arb_act X ∈ X"

locale MDP_act_disc = MDP_act A K + MDP_att_L A K r l
  for A :: "'s::countable ==> 'a::countable set" and K r l
begin

lemma is_opt_act_some: "is_opt_act v s (arb_act (opt_acts v s))"
  using arb_act_in[of "{a. is_arg_max (λa. L_a a v s) (λa. a ∈ A s) a}"] Sup_att has_arg_max_def
  unfolding max_L_ex_def is_opt_act_def by auto

lemma some_opt_acts_in_A: "arb_act (opt_acts v s) ∈ A s"
  using is_opt_act_some unfolding is_opt_act_def is_arg_max_def by auto

lemma ν_improving_opt_acts: "ν_improving v0 (mk_dec_det (λs. arb_act (opt_acts (apply_bfun v0) s)))"
  using is_opt_act_def is_opt_act_some some_opt_acts_in_A
  by (subst ν_improving_alt) (fastforce simp: L_eq_L_{a_det} L_{b_eq_argmax_L}_a is_dec_det_def)+

end

locale MDP_finite_type = MDP_reward_disc A K r l
  for A and K :: "'s :: finite × 'a :: finite ==> 's pmf" and r l

end

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.54 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.