theory MDP_reward imports
Bounded_Functions
MDP_reward_Util
Blinfun_Util
MDP_disc begin
section‹Markov Decision Processes with Rewards›
locale MDP_reward = discrete_MDP A K for
A and
K :: "'s ::countable × 'a ::countable ==> 's pmf" + fixes
r :: "('s × 'a) ==> real"and
l :: real assumes
zero_le_disc [simp]: "0 ≤ l"and
r_bounded: "bounded (range r)" begin
text‹
extension to the basic MDPs is formalized with another locale.
assumes the existence of a reward function @{term r} which takes a state-action pair to a real
. We assume that the function is bounded @{prop r_bounded}.
, we fix a discounting factor @{term l}, where @{term "0 ≤ l ∧ l < 1"}. ›
subsection‹Util› subsubsection‹Basic Properties of rewards› lemma r_bfun: "r ∈ bfun" using r_bounded by auto
lemma abs_r_le_rM: "∣r sa∣≤ rM" using bounded_norm_le_SUP_norm r_bounded rM_defby fastforce
lemma abs_rM_eq_rM [simp]: "∣rM∣ = rM" using abs_r_le_rMby fastforce
lemma rM_nonneg: "0 ≤ rM" using abs_rM_eq_rMby linarith
lemma measurable_r_nth [measurable]: "(λt. r (t !! i)) ∈ borel_measurable S" by measurable
lemma integrable_r_nth [simp]: "integrable (T p s) (λt. r (t !! i))" by (fastforce simp: bounded_iff intro: abs_r_le_rM)
lemma expectation_abs_r_le: "measure_pmf.expectation d (λa. ∣r (s, a)∣) ≤ rM" using abs_r_le_rM by (fastforce intro!: measure_pmf.integral_le_const measure_pmf.integrable_const_bound)
lemma abs_exp_r_le: "∣measure_pmf.expectation d r∣≤ rM" using abs_r_le_rM by (fastforce intro!: measure_pmf.integral_le_const order.trans[OF integral_abs_bound] measure_pmf.integrable_const_bound)
subsubsection‹Infinite disounted sums› lemma abs_disc_eq[simp]: "∣l ^ i * x∣ = l ^ i * ∣x∣" by (auto simp: abs_mult)
lemma norm_l_pow_eq[simp]: "norm (l^t *R F) = l^t * norm F" by auto
subsection‹Total Reward for Single Traces›
abbreviation"ν_trace_fin t N ≡∑i < N. l ^ i * r (t !! i)" abbreviation"ν_trace t ≡∑i. l ^ i * r (t !! i)"
lemma abs_ν_trace_fin_le: "∣ν_trace_fin t N∣≤ (∑i < N. l^i * rM)" by (auto intro!: sum_mono order.trans[OF sum_abs] mult_left_mono abs_r_le_rM)
lemma measurable_suminf_reward[measurable]: "ν_trace ∈ borel_measurable S" by measurable
lemma integrable_ν_trace_fin: "integrable (T p s) (λt. ν_trace_fin t N)" by (fastforce simp: bounded_iff intro: abs_ν_trace_fin_le)
context fixes p :: "('s, 'a) pol" begin
subsection‹Expected Finite-Horizon Discounted Reward› definition"ν_fin n s = ∫t. ν_trace_fin t n ∂T p s"
lemma abs_ν_fin_le: "∣ν_fin N s∣≤ (∑i<N. l^i * rM)" unfolding ν_fin_def using abs_ν_trace_fin_le by (fastforce intro!: prob_space.integral_le_const order_trans[OF integral_abs_bound])
lemma ν_fin_bfun: "(λs. ν_fin N s) ∈ bfun" by (auto intro!: abs_ν_fin_le)
lemma ν_fin_elem: "ν_fin (mk_markovian p) n s = (∑i<n. l^i * PX p i (r_decb (p i)) s)" unfoldingPX_def ν_fin_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind using measure_pmf_in_subprob_algebra abs_r_le_rM by (subst integral_bind) (auto simp: r_dec_eq_r_K0)
lemma νb_fin_eq_PX: "νb_fin (mk_markovian p) n = (∑i<n. l^i *RPX p i (r_decb (p i)))" by (auto simp: ν_fin_elem sum_apply_bfun νb_fin.rep_eq)
lemma ν_fin_eq_PX: "ν_fin (mk_markovian p) n = (∑i<n. l^i *RPX p i (r_decb (p i)))" by (metis νb_fin.rep_eq νb_fin_eq_PX)
text‹
{term "P1 d v"} defines for each state the expected value of @{term v}
taking a single step in the MDP according to the decision rule @{term d}. ›
context fixes d :: "('s, 'a) dec" begin
lift_definition P1 :: "('s ==>b real) ==>L ('s ==>b real)"is"push_exp (K_st d)" using push_exp_bounded_linear .
lemmaP1_bfun_one [simp]:"P1 1 = 1" by (auto simp: P1.rep_eq)
lemmaP1_pow_bfun_one [simp]: "(P1^^t) 1 = 1" by (induction t) auto
lemmaP1_pow: "blinfun_apply (P1 ^^ n) = blinfun_apply P1 ^^ n" by (induction n) auto
lemma norm_P1 [simp]: "norm P1 = 1" by (simp add: norm_blinfun.rep_eq P1.rep_eq) end
lemmaPX_Suc: "PX p (Suc n) v = P1 (p 0) ((PX (λn. p (Suc n)) n) v)" unfoldingPX_defP1.rep_eq by (fastforce intro!: abs_le_norm_bfun integral_bind[where K = "count_space UNIV"]
simp: measure_pmf_in_subprob_algebra measure_pmf_bind Suc_Xn'_markovian)
lemmaPX_Suc': "PX p (Suc n) v = PX p n (P1 (p n) v)" proof (induction n arbitrary: p) case0 thus ?case by (simp add: PX_Suc) next case (Suc n) thus ?case by (metis PX_Suc) qed
lemmaPX_const: "PX (λ_. d) n = P1 d ^^ n" by (induction n) (auto simp add: P1_pow PX_Suc)
lemmaPX_sconst: "PX (λ_. p) n = P1 p ^^n" usingPX_const.
lemma norm_P_n[simp]: "onorm (P1 d ^^ n) = 1" using norm_PX[of "λ_. d"] by (auto simp: PX_sconst)
lemma norm_P1_pow [simp]: "norm (P1 d ^^ t) = 1" by (simp add: norm_blinfun.rep_eq)
lemmaPX_Suc_n_elem: "PX p n (P1 (p n) v) = PX p (Suc n) v" usingPX_Suc' P1.rep_eq by auto
lemmaP1_eq_PX_one: "blinfun_apply (P1 (p 0)) = PX p 1" by (auto simp: PX_Suc' P1.rep_eq)
lemmaP1_pos: "0 ≤ u ==> 0 ≤P1 d u" by (auto simp: P1.rep_eq less_eq_bfun_def)
lemmaP1_nonneg: "nonneg_blinfun (P1 d)" by (simp add: P1_pos nonneg_blinfun_def)
lemmaP1_n_pos: "0 ≤ u ==> 0 ≤ (P1 d ^^ n) u" by (induction n) (auto simp: P1.rep_eq less_eq_bfun_def)
lemmaP1_n_nonneg: "nonneg_blinfun (P1 d ^^ n)" by (simp add: P1_n_pos nonneg_blinfun_def)
lemmaP1_n_disc_pos: "0 ≤ u ==> 0 ≤ (l^n *RP1 d ^^n) u" by (auto simp: P1_n_pos scaleR_nonneg_nonneg blinfun.scaleR_left)
lemmaP1_sum_pos: "0 ≤ u ==> 0 ≤ (∑t≤n. l^t *R (P1 d ^^ t)) u" usingP1_n_pos P1_pos by (induction n) (auto simp: blinfun.add_left blinfun.scaleR_left scaleR_nonneg_nonneg)
lemmaP1_sum_ge: assumes"0 ≤ u" shows"u ≤ (∑t≤n. l^t *RP1 d ^^t) u" usingP1_n_disc_pos[OF assms, of "Suc _"] by (induction n) (auto intro: add_increasing2 simp add: blinfun.add_left)
subsection‹The Bellman Operator› definition"L d v ≡ r_decb d + l *RP1 d v"
lemma norm_L_le: "norm (L d v) ≤ rM + l * norm v" using norm_blinfun[of "P1 d"] norm_P1 norm_r_dec_le by (auto intro!: norm_add_rule_thm mult_left_mono simp: L_def)
lemma abs_L_le: "∣L d v s∣≤ rM + l * norm v" using order.trans[OF norm_le_norm_bfun norm_L_le] by auto
subsubsection‹Bellman Operator for Single Actions› abbreviation"La a v s ≡ r (s, a) + l * measure_pmf.expectation (K (s,a)) v"
lemma La_le: fixes v :: "'s ==>b real" shows"∣La a v s∣≤ rM + l * norm v" using abs_r_le_rM by (fastforce intro: order_trans[OF abs_triangle_ineq] order_trans[OF integral_abs_bound]
add_mono mult_mono measure_pmf.integral_le_const abs_le_norm_bfun
simp: abs_mult)
lemma La_bounded: "bounded (range (λa. La a (apply_bfun v) s))" using La_leby (auto intro!: boundedI)
lemma La_int: fixes d :: "'a pmf"and v :: "'s ==>b real" shows"(∫a. La a v s ∂d) = (∫a. r (s, a) ∂d) + l * ∫a. ∫s'. v s' ∂K (s, a) ∂d" proof (subst Bochner_Integration.integral_add) show"integrable d (λa. r (s, a))" using abs_r_le_rMby (fastforce intro!: bounded_integrable simp: bounded_iff) show"integrable d (λa. l * ∫s'. v s' ∂K (s, a))" by (intro bounded_integrable)
(auto intro!: mult_mono order_trans[OF integral_abs_bound] boundedI[of _ "l * norm v"]
measure_pmf.integral_le_const simp: abs_le_norm_bfun abs_mult) qed auto
lemma L_eq_La: "L d v s = measure_pmf.expectation (d s) (λa. La a v s)" unfolding La_int L_def K_st_def P1.rep_eq by (auto simp: measure_pmf_bind integral_measure_pmf_bind[where B = "norm v"] abs_le_norm_bfun)
lemma L_eq_La_det: "L (mk_dec_det d) v s = La (d s) v s" by (auto simp: L_eq_La mk_dec_det_def)
lemma La_eq_L: "measure_pmf.expectation p (λa. La a (apply_bfun v) s) = L (λt. if t = s then p else return_pmf (SOME a. a ∈ A t)) v s" unfolding L_eq_Laby auto
lemma L_le: "L d v s ≤ rM + l * norm v" unfolding L_def using norm_P1 norm_blinfun[of "(P1 d)"] abs_r_dec_le by (fastforce intro: order_trans[OF le_norm_bfun] add_mono mult_left_mono dest: abs_le_D1)
lemma La_le': "La a (apply_bfun v) s ≤ rM + l * norm v" using La_le abs_le_D1 by blast
subsection‹Optimality Equations›
definition"L (v :: 's ==>b real) s = (⊔d ∈ DR. L d v s)"
lemmaL_bfun: "L v ∈ bfun" unfoldingL_defusing abs_L_le ex_dec by (fastforce intro!: cSup_abs_le bfun_normI)
lemma L_bounded[simp, intro]: "bounded (range (λp. L p v s))" using abs_L_le by (auto intro!: boundedI)
lemma L_bounded'[simp, intro]: "bounded ((λp. L p v s) ` X)" by (auto intro: bounded_subset)
lemma L_bdd_above[simp, intro]: "bdd_above ((λp. L p v s) ` X)" by (auto intro: bounded_imp_bdd_above)
lemma L_le_Lb: "is_dec d ==> L d v ≤Lb v" by (fastforce simp: Lb.rep_eq L_def intro!: cSUP_upper)
subsubsection‹Equivalences involving @{const Lb}›
lemma SUP_step_MR_eq: "L v s = (⊔pa ∈ {pa. set_pmf pa ⊆ A s}. (∫a. La a v s ∂measure_pmf pa))" unfoldingL_def proof (intro antisym) show"(⊔d∈DR. L d v s) ≤ (⊔pa ∈ {pa. set_pmf pa ⊆ A s}. ∫a. La a v s ∂measure_pmf pa)" proof (rule cSUP_mono) show"DR≠ {}" using DR_ne . nextshow"bdd_above ((λpa. ∫a. La a v s ∂measure_pmf pa) ` {pa. set_pmf pa ⊆ A s})" using La_bounded La_le by (auto intro!: order_trans[OF integral_abs_bound]
bounded_imp_bdd_above boundedI[where B = "rM + l * norm v"]
measure_pmf.integral_le_const bounded_integrable) nextshow"∃m∈{pa. set_pmf pa ⊆ A s}. L n v s ≤∫a. La a v s ∂measure_pmf m"if"n ∈DR"for n using that by (fastforce simp: L_eq_La La_int is_dec_def) qed next have aux: "{pa. set_pmf pa ⊆ A s} ≠ {}" using DR_ne is_dec_def by auto show"(⊔pa∈{pa. set_pmf pa ⊆ A s}. ∫a. La a v s ∂measure_pmf pa) ≤ (⊔d∈DR. L d v s)" proof (intro cSUP_least[OF aux] cSUP_upper2) fix n assume h: "n ∈ {pa. set_pmf pa ⊆ A s}" let ?p = "(λs'. if s = s' then n else SOME a. set_pmf a ⊆ A s')" have aux: "∃a. set_pmf a ⊆ A sa"for sa using ex_dec is_dec_def by blast show"?p ∈ DR" unfolding is_dec_def using h someI_ex[OF aux] by auto thus"(∫a. La a v s ∂n) ≤ L ?p v s" by (auto simp: L_eq_La) show"bdd_above ((λd. L d v s) ` DR)" by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def) next qed qed
lemmaLb_eq_SUP_La: "Lb v s = (⊔p ∈ {p. set_pmf p ⊆ A s}. ∫a. La a v s ∂measure_pmf p)" using SUP_step_MR_eq Lb.rep_eq by presburger
lemma SUP_step_det_eq: "(⊔d ∈ DD. L (mk_dec_det d) v s) = (⊔a ∈ A s. La a v s)" proof (intro antisym cSUP_mono) show"bdd_above ((λa. La a v s) ` A s)" using La_boundedby (fastforce intro!: bounded_imp_bdd_above simp: bounded_def) show"bdd_above ((λd. L (mk_dec_det d) v s) ` DD)" by (auto intro!: bounded_imp_bdd_above boundedI abs_L_le) show"∃m∈A s. L (mk_dec_det n) v s ≤ La m v s"if"n ∈ DD"for n using that is_dec_det_def by (auto simp: L_eq_La_det intro: bexI[of _ "n s"]) show"∃m∈DD. La n v s ≤ L (mk_dec_det m) v s"if"n ∈ A s"for n using that A_ne by (fastforce simp: L_eq_La_det is_dec_det_def some_in_eq
intro!: bexI[of _ "λs'. if s = s' then _ else SOME a. a ∈ A s'"]) qed (auto simp: A_ne)
lemma integrable_La: "integrable (measure_pmf x) (λa. La a (apply_bfun v) s)" proof (intro Bochner_Integration.integrable_add integrable_mult_right) show"integrable (measure_pmf x) (λx. r (s, x))" using abs_r_le_rM by (auto intro: measure_pmf.integrable_const_bound[of _ "rM"]) next show"integrable (measure_pmf x) (λx. measure_pmf.expectation (K (s, x)) v)" by (auto intro!: bounded_integrable boundedI order.trans[OF integral_abs_bound]
measure_pmf.integral_le_const abs_le_norm_bfun) qed
lemma SUP_La_eq_det: fixes v :: "'s ==>b real" shows"(⊔p∈{p. set_pmf p ⊆ A s}. ∫a. La a v s ∂measure_pmf p) = (⊔a∈A s. La a v s)" proof (intro antisym) show"(⊔pa∈{pa. set_pmf pa ⊆ A s}. measure_pmf.expectation pa (λa. La a v s)) ≤ (⊔a∈A s. La a v s)" using ex_dec is_dec_def integrable_La A_ne La_bounded by (fastforce intro: bounded_range_subset intro!: cSUP_least lemma_4_3_1) show"(⊔a∈A s. La a v s) ≤ (⊔p∈{p. set_pmf p ⊆ A s}. ∫a. La a v s ∂measure_pmf p)" unfolding SUP_step_MR_eq[symmetric] SUP_step_det_eq[symmetric] L_def using ex_dec_det by (fastforce intro!: cSUP_mono) qed
lemmaL_eq_SUP_det: "L v s = (⊔d ∈ DD. L (mk_dec_det d) v s)" using SUP_step_MR_eq SUP_step_det_eq SUP_La_eq_detby auto
lemmaLb_eq_SUP_det: "Lb v s = (⊔d ∈ DD. L (mk_dec_det d) v s)" usingL_eq_SUP_det unfoldingLb.rep_eq by auto
subsection‹Monotonicity›
lemmaPX_mono[intro]: "a ≤ b ==>PX p n a ≤PX p n b" by (fastforce simp: PX_def intro: integral_mono)
lemmaP1_mono[intro]: "a ≤ b ==>P1 p a ≤P1 p b" usingP1_nonneg by auto
lemma L_mono[intro]: "u ≤ v ==> L d u ≤ L d v" unfolding L_def by (auto intro: scaleR_left_mono)
lemmaLb_mono[intro]: "u ≤ v ==>Lb u ≤Lb v" using ex_dec L_mono[of u v] by (fastforce intro!: cSUP_mono simp: Lb.rep_eq L_def)
lemma step_mono: assumes"Lb v ≤ v""d ∈ DR" shows"L d v ≤ v" using assms L_le_Lb order.trans by blast
lemma step_mono_elem_det: assumes"v ≤Lb v""e > 0" shows"∃d∈DD. v ≤ L (mk_dec_det d) v + e *R 1" proof - have"v s ≤ (⊔a∈A s. La a v s)"for s using SUP_step_det_eq Lb_eq_SUP_det assms(1) by fastforce hence"∃a∈A s. v s - e < La a v s"for s using A_ne La_le' by (subst less_cSUP_iff[symmetric]) (fastforce simp: assms add_strict_increasing algebra_simps intro!: bdd_above.I2)+ hence aux: "∃a∈A s. v s ≤ La a v s + e"for s by (auto simp: diff_less_eq intro: less_imp_le) thenobtain d where"is_dec_det d""v s ≤ L (mk_dec_det d) v s + e"for s by (metis L_eq_La_det is_dec_det_def) thus ?thesis by fastforce qed
lemma step_mono_elem: assumes"v ≤Lb v""e > 0" shows"∃d∈DR. v ≤ L d v + e *R 1" using assms step_mono_elem_det by blast
lemmaPX_L_le: assumes"Lb v ≤ v""p ∈ ΠMR" shows"PX p n (L (p n) v) ≤PX p n v" using assms step_mono by auto
end
locale MDP_reward_disc = MDP_reward A K r l for
A and
K :: "'s ::countable × 'a ::countable ==> 's pmf"and
r l + assumes
disc_lt_one [simp]: "l < 1" begin
definition"is_opt_act v s = is_arg_max (λa. La a v s) (λa. a ∈ A s)" abbreviation"opt_acts v s ≡ {a. is_opt_act v s a}"
lemma summable_disc [intro, simp]: "summable (λi. l ^ i * x)" by (simp add: mult.commute)
lemma summable_r_disc[intro, simp]: "summable (λi. ∣l ^ i * r (sa i)∣)" "summable (λi. l ^ i * ∣r (sa i)∣)" "summable (λi. l ^ i * r (sa i))" proof - show"summable (λi. ∣l ^ i * r (sa i)∣)" using abs_r_le_rM by (fastforce intro!: mult_left_mono summable_comparison_test'[OF summable_disc]) thus"summable (λi. l ^ i * r (sa i))""summable (λi. l ^ i * ∣r (sa i)∣)" by (auto intro: summable_rabs_cancel) qed
lemma summable_norm_disc_I[intro]: assumes"summable (λt. (l^t * norm F))" shows"summable (λt. norm (l^t *R F))" using assms by auto
lemma summable_norm_disc_I'[intro]: assumes"summable (λt. (l^t * norm (F t)))" shows"summable (λt. norm (l^t *R F t))" using assms by auto
lemma summable_discI [intro]: assumes"bounded (range F)" shows"summable (λt. l^t * norm (F t))" proof - obtain b where"norm (F x) ≤ b"for x using assms by (auto simp: bounded_iff) thus ?thesis using Abel_lemma[of l 1 F b] by (auto simp: mult.commute) qed
lemma summable_norm_bfun_disc: "summable (λt. l^t * norm (apply_bfun f t))" using norm_le_norm_bfun by (auto simp: mult.commute[of "l^_"] intro!: Abel_lemma[of _ 1 _ "norm f"])
lemma summable_bfun_disc [simp]: "summable (λt. l^t * (apply_bfun f t))" proof - have"norm (l^t * apply_bfun f t) = l^t * norm (apply_bfun f t)"for t by (auto simp: abs_mult) hence"summable (λt. norm (l^t * (apply_bfun f t)))" by (auto simp only: abs_mult) thus ?thesis by (auto intro: summable_norm_cancel) qed
lemma norm_bfun_disc_le: "norm f ≤ B ==> (∑x. l^x * norm (apply_bfun f x)) ≤ (∑x. l^x * B)" by (fastforce intro!: suminf_le mult_left_mono norm_le_norm_bfun intro: order.trans)
lemma norm_bfun_disc_le': "norm f ≤ B ==> (∑x. l^x * (apply_bfun f x)) ≤ (∑x. l^x * B)" by (auto simp: mult_left_mono intro!: suminf_le order.trans[OF _ norm_bfun_disc_le])
lemma sum_disc_lim_l: "(∑x. l^x * B) = B /(1-l)" by (simp add: suminf_mult2[symmetric] summable_geometric suminf_geometric[of l])
lemma sum_disc_bound: "(∑x. l^x * apply_bfun f x) ≤ (norm f) /(1-l)" using norm_bfun_disc_le' sum_disc_lim by auto
lemma sum_disc_bound': fixes f :: "nat ==> 'b ==>b real" assumes h: "∀n. norm (f n) ≤ B" shows"norm (∑x. l^x *R f x) ≤ B /(1-l)" proof - have"norm (∑x. l^x *R f x) ≤ (∑x. norm (l^x *R f x))" using h by (fastforce intro!: boundedI summable_norm) alsohave"…≤ (∑x. l^x * B)" using h by (auto intro!: suminf_le boundedI simp: mult_mono') alsohave"… = B /(1-l)" by (simp add: sum_disc_lim) finallyshow"norm (∑x. l^x *R f x) ≤ B /(1-l)" . qed
lemma abs_ν_trace_le: "∣ν_trace t∣≤ (∑i. l ^ i * rM)" by (auto intro!: abs_r_le_rM mult_left_mono order_trans[OF summable_rabs] suminf_le)
lemma integrable_ν_trace: "integrable (T p s) ν_trace" by (fastforce simp: bounded_iff intro: abs_ν_trace_le)
context fixes p :: "('s, 'a) pol" begin
lemma ν_eq_ν_trace: "ν p s = ∫t. ν_trace t ∂T p s" proof - have"(λn. ν_fin p n s) <----∫t. ν_trace t ∂T p s" unfolding ν_fin_def proof(intro integral_dominated_convergence) show"AE x in T p s. ν_trace_fin x <---- ν_trace x" using summable_LIMSEQ by blast next have"(∑i<N. l ^ i * rM) ≤ (∑N. l ^ N * rM)"for N by (auto intro: sum_le_suminf simp: rM_nonneg) thus"AE x in T p s. norm (ν_trace_fin x N) ≤ (∑N. l ^ N * rM)"for N using order_trans[OF abs_ν_trace_fin_le] by fastforce qed auto thus ?thesis using ν_eq_lim limI by fastforce qed
lemma abs_ν_le: "∣ν p s∣≤ (∑i. l^i * rM)" unfolding ν_eq_Pn using abs_exp_r_le by (fastforce intro!: order.trans[OF summable_rabs] suminf_le summable_comparison_test'[OF summable_disc] mult_left_mono)
lemma ν_le: "ν p s ≤ (∑i. l^i * rM)" by (auto intro: abs_ν_le abs_le_D1)
(* 6.1.2 in Puterman *) lemma ν_bfun: "ν p ∈ bfun" by (auto intro!: abs_ν_le)
lift_definition νb :: "'s ==>b real"is"ν p" using ν_bfun by blast
lemma norm_ν_le: "norm νb≤ rM / (1-l)" using abs_ν_le sum_disc_lim by (auto simp: νb.rep_eq norm_bfun_def' intro: cSUP_least) end
lemma ν_as_markovian: "ν (mk_markovian (as_markovian p (return_pmf s))) s = ν p s" by (auto simp: ν_eq_Pn Pn_as_markovian_eq Pn'_def)
lemma νb_as_markovian: "νb (mk_markovian (as_markovian p (return_pmf s))) s = νb p s" using ν_as_markovian by (auto simp: νb.rep_eq)
subsection‹Optimal Reward›
definition"ν_MD s ≡⊔p ∈ ΠMD. ν (mk_markovian_det p) s" definition"ν_opt s ≡⊔p ∈ ΠHR. ν p s"
lemma ν_opt_bfun: "ν_opt ∈ bfun" using abs_ν_le policies_ne by (fastforce simp: ν_opt_def intro!: order_trans[OF cSup_abs_le] bfun_normI)
lift_definition νb_opt :: "'s ==>b real"is ν_opt using ν_opt_bfun .
lemma νb_opt_eq: "νb_opt s = (⊔p ∈ ΠHR. νb p s)" using νb.rep_eq νb_opt.rep_eq ν_opt_def by presburger
lemma ν_le_ν_opt [intro]: assumes"is_policy p" shows"ν p s ≤ ν_opt s" unfolding ν_opt_def using abs_ν_le assms by (force intro: cSUP_upper intro!: bounded_imp_bdd_above boundedI)
lemma νb_le_opt [intro]: "p ∈ ΠHR==> νb p ≤ νb_opt" using ν_le by (fastforce simp: νb.rep_eq νb_opt.rep_eq)
lemma νb_le_opt_DD [intro]: "is_dec_det d ==> νb (mk_stationary_det d) ≤ νb_opt" by (auto simp add: is_policy_def mk_markovian_def)
lemma νb_le_opt_DR [intro]: "is_dec d ==> νb (mk_stationary d) ≤ νb_opt" by (auto simp add: is_policy_def mk_markovian_def)
lemma νb_opt_eq_MR: "νb_opt s = (⊔p ∈ ΠMR. νb (mk_markovian p) s)" proof (rule antisym) show"νb_opt s ≤ (⊔p∈ΠMR. νb (mk_markovian p) s)" unfolding νb_opt_eq proof (rule cSUP_mono) show"ΠHR≠ {}" using policies_ne by simp show"bdd_above ((λp. νb (mk_markovian p) s) ` ΠMR)" by (auto intro!: boundedI bounded_imp_bdd_above abs_ν_le simp: νb.rep_eq) show"n ∈ ΠHR==>∃m∈ΠMR. νb n s ≤ νb (mk_markovian m) s"for n using is_ΠMR_as_markovianby (subst νb_as_markovian[symmetric]) fastforce qed show"(⊔p∈ΠMR. νb (mk_markovian p) s) ≤ νb_opt s" using ΠMR_ne ΠMR_imp_policies by (auto intro!: cSUP_mono bounded_imp_bdd_above boundedI abs_ν_le simp: νb_opt_eq νb.rep_eq) qed
lemma summable_norm_disc_reward'[simp]: "summable (λt. l^t * norm (PX p t (r_decb (p t))))" usingPX_bounded_rby auto
lemma summable_disc_reward_PX [simp]: "summable (λt. l^t *RPX p t (r_decb (p t)))" using summable_disc_reward PX_bounded_rby blast
lemma disc_reward_tendsto: "(λn. ∑t<n. l^t *RPX p t (r_decb (p t))) <---- (∑t. l^t *RPX p t (r_decb (p t)))" by (simp add: summable_LIMSEQ)
lemma ν_eq_PX: "ν (mk_markovian p) = (∑i. l^i *RPX p i (r_decb (p i)))" proof - have"ν (mk_markovian p) s = (∑i. l^i * PX p i (r_decb (p i)) s)"for s unfolding νb.rep_eq PX_def ν_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind using measure_pmf_in_subprob_algebra abs_r_le_rM by (subst integral_bind) (auto simp: r_dec_eq_r_K0) thus ?thesis by (auto simp: suminf_apply_bfun) qed
lemma νb_eq_PX: "νb (mk_markovian p) = (∑i. l^i *RPX p i (r_decb (p i)))" by (auto simp: ν_eq_PX νb.rep_eq)
lemma νb_fin_tendsto_νb: "(νb_fin (mk_markovian p)) <---- νb (mk_markovian p)" using disc_reward_tendsto νb_eq_PX νb_fin_eq_PX by presburger
lemma norm_P1_l_less: "norm (l *RP1 d) < 1" by auto lemma disc_P1_tendsto: "(λn. (∑t≤n. l^t *RP1 d ^^t)) <---- (∑t. l^t *RP1 d ^^t)" by (fastforce simp: bounded_iff intro: summable_LIMSEQ')
lemma disc_P1_lim: "lim (λn. (∑t≤n. l^t *RP1 d ^^ t)) = (∑t. l^t *RP1 d ^^t)" using limI disc_P1_tendsto by blast
lemma convergent_disc_P1: "convergent (λn. (∑t≤n. l^t *RP1 d ^^t))" using convergentI disc_P1_tendsto by blast
lemmaP1_suminf_ge: assumes"0 ≤ u"shows"u ≤ (∑t. l^t *RP1 d ^^t) u" proof - have aux: "∧x. (λn. (∑t≤n. l^t *RP1 d ^^t) u x) <---- (∑t. l^t *RP1 d ^^t) u x" using bfun_tendsto_apply_bfun disc_P1_lim lim_blinfun_apply[OF convergent_disc_P1] by fastforce have"∧n. u ≤ (∑t≤n. l^t *RP1 d ^^t) u" usingP1_sum_ge[OF assms] by auto thus ?thesis by (auto intro!: LIMSEQ_le_const[OF aux]) qed
lemmaP1_suminf_pos: assumes"0 ≤ u" shows"0 ≤ (∑t. l^t *RP1 d ^^t) u" usingP1_suminf_ge[of u] assms order.trans by auto
lemma lemma_6_1_2_b: assumes"v ≤ u" shows"(∑t. l^t *RP1 d ^^t) v ≤ (∑t. l^t *RP1 d ^^t) u" proof - have"0 ≤ (∑n. l ^ n *RP1 d ^^ n) (u - v)" usingP1_suminf_pos assms by simp thus ?thesis by (simp add: blinfun.diff_right) qed
lemma ν_stationary: "νb (mk_stationary d) = (∑t. l^t *R (P1 d ^^ t)) (r_decb d)" proof - have"νb (mk_stationary d) = (∑t. (l ^ t *R (P1 d ^^ t)) (r_decb d))" by (simp add: νb_eq_PX scaleR_blinfun.rep_eq PX_sconst) alsohave"... = (∑t. (l ^ t *R (P1 d ^^ t))) (r_decb d)" by (subst bounded_linear.suminf[where f = "λx. blinfun_apply x (r_decb d)"])
(auto intro!: bounded_linear.suminf boundedI) finallyshow ?thesis . qed
lemma ν_stationary_inv: "νb (mk_stationary d) = invL (id_blinfun - l *RP1 d) (r_decb d)" by (auto simp: ν_stationary invL_inf_sum blincomp_scaleR_right)
text‹The value of a markovian policy can be expressed in terms of @{const L}.›
lemma ν_step: "νb (mk_markovian p) = L (p 0) (νb (mk_markovian (λn. p (Suc n))))" proof - have s: "summable (λt. l^t *R (PX p (Suc t) (r_decb (p (Suc t)))))" usingPX_bound_rby (auto intro!: boundedI[of _ rM]) have "νb (mk_markovian p) = r_decb (p 0) + (∑t. l ^ (Suc t) *RPX p (Suc t) (r_decb (p (Suc t))))" by (subst suminf_split_head) (auto simp: νb_eq_PX) alsohave "… = r_decb (p 0) + l *R (∑t. P1 (p 0) (l^t *RPX (λn. p (Suc n)) t (r_decb (p (Suc t)))))" using suminf_scaleR_right[OF s] by (auto simp: PX_Suc blinfun.scaleR_right) alsohave "… = L (p 0) (νb (mk_markovian (λn. p (Suc n))))" using blinfun.bounded_linear_right bounded_linear.suminf[of "blinfun_apply (P1 (p 0))"] by (fastforce simp add: νb_eq_PX L_def) finallyshow ?thesis . qed
lemma L_ν_fix: "νb (mk_stationary d) = L d (νb (mk_stationary d))" using ν_step .
lemma L_fix_ν: assumes"L p v = v" shows"v = νb (mk_stationary p)" proof - have"r_decb p = (id_blinfun - l *RP1 p) v" using assms by (auto simp: eq_diff_eq L_def blinfun.diff_left blinfun.scaleR_left) hence"v = (∑t. (l *RP1 p)^^t) (r_decb p)" using inv_norm_le'(2)[OF norm_P1_l_less] by auto thus"v = νb (mk_stationary p)" by (auto simp: ν_stationary blincomp_scaleR_right) qed
lemma L_ν_fix_iff: "L d v = v ⟷ v = νb (mk_stationary d)" using L_fix_ν L_ν_fixby auto
subsection‹Properties of Solutions of the Optimality Equations›
abbreviation"Pd p n v ≡ l^n *RPX p n v"
lemmaPd_lim: "(λn. (Pd p n v)) <---- 0" proof - have"(λn. l^n * norm v) <---- 0" by (auto intro!: tendsto_eq_intros) moreoverhave"norm (Pd p n v) ≤ l^n * norm v"for p n by (simp add: mult_mono') ultimatelyhave"(λn. norm (Pd p n v)) <---- 0"for p by (auto simp: Lim_transform_bound[where g = "λn. (l^n * norm v)"]) thus"(λn. (Pd p n v)) <---- 0"for p using tendsto_norm_zero_cancel by fast qed
(* 6.2.2 a) in Puterman *)
lemmaL_dec_ge_opt: assumes"Lb v ≤ v" shows"νb_opt≤ v" proof - have"νb (mk_markovian p) ≤ v"if"p ∈ ΠMR"for p proof - let ?p = "mk_markovian p" have aux: "νb_fin ?p n + l^n *RPX p n v ≤ v"for n proof (induction n) case (Suc n) have"PX p n (r_decb (p n)) + l *R (PX p (Suc n) v) ≤PX p n v" usingPX_L_le assms that by (simp add: PX_Suc_n_elem L_def linear_simps) hence"νb_fin ?p (n + 1) + l^(n + 1) *R (PX p (n + 1) v) ≤ νb_fin ?p n + l^n *R (PX p n v)" by (auto simp del: scaleR_scaleR intro: scaleR_left_mono simp: νb_fin_eq_PX
mult.commute[of l] scaleR_add_right[symmetric] scaleR_scaleR[symmetric]) alsohave"…≤ v" using Suc.IH by (auto simp: νb_fin_eq_PX) finallyshow ?case by auto qed (auto simp: νb_fin_eq_PX) have1: "(λn. (νb_fin ?p n + Pd p n v) s) <---- νb ?p s"for s using bfun_tendsto_apply_bfun Limits.tendsto_add[OF νb_fin_tendsto_νbPd_lim] by fastforce have"νb ?p s ≤ v s"for s using that aux assms by (fastforce intro!: lim_mono[OF _ 1, of _ _ "λn. v s"]) thus ?thesis using that by blast qed thus ?thesis using policies_ne by (fastforce simp: is_policy_def νb_opt_eq_MR intro!: cSUP_least) qed
lemmaL_inc_le_opt: assumes"v ≤Lb v" shows"v ≤ νb_opt" proof - have le_elem: "v s ≤ νb_opt s + (e/(1-l))"if"e > 0"for s e proof - obtain d where"d ∈ DR"and hd: "v ≤ L d v + e *R 1" using assms step_mono_elem ‹e > 0›by blast let ?Pinf = "(∑i. l^i *RP1 d^^i)" have"v ≤ r_decb d + l *R (P1 d) v + e *R 1" using hd L_def by fastforce hence"(id_blinfun - l *RP1 d) v ≤ r_decb d + e *R 1" by (auto simp: blinfun.diff_left blinfun.scaleR_left algebra_simps) hence"?Pinf ((id_blinfun - l *RP1 d) v) ≤ ?Pinf (r_decb d + e *R 1)" using lemma_6_1_2_b P1_def hd by auto hence"v ≤ ?Pinf (r_decb d + e *R 1)" using inv_norm_le'(2)[of "l *RP1 d"] by (auto simp: blincomp_scaleR_right) alsohave"… = νb (mk_stationary d) + e *R ?Pinf 1" by (simp add: ν_stationary blinfun.add_right blinfun.scaleR_right) alsohave"… = νb (mk_stationary d) + e *R (∑i. (l^i *R ((P1 d^^i))) 1)" using convergent_disc_P1 by (auto simp: summable_iff_convergent' bounded_linear.suminf[of "λx. blinfun_apply x 1"]) alsohave"… = νb (mk_stationary d) + e *R (∑i. (l^i *R 1))" by (auto simp: scaleR_blinfun.rep_eq) alsohave"…≤ (νb (mk_stationary d) + (e / (1-l)) *R 1)" by (auto simp: bounded_linear.suminf[symmetric, where f = "λx. x *R 1"]
suminf_geometric bounded_linear_scaleR_left summable_geometric) finallyhave"v s ≤ (νb (mk_stationary d) + (e/(1-l)) *R 1) s" by auto thus"v s ≤ νb_opt s + (e/(1-l))" using‹d ∈ DR› νb_le_opt by (auto simp: is_policy_def mk_markovian_def less_eq_bfun_def intro: order_trans) qed have"v s ≤ νb_opt s + e"if"e > 0"for s e proof - have"e * (1 - l) > 0" by (simp add: ‹0 < e›) thus"v s ≤ νb_opt s + e" using disc_lt_one that le_elem by (fastforce split: if_splits) qed thus ?thesis by (fastforce intro: field_le_epsilon) qed lemmaL_fix_imp_opt: assumes"v = Lb v" shows"v = νb_opt" using assms dual_order.antisym[OF L_dec_ge_opt L_inc_le_opt] by auto
lemma bounded_P: "bounded (P1 ` X)" by (auto simp: bounded_iff)
subsection‹Solutions to the Optimality Equation› subsubsection‹@{const Lb} and @{const L} are Contraction Mappings› declare bounded_apply_blinfun[intro] bounded_apply_bfun'[intro]
lemma contraction_L: "dist (Lb v) (Lb u) ≤ l * dist v u" proof - have"dist (Lb v s) (Lb u s) ≤ l * dist v u"if"Lb u s ≤Lb v s"for s v u proof - have"dist (Lb v s) (Lb u s) ≤ (⊔d ∈ DR. L d v s - L d u s)" using ex_dec that by (fastforce intro!: le_SUP_diff' simp: dist_real_def Lb.rep_eq L_def) alsohave"… = (⊔d ∈ DR. l * (P1 d (v - u) s))" by (auto simp: L_def right_diff_distrib blinfun.diff_right) alsohave"… = l * (⊔d ∈ DR. P1 d (v - u) s)" using DR_ne bounded_P by (fastforce intro: bounded_SUP_mul) alsohave"…≤ l * norm (⊔d ∈ DR. P1 d (v - u) s)" by (simp add: mult_left_mono) alsohave"…≤ l * (⊔d ∈ DR. norm ((P1 d (v - u)) s))" proof - have"bounded ((λx. norm ((P1 x (v - u)) s)) ` DR)" using bounded_apply_bfun' bounded_P bounded_apply_blinfun bounded_norm_comp by metis thus ?thesis using DR_ne ex_dec bounded_norm_comp by (fastforce intro!: mult_left_mono) qed alsohave"…≤ l * (⊔p ∈ DR. norm (P1 p ((v - u))))" using DR_ne abs_le_norm_bfun bounded_P by (fastforce simp: bounded_norm_comp intro!: bounded_imp_bdd_above mult_left_mono cSUP_mono) alsohave"…≤ l * (⊔p ∈ DR. norm ((v - u)))" using norm_push_exp_le_norm DR_ne by (fastforce simp: P1.rep_eq intro!: mult_left_mono cSUP_mono) alsohave"… = l * dist v u" by (auto simp: dist_norm) finallyshow ?thesis . qed hence"Lb u s ≤Lb v s ==> dist (Lb v s) (Lb u s) ≤ l * dist v u" "Lb v s ≤Lb u s ==> dist (Lb v s) (Lb u s) ≤ l * dist v u"for u v s by (fastforce simp: dist_commute)+ thus ?thesis using linear[of "Lb u _"] by (fastforce intro: dist_bound) qed
lemma is_contraction_L: "is_contraction Lb" using contraction_L zero_le_disc disc_lt_one unfolding is_contraction_def by blast
lemma contraction_L: "dist (L p v) (L p u) ≤ l * dist v u" proof - have aux: "L p v s - L p u s ≤ l * dist v u"if lea: "L p v s ≥ L p u s"for v s u proof - have"L p v s - L p u s = (l *R (P1 p v - P1 p u)) s" by (simp add: L_def scale_right_diff_distrib) alsohave"…≤ l * norm (P1 p (v - u) s)" by (auto simp: blinfun.diff_right intro!: mult_left_mono) alsohave"…≤ l * norm (P1 p (v - u))" using abs_le_norm_bfun by (auto intro!: mult_left_mono) alsohave"…≤ l * dist v u" by (simp add: P1.rep_eq mult_left_mono norm_push_exp_le_norm dist_norm) finallyshow ?thesis by auto qed have"dist (L p v s) (L p u s) ≤ l * dist v u"for v s u using aux[of v _ u] aux[of u _ v] by (cases "L p v s ≥ L p u s") (auto simp: dist_real_def dist_commute) thus"dist (L p v) (L p u) ≤ l * dist v u" by (simp add: dist_bound) qed
lemma is_contraction_L: "is_contraction (L p)" unfolding is_contraction_def using contraction_L disc_lt_one zero_le_disc by blast
subsubsection‹Existence of a Fixpoint of @{const Lb}› lemmaLb_conv: "∃!v. Lb v = v""(λn. (Lb ^^ n) v) <---- (THE v. Lb v = v)" using banach'[OF is_contraction_L] by auto
lemmaLb_fix_iff_opt [simp]: "Lb v = v ⟷ v = νb_opt" using banach'(1) is_contraction_LL_fix_imp_opt by metis
lemma νb_opt_fix: "νb_opt = (THE v. Lb v = v)" by auto
lemma thm_6_2_6: "νb p = νb_opt⟷Lb (νb p) = νb p" by force
lemma thm_6_2_6': "ν p = ν_opt ⟷Lb (νb p) = νb p" using thm_6_2_6 νb.rep_eq νb_opt.rep_eq by fastforce
subsection‹Existence of Optimal Policies›
definition"ν_improving v d ⟷ (∀s. is_arg_max (λd. (L d v) s) (λd. d ∈ DR) d)"
lemma ν_improving_iff: "ν_improving v d ⟷ d ∈ DR∧ (∀d' ∈ DR. ∀s. L d' v s ≤ L d v s)" by (auto simp: ν_improving_def is_arg_max_linorder)
lemma ν_improving_D_MR[dest]: "ν_improving v d ==> d ∈ DR" by (auto simp add: ν_improving_iff)
lemma ν_improving_ge: "ν_improving v d ==> d' ∈ DR==> L d' v s ≤ L d v s" by (auto simp: ν_improving_iff)
lemma ν_improving_imp_Lb: "ν_improving v d ==>Lb v = L d v" by (fastforce intro!: cSup_eq_maximum simp: ν_improving_iff Lb.rep_eq L_def)
lemmaLb_imp_ν_improving: assumes"d ∈ DR""Lb v = L d v" shows"ν_improving v d" using assms L_le_Lbby (auto simp: ν_improving_iff assms(2)[symmetric])
lemma ν_improving_alt: assumes"d ∈ DR" shows"ν_improving v d ⟷Lb v = L d v" usingLb_imp_ν_improving ν_improving_imp_Lb assms by blast
definition"ν_conserving d = ν_improving (νb_opt) d"
lemma ν_conserving_iff: "ν_conserving d ⟷ d ∈ DR∧ (∀d' ∈ DR. ∀s. L d' νb_opt s ≤ L d νb_opt s)" by (auto simp: ν_conserving_def ν_improving_iff)
lemma ν_conserving_ge: "ν_conserving d ==> d' ∈ DR==> L d' νb_opt s ≤ L d νb_opt s" by (auto simp: ν_conserving_iff intro: ν_improving_ge)
lemma ν_conserving_imp_Lb [simp]: "ν_conserving d ==> L d νb_opt = νb_opt" using ν_improving_imp_Lbby (fastforce simp: ν_conserving_def)
lemmaLb_imp_ν_conserving: assumes"d ∈ DR""Lb νb_opt = L d νb_opt" shows"ν_conserving d" usingLb_imp_ν_improving assms by (auto simp: ν_conserving_def)
lemma ν_conserving_alt: assumes"d ∈ DR" shows"ν_conserving d ⟷Lb νb_opt = L d νb_opt" unfolding ν_conserving_def using ν_improving_alt assms by auto
lemma ν_conserving_alt': assumes"d ∈ DR" shows"ν_conserving d ⟷ L d νb_opt = νb_opt" using assms ν_conserving_alt by auto
subsubsection‹Conserving Decision Rules are Optimal›
theorem ex_improving_imp_conserving: assumes"∧v. ∃d. ν_improving v (mk_dec_det d)" shows"∃d. ν_conserving (mk_dec_det d)" by (simp add: assms ν_conserving_def)
theorem conserving_imp_opt[simp]: assumes"ν_conserving (mk_dec_det d)" shows"νb (mk_stationary_det d) = νb_opt" using L_ν_fix_iff ν_conserving_imp_Lb[OF assms] by simp
lemma conserving_imp_opt': assumes"∃d. ν_conserving (mk_dec_det d)" shows"∃d ∈ DD. (νb (mk_stationary_det d)) = νb_opt" using assms by (fastforce simp: ν_conserving_def)
theorem improving_att_imp_det_opt: assumes"∧v. ∃d. ν_improving v (mk_dec_det d)" shows"νb_opt s = (⊔d ∈ DD. νb (mk_stationary_det d) s)" proof - obtain d where d: "ν_conserving (mk_dec_det d)" using assms ex_improving_imp_conserving by auto hence"d ∈ DD" using ν_conserving_iff is_dec_mk_dec_det_iff by blast thus ?thesis using ΠMR_imp_policies νb_le_opt by (fastforce intro!: cSup_eq_maximum[where z = "νb_opt s", symmetric]
simp: conserving_imp_opt[OF d] image_iff) qed
lemmaLb_sup_att_dec: assumes"d ∈ DR""Lb v = L d v" shows"∃d' ∈ DD. Lb v = L (mk_dec_det d') v" proof - have"∃a∈ A s. L d v s = La a v s"for s unfolding L_eq_La using assms is_dec_def La_bounded A_ne Lb.rep_eq L_def by (intro lemma_4_3_1')
(auto intro: bounded_range_subset simp: assms(2)[symmetric] L_eq_La[symmetric] SUP_step_MR_eq) thenobtain d' where d: "d' s ∈ A s""L d v s = La (d' s) v s"for s by metis thus ?thesis using assms d by (fastforce simp: is_dec_det_def mk_dec_det_def L_eq_La) qed
lemmaLb_sup_att_dec': assumes"d ∈ DR""Lb v = L d v" shows"∃d' ∈ DD. ν_improving v (mk_dec_det d')" usingLb_sup_att_dec ν_improving_alt assms by force
subsubsection‹Deterministic Decision Rules are Optimal›
lemma opt_imp_opt_dec_det: assumes"p ∈ ΠHR""νb p = νb_opt" shows"∃d ∈ DD. νb (mk_stationary_det d) = νb_opt" proof - have aux: "L (as_markovian p (return_pmf s) 0) νb_opt s = νb_opt s"for s proof - let ?ps = "as_markovian p (return_pmf s)" have markovian_suc_le: "νb (mk_markovian (λn. as_markovian p (return_pmf s) (Suc n))) ≤ νb_opt" using is_ΠMR_as_markovian assms by (auto simp: is_policy_def mk_markovian_def) have aux_le: "∧x f g. f ≤ g ==> apply_bfun f x ≤ apply_bfun g x" unfolding less_eq_bfun_def by auto have"νb_opt s = νb (mk_markovian ?ps) s" using assms νb_as_markovianby metis alsohave"… = L (?ps 0) (νb (mk_markovian (λn. ?ps (Suc n)))) s" using ν_step by blast alsohave"…≤ L (?ps 0) (νb_opt) s" unfolding L_def using markovian_suc_le P1_mono by (auto intro!: mult_left_mono) finallyhave"νb_opt s ≤ L (?ps 0) (νb_opt) s" . have"as_markovian p (return_pmf s) 0 ∈ DR" using is_ΠMR_as_markovian assms by fast have"L (?ps 0) νb_opt≤ νb_opt" using‹?ps 0 ∈ DR› L_le_Lb[of "?ps 0""νb_opt"] by simp thus"L (?ps 0) νb_opt s = νb_opt s" using‹νb_opt s ≤ (L (?ps 0) νb_opt) s›by (auto intro!: antisym) qed have"L (p []) v s = L (as_markovian p (return_pmf s) 0) v s"for v s by (auto simp: L_def P1.rep_eq K_st_def) hence"L (p []) νb_opt = νb_opt" using aux by auto hence"∃d ∈ DD. L (mk_dec_det d) νb_opt = νb_opt" usingLb_sup_att_dec assms(1) Lb_opt is_policy_def mem_Collect_eq by metis thus ?thesis using conserving_imp_opt' ν_conserving_alt' by blast qed
subsubsection‹Optimal Decision Rules for Finite Action Spaces›
(* 6.2.10 *) lemma ex_opt_act: assumes"∧s. finite (A s)" shows"∃a ∈ A s. La a (v :: _ ==>b _) s = Lb v s" unfoldingLb.rep_eq L_eq_SUP_det SUP_step_det_eq using arg_max_on_in[OF assms A_ne] by (auto simp: cSup_eq_Sup_fin Sup_fin_Max assms A_ne finite_arg_max_eq_Max[symmetric])
lemma ex_opt_dec_det: assumes"∧s. finite (A s)" shows"∃d ∈ DD. L (mk_dec_det d) (v :: _ ==>b _) = Lb v" unfolding is_dec_det_def mk_dec_det_def using ex_opt_act[OF assms] someI_ex apply (auto intro!: exI[of _ ‹λs. SOME a. a ∈ A s ∧ La a v s = Lb v s›] bfun_eqI) apply (smt (verit, best) someI_ex) apply (subst L_eq_La) apply (subst expectation_return_pmf) by (smt (verit, best) someI_ex)
lemma thm_6_2_10: assumes"∧s. finite (A s)" shows"∃d ∈ DD. νb_opt = νb (mk_stationary_det d)" using assms conserving_imp_opt' Lb_opt L_ν_fix_iff ex_opt_dec_det by metis
subsubsection‹Existence of Epsilon-Optimal Policies›
lemma ex_det_eps: assumes"0 < e" shows"∃d ∈ DD. Lb v ≤ L (mk_dec_det d) v + e *R 1" proof - have"∃a ∈ A s. Lb v s ≤ La a v s + e"for s proof - have"bdd_above ((λa. La a v s) ` A s)" using La_leby (auto intro!: boundedI bounded_imp_bdd_above) hence"∃a ∈ A s. Lb v s - e < La a v s" unfoldingLb.rep_eq L_eq_SUP_det SUP_step_det_eq by (auto simp: less_cSUP_iff[OF A_ne, symmetric] ‹0 < e›) thus"∃a ∈ A s. Lb v s ≤ La a v s + e" by force qed thus ?thesis unfolding mk_dec_det_def is_dec_det_def by (auto simp: L_def P1.rep_eq bind_return_pmf K_st_def less_eq_bfun_def) metis qed
lemma thm_6_2_11: assumes"eps > 0" shows"∃d ∈ DD. νb_opt≤ νb (mk_stationary_det d) + eps *R 1" proof - have"(1-l) * eps > 0" by (simp add: assms) thenobtain d where"d ∈ DD"and d: "Lb νb_opt≤ L (mk_dec_det d) νb_opt + ((1-l)*eps) *R 1" using ex_det_eps[of _ νb_opt] by auto let ?d = "mk_dec_det d" let ?lK = "l *RP1 ?d" let ?lK_opt = "l *RP1 ?d νb_opt" have"νb_opt≤ r_decb ?d + ?lK_opt + ((1-l)*eps) *R 1" using L_def L_fix_imp_opt d by simp hence"νb_opt - ?lK_opt - ((1-l)*eps) *R 1 ≤ r_decb ?d" by (simp add: cancel_ab_semigroup_add_class.diff_right_commute diff_le_eq) hence"(∑i. ?lK ^^ i) (νb_opt - ?lK_opt - ((1-l)*eps) *R 1) ≤ νb (mk_stationary ?d)" using lemma_6_1_2_b suminf_cong by (simp add: blincomp_scaleR_right ν_stationary) hence"((∑i. ?lK ^^ i) oL (id_blinfun - ?lK)) νb_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *R 1) ≤ (νb (mk_stationary ?d))" by (simp add: blinfun.diff_right blinfun.diff_left blinfun.scaleR_left) hence le: "νb_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *R 1) ≤ νb (mk_stationary ?d)" by (auto simp: inv_norm_le') have s: "summable (λi. (l *RP1 ?d)^^i)" using convergent_disc_P1 summable_iff_convergent' by (simp add: blincomp_scaleR_right summable_iff_convergent') have"(∑i. ?lK ^^ i) (((1-l)*eps) *R 1) = eps *R 1" proof - have"(∑i. ?lK ^^ i) (((1-l)*eps) *R 1) = ((1-l)*eps) *R (∑i. ?lK^^i) 1" using blinfun.scaleR_right by blast alsohave"… = ((1-l)*eps) *R (∑i. (?lK^^i) 1) " using s by (auto simp: bounded_linear.suminf[of "λx. blinfun_apply x 1"]) alsohave"… = ((1-l)*eps) *R (∑i. (l ^ i)) *R 1" by (auto simp: blinfun.scaleR_left blincomp_scaleR_right bounded_linear_scaleR_left
bounded_linear.suminf[of "λx. x *R 1"]) alsohave"… = ((1-l)*eps) *R (1 / (1-l)) *R 1" by (simp add: suminf_geometric) alsohave"… = eps *R 1" using disc_lt_one ‹0 < (1 - l) * eps›by auto finallyshow ?thesis . qed thus ?thesis using‹d ∈ DD› diff_le_eq le by auto qed
lemma ex_det_dist_eps: assumes"0 < (e :: real)" shows"∃d ∈ DD. dist (Lb v) (L (mk_dec_det d) v) ≤ e" proof - obtain d where"d ∈ DD""L (mk_dec_det d) v ≤ (Lb v)" and h2: "Lb v ≤ L (mk_dec_det d) v + e *R 1" using assms ex_det_eps L_le_Lbby blast hence"0 ≤Lb v - L (mk_dec_det d) v" by simp moreoverhave"Lb v - L (mk_dec_det d) v ≤ e *R 1" using h2 by (simp add: add.commute diff_le_eq) ultimatelyhave"∀s. ∣(Lb v) s - L (mk_dec_det d) v s∣≤ e" unfolding less_eq_bfun_def by auto hence"dist (Lb v) (L (mk_dec_det d) v) ≤ e" unfolding dist_bfun.rep_eq by (auto intro!: cSUP_least simp: dist_real_def) thus ?thesis using‹d ∈ DD› by auto qed
lemma less_imp_ex_add_le: "(x :: real) < y ==>∃eps>0. x + eps ≤ y" by (meson field_le_epsilon less_le_not_le nle_le)
lemma νb_opt_le_det: "νb_opt s ≤ (⊔d ∈ DD. νb (mk_stationary_det d) s)" proof (subst le_cSUP_iff, safe) fix y assume"y < νb_opt s" thenobtain eps where1: "y ≤ νb_opt s - eps"and"eps > 0" using less_imp_ex_add_le by force hence"eps / 2 > 0"by auto obtain d where"d ∈ DD"and"νb_opt s ≤ νb (mk_stationary_det d) s + eps / 2" using thm_6_2_11[OF ‹eps / 2 > 0›] by fastforce hence"y < νb (mk_stationary_det d) s" using‹eps > 0›by (auto simp: diff_less_eq intro: le_less_trans[OF 1]) thus"∃i∈DD. y < νb (mk_stationary_det i) s" using‹d ∈ DD›by blast next show"DD = {} ==> False" using D_det_ne by blast show"bdd_above ((λd. νb (mk_stationary_det d) s) ` DD)" by (auto intro!: bounded_imp_bdd_above boundedI abs_ν_le simp: νb.rep_eq) qed
lemma νb_opt_eq_det: "νb_opt s = (⊔d ∈ DD. νb (mk_stationary_det d) s)" using νb_le_opt_DD D_det_ne by (fastforce intro!: antisym[OF νb_opt_le_det] cSUP_least)
lemma νb_fin_eq: "νb_fin (mk_stationary d) n = (∑i < n. ((l *RP1 d)^^ i)) (r_decb d)" by (induction n) (auto simp add: plus_blinfun.rep_eq)
lemma L_iter: "(L d ^^ m) v = νb_fin (mk_stationary d) m + ((l *RP1 d)^^ m) v" proof (induction m arbitrary: v) case (Suc m) have"(L d ^^ Suc m) v = (L d ^^ m) (L d v)" by (simp add: funpow_Suc_right del: funpow.simps) alsohave"… = νb_fin (mk_stationary d) m + ((l *RP1 d) ^^ m) (L d v)" using Suc by simp alsohave"… = νb_fin (mk_stationary d) (Suc m) + ((l *RP1 d) ^^ Suc m) v" unfolding L_def by (auto simp: P1_pow blinfun.bilinear_simps blincomp_scaleR_right funpow_swap1) finallyshow ?case . qed simp
lemma bounded_stationary_νb_fin: "bounded ((λx. (νb_fin (mk_stationary x) N) s) ` X)" using νb_fin.rep_eq abs_ν_fin_le by (auto intro!: boundedI)
locale MDP_att_L = MDP_reward_disc A K r l for
A and
K :: "'s ::countable × 'a ::countable ==> 's pmf"and
r and l + assumes Sup_att: "max_L_ex (s :: 's) v" begin theoremLb_eq_argmax_La: fixes v :: "'s ==>b real" assumes"is_arg_max (λa. La a v s) (λa. a ∈ A s) a" shows"Lb v s = La a v s" using La_le assms A_ne Lb.rep_eq L_eq_SUP_det SUP_step_det_eq by (auto intro!: cSUP_upper2 antisym cSUP_least simp: is_arg_max_linorder)
lemma La_le_arg_max: "a ∈ A s ==> La a v s ≤ La (arg_max_on (λa. La a v s) (A s)) v s" using Sup_att app_arg_max_ge[OF Sup_att[unfolded max_L_ex_def]] by (simp add: arg_max_on_def)
lemma arg_max_on_in: "has_arg_max f Q ==> arg_max_on f Q ∈ Q" using has_arg_max_arg_max by (auto simp: arg_max_on_def)
lemmaLb_eq_La_max: "Lb v s = La (arg_max_on (λa. La a v s) (A s)) v s" using app_arg_max_eq_SUP[symmetric] Sup_att max_L_ex_def by (auto simp: Lb_eq_SUP_det SUP_step_det_eq)
lemma ex_opt_det: "∃d ∈ DD. Lb v = L (mk_dec_det d) v" proof -
define d where"d = (λs. arg_max_on (λa. La a v s) (A s))" have"Lb v s = L (mk_dec_det d) v s"for s by (auto simp: d_def Lb_eq_La_max L_eq_La_det) moreoverhave"d ∈ DD" using Sup_att arg_max_on_in by (auto simp: d_def is_dec_det_def max_L_ex_def) ultimatelyshow ?thesis by auto qed
lemma ex_improving_det: "∃d ∈ DD. ν_improving v (mk_dec_det d)" using ν_improving_alt ex_opt_det by auto end
locale MDP_act = discrete_MDP A K for A :: "'s::countable ==> 'a::countable set"and K + fixes arb_act :: "'a set ==> 'a" assumes arb_act_in[simp]: "X ≠ {} ==> arb_act X ∈ X"
locale MDP_act_disc = MDP_act A K + MDP_att_L A K r l for A :: "'s::countable ==> 'a::countable set"and K r l begin
lemma is_opt_act_some: "is_opt_act v s (arb_act (opt_acts v s))" using arb_act_in[of "{a. is_arg_max (λa. La a v s) (λa. a ∈ A s) a}"] Sup_att has_arg_max_def unfolding max_L_ex_def is_opt_act_def by auto
lemma some_opt_acts_in_A: "arb_act (opt_acts v s) ∈ A s" using is_opt_act_some unfolding is_opt_act_def is_arg_max_def by auto
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.