(* Title: HOL/Probability/Information.thy Author: Johannes Hölzl, TU München Author: Armin Heller, TU München *)
section‹Information theory›
theory Information imports
Independent_Family begin
subsection"Information theory"
locale information_space = prob_space + fixes b :: real assumes b_gt_1: "1 < b"
text‹Introduce some simplification rules for logarithm of base 🍋‹b›.› lemmas log_simps = log_mult log_inverse log_divide
subsection"Kullback$-$Leibler divergence"
text‹The Kullback$-$Leibler divergence is also known as relative entropy or Kullback$-$Leibler distance.›
definition "entropy_density b M N = log b ∘ enn2real ∘ RN_deriv M N"
definition "KL_divergence b M N = integral🪙L N (entropy_density b M N)"
lemma measurable_entropy_density[measurable]: "entropy_density b M N ∈ borel_measurable M" unfolding entropy_density_def by auto
lemma (in sigma_finite_measure) KL_density: fixes f :: "'a ==> real" assumes"1 < b" assumes f[measurable]: "f ∈ borel_measurable M"and nn: "AE x in M. 0 ≤ f x" shows"KL_divergence b M (density M f) = (∫x. f x * log b (f x) ∂M)" unfolding KL_divergence_def proof (subst integral_real_density) show [measurable]: "entropy_density b M (density M (λx. ennreal (f x))) ∈ borel_measurable M" using f by (auto simp: comp_def entropy_density_def) have"density M (RN_deriv M (density M f)) = density M f" using f nn by (intro density_RN_deriv_density) auto thenhave eq: "AE x in M. RN_deriv M (density M f) x = f x" using f nn by (intro density_unique) auto have"AE x in M. f x * entropy_density b M (density M (λx. ennreal (f x))) x = f x * log b (f x)" using eq nn by (auto simp: entropy_density_def) thenshow"(∫x. f x * entropy_density b M (density M (λx. ennreal (f x))) x ∂M) = (∫x. f x * log b (f x) ∂M)" by (intro integral_cong_AE) measurable qed fact+
lemma (in sigma_finite_measure) KL_density_density: fixes f g :: "'a ==> real" assumes"1 < b" assumes f: "f ∈ borel_measurable M""AE x in M. 0 ≤ f x" assumes g: "g ∈ borel_measurable M""AE x in M. 0 ≤ g x" assumes ac: "AE x in M. f x = 0 ⟶ g x = 0" shows"KL_divergence b (density M f) (density M g) = (∫x. g x * log b (g x / f x) ∂M)" proof - interpret Mf: sigma_finite_measure "density M f" using f by (subst sigma_finite_iff_density_finite) auto have"KL_divergence b (density M f) (density M g) = KL_divergence b (density M f) (density (density M f) (λx. g x / f x))" using f g ac by (subst density_density_divide) simp_all alsohave"… = (∫x. (g x / f x) * log b (g x / f x) ∂density M f)" using f g ‹1 🚫›by (intro Mf.KL_density) (auto simp: AE_density) alsohave"… = (∫x. g x * log b (g x / f x) ∂M)" using ac f g ‹1 🚫›by (subst integral_density) (auto intro!: integral_cong_AE) finallyshow ?thesis . qed
lemma (in information_space) KL_gt_0: fixes D :: "'a ==> real" assumes"prob_space (density M D)" assumes D: "D ∈ borel_measurable M""AE x in M. 0 ≤ D x" assumes int: "integrable M (λx. D x * log b (D x))" assumes A: "density M D ≠ M" shows"0 < KL_divergence b M (density M D)" proof - interpret N: prob_space "density M D"by fact
obtain A where"A ∈ sets M""emeasure (density M D) A ≠ emeasure M A" using measure_eqI[of "density M D" M] ‹density M D ≠ M›by auto
let ?D_set = "{x∈space M. D x ≠ 0}" have [simp, intro]: "?D_set ∈ sets M" using D by auto
have D_neg: "(∫🪙+ x. ennreal (- D x) ∂M) = 0" using D by (subst nn_integral_0_iff_AE) (auto simp: ennreal_neg)
have"(∫🪙+ x. ennreal (D x) ∂M) = emeasure (density M D) (space M)" using D by (simp add: emeasure_density cong: nn_integral_cong) thenhave D_pos: "(∫🪙+ x. ennreal (D x) ∂M) = 1" using N.emeasure_space_1 by simp
have"integrable M D" using D D_pos D_neg unfolding real_integrable_def real_lebesgue_integral_def by simp_all thenhave"integral🪙L M D = 1" using D D_pos D_neg by (simp add: real_lebesgue_integral_def)
have"0 ≤ 1 - measure M ?D_set" using prob_le_1 by (auto simp: field_simps) alsohave"… = (∫ x. D x - indicator ?D_set x ∂M)" using‹integrable M D›‹integral🪙L M D = 1› by (simp add: emeasure_eq_measure) alsohave"… < (∫ x. D x * (ln b * log b (D x)) ∂M)" proof (rule integral_less_AE) show"integrable M (λx. D x - indicator ?D_set x)" using‹integrable M D›by (auto simp: less_top[symmetric]) next from integrable_mult_left(1)[OF int, of "ln b"] show"integrable M (λx. D x * (ln b * log b (D x)))" by (simp add: ac_simps) next show"emeasure M {x∈space M. D x ≠ 1 ∧ D x ≠ 0} ≠ 0" proof assume eq_0: "emeasure M {x∈space M. D x ≠ 1 ∧ D x ≠ 0} = 0" thenhave disj: "AE x in M. D x = 1 ∨ D x = 0" using D(1) by (auto intro!: AE_I[OF subset_refl] sets.sets_Collect)
have"emeasure M {x∈space M. D x = 1} = (∫🪙+ x. indicator {x∈space M. D x = 1} x ∂M)" using D(1) by auto alsohave"… = (∫🪙+ x. ennreal (D x) ∂M)" using disj by (auto intro!: nn_integral_cong_AE simp: indicator_def one_ennreal_def) finallyhave"AE x in M. D x = 1" using D D_pos by (intro AE_I_eq_1) auto thenhave"(∫🪙+x. indicator A x∂M) = (∫🪙+x. ennreal (D x) * indicator A x∂M)" by (intro nn_integral_cong_AE) (auto simp: one_ennreal_def[symmetric]) alsohave"… = density M D A" using‹A ∈ sets M› D by (simp add: emeasure_density) finallyshow False using‹A ∈ sets M›‹emeasure (density M D) A ≠ emeasure M A›by simp qed show"{x∈space M. D x ≠ 1 ∧ D x ≠ 0} ∈ sets M" using D(1) by (auto intro: sets.sets_Collect_conj)
have False if Dt: "t ∈ space M""D t ≠ 1""D t ≠ 0""0 ≤ D t" and eq: "D t - indicator ?D_set t = D t * (ln b * log b (D t))"for t proof - have"D t - 1 = D t - indicator ?D_set t" using Dt by simp alsonote eq alsohave"D t * (ln b * log b (D t)) = - D t * ln (1 / D t)" using b_gt_1 ‹D t ≠ 0›‹0 ≤ D t› by (simp add: log_def ln_div less_le) finallyhave"ln (1 / D t) = 1 / D t - 1" using‹D t ≠ 0›by (auto simp: field_simps) from ln_eq_minus_one[OF _ this] ‹D t ≠ 0›‹0 ≤ D t›‹D t ≠ 1› show False by auto qed with D(2) show"AE t in M. t ∈ {x∈space M. D x ≠ 1 ∧ D x ≠ 0} ⟶ D t - indicator ?D_set t ≠ D t * (ln b * log b (D t))" by fastforce
show"AE t in M. D t - indicator ?D_set t ≤ D t * (ln b * log b (D t))" using D(2) AE_space proof eventually_elim fix t assume"t ∈ space M""0 ≤ D t" show"D t - indicator ?D_set t ≤ D t * (ln b * log b (D t))" proof cases assume asm: "D t ≠ 0" thenhave"0 < D t"using‹0 ≤ D t›by auto thenhave"0 < 1 / D t"by auto have"D t - indicator ?D_set t ≤ - D t * (1 / D t - 1)" using asm ‹t ∈ space M›by (simp add: field_simps) alsohave"- D t * (1 / D t - 1) ≤ - D t * ln (1 / D t)" using ln_le_minus_one ‹0 🚫 / D t›by (intro mult_left_mono_neg) auto alsohave"… = D t * (ln b * log b (D t))" using‹0 🚫 t› b_gt_1 by (simp_all add: log_def ln_div) finallyshow ?thesis by simp qed simp qed qed alsohave"… = (∫ x. ln b * (D x * log b (D x)) ∂M)" by (simp add: ac_simps) alsohave"… = ln b * (∫ x. D x * log b (D x) ∂M)" using int by simp finallyshow ?thesis using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) qed
lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" proof - have"AE x in M. 1 = RN_deriv M M x" proof (rule RN_deriv_unique) show"density M (λx. 1) = M" by (simp add: density_1) qed auto thenhave"AE x in M. log b (enn2real (RN_deriv M M x)) = 0" by (elim AE_mp) simp from integral_cong_AE[OF _ _ this] have"integral🪙L M (entropy_density b M M) = 0" by (simp add: entropy_density_def comp_def) thenshow"KL_divergence b M M = 0" unfolding KL_divergence_def by auto qed
lemma (in information_space) KL_eq_0_iff_eq: fixes D :: "'a ==> real" assumes"prob_space (density M D)" assumes D: "D ∈ borel_measurable M""AE x in M. 0 ≤ D x" assumes int: "integrable M (λx. D x * log b (D x))" shows"KL_divergence b M (density M D) = 0 ⟷ density M D = M" using KL_same_eq_0[of b] KL_gt_0[OF assms] by (auto simp: less_le)
lemma (in information_space) KL_eq_0_iff_eq_ac: fixes D :: "'a ==> real" assumes"prob_space N" assumes ac: "absolutely_continuous M N""sets N = sets M" assumes int: "integrable N (entropy_density b M N)" shows"KL_divergence b M N = 0 ⟷ N = M" proof - interpret N: prob_space N by fact have"finite_measure N"by unfold_locales from real_RN_deriv[OF this ac] obtain D where D: "random_variable borel D" "AE x in M. RN_deriv M N x = ennreal (D x)" "AE x in N. 0 < D x" "∧x. 0 ≤ D x" by this auto
have"N = density M (RN_deriv M N)" using ac by (rule density_RN_deriv[symmetric]) alsohave"… = density M D" using D by (auto intro!: density_cong) finallyhave N: "N = density M D" .
from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density have"integrable N (λx. log b (D x))" by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int])
(auto simp: N entropy_density_def) with D b_gt_1 have"integrable M (λx. D x * log b (D x))" by (subst integrable_real_density[symmetric]) (auto simp: N[symmetric] comp_def) with‹prob_space N› D show ?thesis unfolding N by (intro KL_eq_0_iff_eq) auto qed
lemma (in information_space) KL_nonneg: assumes"prob_space (density M D)" assumes D: "D ∈ borel_measurable M""AE x in M. 0 ≤ D x" assumes int: "integrable M (λx. D x * log b (D x))" shows"0 ≤ KL_divergence b M (density M D)" using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0)
lemma (in sigma_finite_measure) KL_density_density_nonneg: fixes f g :: "'a ==> real" assumes"1 < b" assumes f: "f ∈ borel_measurable M""AE x in M. 0 ≤ f x""prob_space (density M f)" assumes g: "g ∈ borel_measurable M""AE x in M. 0 ≤ g x""prob_space (density M g)" assumes ac: "AE x in M. f x = 0 ⟶ g x = 0" assumes int: "integrable M (λx. g x * log b (g x / f x))" shows"0 ≤ KL_divergence b (density M f) (density M g)" proof - interpret Mf: prob_space "density M f"by fact interpret Mf: information_space "density M f" b by standard fact have eq: "density (density M f) (λx. g x / f x) = density M g" (is"?DD = _") using f g ac by (subst density_density_divide) simp_all
have"0 ≤ KL_divergence b (density M f) (density (density M f) (λx. g x / f x))" proof (rule Mf.KL_nonneg) show"prob_space ?DD"unfolding eq by fact from f g show"(λx. g x / f x) ∈ borel_measurable (density M f)" by auto show"AE x in density M f. 0 ≤ g x / f x" using f g by (auto simp: AE_density) show"integrable (density M f) (λx. g x / f x * log b (g x / f x))" using‹1 🚫› f g ac by (subst integrable_density)
(auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) qed alsohave"… = KL_divergence b (density M f) (density M g)" using f g ac by (subst density_density_divide) simp_all finallyshow ?thesis . qed
subsection‹Finite Entropy›
definition (in information_space) finite_entropy :: "'b measure ==> ('a ==> 'b) ==> ('b ==> real) ==> bool" where "finite_entropy S X f ⟷ distributed M S X f ∧ integrable S (λx. f x * log b (f x)) ∧ (∀x∈space S. 0 ≤ f x)"
lemma (in information_space) finite_entropy_simple_function: assumes X: "simple_function M X" shows"finite_entropy (count_space (X`space M)) X (λa. measure M {x ∈ space M. X x = a})" unfolding finite_entropy_def proof safe have [simp]: "finite (X ` space M)" using X by (auto simp: simple_function_def) thenshow"integrable (count_space (X ` space M)) (λx. prob {xa ∈ space M. X xa = x} * log b (prob {xa ∈ space M. X xa = x}))" by (rule integrable_count_space) have d: "distributed M (count_space (X ` space M)) X (λx. ennreal (if x ∈ X`space M then prob {xa ∈ space M. X xa = x} else 0))" by (rule distributed_simple_function_superset[OF X]) (auto intro!: arg_cong[where f=prob]) show"distributed M (count_space (X ` space M)) X (λx. ennreal (prob {xa ∈ space M. X xa = x}))" by (rule distributed_cong_density[THEN iffD1, OF _ _ _ d]) auto qed (rule measure_nonneg)
lemma ac_fst: assumes"sigma_finite_measure T" shows"absolutely_continuous S (distr (S ⨂🪙M T) S fst)" proof - interpret sigma_finite_measure T by fact
{ fix A assume A: "A ∈ sets S""emeasure S A = 0" thenhave"fst -` A ∩ space (S ⨂🪙M T) = A × space T" by (auto simp: space_pair_measure dest!: sets.sets_into_space) with A have"emeasure (S ⨂🪙M T) (fst -` A ∩ space (S ⨂🪙M T)) = 0" by (simp add: emeasure_pair_measure_Times) } thenshow ?thesis unfolding absolutely_continuous_def by (metis emeasure_distr measurable_fst null_setsD1 null_setsD2 null_setsI sets_distr subsetI) qed
lemma ac_snd: assumes"sigma_finite_measure T" shows"absolutely_continuous T (distr (S ⨂🪙M T) T snd)" proof - interpret sigma_finite_measure T by fact
{ fix A assume A: "A ∈ sets T""emeasure T A = 0" thenhave"snd -` A ∩ space (S ⨂🪙M T) = space S × A" by (auto simp: space_pair_measure dest!: sets.sets_into_space) with A have"emeasure (S ⨂🪙M T) (snd -` A ∩ space (S ⨂🪙M T)) = 0" by (simp add: emeasure_pair_measure_Times) } thenshow ?thesis unfolding absolutely_continuous_def by (metis emeasure_distr measurable_snd null_setsD1 null_setsD2 null_setsI sets_distr subsetI) qed
lemma (in information_space) finite_entropy_integrable: "finite_entropy S X Px ==> integrable S (λx. Px x * log b (Px x))" unfolding finite_entropy_def by auto
lemma (in information_space) finite_entropy_distributed: "finite_entropy S X Px ==> distributed M S X Px" unfolding finite_entropy_def by auto
lemma (in information_space) finite_entropy_nn: "finite_entropy S X Px ==> x ∈ space S ==> 0 ≤ Px x" by (auto simp: finite_entropy_def)
lemma (in information_space) finite_entropy_measurable: "finite_entropy S X Px ==> Px ∈ S →🪙M borel" using distributed_real_measurable[of S Px M X]
finite_entropy_nn[of S X Px] finite_entropy_distributed[of S X Px] by auto
lemma (in information_space) subdensity_finite_entropy: fixes g :: "'b ==> real"and f :: "'c ==> real" assumes T: "T ∈ measurable P Q" assumes f: "finite_entropy P X f" assumes g: "finite_entropy Q Y g" assumes Y: "Y = T ∘ X" shows"AE x in P. g (T x) = 0 ⟶ f x = 0" using subdensity[OF T, of M X "λx. ennreal (f x)" Y "λx. ennreal (g x)"]
finite_entropy_distributed[OF f] finite_entropy_distributed[OF g]
finite_entropy_nn[OF f] finite_entropy_nn[OF g]
assms by auto
lemma (in information_space) finite_entropy_integrable_transform: "finite_entropy S X Px ==> distributed M T Y Py ==> (∧x. x ∈ space T ==> 0 ≤ Py x) ==> X = (λx. f (Y x)) ==> f ∈ measurable T S ==> integrable T (λx. Py x * log b (Px (f x)))" using distributed_transform_integrable[of M T Y Py S X Px f "λx. log b (Px x)"] using distributed_real_measurable[of S Px M X] by (auto simp: finite_entropy_def)
subsection‹Mutual Information›
definition (in prob_space) "mutual_information b S T X Y = KL_divergence b (distr M S X ⨂🪙M distr M T Y) (distr M (S ⨂🪙M T) (λx. (X x, Y x)))"
lemma (in information_space) mutual_information_indep_vars: fixes S T X Y defines"P ≡ distr M S X ⨂🪙M distr M T Y" defines"Q ≡ distr M (S ⨂🪙M T) (λx. (X x, Y x))" shows"indep_var S X T Y ⟷ (random_variable S X ∧ random_variable T Y ∧ absolutely_continuous P Q ∧ integrable Q (entropy_density b P Q) ∧ mutual_information b S T X Y = 0)" unfolding indep_var_distribution_eq proof safe assume rv[measurable]: "random_variable S X""random_variable T Y"
interpret X: prob_space "distr M S X" by (rule prob_space_distr) fact interpret Y: prob_space "distr M T Y" by (rule prob_space_distr) fact interpret XY: pair_prob_space "distr M S X""distr M T Y"by standard interpret P: information_space P b unfolding P_def by standard (rule b_gt_1)
interpret Q: prob_space Q unfolding Q_def by (rule prob_space_distr) simp
{ assume"distr M S X ⨂🪙M distr M T Y = distr M (S ⨂🪙M T) (λx. (X x, Y x))" thenhave [simp]: "Q = P"unfolding Q_def P_def by simp
show ac: "absolutely_continuous P Q"by (simp add: absolutely_continuous_def) thenhave ed: "entropy_density b P Q ∈ borel_measurable P" by simp
have"AE x in P. 1 = RN_deriv P Q x" proof (rule P.RN_deriv_unique) show"density P (λx. 1) = Q" unfolding‹Q = P›by (intro measure_eqI) (auto simp: emeasure_density) qed auto thenhave ae_0: "AE x in P. entropy_density b P Q x = 0" by (auto simp: entropy_density_def) thenhave"integrable P (entropy_density b P Q) ⟷ integrable Q (λx. 0::real)" using ed unfolding‹Q = P›by (intro integrable_cong_AE) auto thenshow"integrable Q (entropy_density b P Q)"by simp
from ae_0 have"mutual_information b S T X Y = (∫x. 0 ∂P)" unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] ‹Q = P› by (intro integral_cong_AE) auto thenshow"mutual_information b S T X Y = 0" by simp }
{ assume ac: "absolutely_continuous P Q" assume int: "integrable Q (entropy_density b P Q)" assume I_eq_0: "mutual_information b S T X Y = 0"
have eq: "Q = P" proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) show"prob_space Q"by unfold_locales show"absolutely_continuous P Q"by fact show"integrable Q (entropy_density b P Q)"by fact show"sets Q = sets P"by (simp add: P_def Q_def sets_pair_measure) show"KL_divergence b P Q = 0" using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) qed thenshow"distr M S X ⨂🪙M distr M T Y = distr M (S ⨂🪙M T) (λx. (X x, Y x))" unfolding P_def Q_def .. } qed
abbreviation (in information_space)
mutual_information_Pow (‹I'(_ ; _')›) where "I(X ; Y) ≡ mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y"
lemma (in information_space) fixes Pxy :: "'b × 'c ==> real"and Px :: "'b ==> real"and Py :: "'c ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Fx: "finite_entropy S X Px"and Fy: "finite_entropy T Y Py" assumes Fxy: "finite_entropy (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" defines"f ≡ λx. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" shows mutual_information_distr': "mutual_information b S T X Y = integral🪙L (S ⨂??M T) f" (is"?M = ?R") and mutual_information_nonneg': "0 ≤ mutual_information b S T X Y" proof - have Px: "distributed M S X Px"and Px_nn: "∧x. x ∈ space S ==> 0 ≤ Px x" using Fx by (auto simp: finite_entropy_def) have Py: "distributed M T Y Py"and Py_nn: "∧x. x ∈ space T ==> 0 ≤ Py x" using Fy by (auto simp: finite_entropy_def) have Pxy: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn: "∧x. x ∈ space (S ⨂🪙M T) ==> 0 ≤ Pxy x" "∧x y. x ∈ space S ==> y ∈ space T ==> 0 ≤ Pxy (x, y)" using Fxy by (auto simp: finite_entropy_def space_pair_measure)
have [measurable]: "Px ∈ S →🪙M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Py ∈ T →🪙M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy ∈ (S ⨂🪙M T) →🪙M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
have X[measurable]: "random_variable S X" using Px by auto have Y[measurable]: "random_variable T Y" using Py by auto interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. interpret X: prob_space "distr M S X"using X by (rule prob_space_distr) interpret Y: prob_space "distr M T Y"using Y by (rule prob_space_distr) interpret XY: pair_prob_space "distr M S X""distr M T Y" .. let ?P = "S ⨂🪙M T" let ?D = "distr M ?P (λx. (X x, Y x))"
{ fix A assume"A ∈ sets S" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M S X) A = emeasure ?D (A × space T)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq1 = this
{ fix A assume"A ∈ sets T" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M T Y) A = emeasure ?D (space S × A)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq2 = this
have distr_eq: "distr M S X ⨂🪙M distr M T Y = density ?P (λx. ennreal (Px (fst x) * Py (snd x)))" unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] proof (subst pair_measure_density) show"(λx. ennreal (Px x)) ∈ borel_measurable S""(λy. ennreal (Py y)) ∈ borel_measurable T" using Px Py by (auto simp: distributed_def) show"sigma_finite_measure (density T Py)"unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. show"density (S ⨂🪙M T) (λ(x, y). ennreal (Px x) * ennreal (Py y)) = density (S ⨂🪙M T) (λx. ennreal (Px (fst x) * Py (snd x)))" using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) qed fact
from Px Py have f: "(λx. Px (fst x) * Py (snd x)) ∈ borel_measurable ?P" by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') have PxPy_nonneg: "AE x in ?P. 0 ≤ Px (fst x) * Py (snd x)" using Px_nn Py_nn by (auto simp: space_pair_measure)
have A: "(AE x in ?P. Px (fst x) = 0 ⟶ Pxy x = 0)" by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) moreover have B: "(AE x in ?P. Py (snd x) = 0 ⟶ Pxy x = 0)" by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) ultimatelyhave ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 ⟶ Pxy x = 0" by auto
show"?M = ?R" unfolding M f_def using Pxy_nn Px_nn Py_nn by (intro ST.KL_density_density b_gt_1 f PxPy_nonneg ac) (auto simp: space_pair_measure)
have X: "X = fst ∘ (λx. (X x, Y x))"and Y: "Y = snd ∘ (λx. (X x, Y x))" by auto
have"integrable (S ⨂🪙M T) (λx. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" using finite_entropy_integrable[OF Fxy] using finite_entropy_integrable_transform[OF Fx Pxy, of fst] using finite_entropy_integrable_transform[OF Fy Pxy, of snd] by (simp add: Pxy_nn) moreoverhave"f ∈ borel_measurable (S ⨂🪙M T)" unfolding f_def using Px Py Pxy by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd''
intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) ultimatelyhave int: "integrable (S ⨂🪙M T) f" proof (rule integrable_cong_AE_imp) show"AE x in S ⨂🪙M T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) = f x" using A B by (auto simp: f_def field_simps space_pair_measure log_mult log_divide) qed
show"0 ≤ ?M"unfolding M proof (intro ST.KL_density_density_nonneg) show"prob_space (density (S ⨂🪙M T) (λx. ennreal (Pxy x))) " unfolding distributed_distr_eq_density[OF Pxy, symmetric] using distributed_measurable[OF Pxy] by (rule prob_space_distr) show"prob_space (density (S ⨂🪙M T) (λx. ennreal (Px (fst x) * Py (snd x))))" unfolding distr_eq[symmetric] by unfold_locales show"integrable (S ⨂🪙M T) (λx. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" using int unfolding f_def . qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) qed
lemma (in information_space) fixes Pxy :: "'b × 'c ==> real"and Px :: "'b ==> real"and Py :: "'c ==> real" assumes"sigma_finite_measure S""sigma_finite_measure T" assumes Px: "distributed M S X Px"and Px_nn: "∧x. x ∈ space S ==> 0 ≤ Px x" and Py: "distributed M T Y Py"and Py_nn: "∧y. y ∈ space T ==> 0 ≤ Py y" and Pxy: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn: "∧x y. x ∈ space S ==> y ∈ space T ==> 0 ≤ Pxy (x, y)" defines"f ≡ λx. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" shows mutual_information_distr: "mutual_information b S T X Y = integral🪙L (S ⨂🪙M T) f" (is"?M = ?R") and mutual_information_nonneg: "integrable (S ⨂🪙M T) f ==> 0 ≤ mutual_information b S T X Y" proof - have X[measurable]: "random_variable S X" using Px by (auto simp: distributed_def) have Y[measurable]: "random_variable T Y" using Py by (auto simp: distributed_def) have [measurable]: "Px ∈ S →🪙M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Py ∈ T →🪙M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy ∈ (S ⨂🪙M T) →🪙M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. interpret X: prob_space "distr M S X"using X by (rule prob_space_distr) interpret Y: prob_space "distr M T Y"using Y by (rule prob_space_distr) interpret XY: pair_prob_space "distr M S X""distr M T Y" .. let ?P = "S ⨂🪙M T" let ?D = "distr M ?P (λx. (X x, Y x))"
{ fix A assume"A ∈ sets S" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M S X) A = emeasure ?D (A × space T)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq1 = this
{ fix A assume"A ∈ sets T" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M T Y) A = emeasure ?D (space S × A)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq2 = this
have distr_eq: "distr M S X ⨂🪙M distr M T Y = density ?P (λx. ennreal (Px (fst x) * Py (snd x)))" unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] proof (subst pair_measure_density) show"(λx. ennreal (Px x)) ∈ borel_measurable S""(λy. ennreal (Py y)) ∈ borel_measurable T" using Px Py by (auto simp: distributed_def) show"sigma_finite_measure (density T Py)"unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. show"density (S ⨂🪙M T) (λ(x, y). ennreal (Px x) * ennreal (Py y)) = density (S ⨂🪙M T) (λx. ennreal (Px (fst x) * Py (snd x)))" using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) qed fact
from Px Py have f: "(λx. Px (fst x) * Py (snd x)) ∈ borel_measurable ?P" by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') have PxPy_nonneg: "AE x in ?P. 0 ≤ Px (fst x) * Py (snd x)" using Px_nn Py_nn by (auto simp: space_pair_measure)
have"(AE x in ?P. Px (fst x) = 0 ⟶ Pxy x = 0)" by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) moreover have"(AE x in ?P. Py (snd x) = 0 ⟶ Pxy x = 0)" by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) ultimatelyhave ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 ⟶ Pxy x = 0" by auto
show"?M = ?R" unfolding M f_def using b_gt_1 f PxPy_nonneg ac Pxy_nn by (intro ST.KL_density_density) (auto simp: space_pair_measure)
assume int: "integrable (S ⨂🪙M T) f" show"0 ≤ ?M"unfolding M proof (intro ST.KL_density_density_nonneg) show"prob_space (density (S ⨂🪙M T) (λx. ennreal (Pxy x))) " unfolding distributed_distr_eq_density[OF Pxy, symmetric] using distributed_measurable[OF Pxy] by (rule prob_space_distr) show"prob_space (density (S ⨂🪙M T) (λx. ennreal (Px (fst x) * Py (snd x))))" unfolding distr_eq[symmetric] by unfold_locales show"integrable (S ⨂🪙M T) (λx. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" using int unfolding f_def . qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) qed
lemma (in information_space) fixes Pxy :: "'b × 'c ==> real"and Px :: "'b ==> real"and Py :: "'c ==> real" assumes"sigma_finite_measure S""sigma_finite_measure T" assumes Px[measurable]: "distributed M S X Px"and Px_nn: "∧x. x ∈ space S ==> 0 ≤ Px x" and Py[measurable]: "distributed M T Y Py"and Py_nn: "∧x. x ∈ space T ==> 0 ≤ Py x" and Pxy[measurable]: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn: "∧x. x ∈ space (S ⨂🪙M T) ==> 0 ≤ Pxy x" assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" shows mutual_information_eq_0: "mutual_information b S T X Y = 0" proof - interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. note
distributed_real_measurable[OF Px_nn Px, measurable]
distributed_real_measurable[OF Py_nn Py, measurable]
distributed_real_measurable[OF Pxy_nn Pxy, measurable]
have"AE x in S ⨂🪙M T. Px (fst x) = 0 ⟶ Pxy x = 0" by (rule subdensity_real[OF measurable_fst Pxy Px]) (auto simp: Px_nn Pxy_nn space_pair_measure) moreover have"AE x in S ⨂🪙M T. Py (snd x) = 0 ⟶ Pxy x = 0" by (rule subdensity_real[OF measurable_snd Pxy Py]) (auto simp: Py_nn Pxy_nn space_pair_measure) moreover have"AE x in S ⨂🪙M T. Pxy x = Px (fst x) * Py (snd x)" by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') ultimatelyhave"AE x in S ⨂🪙M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" by eventually_elim simp thenhave"(∫x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) ∂(S ⨂🪙M T)) = (∫x. 0 ∂(S ⨂🪙M T))" by (intro integral_cong_AE) auto thenshow ?thesis by (subst mutual_information_distr[OF assms(1-8)]) (auto simp add: space_pair_measure) qed
lemma (in information_space) mutual_information_simple_distributed: assumes X: "simple_distributed M X Px"and Y: "simple_distributed M Y Py" assumes XY: "simple_distributed M (λx. (X x, Y x)) Pxy" shows"I(X ; Y) = (∑(x, y)∈(λx. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] _ simple_distributed[OF Y] _ simple_distributed_joint[OF XY]]) note fin = simple_distributed_joint_finite[OF XY, simp] show"sigma_finite_measure (count_space (X ` space M))" by (simp add: sigma_finite_measure_count_space_finite) show"sigma_finite_measure (count_space (Y ` space M))" by (simp add: sigma_finite_measure_count_space_finite) let ?Pxy = "λx. (if x ∈ (λx. (X x, Y x)) ` space M then Pxy x else 0)" let ?f = "λx. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" have"∧x. ?f x = (if x ∈ (λx. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" by auto with fin show"(∫ x. ?f x ∂(count_space (X ` space M) ⨂🪙M count_space (Y ` space M))) = (∑(x, y)∈(λx. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite sum.If_cases split_beta'
intro!: sum.cong) qed (insert X Y XY, auto simp: simple_distributed_def)
lemma (in information_space) fixes Pxy :: "'b × 'c ==> real"and Px :: "'b ==> real"and Py :: "'c ==> real" assumes Px: "simple_distributed M X Px"and Py: "simple_distributed M Y Py" assumes Pxy: "simple_distributed M (λx. (X x, Y x)) Pxy" assumes ae: "∀x∈space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" shows mutual_information_eq_0_simple: "I(X ; Y) = 0" proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) have"(∑(x, y)∈(λx. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = (∑(x, y)∈(λx. (X x, Y x)) ` space M. 0)" by (intro sum.cong) (auto simp: ae) thenshow"(∑(x, y)∈(λx. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0"by simp qed
subsection‹Entropy›
definition (in prob_space) entropy :: "real ==> 'b measure ==> ('a ==> 'b) ==> real"where "entropy b S X = - KL_divergence b S (distr M S X)"
abbreviation (in information_space)
entropy_Pow (‹H'(_')›) where "H(X) ≡ entropy b (count_space (X`space M)) X"
lemma (in prob_space) distributed_RN_deriv: assumes X: "distributed M S X Px" shows"AE x in S. RN_deriv S (density S Px) x = Px x" proof - have"distributed M S X (RN_deriv S (density S Px))" by (metis RN_derivI assms borel_measurable_RN_deriv distributed_def) thenshow ?thesis using assms distributed_unique by blast qed
lemma (in information_space) fixes X :: "'a ==> 'b" assumes X[measurable]: "distributed M MX X f"and nn: "∧x. x ∈ space MX ==> 0 ≤ f x" shows entropy_distr: "entropy b MX X = - (∫x. f x * log b (f x) ∂MX)" (is ?eq) proof - note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] note ae = distributed_RN_deriv[OF X] note distributed_real_measurable[OF nn X, measurable]
have ae_eq: "AE x in distr M MX X. log b (enn2real (RN_deriv MX (distr M MX X) x)) = log b (f x)" unfolding distributed_distr_eq_density[OF X] using D ae by (auto simp: AE_density)
have int_eq: "(∫ x. f x * log b (f x) ∂MX) = (∫ x. log b (f x) ∂distr M MX X)" unfolding distributed_distr_eq_density[OF X] using D by (subst integral_density) (auto simp: nn)
show ?eq unfolding entropy_def KL_divergence_def entropy_density_def comp_def int_eq neg_equal_iff_equal using ae_eq by (intro integral_cong_AE) (auto simp: nn) qed
lemma (in information_space) entropy_le: fixes Px :: "'b ==> real"and MX :: "'b measure" assumes X[measurable]: "distributed M MX X Px"and Px_nn[simp]: "∧x. x ∈ space MX ==> 0 ≤ Px x" and fin: "emeasure MX {x ∈ space MX. Px x ≠ 0} ≠ top" and int: "integrable MX (λx. - Px x * log b (Px x))" shows"entropy b MX X ≤ log b (measure MX {x ∈ space MX. Px x ≠ 0})" proof - note Px = distributed_borel_measurable[OF X] interpret X: prob_space "distr M MX X" using distributed_measurable[OF X] by (rule prob_space_distr)
have" - log b (measure MX {x ∈ space MX. Px x ≠ 0}) = - log b (∫ x. indicator {x ∈ space MX. Px x ≠ 0} x ∂MX)" using Px Px_nn fin by (auto simp: measure_def) alsohave"- log b (∫ x. indicator {x ∈ space MX. Px x ≠ 0} x ∂MX) = - log b (∫ x. 1 / Px x ∂distr M MX X)" proof - have"integral🪙L MX (indicator {x ∈ space MX. Px x ≠ 0}) = LINT x|MX. Px x *🪙R (1 / Px x)" by (rule Bochner_Integration.integral_cong) auto alsohave"... = LINT x|density MX (λx. ennreal (Px x)). 1 / Px x" by (rule integral_density [symmetric]) (use Px Px_nn in auto) finallyshow ?thesis unfolding distributed_distr_eq_density[OF X] by simp qed alsohave"…≤ (∫ x. - log b (1 / Px x) ∂distr M MX X)" proof (rule X.jensens_inequality[of "λx. 1 / Px x""{0<..}" 0 1 "λx. - log b x"]) show"AE x in distr M MX X. 1 / Px x ∈ {0<..}" unfolding distributed_distr_eq_density[OF X] using Px by (auto simp: AE_density) have [simp]: "∧x. x ∈ space MX ==> ennreal (if Px x = 0 then 0 else 1) = indicator {x ∈ space MX. Px x ≠ 0} x" by (auto simp: one_ennreal_def) have"(∫🪙+ x. ennreal (- (if Px x = 0 then 0 else 1)) ∂MX) = (∫🪙+ x. 0 ∂MX)" by (intro nn_integral_cong) (auto simp: ennreal_neg) thenshow"integrable (distr M MX X) (λx. 1 / Px x)" unfolding distributed_distr_eq_density[OF X] using Px by (auto simp: nn_integral_density real_integrable_def fin ennreal_neg ennreal_mult[symmetric]
cong: nn_integral_cong) have"integrable MX (λx. Px x * log b (1 / Px x)) = integrable MX (λx. - Px x * log b (Px x))" using Px by (intro integrable_cong_AE) (auto simp: log_divide_pos log_recip) thenshow"integrable (distr M MX X) (λx. - log b (1 / Px x))" unfolding distributed_distr_eq_density[OF X] using Px int by (subst integrable_real_density) auto qed (auto simp: minus_log_convex[OF b_gt_1]) alsohave"… = (∫ x. log b (Px x) ∂distr M MX X)" unfolding distributed_distr_eq_density[OF X] using Px by (intro integral_cong_AE) (auto simp: AE_density log_divide_pos) alsohave"… = - entropy b MX X" unfolding distributed_distr_eq_density[OF X] using Px by (subst entropy_distr[OF X]) (auto simp: integral_density) finallyshow ?thesis by simp qed
lemma (in information_space) entropy_le_space: fixes Px :: "'b ==> real"and MX :: "'b measure" assumes X: "distributed M MX X Px"and Px_nn[simp]: "∧x. x ∈ space MX ==> 0 ≤ Px x" and fin: "finite_measure MX" and int: "integrable MX (λx. - Px x * log b (Px x))" shows"entropy b MX X ≤ log b (measure MX (space MX))" proof - note Px = distributed_borel_measurable[OF X] interpret finite_measure MX by fact have"entropy b MX X ≤ log b (measure MX {x ∈ space MX. Px x ≠ 0})" using int X by (intro entropy_le) auto alsohave"…≤ log b (measure MX (space MX))" using Px distributed_imp_emeasure_nonzero[OF X] by (intro Transcendental.log_mono)
(auto intro!: finite_measure_mono b_gt_1 less_le[THEN iffD2]
simp: emeasure_eq_measure cong: conj_cong) finallyshow ?thesis . qed
lemma (in information_space) entropy_uniform: assumes X: "distributed M MX X (λx. indicator A x / measure MX A)" (is"distributed _ _ _ ?f") shows"entropy b MX X = log b (measure MX A)" proof (subst entropy_distr[OF X]) have [simp]: "emeasure MX A ≠∞" using uniform_distributed_params[OF X] by (auto simp add: measure_def) have eq: "(∫ x. indicator A x / measure MX A * log b (indicator A x / measure MX A)∂MX) = (∫ x. (- log b (measure MX A) / measure MX A) * indicator A x ∂MX)" using uniform_distributed_params[OF X] by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_pos zero_less_measure_iff) show"- (∫ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) ∂MX) = log b (measure MX A)" unfolding eq using uniform_distributed_params[OF X] by (subst Bochner_Integration.integral_mult_right) (auto simp: measure_def less_top[symmetric] intro!: integrable_real_indicator) qed simp
lemma (in information_space) entropy_simple_distributed: "simple_distributed M X f ==>H(X) = - (∑x∈X`space M. f x * log b (f x))" by (subst entropy_distr[OF simple_distributed])
(auto simp add: lebesgue_integral_count_space_finite)
lemma (in information_space) entropy_le_card_not_0: assumes X: "simple_distributed M X f" shows"H(X) ≤ log b (card (X ` space M ∩ {x. f x ≠ 0}))" proof - let ?X = "count_space (X`space M)" have"H(X) ≤ log b (measure ?X {x ∈ space ?X. f x ≠ 0})" by (rule entropy_le[OF simple_distributed[OF X]])
(insert X, auto simp: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) alsohave"measure ?X {x ∈ space ?X. f x ≠ 0} = card (X ` space M ∩ {x. f x ≠ 0})" by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) finallyshow ?thesis . qed
lemma (in information_space) entropy_le_card: assumes X: "simple_distributed M X f" shows"H(X) ≤ log b (real (card (X ` space M)))" proof - let ?X = "count_space (X`space M)" have"H(X) ≤ log b (measure ?X (space ?X))" by (rule entropy_le_space[OF simple_distributed[OF X]])
(insert X, auto simp: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) alsohave"measure ?X (space ?X) = card (X ` space M)" by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) finallyshow ?thesis . qed
subsection‹Conditional Mutual Information›
definition (in prob_space) "conditional_mutual_information b MX MY MZ X Y Z ≡ mutual_information b MX (MY ⨂🪙M MZ) X (λx. (Y x, Z x)) - mutual_information b MX MZ X Z"
abbreviation (in information_space)
conditional_mutual_information_Pow (‹I'( _ ; _ | _ ')›) where "I(X ; Y | Z) ≡ conditional_mutual_information b (count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z"
lemma (in information_space) assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T"and P: "sigma_finite_measure P" assumes Px[measurable]: "distributed M S X Px" and Px_nn[simp]: "∧x. x ∈ space S ==> 0 ≤ Px x" assumes Pz[measurable]: "distributed M P Z Pz" and Pz_nn[simp]: "∧z. z ∈ space P ==> 0 ≤ Pz z" assumes Pyz[measurable]: "distributed M (T ⨂🪙M P) (λx. (Y x, Z x)) Pyz" and Pyz_nn[simp]: "∧y z. y ∈ space T ==> z ∈ space P ==> 0 ≤ Pyz (y, z)" assumes Pxz[measurable]: "distributed M (S ⨂🪙M P) (λx. (X x, Z x)) Pxz" and Pxz_nn[simp]: "∧x z. x ∈ space S ==> z ∈ space P ==> 0 ≤ Pxz (x, z)" assumes Pxyz[measurable]: "distributed M (S ⨂🪙M T ⨂🪙M P) (λx. (X x, Y x, Z x)) Pxyz" and Pxyz_nn[simp]: "∧x y z. x ∈ space S ==> y ∈ space T ==> z ∈ space P ==> 0 ≤ Pxyz (x, y, z)" assumes I1: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" assumes I2: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z = (∫(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) ∂(S ⨂🪙M T ⨂🪙M P))" (is"?eq") and conditional_mutual_information_generic_nonneg: "0 ≤ conditional_mutual_information b S T P X Y Z" (is"?nonneg") proof - have [measurable]: "Px ∈ S →🪙M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Pz ∈ P →🪙M borel" using Pz Pz_nn by (intro distributed_real_measurable) have measurable_Pyz[measurable]: "Pyz ∈ (T ⨂🪙M P) →🪙M borel" using Pyz Pyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) have measurable_Pxz[measurable]: "Pxz ∈ (S ⨂🪙M P) →🪙M borel" using Pxz Pxz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) have measurable_Pxyz[measurable]: "Pxyz ∈ (S ⨂🪙M T ⨂🪙M P) →🪙M borel" using Pxyz Pxyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret P: sigma_finite_measure P by fact interpret TP: pair_sigma_finite T P .. interpret SP: pair_sigma_finite S P .. interpret ST: pair_sigma_finite S T .. interpret SPT: pair_sigma_finite "S ⨂🪙M P" T .. interpret STP: pair_sigma_finite S "T ⨂🪙M P" .. interpret TPS: pair_sigma_finite "T ⨂🪙M P" S .. have TP: "sigma_finite_measure (T ⨂🪙M P)" .. have SP: "sigma_finite_measure (S ⨂🪙M P)" .. have YZ: "random_variable (T ⨂🪙M P) (λx. (Y x, Z x))" using Pyz by (simp add: distributed_measurable)
from Pxz Pxyz have distr_eq: "distr M (S ⨂🪙M P) (λx. (X x, Z x)) = distr (distr M (S ⨂🪙M T ⨂🪙M P) (λx. (X x, Y x, Z x))) (S ⨂🪙M P) (λ(x, y, z). (x, z))" by (simp add: comp_def distr_distr)
have"mutual_information b S P X Z = (∫x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) ∂(S ⨂🪙M P))" by (rule mutual_information_distr[OF S P Px Px_nn Pz Pz_nn Pxz Pxz_nn]) alsohave"… = (∫(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) ∂(S ⨂🪙M T ⨂🪙M P))" using b_gt_1 Pxz Px Pz by (subst distributed_transform_integral[OF Pxyz _ Pxz _, where T="λ(x, y, z). (x, z)"])
(auto simp: split_beta' space_pair_measure) finallyhave mi_eq: "mutual_information b S P X Z = (∫(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) ∂(S ⨂🪙M T ⨂🪙M P))" .
have ae1: "AE x in S ⨂🪙M T ⨂🪙M P. Px (fst x) = 0 ⟶ Pxyz x = 0" by (intro subdensity_real[of fst, OF _ Pxyz Px]) (auto simp: space_pair_measure) moreoverhave ae2: "AE x in S ⨂🪙M T ⨂🪙M P. Pz (snd (snd x)) = 0 ⟶ Pxyz x = 0" by (intro subdensity_real[of "λx. snd (snd x)", OF _ Pxyz Pz]) (auto simp: space_pair_measure) moreoverhave ae3: "AE x in S ⨂🪙M T ⨂🪙M P. Pxz (fst x, snd (snd x)) = 0 ⟶ Pxyz x = 0" by (intro subdensity_real[of "λx. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto simp: space_pair_measure) moreoverhave ae4: "AE x in S ⨂🪙M T ⨂🪙M P. Pyz (snd x) = 0 ⟶ Pxyz x = 0" by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto simp: space_pair_measure) ultimatelyhave ae: "AE x in S ⨂🪙M T ⨂🪙M P. Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " using AE_space proof eventually_elim case (elim x) show ?case proof cases assume"Pxyz x ≠ 0" with elim have"0 < Px (fst x)""0 < Pz (snd (snd x))""0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)""0 < Pxyz x" by (auto simp: space_pair_measure less_le) thenshow ?thesis using b_gt_1 by (simp add: log_simps less_imp_le field_simps) qed simp qed with I1 I2 show ?eq unfolding conditional_mutual_information_def apply (subst mi_eq) apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz _ Pxyz]) apply (auto simp: space_pair_measure) apply (subst Bochner_Integration.integral_diff[symmetric]) apply (auto intro!: integral_cong_AE simp: split_beta' simp del: Bochner_Integration.integral_diff) done
let ?P = "density (S ⨂🪙M T ⨂🪙M P) Pxyz" interpret P: prob_space ?P unfolding distributed_distr_eq_density[OF Pxyz, symmetric] by (rule prob_space_distr) simp
let ?Q = "density (T ⨂🪙M P) Pyz" interpret Q: prob_space ?Q unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp
let ?f = "λ(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
from subdensity_real[of snd, OF _ Pyz Pz _ AE_I2 AE_I2] have aeX1: "AE x in T ⨂🪙M P. Pz (snd x) = 0 ⟶ Pyz x = 0" by (auto simp: comp_def space_pair_measure) have aeX2: "AE x in T ⨂🪙M P. 0 ≤ Pz (snd x)" using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def)
have aeX3: "AE y in T ⨂🪙M P. (∫🪙+ x. ennreal (Pxz (x, snd y)) ∂S) = ennreal (Pz (snd y))" using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] by (intro TP.AE_pair_measure) auto
have"(∫🪙+ x. ?f x ∂?P) ≤ (∫🪙+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) ∂(S ⨂??M T ⨂🪙M P))" by (subst nn_integral_density)
(auto intro!: nn_integral_mono simp: space_pair_measure ennreal_mult[symmetric]) alsohave"… = (∫🪙+(y, z). (∫🪙+ x. ennreal (Pxz (x, z)) * ennreal (Pyz (y, z) / Pz z) ∂S) ∂(T ⨂🪙M P))" by (subst STP.nn_integral_snd[symmetric])
(auto simp add: split_beta' ennreal_mult[symmetric] space_pair_measure intro!: nn_integral_cong) alsohave"… = (∫🪙+x. ennreal (Pyz x) * 1 ∂T ⨂🪙M P)" proof - have D: "(∫🪙+ x. ennreal (Pxz (x, b)) * ennreal (Pyz (a, b) / Pz b) ∂S) = ennreal (Pyz (a, b))" if"Pz b = 0 ⟶ Pyz (a, b) = 0""a ∈ space T ∧ b ∈ space P" "(∫🪙+ x. ennreal (Pxz (x, b)) ∂S) = ennreal (Pz b)" for a b using that by (subst nn_integral_multc) (auto split: prod.split simp: ennreal_mult[symmetric]) show ?thesis apply (rule nn_integral_cong_AE) using aeX1 aeX3 by (force simp add: space_pair_measure D) qed alsohave"… = 1" using Q.emeasure_space_1 distributed_distr_eq_density[OF Pyz] by (subst nn_integral_density[symmetric]) auto finallyhave le1: "(∫🪙+ x. ?f x ∂?P) ≤ 1" . alsohave"… < ∞"by simp finallyhave fin: "(∫🪙+ x. ?f x ∂?P) ≠∞"by simp
have"(∫🪙+ x. ennreal (Pxyz x) * ennreal (Pxz (fst x, snd (snd x)) * Pyz (snd x) / (Pz (snd (snd x)) * Pxyz x)) ∂(S ⨂🪙M T ⨂🪙M P)) ≠ 0" proof let ?g = "λx. ennreal (Pxyz x) * (Pxz (fst x, snd (snd x)) * Pyz (snd x) / (Pz (snd (snd x)) * Pxyz x))" assume"(∫🪙+x. ?g x ∂(S ⨂🪙M T ⨂🪙M P)) = 0" thenhave"AE x in S ⨂🪙M T ⨂🪙M P. ?g x = 0" by (intro nn_integral_0_iff_AE[THEN iffD1]) auto thenhave"AE x in S ⨂🪙M T ⨂🪙M P. Pxyz x = 0" using ae2 ae3 ae4 by (auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff space_pair_measure) thenhave"(∫🪙+ x. ennreal (Pxyz x) ∂S ⨂🪙M T ⨂🪙M P) = 0" by (subst nn_integral_cong_AE[of _ "λx. 0"]) auto with P.emeasure_space_1 show False by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) qed thenhave pos: "(∫🪙+x. ?f x ∂?P) ≠ 0" by (subst nn_integral_density) (simp_all add: split_beta')
have neg: "(∫🪙+ x. - ?f x ∂?P) = 0" by (subst nn_integral_0_iff_AE) (auto simp: space_pair_measure ennreal_neg)
have I3: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ Bochner_Integration.integrable_diff[OF I1 I2]]) using ae apply (auto simp: split_beta') done
have"- log b 1 ≤ - log b (integral🪙L ?P ?f)" proof (intro le_imp_neg_le Transcendental.log_mono[OF b_gt_1]) haveIf: "integrable ?P ?f" unfolding real_integrable_def proof (intro conjI) from neg show"(∫🪙+ x. - ?f x ∂?P) ≠∞" by simp from fin show"(∫🪙+ x. ?f x ∂?P) ≠∞" by simp qed simp thenhave"(∫🪙+ x. ?f x ∂?P) = (∫x. ?f x ∂?P)" apply (rule nn_integral_eq_integral) apply (auto simp: space_pair_measure ennreal_neg) done with pos le1 show"0 < (∫x. ?f x ∂?P)""(∫x. ?f x ∂?P) ≤ 1" by (simp_all add: one_ennreal.rep_eq zero_less_iff_neq_zero[symmetric]) qed alsohave"- log b (integral🪙L ?P ?f) ≤ (∫ x. - log b (?f x) ∂?P)" proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) show"AE x in ?P. ?f x ∈ {0<..}" unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] using ae1 ae2 ae3 ae4 by (auto simp: space_pair_measure less_le) show"integrable ?P ?f" unfolding real_integrable_def using fin neg by (auto simp: split_beta') have"integrable (S ⨂🪙M T ⨂🪙M P) (λx. Pxyz x * - log b (?f x))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) by (auto simp: log_mult log_divide field_simps) then show"integrable ?P (λx. - log b (?f x))" by (subst integrable_real_density) (auto simp: space_pair_measure) qed (auto simp: b_gt_1 minus_log_convex) alsohave"… = conditional_mutual_information b S T P X Y Z" unfolding‹?eq› apply (subst integral_real_density) apply simp apply (force simp: space_pair_measure) apply simp apply (intro integral_cong_AE) by (auto simp: field_simps log_divide) finallyshow ?nonneg by simp qed
lemma (in information_space) fixes Px :: "_ ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T"and P: "sigma_finite_measure P" assumes Fx: "finite_entropy S X Px" assumes Fz: "finite_entropy P Z Pz" assumes Fyz: "finite_entropy (T ⨂🪙M P) (λx. (Y x, Z x)) Pyz" assumes Fxz: "finite_entropy (S ⨂🪙M P) (λx. (X x, Z x)) Pxz" assumes Fxyz: "finite_entropy (S ⨂🪙M T ⨂🪙M P) (λx. (X x, Y x, Z x)) Pxyz" shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z = (∫(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) ∂(S ⨂🪙M T ⨂🪙M P))" (is"?eq") and conditional_mutual_information_generic_nonneg': "0 ≤ conditional_mutual_information b S T P X Y Z" (is"?nonneg") proof - note Px = Fx[THEN finite_entropy_distributed, measurable] note Pz = Fz[THEN finite_entropy_distributed, measurable] note Pyz = Fyz[THEN finite_entropy_distributed, measurable] note Pxz = Fxz[THEN finite_entropy_distributed, measurable] note Pxyz = Fxyz[THEN finite_entropy_distributed, measurable]
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret P: sigma_finite_measure P by fact interpret TP: pair_sigma_finite T P .. interpret SP: pair_sigma_finite S P .. interpret ST: pair_sigma_finite S T .. interpret SPT: pair_sigma_finite "S ⨂🪙M P" T .. interpret STP: pair_sigma_finite S "T ⨂🪙M P" .. interpret TPS: pair_sigma_finite "T ⨂🪙M P" S .. have TP: "sigma_finite_measure (T ⨂🪙M P)" .. have SP: "sigma_finite_measure (S ⨂🪙M P)" ..
from Pxz Pxyz have distr_eq: "distr M (S ⨂🪙M P) (λx. (X x, Z x)) = distr (distr M (S ⨂🪙M T ⨂🪙M P) (λx. (X x, Y x, Z x))) (S ⨂🪙M P) (λ(x, y, z). (x, z))" by (simp add: distr_distr comp_def)
have"mutual_information b S P X Z = (∫x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) ∂(S ⨂🪙M P))" using Px Px_nn Pz Pz_nn Pxz Pxz_nn by (rule mutual_information_distr[OF S P]) (auto simp: space_pair_measure) alsohave"… = (∫(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) ∂(S ⨂🪙M T ⨂🪙M P))" using b_gt_1 Pxz Pxz_nn Pxyz Pxyz_nn by (subst distributed_transform_integral[OF Pxyz _ Pxz, where T="λ(x, y, z). (x, z)"])
(auto simp: split_beta') finallyhave mi_eq: "mutual_information b S P X Z = (∫(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) ∂(S ⨂🪙M T ⨂🪙M P))" .
have ae1: "AE x in S ⨂🪙M T ⨂🪙M P. Px (fst x) = 0 ⟶ Pxyz x = 0" by (intro subdensity_finite_entropy[of fst, OF _ Fxyz Fx]) auto moreoverhave ae2: "AE x in S ⨂🪙M T ⨂🪙M P. Pz (snd (snd x)) = 0 ⟶ Pxyz x = 0" by (intro subdensity_finite_entropy[of "λx. snd (snd x)", OF _ Fxyz Fz]) auto moreoverhave ae3: "AE x in S ⨂🪙M T ⨂🪙M P. Pxz (fst x, snd (snd x)) = 0 ⟶ Pxyz x = 0" by (intro subdensity_finite_entropy[of "λx. (fst x, snd (snd x))", OF _ Fxyz Fxz]) auto moreoverhave ae4: "AE x in S ⨂🪙M T ⨂🪙M P. Pyz (snd x) = 0 ⟶ Pxyz x = 0" by (intro subdensity_finite_entropy[of snd, OF _ Fxyz Fyz]) auto ultimatelyhave ae: "AE x in S ⨂🪙M T ⨂🪙M P. Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) - Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) = Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " using AE_space proof eventually_elim case (elim x) show ?case proof cases assume"Pxyz x ≠ 0" with elim have"0 < Px (fst x)""0 < Pz (snd (snd x))""0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)""0 < Pxyz x" using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (auto simp: space_pair_measure less_le) thenshow ?thesis using b_gt_1 by (simp add: log_simps less_imp_le field_simps) qed simp qed
have"integrable (S ⨂🪙M T ⨂🪙M P) (λx. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" using finite_entropy_integrable[OF Fxyz] using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] using finite_entropy_integrable_transform[OF Fyz Pxyz Pxyz_nn, of snd] by simp moreoverhave"(λ(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) ∈ borel_measurable (S ⨂🪙M T ⨂🪙M P)" using Pxyz Px Pyz by simp ultimatelyhave I1: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" apply (rule integrable_cong_AE_imp) using ae1 ae4 Px_nn Pyz_nn Pxyz_nn by (auto simp: log_divide log_mult field_simps) have"integrable (S ⨂🪙M T ⨂🪙M P) (λx. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "λx. (fst x, snd (snd x))"] using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] using finite_entropy_integrable_transform[OF Fz Pxyz Pxyz_nn, of "snd ∘ snd"] by simp moreoverhave"(λ(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z))) ∈borel_measurable (S ⨂🪙M T ⨂🪙M P)" using Pxyz Px Pz by auto ultimatelyhave I2: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" apply (rule integrable_cong_AE_imp) using ae1 ae2 ae3 ae4 Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn apply(auto simp: field_simps log_divide log_mult) done from ae I1 I2 show ?eq unfolding conditional_mutual_information_def mi_eq apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz Pyz_nn Pxyz Pxyz_nn]; simp add: space_pair_measure) apply (subst Bochner_Integration.integral_diff[symmetric]) apply (auto intro!: integral_cong_AE simp: split_beta' simp del: Bochner_Integration.integral_diff) done
let ?P = "density (S ⨂🪙M T ⨂🪙M P) Pxyz" interpret P: prob_space ?P unfolding distributed_distr_eq_density[OF Pxyz, symmetric] by (rule prob_space_distr) simp
let ?Q = "density (T ⨂🪙M P) Pyz" interpret Q: prob_space ?Q unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp
let ?f = "λ(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
from subdensity_finite_entropy[of snd, OF _ Fyz Fz] have aeX1: "AE x in T ⨂🪙M P. Pz (snd x) = 0 ⟶ Pyz x = 0"by (auto simp: comp_def) have aeX2: "AE x in T ⨂🪙M P. 0 ≤ Pz (snd x)" using Pz by (intro TP.AE_pair_measure) (auto intro: Pz_nn)
have aeX3: "AE y in T ⨂🪙M P. (∫🪙+ x. ennreal (Pxz (x, snd y)) ∂S) = ennreal (Pz (snd y))" using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] by (intro TP.AE_pair_measure) (auto ) have"(∫🪙+ x. ?f x ∂?P) ≤ (∫🪙+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) ∂(S ⨂??M T ⨂🪙M P))" using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (subst nn_integral_density)
(auto intro!: nn_integral_mono simp: space_pair_measure ennreal_mult[symmetric]) alsohave"… = (∫🪙+(y, z). ∫🪙+ x. ennreal (Pxz (x, z)) * ennreal (Pyz (y, z) / Pz z) ∂S ∂T ⨂🪙M P)" using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (subst STP.nn_integral_snd[symmetric])
(auto simp add: split_beta' ennreal_mult[symmetric] space_pair_measure intro!: nn_integral_cong) alsohave"… = (∫🪙+x. ennreal (Pyz x) * 1 ∂T ⨂🪙M P)" proof - have *: "(∫🪙+ x. ennreal (Pxz (x, b)) * ennreal (Pyz (a, b) / Pz b) ∂S) = ennreal (Pyz (a, b))" if"Pz b = 0 ⟶ Pyz (a, b) = 0""0 ≤ Pz b""a ∈ space T ∧ b ∈ space P" "(∫🪙+ x. ennreal (Pxz (x, b)) ∂S) = ennreal (Pz b)"for a b using Pyz_nn[of "(a,b)"] that by (subst nn_integral_multc) (auto simp: space_pair_measure ennreal_mult[symmetric]) show ?thesis using aeX1 aeX2 aeX3 AE_space by (force simp: * space_pair_measure intro: nn_integral_cong_AE) qed alsohave"… = 1" using Q.emeasure_space_1 Pyz_nn distributed_distr_eq_density[OF Pyz] by (subst nn_integral_density[symmetric]) auto finallyhave le1: "(∫🪙+ x. ?f x ∂?P) ≤ 1" . alsohave"… < ∞"by simp finallyhave fin: "(∫🪙+ x. ?f x ∂?P) ≠∞"by simp
have"(∫🪙+ x. ?f x ∂?P) ≠ 0" using Pxyz_nn apply (subst nn_integral_density) apply (simp_all add: split_beta' ennreal_mult'[symmetric] cong: nn_integral_cong) proof let ?g = "λx. ennreal (if Pxyz x = 0 then 0 else Pxz (fst x, snd (snd x)) * Pyz (snd x) / Pz (snd (snd x)))" assume"(∫🪙+ x. ?g x ∂(S ⨂🪙M T ⨂🪙M P)) = 0" thenhave"AE x in S ⨂🪙M T ⨂🪙M P. ?g x = 0" by (intro nn_integral_0_iff_AE[THEN iffD1]) auto thenhave"AE x in S ⨂🪙M T ⨂🪙M P. Pxyz x = 0" using ae1 ae2 ae3 ae4 by (insert Px_nn Pz_nn Pxz_nn Pyz_nn,
auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff space_pair_measure) thenhave"(∫🪙+ x. ennreal (Pxyz x) ∂S ⨂🪙M T ⨂🪙M P) = 0" by (subst nn_integral_cong_AE[of _ "λx. 0"]) auto with P.emeasure_space_1 show False by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) qed thenhave pos: "0 < (∫🪙+ x. ?f x ∂?P)" by (simp add: zero_less_iff_neq_zero)
have neg: "(∫🪙+ x. - ?f x ∂?P) = 0" using Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (intro nn_integral_0_iff_AE[THEN iffD2])
(auto simp: split_beta' AE_density space_pair_measure intro!: AE_I2 ennreal_neg)
have I3: "integrable (S ⨂🪙M T ⨂🪙M P) (λ(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ Bochner_Integration.integrable_diff[OF I1 I2]]) using ae by (auto simp: split_beta')
have"- log b 1 ≤ - log b (integral🪙L ?P ?f)" proof (intro le_imp_neg_le Transcendental.log_mono[OF b_gt_1]) haveIf: "integrable ?P ?f" using neg fin by (force simp add: real_integrable_def) thenhave"(∫🪙+ x. ?f x ∂?P) = (∫x. ?f x ∂?P)" using Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (intro nn_integral_eq_integral)
(auto simp: AE_density space_pair_measure) with pos le1 show"0 < (∫x. ?f x ∂?P)""(∫x. ?f x ∂?P) ≤ 1" by (simp_all add: ) qed alsohave"- log b (integral🪙L ?P ?f) ≤ (∫ x. - log b (?f x) ∂?P)" proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) show"AE x in ?P. ?f x ∈ {0<..}" unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] using ae1 ae2 ae3 ae4 by (insert Pxyz_nn Pyz_nn Pz_nn Pxz_nn, auto simp: space_pair_measure less_le) show"integrable ?P ?f" unfolding real_integrable_def using fin neg by (auto simp: split_beta') have"integrable (S ⨂🪙M T ⨂🪙M P) (λx. Pxyz x * - log b (?f x))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) using Pz_nn Pxz_nn Pyz_nn Pxyz_nn ae2 ae3 ae4 by (auto simp: log_divide field_simps) thenshow"integrable ?P (λx. - log b (?f x))" using Pxyz_nn by (auto simp: integrable_real_density) qed (auto simp: b_gt_1 minus_log_convex) alsohave"… = conditional_mutual_information b S T P X Y Z" unfolding‹?eq› using Pz_nn Pxz_nn Pyz_nn Pxyz_nn apply (subst integral_real_density) apply simp apply simp apply simp apply (intro integral_cong_AE) using ae1 ae2 ae3 ae4 by (auto simp: log_divide zero_less_mult_iff field_simps) finallyshow ?nonneg by simp qed
lemma (in information_space) conditional_mutual_information_eq: assumes Pz: "simple_distributed M Z Pz" assumes Pyz: "simple_distributed M (λx. (Y x, Z x)) Pyz" assumes Pxz: "simple_distributed M (λx. (X x, Z x)) Pxz" assumes Pxyz: "simple_distributed M (λx. (X x, Y x, Z x)) Pxyz" shows"I(X ; Y | Z) = (∑(x, y, z)∈(λx. (X x, Y x, Z x))`space M. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" proof (subst conditional_mutual_information_generic_eq[OF _ _ _ _ _
simple_distributed[OF Pz] _ simple_distributed_joint[OF Pyz] _ simple_distributed_joint[OF Pxz] _
simple_distributed_joint2[OF Pxyz]]) note simple_distributed_joint2_finite[OF Pxyz, simp] show"sigma_finite_measure (count_space (X ` space M))" by (simp add: sigma_finite_measure_count_space_finite) show"sigma_finite_measure (count_space (Y ` space M))" by (simp add: sigma_finite_measure_count_space_finite) show"sigma_finite_measure (count_space (Z ` space M))" by (simp add: sigma_finite_measure_count_space_finite) have"count_space (X ` space M) ⨂🪙M count_space (Y ` space M) ⨂🪙M count_space (Z ` space M) = count_space (X`space M × Y`space M × Z`space M)"
(is"?P = ?C") by (simp add: pair_measure_count_space)
let ?Px = "λx. measure M (X -` {x} ∩ space M)" have"(λx. (X x, Z x)) ∈ measurable M (count_space (X ` space M) ⨂🪙M count_space (Z ` space M))" using simple_distributed_joint[OF Pxz] by (rule distributed_measurable) from measurable_comp[OF this measurable_fst] have"random_variable (count_space (X ` space M)) X" by (simp add: comp_def) thenhave"simple_function M X" unfolding simple_function_def by (auto simp: measurable_count_space_eq2) thenhave"simple_distributed M X ?Px" by (rule simple_distributedI) (auto simp: measure_nonneg) thenshow"distributed M (count_space (X ` space M)) X ?Px" by (rule simple_distributed)
let ?f = "(λx. if x ∈ (λx. (X x, Y x, Z x)) ` space M then Pxyz x else 0)" let ?g = "(λx. if x ∈ (λx. (Y x, Z x)) ` space M then Pyz x else 0)" let ?h = "(λx. if x ∈ (λx. (X x, Z x)) ` space M then Pxz x else 0)" show "integrable ?P (λ(x, y, z). ?f (x, y, z) * log b (?f (x, y, z) / (?Px x * ?g (y, z))))" "integrable ?P (λ(x, y, z). ?f (x, y, z) * log b (?h (x, z) / (?Px x * Pz z)))" by (auto intro!: integrable_count_space simp: pair_measure_count_space) let ?i = "λx y z. ?f (x, y, z) * log b (?f (x, y, z) / (?h (x, z) * (?g (y, z) / Pz z)))" let ?j = "λx y z. Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z)))" have"(λ(x, y, z). ?i x y z) = (λx. if x ∈ (λx. (X x, Y x, Z x)) ` space M then ?j (fst x) (fst (snd x)) (snd (snd x)) else 0)" by (auto intro!: ext) thenshow"(∫ (x, y, z). ?i x y z ∂?P) = (∑(x, y, z)∈(λx. (X x, Y x, Z x)) ` space M. ?j x y z)" by (auto intro!: sum.cong simp add: ‹?P = ?C› lebesgue_integral_count_space_finite simple_distributed_finite sum.If_cases split_beta') qed (insert Pz Pyz Pxz Pxyz, auto intro: measure_nonneg)
lemma (in information_space) conditional_mutual_information_nonneg: assumes X: "simple_function M X"and Y: "simple_function M Y"and Z: "simple_function M Z" shows"0 ≤I(X ; Y | Z)" proof - have [simp]: "count_space (X ` space M) ⨂🪙M count_space (Y ` space M) ⨂🪙M count_space (Z ` space M) = count_space (X`space M × Y`space M × Z`space M)" by (simp add: pair_measure_count_space X Y Z simple_functionD) note sf = sigma_finite_measure_count_space_finite[OF simple_functionD(1)] note sd = simple_distributedI[OF _ _ refl] note sp = simple_function_Pair show ?thesis apply (rule conditional_mutual_information_generic_nonneg[OF sf[OF X] sf[OF Y] sf[OF Z]]) apply (force intro: simple_distributed[OF sd[OF X]]) apply simp apply (force intro: simple_distributed[OF sd[OF Z]]) apply simp apply (force intro: simple_distributed_joint[OF sd[OF sp[OF Y Z]]]) apply simp apply (force intro: simple_distributed_joint[OF sd[OF sp[OF X Z]]]) apply simp apply (fastforce intro: simple_distributed_joint2[OF sd[OF sp[OF X sp[OF Y Z]]]]) apply (auto intro!: integrable_count_space simp: X Y Z simple_functionD) done qed
subsection‹Conditional Entropy›
definition (in prob_space) "conditional_entropy b S T X Y = - (∫(x, y). log b (enn2real (RN_deriv (S ⨂🪙M T) (distr M (S ⨂🪙M T) (λx. (X x, Y x))) (x, y)) / enn2real (RN_deriv T (distr M T Y) y)) ∂distr M (S ⨂🪙M T) (λx. (X x, Y x)))"
abbreviation (in information_space)
conditional_entropy_Pow (‹H'(_ | _')›) where "H(X | Y) ≡ conditional_entropy b (count_space (X`space M)) (count_space (Y`space M)) X Y"
lemma (in information_space) conditional_entropy_generic_eq: fixes Pxy :: "_ ==> real"and Py :: "'c ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Py[measurable]: "distributed M T Y Py"and Py_nn[simp]: "∧x. x ∈ space T ==>0 ≤ Py x" assumes Pxy[measurable]: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn[simp]: "∧x y. x ∈ space S ==> y ∈ space T ==> 0 ≤ Pxy (x, y)" shows"conditional_entropy b S T X Y = - (∫(x, y). Pxy (x, y) * log b (Pxy (x, y) / Py y) ∂(S ⨂🪙M T))" proof - interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T ..
have [measurable]: "Py ∈ T →🪙M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy ∈ (S ⨂🪙M T) →🪙M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
have"AE x in density (S ⨂🪙M T) (λx. ennreal (Pxy x)). Pxy x = enn2real (RN_deriv (S ⨂🪙M T) (distr M (S ⨂🪙M T) (λx. (X x, Y x))) x)" unfolding AE_density[OF distributed_borel_measurable, OF Pxy] unfolding distributed_distr_eq_density[OF Pxy] using distributed_RN_deriv[OF Pxy] by auto moreover have"AE x in density (S ⨂🪙M T) (λx. ennreal (Pxy x)). Py (snd x) = enn2real (RN_deriv T (distr M T Y) (snd x))" unfolding AE_density[OF distributed_borel_measurable, OF Pxy] unfolding distributed_distr_eq_density[OF Py] using distributed_RN_deriv[OF Py] by (force intro: ST.AE_pair_measure) ultimately have"conditional_entropy b S T X Y = - (∫x. Pxy x * log b (Pxy x / Py (snd x)) ∂(S ⨂🪙M T))" unfolding conditional_entropy_def neg_equal_iff_equal apply (subst integral_real_density[symmetric]) apply (auto simp: distributed_distr_eq_density[OF Pxy] space_pair_measure
intro!: integral_cong_AE) done thenshow ?thesis by (simp add: split_beta') qed
lemma (in information_space) conditional_entropy_eq_entropy: fixes Px :: "'b ==> real"and Py :: "'c ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Py[measurable]: "distributed M T Y Py" and Py_nn[simp]: "∧x. x ∈ space T ==> 0 ≤ Py x" assumes Pxy[measurable]: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn[simp]: "∧x y. x ∈ space S ==> y ∈ space T ==> 0 ≤ Pxy (x, y)" assumes I1: "integrable (S ⨂🪙M T) (λx. Pxy x * log b (Pxy x))" assumes I2: "integrable (S ⨂🪙M T) (λx. Pxy x * log b (Py (snd x)))" shows"conditional_entropy b S T X Y = entropy b (S ⨂🪙M T) (λx. (X x, Y x)) - entropy b T Y" proof - interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T ..
have [measurable]: "Py ∈ T →🪙M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy ∈ (S ⨂🪙M T) →🪙M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
have"entropy b T Y = - (∫y. Py y * log b (Py y) ∂T)" by (rule entropy_distr[OF Py Py_nn]) alsohave"… = - (∫(x,y). Pxy (x,y) * log b (Py y) ∂(S ⨂🪙M T))" using b_gt_1 by (subst distributed_transform_integral[OF Pxy _ Py, where T=snd])
(auto intro!: Bochner_Integration.integral_cong simp: space_pair_measure) finallyhave e_eq: "entropy b T Y = - (∫(x,y). Pxy (x,y) * log b (Py y) ∂(S ⨂🪙M T))" .
have **: "∧x. x ∈ space (S ⨂🪙M T) ==> 0 ≤ Pxy x" by (auto simp: space_pair_measure)
have ae2: "AE x in S ⨂🪙M T. Py (snd x) = 0 ⟶ Pxy x = 0" by (intro subdensity_real[of snd, OF _ Pxy Py])
(auto intro: measurable_Pair simp: space_pair_measure) moreoverhave ae4: "AE x in S ⨂🪙M T. 0 ≤ Py (snd x)" using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'') ultimatelyhave"AE x in S ⨂🪙M T. 0 ≤ Pxy x ∧ 0 ≤ Py (snd x) ∧ (Pxy x = 0 ∨ (Pxy x ≠ 0 ⟶ 0 < Pxy x ∧ 0 < Py (snd x)))" by (auto simp: space_pair_measure less_le) thenhave ae: "AE x in S ⨂🪙M T. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / Py (snd x))" by (auto simp: log_simps field_simps b_gt_1) have"conditional_entropy b S T X Y = - (∫x. Pxy x * log b (Pxy x) - Pxy x * log b (Py (snd x)) ∂(S ⨂🪙M T))" unfolding conditional_entropy_generic_eq[OF S T Py Py_nn Pxy Pxy_nn, simplified] neg_equal_iff_equal using ae by (force intro: integral_cong_AE) alsohave"… = - (∫x. Pxy x * log b (Pxy x) ∂(S ⨂🪙M T)) - - (∫x. Pxy x * log b (Py (snd x)) ∂(S ⨂🪙M T))" by (simp add: Bochner_Integration.integral_diff[OF I1 I2]) finallyshow ?thesis using conditional_entropy_generic_eq[OF S T Py Py_nn Pxy Pxy_nn, simplified]
entropy_distr[OF Pxy **, simplified] e_eq by (simp add: split_beta') qed
lemma (in information_space) conditional_entropy_eq_entropy_simple: assumes X: "simple_function M X"and Y: "simple_function M Y" shows"H(X | Y) = entropy b (count_space (X`space M) ⨂🪙M count_space (Y`space M)) (λx. (X x, Y x)) - H(Y)" proof - have"count_space (X ` space M) ⨂🪙M count_space (Y ` space M) = count_space (X`space M × Y`space M)"
(is"?P = ?C") using X Y by (simp add: simple_functionD pair_measure_count_space) show ?thesis by (rule conditional_entropy_eq_entropy sigma_finite_measure_count_space_finite
simple_functionD X Y simple_distributed simple_distributedI[OF _ _ refl]
simple_distributed_joint simple_function_Pair integrable_count_space measure_nonneg)+
(auto simp: ‹?P = ?C› measure_nonneg intro!: integrable_count_space simple_functionD X Y) qed
lemma (in information_space) conditional_entropy_eq: assumes Y: "simple_distributed M Y Py" assumes XY: "simple_distributed M (λx. (X x, Y x)) Pxy" shows"H(X | Y) = - (∑(x, y)∈(λx. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" proof (subst conditional_entropy_generic_eq[OF _ _
simple_distributed[OF Y] _ simple_distributed_joint[OF XY]]) have"finite ((λx. (X x, Y x))`space M)" using XY unfolding simple_distributed_def by auto from finite_imageI[OF this, of fst] have [simp]: "finite (X`space M)" by (simp add: image_comp comp_def) note Y[THEN simple_distributed_finite, simp] show"sigma_finite_measure (count_space (X ` space M))" by (simp add: sigma_finite_measure_count_space_finite) show"sigma_finite_measure (count_space (Y ` space M))" by (simp add: sigma_finite_measure_count_space_finite) let ?f = "(λx. if x ∈ (λx. (X x, Y x)) ` space M then Pxy x else 0)" have"count_space (X ` space M) ⨂🪙M count_space (Y ` space M) = count_space (X`space M × Y`space M)"
(is"?P = ?C") using Y by (simp add: simple_distributed_finite pair_measure_count_space) have eq: "(λ(x, y). ?f (x, y) * log b (?f (x, y) / Py y)) = (λx. if x ∈ (λx. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / Py (snd x)) else 0)" by auto from Y show"- (∫ (x, y). ?f (x, y) * log b (?f (x, y) / Py y) ∂?P) = - (∑(x, y)∈(λx. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / Py y))" by (auto intro!: sum.cong simp add: ‹?P = ?C› lebesgue_integral_count_space_finite simple_distributed_finite eq sum.If_cases split_beta') qed (use Y XY in auto)
lemma (in information_space) conditional_mutual_information_eq_conditional_entropy: assumes X: "simple_function M X"and Y: "simple_function M Y" shows"I(X ; X | Y) = H(X | Y)" proof -
define Py where"Py x = (if x ∈ Y`space M then measure M (Y -` {x} ∩ space M) else 0)"for x
define Pxy where"Pxy x = (if x ∈ (λx. (X x, Y x))`space M then measure M ((λx. (X x, Y x)) -` {x} ∩ space M) else 0)" for x
define Pxxy where"Pxxy x = (if x ∈ (λx. (X x, X x, Y x))`space M then measure M ((λx. (X x, X x, Y x)) -` {x}∩ space M) else 0)" for x let ?M = "X`space M × X`space M × Y`space M"
note XY = simple_function_Pair[OF X Y] note XXY = simple_function_Pair[OF X XY] have Py: "simple_distributed M Y Py" using Y by (rule simple_distributedI) (auto simp: Py_def measure_nonneg) have Pxy: "simple_distributed M (λx. (X x, Y x)) Pxy" using XY by (rule simple_distributedI) (auto simp: Pxy_def measure_nonneg) have Pxxy: "simple_distributed M (λx. (X x, X x, Y x)) Pxxy" using XXY by (rule simple_distributedI) (auto simp: Pxxy_def measure_nonneg) have eq: "(λx. (X x, X x, Y x)) ` space M = (λ(x, y). (x, x, y)) ` (λx. (X x, Y x)) ` space M" by auto have inj: "∧A. inj_on (λ(x, y). (x, x, y)) A" by (auto simp: inj_on_def) have Pxxy_eq: "∧x y. Pxxy (x, x, y) = Pxy (x, y)" by (auto simp: Pxxy_def Pxy_def intro!: arg_cong[where f=prob]) have"AE x in count_space ((λx. (X x, Y x))`space M). Py (snd x) = 0 ⟶ Pxy x = 0" using Py Pxy by (intro subdensity_real[of snd, OF _ Pxy[THEN simple_distributed] Py[THEN simple_distributed]])
(auto intro: measurable_Pair simp: AE_count_space) thenshow ?thesis apply (subst conditional_mutual_information_eq[OF Py Pxy Pxy Pxxy]) apply (subst conditional_entropy_eq[OF Py Pxy]) apply (auto intro!: sum.cong simp: Pxxy_eq sum_negf[symmetric] eq sum.reindex[OF inj]
log_simps zero_less_mult_iff zero_le_mult_iff field_simps mult_less_0_iff AE_count_space) done qed
lemma (in information_space) conditional_entropy_nonneg: assumes X: "simple_function M X"and Y: "simple_function M Y"shows"0 ≤H(X | Y)" using conditional_mutual_information_eq_conditional_entropy[OF X Y] conditional_mutual_information_nonneg[OF X X Y] by simp
subsection‹Equalities›
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy_distr: fixes Px :: "'b ==> real"and Py :: "'c ==> real"and Pxy :: "('b × 'c) ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Px[measurable]: "distributed M S X Px" and Px_nn[simp]: "∧x. x ∈ space S ==> 0 ≤ Px x" and Py[measurable]: "distributed M T Y Py" and Py_nn[simp]: "∧x. x ∈ space T ==> 0 ≤ Py x" and Pxy[measurable]: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" and Pxy_nn[simp]: "∧x y. x ∈ space S ==> y ∈ space T ==> 0 ≤ Pxy (x, y)" assumes Ix: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Px (fst x)))" assumes Iy: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Py (snd x)))" assumes Ixy: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Pxy x))" shows"mutual_information b S T X Y = entropy b S X + entropy b T Y - entropy b (S ⨂🪙M T) (λx. (X x, Y x))" proof - have [measurable]: "Px ∈ S →🪙M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Py ∈ T →🪙M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy ∈ (S ⨂🪙M T) →🪙M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
have X: "entropy b S X = - (∫x. Pxy x * log b (Px (fst x)) ∂(S ⨂🪙M T))" using b_gt_1 apply (subst entropy_distr[OF Px Px_nn], simp) apply (subst distributed_transform_integral[OF Pxy _ Px, where T=fst]) apply (auto intro!: integral_cong simp: space_pair_measure) done
have Y: "entropy b T Y = - (∫x. Pxy x * log b (Py (snd x)) ∂(S ⨂🪙M T))" using b_gt_1 apply (subst entropy_distr[OF Py Py_nn], simp) apply (subst distributed_transform_integral[OF Pxy _ Py, where T=snd]) apply (auto intro!: integral_cong simp: space_pair_measure) done
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. have ST: "sigma_finite_measure (S ⨂🪙M T)" ..
have XY: "entropy b (S ⨂🪙M T) (λx. (X x, Y x)) = - (∫x. Pxy x * log b (Pxy x) ∂(S ⨂🪙M T))" by (subst entropy_distr[OF Pxy]) (auto intro!: integral_cong simp: space_pair_measure)
have"AE x in S ⨂🪙M T. Px (fst x) = 0 ⟶ Pxy x = 0" by (intro subdensity_real[of fst, OF _ Pxy Px]) (auto intro: measurable_Pair simp: space_pair_measure) moreoverhave"AE x in S ⨂🪙M T. Py (snd x) = 0 ⟶ Pxy x = 0" by (intro subdensity_real[of snd, OF _ Pxy Py]) (auto intro: measurable_Pair simp: space_pair_measure) moreoverhave"AE x in S ⨂🪙M T. 0 ≤ Px (fst x)" using Px by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_fst'') moreoverhave"AE x in S ⨂🪙M T. 0 ≤ Py (snd x)" using Py by (intro ST.AE_pair_measure) (auto simp: comp_def intro!: measurable_snd'') ultimatelyhave"AE x in S ⨂🪙M T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)) = Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))"
(is"AE x in _. ?f x = ?g x") using AE_space proof eventually_elim case (elim x) show ?case proof cases assume"Pxy x ≠ 0" with elim have"0 < Px (fst x)""0 < Py (snd x)""0 < Pxy x" by (auto simp: space_pair_measure less_le) thenshow ?thesis using b_gt_1 by (simp add: log_simps less_imp_le field_simps) qed simp qed
have"entropy b S X + entropy b T Y - entropy b (S ⨂🪙M T) (λx. (X x, Y x)) = integral🪙L (S ⨂🪙M T) ?f" unfolding X Y XY apply (subst Bochner_Integration.integral_diff) apply (intro Bochner_Integration.integrable_diff Ixy Ix Iy)+ apply (subst Bochner_Integration.integral_diff) apply (intro Ixy Ix Iy)+ apply (simp add: field_simps) done alsohave"… = integral🪙L (S ⨂🪙M T) ?g" using‹AE x in _. ?f x = ?g x›by (intro integral_cong_AE) auto alsohave"… = mutual_information b S T X Y" by (rule mutual_information_distr[OF S T Px _ Py _ Pxy _ , symmetric])
(auto simp: space_pair_measure) finallyshow ?thesis .. qed
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy': fixes Px :: "'b ==> real"and Py :: "'c ==> real"and Pxy :: "('b × 'c) ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Px: "distributed M S X Px""∧x. x ∈ space S ==> 0 ≤ Px x" and Py: "distributed M T Y Py""∧x. x ∈ space T ==> 0 ≤ Py x" assumes Pxy: "distributed M (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" "∧x. x ∈ space (S ⨂🪙M T) ==> 0 ≤ Pxy x" assumes Ix: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Px (fst x)))" assumes Iy: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Py (snd x)))" assumes Ixy: "integrable(S ⨂🪙M T) (λx. Pxy x * log b (Pxy x))" shows"mutual_information b S T X Y = entropy b S X - conditional_entropy b S T X Y" using
mutual_information_eq_entropy_conditional_entropy_distr[OF S T Px Py Pxy Ix Iy Ixy]
conditional_entropy_eq_entropy[OF S T Py Pxy Ixy Iy] by (simp add: space_pair_measure)
lemma (in information_space) mutual_information_eq_entropy_conditional_entropy: assumes sf_X: "simple_function M X"and sf_Y: "simple_function M Y" shows"I(X ; Y) = H(X) - H(X | Y)" proof - have X: "simple_distributed M X (λx. measure M (X -` {x} ∩ space M))" using sf_X by (rule simple_distributedI) (auto simp: measure_nonneg) have Y: "simple_distributed M Y (λx. measure M (Y -` {x} ∩ space M))" using sf_Y by (rule simple_distributedI) (auto simp: measure_nonneg) have sf_XY: "simple_function M (λx. (X x, Y x))" using sf_X sf_Y by (rule simple_function_Pair) thenhave XY: "simple_distributed M (λx. (X x, Y x)) (λx. measure M ((λx. (X x, Y x)) -` {x} ∩ space M))" by (rule simple_distributedI) (auto simp: measure_nonneg) from simple_distributed_joint_finite[OF this, simp] have eq: "count_space (X ` space M) ⨂🪙M count_space (Y ` space M) = count_space (X ` space M × Y ` space M)" by (simp add: pair_measure_count_space)
have"I(X ; Y) = H(X) + H(Y) - entropy b (count_space (X`space M) ⨂🪙M count_space (Y`space M)) (λx. (X x, Y x))" using sigma_finite_measure_count_space_finite
sigma_finite_measure_count_space_finite
simple_distributed[OF X] measure_nonneg simple_distributed[OF Y] measure_nonneg simple_distributed_joint[OF XY] by (rule mutual_information_eq_entropy_conditional_entropy_distr)
(auto simp: eq integrable_count_space measure_nonneg) thenshow ?thesis unfolding conditional_entropy_eq_entropy_simple[OF sf_X sf_Y] by simp qed
lemma (in information_space) mutual_information_nonneg_simple: assumes sf_X: "simple_function M X"and sf_Y: "simple_function M Y" shows"0 ≤I(X ; Y)" proof - have X: "simple_distributed M X (λx. measure M (X -` {x} ∩ space M))" using sf_X by (rule simple_distributedI) (auto simp: measure_nonneg) have Y: "simple_distributed M Y (λx. measure M (Y -` {x} ∩ space M))" using sf_Y by (rule simple_distributedI) (auto simp: measure_nonneg)
have sf_XY: "simple_function M (λx. (X x, Y x))" using sf_X sf_Y by (rule simple_function_Pair) thenhave XY: "simple_distributed M (λx. (X x, Y x)) (λx. measure M ((λx. (X x, Y x)) -` {x} ∩ space M))" by (rule simple_distributedI) (auto simp: measure_nonneg)
from simple_distributed_joint_finite[OF this, simp] have eq: "count_space (X ` space M) ⨂🪙M count_space (Y ` space M) = count_space (X ` space M × Y ` space M)" by (simp add: pair_measure_count_space)
show ?thesis by (rule mutual_information_nonneg[OF _ _ simple_distributed[OF X] _ simple_distributed[OF Y] _ simple_distributed_joint[OF XY]])
(simp_all add: eq integrable_count_space sigma_finite_measure_count_space_finite measure_nonneg) qed
lemma (in information_space) conditional_entropy_less_eq_entropy: assumes X: "simple_function M X"and Z: "simple_function M Z" shows"H(X | Z) ≤H(X)" proof - have"0 ≤I(X ; Z)"using X Z by (rule mutual_information_nonneg_simple) alsohave"I(X ; Z) = H(X) - H(X | Z)"using mutual_information_eq_entropy_conditional_entropy[OF assms] . finallyshow ?thesis by auto qed
lemma (in information_space) fixes Px :: "'b ==> real"and Py :: "'c ==> real"and Pxy :: "('b × 'c) ==> real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Px: "finite_entropy S X Px"and Py: "finite_entropy T Y Py" assumes Pxy: "finite_entropy (S ⨂🪙M T) (λx. (X x, Y x)) Pxy" shows"conditional_entropy b S T X Y ≤ entropy b S X" proof - have"0 ≤ mutual_information b S T X Y" by (rule mutual_information_nonneg') fact+ alsohave"… = entropy b S X - conditional_entropy b S T X Y" proof (intro mutual_information_eq_entropy_conditional_entropy') show"integrable (S ⨂🪙M T) (λx. Pxy x * log b (Px (fst x)))" "integrable (S ⨂🪙M T) (λx. Pxy x * log b (Py (snd x)))" "integrable (S ⨂🪙M T) (λx. Pxy x * log b (Pxy x))" using assms by (auto intro!: finite_entropy_integrable finite_entropy_distributed
finite_entropy_integrable_transform[OF Px] finite_entropy_integrable_transform[OF Py]
intro: finite_entropy_nn) qed (use assms Px Py Pxy finite_entropy_nn finite_entropy_distributed in auto) finallyshow ?thesis by auto qed
lemma (in information_space) entropy_chain_rule: assumes X: "simple_function M X"and Y: "simple_function M Y" shows"H(λx. (X x, Y x)) = H(X) + H(Y|X)" proof - note XY = simple_distributedI[OF simple_function_Pair[OF X Y] measure_nonneg refl] note YX = simple_distributedI[OF simple_function_Pair[OF Y X] measure_nonneg refl] note simple_distributed_joint_finite[OF this, simp] let ?f = "λx. prob ((λx. (X x, Y x)) -` {x} ∩ space M)" let ?g = "λx. prob ((λx. (Y x, X x)) -` {x} ∩ space M)" let ?h = "λx. if x ∈ (λx. (Y x, X x)) ` space M then prob ((λx. (Y x, X x)) -` {x} ∩ space M) else 0" have"H(λx. (X x, Y x)) = - (∑x∈(λx. (X x, Y x)) ` space M. ?f x * log b (?f x))" using XY by (rule entropy_simple_distributed) alsohave"… = - (∑x∈(λ(x, y). (y, x)) ` (λx. (X x, Y x)) ` space M. ?g x * log b (?g x))" by (subst (2) sum.reindex) (auto simp: inj_on_def intro!: sum.cong arg_cong[where f="λA. prob A * log b (prob A)"]) alsohave"… = - (∑x∈(λx. (Y x, X x)) ` space M. ?h x * log b (?h x))" by (auto intro!: sum.cong) alsohave"… = entropy b (count_space (Y ` space M) ⨂🪙M count_space (X ` space M)) (λx. (Y x, X x))" by (subst entropy_distr[OF simple_distributed_joint[OF YX]])
(auto simp: pair_measure_count_space sigma_finite_measure_count_space_finite lebesgue_integral_count_space_finite
cong del: sum.cong_simp intro!: sum.mono_neutral_left measure_nonneg) finallyhave"H(λx. (X x, Y x)) = entropy b (count_space (Y ` space M) ⨂🪙M count_space (X ` space M)) (λx. (Y x, X x))" . thenshow ?thesis unfolding conditional_entropy_eq_entropy_simple[OF Y X] by simp qed
lemma (in information_space) entropy_partition: assumes X: "simple_function M X" shows"H(X) = H(f ∘ X) + H(X|f ∘ X)" proof - note fX = simple_function_compose[OF X, of f] have eq: "(λx. ((f ∘ X) x, X x)) ` space M = (λx. (f x, x)) ` X ` space M"by auto have inj: "∧A. inj_on (λx. (f x, x)) A" by (auto simp: inj_on_def)
have"H(X) = - (∑x∈X ` space M. prob (X -` {x} ∩ space M) * log b (prob (X -` {x}∩ space M)))" by (simp add: entropy_simple_distributed[OF simple_distributedI[OF X measure_nonneg refl]]) alsohave"… = - (∑x∈(λx. ((f ∘ X) x, X x)) ` space M. prob ((λx. ((f ∘ X) x, X x)) -` {x} ∩ space M) * log b (prob ((λx. ((f ∘ X) x, X x)) -` {x} ∩ space M)))" unfolding eq apply (subst sum.reindex[OF inj]) apply (auto intro!: sum.cong arg_cong[where f="λA. prob A * log b (prob A)"]) done alsohave"... = H(λx. ((f ∘ X) x, X x))" using entropy_simple_distributed[OF simple_distributedI[OF simple_function_Pair[OF fX X] measure_nonneg refl]] by fastforce alsohave"… = H(f ∘ X) + H(X|f ∘ X)" using X entropy_chain_rule by blast finallyshow ?thesis . qed
corollary (in information_space) entropy_data_processing: assumes"simple_function M X"shows"H(f ∘ X) ≤H(X)" by (smt (verit) assms conditional_entropy_nonneg entropy_partition simple_function_compose)
corollary (in information_space) entropy_of_inj: assumes X: "simple_function M X"and inj: "inj_on f (X`space M)" shows"H(f ∘ X) = H(X)" proof (rule antisym) show"H(f ∘ X) ≤H(X)"using entropy_data_processing[OF X] . next have sf: "simple_function M (f ∘ X)" using X by auto have"H(X) = H(the_inv_into (X`space M) f ∘ (f ∘ X))" unfolding o_assoc apply (subst entropy_simple_distributed[OF simple_distributedI[OF X measure_nonneg refl]]) apply (subst entropy_simple_distributed[OF simple_distributedI[OF simple_function_compose[OF X]], where f="λx. prob (X -` {x} ∩ space M)"]) apply (auto intro!: sum.cong arg_cong[where f=prob] image_eqI simp: the_inv_into_f_f[OF inj] comp_def measure_nonneg) done alsohave"... ≤H(f ∘ X)" using entropy_data_processing[OF sf] . finallyshow"H(X) ≤H(f ∘ X)" . qed
end
Messung V0.5 in Prozent
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.71Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-27)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.