text‹Instead, we use a recursive definition:› fun sublist_at :: "'a list ==> 'a list ==> nat ==> bool"where "sublist_at (x#xs) (y#ys) 0 ⟷ x=y ∧ sublist_at xs ys 0" | "sublist_at xs (y#ys) (Suc i) ⟷ sublist_at xs ys i" | "sublist_at [] ys 0 ⟷ True" | "sublist_at _ [] _ ⟷ False"
text‹In the relevant cases, both definitions agree:› lemma"i ≤ length ys ==> sublist_at xs ys i ⟷ sublist_at' xs ys i" unfolding sublist_at'_def by (induction xs ys i rule: sublist_at.induct) auto
text‹However, the new definition has some reasonable properties:› subsubsection‹Properties› lemma sublist_lengths: "sublist_at xs ys i ==> i + length xs ≤ length ys" by (induction xs ys i rule: sublist_at.induct) auto
lemma Nil_is_sublist: "sublist_at ([] :: 'x list) ys i ⟷ i ≤ length ys" by (induction"[] :: 'x list" ys i rule: sublist_at.induct) auto
text‹Furthermore, we need:› lemma sublist_step[intro]: "[i + length xs < length ys; sublist_at xs ys i; ys!(i + length xs) = x]==> sublist_at (xs@[x]) ys i" apply (induction xs ys i rule: sublist_at.induct) apply auto using sublist_at.elims(3) by fastforce
lemma all_positions_sublist: "[i + length xs ≤ length ys; ∀jj<length xs. ys!(i+jj) = xs!jj]==> sublist_at xs ys i" proof (induction xs rule: rev_induct) case Nil thenshow ?caseby (simp add: Nil_is_sublist) next case (snoc x xs) from‹i + length (xs @ [x]) ≤ length ys›have"i + length xs ≤ length ys"by simp moreoverhave"∀jj<length xs. ys!(i + jj) = xs!jj" by (simp add: nth_append snoc.prems(2)) ultimatelyhave"sublist_at xs ys i" using snoc.IH by simp thenshow ?case using snoc.prems by auto qed
lemma sublist_all_positions: "sublist_at xs ys i ==>∀jj<length xs. ys!(i+jj) = xs!jj" by (induction xs ys i rule: sublist_at.induct) (auto simp: nth_Cons')
text‹It also connects well to theory @{theory "HOL-Library.Sublist"} (compare @{thm[source] sublist_def}):› lemma sublist_at_altdef: "sublist_at xs ys i ⟷ (∃ps ss. ys = ps@xs@ss ∧ i = length ps)" proof (induction xs ys i rule: sublist_at.induct) case (2 ss t ts i) show"sublist_at ss (t#ts) (Suc i) ⟷ (∃xs ys. t#ts = xs@ss@ys ∧ Suc i = length xs)"
(is"?lhs ⟷ ?rhs") proof assume ?lhs thenhave"sublist_at ss ts i"by simp with"2.IH"obtain xs where"∃ys. ts = xs@ss@ys ∧ i = length xs"by auto thenhave"∃ys. t#ts = (t#xs)@ss@ys ∧ Suc i = length (t#xs)"by simp thenshow ?rhs by blast next assume ?rhs thenobtain xs where"∃ys. t#ts = xs@ss@ys ∧ length xs = Suc i" by (blast dest: sym) thenhave"∃ys. ts = (tl xs)@ss@ys ∧ i = length (tl xs)" by (auto simp add: length_Suc_conv) thenhave"∃xs ys. ts = xs@ss@ys ∧ i = length xs"by blast with"2.IH"show ?lhs by simp qed qed auto
text‹
We use the Isabelle Refinement Framework (Theory @{theory Refine_Monadic.Refine_Monadic}) to
phrase the specification and the algorithm. › text‹@{term s} for "searchword" / "searchlist", @{term t} for "text"› definition"kmp_SPEC s t = SPEC (λ None ==>∄i. sublist_at s t i | Some i ==> sublist_at s t i ∧ (∀ii<i. ¬sublist_at s t ii))"
lemma is_arg_min_id: "is_arg_min id P i ⟷ P i ∧ (∀ii<i. ¬P ii)" unfolding is_arg_min_def by auto
lemma kmp_result: "kmp_SPEC s t = RETURN (if sublist s t then Some (LEAST i. sublist_at s t i) else None)" unfolding kmp_SPEC_def sublist_iff_sublist_at apply (auto intro: LeastI dest: not_less_Least split: option.splits) by (meson LeastI nat_neq_iff not_less_Least)
corollary weak_kmp_SPEC: "kmp_SPEC s t ≤ SPEC (λpos. pos≠None ⟷ Sublist.sublist s t)" by (simp add: kmp_result)
text‹Since KMP is a direct advancement of the naive "test-all-starting-positions" approach, we provide it here for comparison:›
subsection‹Invariants› definition"I_out_na s t ≡ λ(i,j,pos). (∀ii<i. ¬sublist_at s t ii) ∧ (case pos of None ==> j = 0 | Some p ==> p=i ∧ sublist_at s t i)" definition"I_in_na s t i ≡ λ(j,pos). case pos of None ==> j < length s ∧ (∀jj<j. t!(i+jj) = s!(jj)) | Some p ==> sublist_at s t i"
subsection‹Algorithm›
(*Algorithm is common knowledge \<longrightarrow> remove citation here, move explanations to KMP below?*) text‹The following definition is taken from Helmut Seidl's lecture on algorithms and data structurescite‹GAD› except that we
▪ output the identified position @{term ‹pos :: nat option›} instead of just @{const True}
▪ use @{term ‹pos :: nat option›} as break-flag to support the abort within the loops
▪ rewrite @{prop ‹i ≤ length t - length s›} in the first while-condition to @{prop ‹i + length s ≤ length t›} to avoid having to use @{typ int} for list indexes (or the additional precondition @{prop ‹length s ≤ length t›}) ›
definition"naive_algorithm s t ≡ do { let i=0; let j=0; let pos=None; (_,_,pos) ← WHILEIT (I_out_na s t) (λ(i,_,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do { (_,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,_). do { let j=j+1; if j=length s then RETURN (j,Some i) else RETURN (j,None) }) (j,pos); if pos=None then do { let i = i + 1; let j = 0; RETURN (i,j,None) } else RETURN (i,j,Some i) }) (i,j,pos);
RETURN pos }"
subsection‹Correctness›
text‹The basic lemmas on @{const sublist_at} from the previous chapter together with @{theory Refine_Monadic.Refine_Monadic}'s verification condition generator / solver suffice:›
lemma"s ≠ [] ==> naive_algorithm s t ≤ kmp_SPEC s t" unfolding naive_algorithm_def kmp_SPEC_def I_out_na_def I_in_na_def apply (refine_vcg
WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
) apply (vc_solve solve: asm_rl)
subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
subgoal using less_Suc_eq by blast
subgoal by (metis less_SucE sublist_all_positions)
subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans) done
text‹Note that the precondition cannot be removed without an extra branch: If @{prop ‹s = []›}, the inner while-condition accesses out-of-bound memory. This will apply to KMP, too.›
section‹Knuth--Morris--Pratt algorithm›
text‹Just like our templatescite‹KMP77›cite‹GAD›, we first verify the main routine and discuss the computation of the auxiliary values @{term ‹f s›} only in a later section.›
interpretation border_order: order border strict_border by standard (auto simp: border_def suffix_def strict_border_def) interpretation border_bot: order_bot Nil border strict_border by standard (simp add: border_def)
lemma all_positions_drop_length_take: "[i ≤ length w; i ≤ length x; ∀j<i. x ! j = w ! (length w + j - i)] ==> drop (length w - i) w = take i x" by (cases "i = length x") (auto intro: nth_equalityI)
lemma all_positions_suffix_take: "[i ≤ length w; i ≤ length x; ∀j<i. x ! j = w ! (length w + j - i)] ==> suffix (take i x) w" by (metis all_positions_drop_length_take suffix_drop)
lemma border_example: "{b. border b ''aabaabaa''} = {'''', ''a'', ''aa'', ''aabaa'', ''aabaabaa''}"
(is"{b. border b ?l} = {?take0, ?take1, ?take2, ?take5, ?l}") proof show"{?take0, ?take1, ?take2, ?take5, ?l} ⊆ {b. border b ?l}" by simp eval have"¬border ''aab'' ?l""¬border ''aaba'' ?l""¬border ''aabaab'' ?l""¬border ''aabaaba'' ?l" by eval+ moreoverhave"{b. border b ?l} ⊆ set (prefixes ?l)" using border_def in_set_prefixes by blast ultimatelyshow"{b. border b ?l} ⊆ {?take0, ?take1, ?take2, ?take5, ?l}" by auto qed
corollary strict_border_example: "{b. strict_border b ''aabaabaa''} = {'''', ''a'', ''aa'', ''aabaa''}"
(is"?l = ?r") proof have"?l ⊆ {b. border b ''aabaabaa''}" by auto alsohave"… = {'''', ''a'', ''aa'', ''aabaa'', ''aabaabaa''}" by (fact border_example) finallyshow"?l ⊆ ?r"by auto show"?r ⊆ ?l"by simp eval qed
corollary"intrinsic_border ''aabaabaa'' = ''aabaa''" proof - ― ‹We later obtain a fast algorithm for that.› have exhaust: "strict_border b ''aabaabaa'' ⟷ b ∈ {'''', ''a'', ''aa'', ''aabaa''}"for b using strict_border_example by auto thenhave "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''''" "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''a''" "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''aa''" "is_arg_max length (λb. strict_border b ''aabaabaa'') ''aabaa''" unfolding is_arg_max_linorder by auto moreoverhave"strict_border (intrinsic_border ''aabaabaa'') ''aabaabaa''" using intrinsic_borderI' by blast note this[unfolded exhaust] ultimatelyshow ?thesis by simp (metis list.discI nonempty_is_arg_max_ib) qed
subsection‹Main routine›
text‹The following is Seidl's "border"-tablecite‹GAD› (values shifted by 1 so we don't need @{typ int}),
equivalently, "f" from Knuth's, Morris' and Pratt's papercite‹KMP77› (with indexes starting at 0).› funf :: "'a list ==> nat ==> nat"where "f s 0 = 0" ― ‹This increments the compare position while @{prop ‹j=(0::nat)›}› | "f s j = length (intrinsic_border (take j s)) + 1" text‹Note that we use their "next" only implicitly.›
subsubsection‹Invariants› definition"I_outer s t ≡ λ(i,j,pos). (∀ii<i. ¬sublist_at s t ii) ∧ (case pos of None ==> (∀jj<j. t!(i+jj) = s!(jj)) ∧ j < length s | Some p ==> p=i ∧ sublist_at s t i)" text‹For the inner loop, we can reuse @{const I_in_na}.›
subsubsection‹Algorithm› text‹First, we use the non-evaluable function @{const f} directly:› definition"kmp s t ≡ do { ASSERT (s ≠ []); let i=0; let j=0; let pos=None; (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do { ASSERT (i + length s ≤ length t); (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do { let j=j+1; if j=length s then RETURN (j,Some i) else RETURN (j,None) }) (j,pos); if pos=None then do { ASSERT (j < length s); let i = i + (j - f s j + 1); let j = max 0 (f s j - 1); ― ‹‹max› not necessary› RETURN (i,j,None) } else RETURN (i,j,Some i) }) (i,j,pos);
RETURN pos }"
subsubsection‹Correctness› lemmaf_eq_0_iff_j_eq_0[simp]: "f s j = 0 ⟷ j = 0" by (cases j) simp_all
lemma j_le_f_le: "j ≤ length s ==>f s j ≤ j" apply (cases j) apply simp_all by (metis Suc_leI intrinsic_border_less length_take list.size(3) min.absorb2 nat.simps(3) not_less)
lemma j_le_f_le': "0 < j ==> j ≤ length s ==>f s j - 1 < j" by (metis diff_less j_le_f_le le_eq_less_or_eq less_imp_diff_less less_one)
lemmaf_le: "s ≠ [] ==>f s j - 1 < length s" by (cases j) (simp_all add: intrinsic_border_take_less)
lemma reuse_matches: assumes j_le: "j ≤ length s" and old_matches: "∀jj<j. t ! (i + jj) = s ! jj" shows"∀jj<f s j - 1. t ! (i + (j - f s j + 1) + jj) = s ! jj"
(is"∀jj<?j'. t ! (?i' + jj) = s ! jj") proof (cases "j>0") assume"j>0" havef_le: "f s j ≤ j" by (simp add: j_le j_le_f_le) with old_matches have1: "∀jj<?j'. t ! (?i' + jj) = s ! (j - f s j + 1 + jj)" by (metis ab_semigroup_add_class.add.commute add.assoc diff_diff_cancel less_diff_conv) have [simp]: "length (take j s) = j""length (intrinsic_border (take j s)) = ?j'" by (simp add: j_le) (metis ‹0 < j› diff_add_inverse2 f.elims nat_neq_iff) thenhave"∀jj<?j'. take j s ! jj = take j s ! (j - (f s j - 1) + jj)" by (metis intrinsic_borderI' ‹0 < j› border_positions length_greater_0_conv strict_border_def) thenhave"∀jj<?j'. take j s ! jj = take j s ! (j - f s j + 1 + jj)" by (simp add: f_le) thenhave2: "∀jj<?j'. s ! (j - f s j + 1 + jj) = s ! jj" usingf_le by simp from12show ?thesis by simp qed simp
theorem shift_safe: assumes "∀ii<i. ¬sublist_at s t ii" "t!(i+j) ≠ s!j"and
[simp]: "j < length s"and
matches: "∀jj<j. t!(i+jj) = s!jj" defines
assignment: "i' ≡ i + (j - f s j + 1)" shows "∀ii<i'. ¬sublist_at s t ii" proof (standard, standard) fix ii assume"ii < i'" then consider ― ‹The position falls into one of three categories:›
(old) "ii < i" |
(current) "ii = i" |
(skipped) "ii > i" by linarith thenshow"¬sublist_at s t ii" proof cases case old ― ‹Old position, use invariant.› with‹∀ii<i. ¬sublist_at s t ii›show ?thesis by simp next case current ― ‹The mismatch occurred while testing this alignment.› with‹t!(i+j) ≠ s!j›show ?thesis using sublist_all_positions[of s t i] by auto next case skipped ― ‹The skipped positions.› thenhave"0<j" using‹ii < i'› assignment by linarith thenhave less_j[simp]: "j + i - ii < j"and le_s: "j + i - ii ≤ length s" using‹ii < i'› assms(3) skipped by linarith+ notef_le[simp] = j_le_f_le[OF assms(3)[THEN less_imp_le]] have"0 < f s j" using‹0 < j›f_eq_0_iff_j_eq_0 neq0_conv by blast thenhave"j + i - ii > f s j - 1" using‹ii < i'› assignment f_le by linarith thenhave contradiction_goal: "j + i - ii > length (intrinsic_border (take j s))" by (metis f.elims ‹0 < j› add_diff_cancel_right' not_gr_zero) show ?thesis proof assume"sublist_at s t ii" note sublist_all_positions[OF this] with le_s have a: "∀jj < j+i-ii. t!(ii+jj) = s!jj" by simp have ff1: "¬ ii < i" by (metis not_less_iff_gr_or_eq skipped) thenhave"i + (ii - i + jj) = ii + jj"for jj by (metis add.assoc add_diff_inverse_nat) thenhave"¬ jj < j + i - ii ∨ t ! (ii + jj) = s ! (ii - i + jj)"if"ii - i + jj < j"for jj using that ff1 by (metis matches) thenhave"¬ jj < j + i - ii ∨ t ! (ii + jj) = s ! (ii - i + jj)"for jj using ff1 by auto with matches have"∀jj < j+i-ii. t!(ii+jj) = s!(ii-i+jj)"by metis thenhave"∀jj < j+i-ii. s!jj = s!(ii-i+jj)" using a by auto thenhave"∀jj < j+i-ii. (take j s)!jj = (take j s)!(ii-i+jj)" using‹i<ii\›by auto with positions_strict_border[of "j+i-ii""take j s", simplified] have"strict_border (take (j+i-ii) s) (take j s)". note intrinsic_border_max[OF this] alsonote contradiction_goal alsohave"j+i-ii ≤ length s"by (fact le_s) ultimately show False by simp qed qed qed
lemma kmp_correct: "s ≠ [] ==> kmp s t ≤ kmp_SPEC s t" unfolding kmp_def kmp_SPEC_def I_outer_def I_in_na_def apply (refine_vcg
WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
) apply (vc_solve solve: asm_rl)
subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
subgoal using less_antisym by blast
subgoal for i jout j using shift_safe[of i s t j] by fastforce
subgoal for i jout j using reuse_matches[of j s t i] f_le by simp
subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans) done
subsubsection‹Storing the @{const f}-values› text‹We refine the algorithm to compute the @{const f}-values only once at the start:› definition compute_fs_SPEC :: "'a list ==> nat list nres"where "compute_fs_SPEC s ≡ SPEC (λfs. length fs = length s + 1 ∧ (∀j≤length s. fs!j = f s j))"
definition"kmp1 s t ≡ do { ASSERT (s ≠ []); let i=0; let j=0; let pos=None; fs ← compute_fs_SPEC (butlast s); ― ‹At the last char, we abort instead.› (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do { ASSERT (i + length s ≤ length t); (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do { let j=j+1; if j=length s then RETURN (j,Some i) else RETURN (j,None) }) (j,pos); if pos=None then do { ASSERT (j < length fs); let i = i + (j - fs!j + 1); let j = max 0 (fs!j - 1); ― ‹‹max› not necessary› RETURN (i,j,None) } else RETURN (i,j,Some i) }) (i,j,pos);
RETURN pos }"
lemmaf_butlast[simp]: "j < length s ==>f (butlast s) j = f s j" by (cases j) (simp_all add: take_butlast)
text‹Next, an algorithm that satisfies @{const compute_fs_SPEC}:› subsection‹Computing @{const f}› subsubsection‹Invariants›
definition"I_out_cb s ≡ λ(fs,i,j). length s + 1 = length fs ∧ (∀jj<j. fs!jj = f s jj) ∧ fs!(j-1) = i ∧ 0 < j" definition"I_in_cb s j ≡ λi. if j=1 then i=0 ― ‹first iteration› else strict_border (take (i-1) s) (take (j-1) s) ∧ f s j ≤ i + 1"
subsubsection‹Algorithm›
text‹Again, we follow Seidlcite‹GAD›, p.582. Apart from the +1-shift, we make another modification:
of directly setting @{term ‹fs!1›}, we let the first loop-iteration (if there is one) do that for us.
allows us to remove the precondition @{prop ‹s ≠ []›}, as the index bounds are respected even in that corner case.›
definition compute_fs :: "'a list ==> nat list nres"where "compute_fs s = do { let fs=replicate (length s + 1) 0; ― ‹only the first 0 is needed› let i=0; let j=1; (fs,_,_) ← WHILEIT (I_out_cb s) (λ(fs,_,j). j < length fs) (λ(fs,i,j). do { i ← WHILEIT (I_in_cb s j) (λi. i>0 ∧ s!(i-1) ≠ s!(j-1)) (λi. do { ASSERT (i-1 < length fs); let i=fs!(i-1); RETURN i }) i; let i=i+1; ASSERT (j < length fs); let fs=fs[j:=i]; let j=j+1; RETURN (fs,i,j) }) (fs,i,j); RETURN fs }"
corollary strict_border_step: "strict_border xs ys ⟷ strict_border (xs@[ys!length xs]) (ys@[ys!length xs])" unfolding strict_border_def using border_step by auto
lemma ib_butlast: "length w ≥ 2 ==> length (intrinsic_border w) ≤ length (intrinsic_border (butlast w)) + 1" proof - assume"length w ≥ 2" thenhave"w ≠ []"by auto thenhave"strict_border (intrinsic_border w) w" by (fact intrinsic_borderI') with‹2 ≤ length w›have"strict_border (butlast (intrinsic_border w)) (butlast w)" by (metis One_nat_def border_bot.bot.not_eq_extremum butlast.simps(1) len_greater_imp_nonempty length_butlast lessI less_le_trans numerals(2) strict_border_butlast zero_less_diff) thenhave"length (butlast (intrinsic_border w)) ≤ length (intrinsic_border (butlast w))" using intrinsic_border_max by blast thenshow ?thesis by simp qed
corollaryf_Suc(*rm*): "Suc i \<le> length w \<Longrightarrow> \<ff> w (Suc i) \<le> \<ff> w i + 1" apply (cases i) apply (simp_all add: take_Suc0) by (metis One_nat_def Suc_eq_plus1 Suc_to_right butlast_take diff_is_0_eq ib_butlast length_take min.absorb2 nat.simps(3) not_less_eq_eq numerals(2))
lemmaf_step_bound(*rm*): assumes"j ≤ length w" shows"f w j ≤f w (j-1) + 1" using assms[THEN j_le_f_le] f_Suc assms by (metis One_nat_def Suc_pred le_SucI not_gr_zero trans_le_add2)
lemma border_take_f: "border (take (f s i - 1) s ) (take i s)" apply (cases i, simp_all) by (metis intrinsic_borderI' border_order.order.eq_iff border_order.less_imp_le border_positions nat.simps(3) nat_le_linear positions_border take_all take_eq_Nil take_length_ib zero_less_Suc)
corollaryf_strict_borderI: "y = f s (i-1) ==> strict_border (take (i-1) s) (take (j-1) s) ==> strict_border (take (y-1) s) (take (j-1) s)" using border_order.less_le_not_le border_order.order.trans border_take_fby blast
corollary strict_border_take_f: "0 < i ==> i ≤ length s ==> strict_border (take (f s i - 1) s ) (take i s)" by (meson border_order.less_le_not_le border_take_f border_take_lengths j_le_f_le' leD)
lemmaf_is_max: "j ≤ length s ==> strict_border b (take j s) ==>f s j ≥ length b + 1" by (metis f.elims add_le_cancel_right add_less_same_cancel2 border_length_r_less intrinsic_border_max length_take min_absorb2 not_add_less2)
theorem skipping_ok: assumes j_bounds[simp]: "1 < j""j ≤ length s" and mismatch: "s!(i-1) ≠ s!(j-1)" and greater_checked: "f s j ≤ i + 1" and"strict_border (take (i-1) s) (take (j-1) s)" shows"f s j ≤f s (i-1) + 1" proof (rule ccontr) assume"¬f s j ≤f s (i-1) + 1" thenhave i_bounds: "0 < i""i ≤ length s" using greater_checked assms(5) take_Nil by fastforce+ thenhave i_less_j: "i < j" using assms(5) border_length_r_less nz_le_conv_less by auto from‹¬f s j ≤f s (i-1) + 1› greater_checked consider
(tested) "f s j = i + 1" ― ‹This contradicts @{thm mismatch}› |
(skipped) "f s (i-1) + 1 < f s j""f s j ≤ i"
― ‹This contradicts @{thm f_is_max[of "i-1" s]}› by linarith thenshow False proof cases case tested thenhave"f s j - 1 = i"by simp moreovernote border_positions[OF border_take_f[of s j, unfolded this]] ultimatelyhave"take j s ! (i-1) = s!(j-1)"using i_bounds i_less_j by simp with‹i < j›have"s!(i-1) = s!(j-1)" by (simp add: less_imp_diff_less) with mismatch show False.. next case skipped let ?border = "take (i-1) s"
― ‹This border of @{term ‹take (j-1) s›} could not be extended to a border of @{term ‹take j s›} due to the mismatch.› let ?impossible = "take (f s j - 2) s"
― ‹A strict border longer than @{term ‹intrinsic_border ?border›}, a contradiction.› have"length (take j s) = j" by simp have"f s j - 2 < i - 1" using skipped by linarith thenhave less_s: "f s j - 2 < length s""i - 1 < length s" using‹i < j› j_bounds(2) by linarith+ thenhave strict: "length ?impossible < length ?border" using‹f s j - 2 < i - 1›by auto moreover { have"prefix ?impossible (take j s)" using prefix_length_prefix take_is_prefix by (metis (no_types, lifting) ‹length (take j s) = j› j_bounds(2) diff_le_self j_le_f_le length_take less_s(1) min_simps(2) order_trans) moreoverhave"prefix ?border (take j s)" by (metis (no_types, lifting) ‹length (take j s) = j› diff_le_self i_less_j le_trans length_take less_or_eq_imp_le less_s(2) min_simps(2) prefix_length_prefix take_is_prefix) ultimatelyhave"prefix ?impossible ?border" using strict less_imp_le_nat prefix_length_prefix by blast
} moreover { have"suffix (take (f s j - 1) s) (take j s)"using border_take_f by (auto simp: border_def) note suffix_butlast[OF this] thenhave"suffix ?impossible (take (j-1) s)" by (metis One_nat_def j_bounds(2) butlast_take diff_diff_left f_le len_greater_imp_nonempty less_or_eq_imp_le less_s(2) one_add_one) thenhave"suffix ?impossible (take (j-1) s)""suffix ?border (take (j-1) s)" using assms(5) by auto from suffix_length_suffix[OF this strict[THEN less_imp_le]] have"suffix ?impossible ?border".
} ultimatelyhave"strict_border ?impossible ?border" unfolding strict_border_def[unfolded border_def] by blast notef_is_max[of "i-1" s, OF _ this] thenhave"length (take (f s j - 2) s) + 1 ≤f s (i-1)" using less_imp_le_nat less_s(2) by blast thenhave"f s j - 1 ≤f s (i-1)" by (simp add: less_s(1)) thenhave"f s j ≤f s (i-1) + 1" using le_diff_conv by blast with skipped(1) show False by linarith qed qed
lemma extend_border: assumes"j ≤ length s" assumes"s!(i-1) = s!(j-1)" assumes"strict_border (take (i-1) s) (take (j-1) s)" assumes"f s j ≤ i + 1" shows"f s j = i + 1" proof - from assms(3) have pos_in_range: "i - 1 < length s ""length (take (i-1) s) = i - 1" using border_length_r_less min_less_iff_conj by auto with strict_border_step[THEN iffD1, OF assms(3)] have"strict_border (take (i-1) s @ [s!(i-1)]) (take (j-1) s @ [s!(i-1)])" by (metis assms(3) border_length_r_less length_take min_less_iff_conj nth_take) with pos_in_range have"strict_border (take i s) (take (j-1) s @ [s!(i-1)])" by (metis Suc_eq_plus1 Suc_pred add.left_neutral border_bot.bot.not_eq_extremum border_order.less_asym neq0_conv take_0 take_Suc_conv_app_nth) thenhave"strict_border (take i s) (take (j-1) s @ [s!(j-1)])" by (simp only: ‹s!(i-1) = s!(j-1)›) thenhave"strict_border (take i s) (take j s)" by (metis One_nat_def Suc_pred assms(1,3) diff_le_self less_le_trans neq0_conv nz_le_conv_less strict_border_imp_nonempty take_Suc_conv_app_nth take_eq_Nil) withf_is_max[OF assms(1) this] have"f s j ≥ i + 1" using Suc_leI by fastforce with‹f s j ≤ i + 1›show ?thesis using le_antisym by presburger qed
lemma compute_fs_correct: "compute_fs s ≤ compute_fs_SPEC s" unfolding compute_fs_SPEC_def compute_fs_def I_out_cb_def I_in_cb_def apply (simp, refine_vcg
WHILEIT_rule[where R="measure (λ(fs,i,j). length s + 1 - j)"]
WHILEIT_rule[where R="measure id"] ― ‹@{term ‹i::nat›} decreases with every iteration.›
) apply (vc_solve, fold One_nat_def)
subgoal for b j by (rule strict_border_take_f, auto)
subgoal by (metis Suc_eq_plus1 f_step_bound less_Suc_eq_le)
subgoal by fastforce
subgoal by (metis (no_types, lifting) One_nat_def Suc_lessD Suc_pred border_length_r_less f_strict_borderI length_take less_Suc_eq less_Suc_eq_le min.absorb2)
subgoal for b j i by (metis (no_types, lifting) One_nat_def Suc_diff_1 Suc_eq_plus1 Suc_leI border_take_lengths less_Suc_eq_le less_antisym skipping_ok strict_border_def)
subgoal by (metis Suc_diff_1 border_take_lengths j_le_f_le less_Suc_eq_le strict_border_def)
subgoal for b j i jj by (metis Suc_eq_plus1 Suc_eq_plus1_left add.right_neutral extend_border f_eq_0_iff_j_eq_0 j_le_f_le le_zero_eq less_Suc_eq less_Suc_eq_le nth_list_update_eq nth_list_update_neq)
subgoal by linarith done
subsubsection‹Index shift› text‹To avoid inefficiencies, we refine @{const compute_fs} to take @{term s}
of @{term ‹butlast s›} (it still only uses @{term ‹butlast s›}).› definition compute_butlast_fs :: "'a list ==> nat list nres"where "compute_butlast_fs s = do { let fs=replicate (length s) 0; let i=0; let j=1; (fs,_,_) ← WHILEIT (I_out_cb (butlast s)) (λ(b,i,j). j < length b) (λ(fs,i,j). do { ASSERT (j < length fs); i ← WHILEIT (I_in_cb (butlast s) j) (λi. i>0 ∧ s!(i-1) ≠ s!(j-1)) (λi. do { ASSERT (i-1 < length fs); let i=fs!(i-1); RETURN i }) i; let i=i+1; ASSERT (j < length fs); let fs=fs[j:=i]; let j=j+1; RETURN (fs,i,j) }) (fs,i,j); RETURN fs }"
lemma compute_fs_inner_bounds: assumes"I_out_cb s (fs,ix,j)" assumes"j < length fs" assumes"I_in_cb s j i" shows"i-1 < length s""j-1 < length s" using assms by (auto simp: I_out_cb_def I_in_cb_def split: if_splits)
lemma compute_butlast_fs_refine[refine]: assumes"(s,s') ∈ br butlast ((≠) [])" shows"compute_butlast_fs s ≤⇓ Id (compute_fs_SPEC s')" proof - have"compute_butlast_fs s ≤⇓ Id (compute_fs s')" unfolding compute_butlast_fs_def compute_fs_def apply (refine_rcg) apply (refine_dref_type) using assms apply (vc_solve simp: in_br_conv) apply (metis Suc_pred length_greater_0_conv replicate_Suc) by (metis One_nat_def compute_fs_inner_bounds nth_butlast) alsonote compute_fs_correct finallyshow ?thesis by simp qed
subsection‹Conflation› text‹We replace @{const compute_fs_SPEC} with @{const compute_butlast_fs}› definition"kmp2 s t ≡ do { ASSERT (s ≠ []); let i=0; let j=0; let pos=None; fs ← compute_butlast_fs s; (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do { ASSERT (i + length s ≤ length t ∧ pos=None); (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do { let j=j+1; if j=length s then RETURN (j,Some i) else RETURN (j,None) }) (j,pos); if pos=None then do { ASSERT (j < length fs); let i = i + (j - fs!j + 1); let j = max 0 (fs!j - 1); ― ‹‹max› not necessary› RETURN (i,j,None) } else RETURN (i,j,Some i) }) (i,j,pos);
RETURN pos }"
text‹Using @{thm [source] compute_butlast_fs_refine} (it has attribute @{attribute refine}), the proof is trivial:› lemma kmp2_refine: "kmp2 s t ≤ kmp1 s t" apply (rule refine_IdD) unfolding kmp2_def kmp1_def apply refine_rcg apply refine_dref_type apply (vc_solve simp: in_br_conv) done
lemma kmp2_correct: "s ≠ [] ==> kmp2 s t ≤ kmp_SPEC s t" proof - assume"s ≠ []" have"kmp2 s t ≤ kmp1 s t"by (fact kmp2_refine) alsohave"... ≤ kmp s t"by (fact kmp1_refine) alsohave"... ≤ kmp_SPEC s t"by (fact kmp_correct[OF ‹s ≠ []›]) finallyshow ?thesis. qed
text‹For convenience, we also remove the precondition:› definition"kmp3 s t ≡ do { if s=[] then RETURN (Some 0) else kmp2 s t }"
lemma kmp3_correct: "kmp3 s t ≤ kmp_SPEC s t" unfolding kmp3_def by (simp add: kmp2_correct) (simp add: kmp_SPEC_def)
section‹Refinement to Imperative/HOL›
lemma eq_id_param: "((=), (=)) ∈ Id → Id → Id"by simp
lemma kmp_inner_in_bound: assumes"i + length s ≤ length t" assumes"I_in_na s t i (j,None)" shows"i + j < length t""j < length s" using assms by (auto simp: I_in_na_def)
subsection‹Overall Correctness Theorem› text‹The following theorem relates the final Imperative HOL algorithm to its specification,
using, beyond basic HOL concepts
▪ Hoare triples for Imperative/HOL, provided by the Separation Logic Framework for Imperative/HOL (Theory @{theory Separation_Logic_Imperative_HOL.Sep_Main});
▪ The assertion @{const arl_assn} to specify array-lists, which we use to represent the input strings of the algorithm;
▪ The @{const sublist_at} function that we defined in section \ref{sec:spec}. › theorem kmp_impl_correct: "< arl_assn id_assn s si * arl_assn id_assn t ti > kmp_impl si ti <λr. arl_assn id_assn s si * arl_assn id_assn t ti * ↑( case r of None ==>∄i. sublist_at s t i | Some i ==> sublist_at s t i ∧ (∀ii<i. ¬ sublist_at s t ii) )>t" by (sep_auto
simp: pure_def kmp_SPEC_def
split: option.split
heap: kmp_impl_correct'[THEN hfrefD, THEN hn_refineD, of "(s,t)""(si,ti)", simplified])
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.