Quelle KMP.thy

Sprache: Isabelle

theory KMP
  imports Refine_Imperative_HOL.IICF
    "HOL-Library.Sublist"
begin

declare len_greater_imp_nonempty[simp del] min_absorb2[simp]
no_notation Ref.update (‹_ := _› 62)

section‹Specification›text_raw‹\label{sec:spec}›

subsection‹Sublist-predicate with a position check›

subsubsection‹Definition›

text ‹One could define›
definition "sublist_at' xs ys i ≡ take (length xs) (drop i ys) = xs"

text‹However, this doesn't handle out-of-bound indexes uniformly:›
value[nbe] "sublist_at' [] [a] 5"
value[nbe] "sublist_at' [a] [a] 5"
value[nbe] "sublist_at' [] [] 5"

text‹Instead, we use a recursive definition:›
fun sublist_at :: "'a list ==> 'a list ==> nat ==> bool" where
  "sublist_at (x#xs) (y#ys) 0 ⟷ x=y ∧ sublist_at xs ys 0" |
  "sublist_at xs (y#ys) (Suc i) ⟷ sublist_at xs ys i" |
  "sublist_at [] ys 0 ⟷ True" |
  "sublist_at _ [] _ ⟷ False"

text‹In the relevant cases, both definitions agree:›
lemma "i ≤ length ys ==> sublist_at xs ys i ⟷ sublist_at' xs ys i"
  unfolding sublist_at'_def
  by (induction xs ys i rule: sublist_at.induct) auto

text‹However, the new definition has some reasonable properties:›
subsubsection‹Properties›
lemma sublist_lengths: "sublist_at xs ys i ==> i + length xs ≤ length ys"
  by (induction xs ys i rule: sublist_at.induct) auto

lemma Nil_is_sublist: "sublist_at ([] :: 'x list) ys i ⟷ i ≤ length ys"
  by (induction "[] :: 'x list" ys i rule: sublist_at.induct) auto

text‹Furthermore, we need:›
lemma sublist_step[intro]:
  "[i + length xs < length ys; sublist_at xs ys i; ys!(i + length xs) = x] ==> sublist_at (xs@[x]) ys i"
  apply (induction xs ys i rule: sublist_at.induct)
      apply auto
  using sublist_at.elims(3) by fastforce

lemma all_positions_sublist:
"[i + length xs ≤ length ys; ∀jj<length xs. ys!(i+jj) = xs!jj] ==> sublist_at xs ys i"
proof (induction xs rule: rev_induct)
  case Nil
  then show ?case by (simp add: Nil_is_sublist)
next
  case (snoc x xs)
  from ‹i + length (xs @ [x]) ≤ length ys› have "i + length xs ≤ length ys" by simp
  moreover have "∀jj<length xs. ys!(i + jj) = xs!jj"
    by (simp add: nth_append snoc.prems(2))
  ultimately have "sublist_at xs ys i"
    using snoc.IH by simp
  then show ?case
    using snoc.prems by auto
qed

lemma sublist_all_positions: "sublist_at xs ys i ==> ∀jj<length xs. ys!(i+jj) = xs!jj"
  by (induction xs ys i rule: sublist_at.induct) (auto simp: nth_Cons')

text‹It also connects well to theory @{theory "HOL-Library.Sublist"} (compare @{thm[source] sublist_def}):›
lemma sublist_at_altdef:
  "sublist_at xs ys i ⟷ (∃ps ss. ys = ps@xs@ss ∧ i = length ps)"
proof (induction xs ys i rule: sublist_at.induct)
  case (2 ss t ts i)
  show "sublist_at ss (t#ts) (Suc i) ⟷ (∃xs ys. t#ts = xs@ss@ys ∧ Suc i = length xs)"
    (is "?lhs ⟷ ?rhs")
  proof
    assume ?lhs
    then have "sublist_at ss ts i" by simp
    with "2.IH" obtain xs where "∃ys. ts = xs@ss@ys ∧ i = length xs" by auto
    then have "∃ys. t#ts = (t#xs)@ss@ys ∧ Suc i = length (t#xs)" by simp
    then show ?rhs by blast
  next
    assume ?rhs
    then obtain xs where "∃ys. t#ts = xs@ss@ys ∧ length xs = Suc i"
      by (blast dest: sym)
    then have "∃ys. ts = (tl xs)@ss@ys ∧ i = length (tl xs)"
      by (auto simp add: length_Suc_conv)
    then have "∃xs ys. ts = xs@ss@ys ∧ i = length xs" by blast
    with "2.IH" show ?lhs by simp
  qed
qed auto

corollary sublist_iff_sublist_at: "Sublist.sublist xs ys ⟷ (∃i. sublist_at xs ys i)"
  by (simp add: sublist_at_altdef Sublist.sublist_def)

subsection‹Sublist-check algorithms›

text‹
We use the Isabelle Refinement Framework (Theory @{theory Refine_Monadic.Refine_Monadic}) to
phrase the specification and the algorithm.
›
text‹@{term s} for "searchword" / "searchlist", @{term t} for "text"›
definition "kmp_SPEC s t = SPEC (λ
  None ==> ∄i. sublist_at s t i |
  Some i ==> sublist_at s t i ∧ (∀ii<i. ¬sublist_at s t ii))"

lemma is_arg_min_id: "is_arg_min id P i ⟷ P i ∧ (∀ii<i. ¬P ii)"
  unfolding is_arg_min_def by auto

lemma kmp_result: "kmp_SPEC s t =
  RETURN (if sublist s t then Some (LEAST i. sublist_at s t i) else None)"
  unfolding kmp_SPEC_def sublist_iff_sublist_at
  apply (auto intro: LeastI dest: not_less_Least split: option.splits)
  by (meson LeastI nat_neq_iff not_less_Least)

corollary weak_kmp_SPEC: "kmp_SPEC s t ≤ SPEC (λpos. pos≠None ⟷ Sublist.sublist s t)"
  by (simp add: kmp_result)

lemmas kmp_SPEC_altdefs =
  kmp_SPEC_def[folded is_arg_min_id]
  kmp_SPEC_def[folded sublist_iff_sublist_at]
  kmp_result

section‹Naive algorithm›

text‹Since KMP is a direct advancement of the naive "test-all-starting-positions" approach, we provide it here for comparison:›

subsection‹Invariants›
definition "I_out_na s t ≡ λ(i,j,pos).
  (∀ii<i. ¬sublist_at s t ii) ∧
  (case pos of None ==> j = 0
    | Some p ==> p=i ∧ sublist_at s t i)"
definition "I_in_na s t i ≡ λ(j,pos).
  case pos of None ==> j < length s ∧ (∀jj<j. t!(i+jj) = s!(jj))
    | Some p ==> sublist_at s t i"

subsection‹Algorithm›

(*Algorithm is common knowledge \<longrightarrow> remove citation here, move explanations to KMP below?*)
text‹The following definition is taken from Helmut Seidl's lecture on algorithms and data structurescite‹GAD› except that we
▪ output the identified position @{term ‹pos :: nat option›} instead of just @{const True}
▪ use @{term ‹pos :: nat option›} as break-flag to support the abort within the loops
▪ rewrite @{prop ‹i ≤ length t - length s›} in the first while-condition to @{prop ‹i + length s ≤ length t›} to avoid having to use @{typ int} for list indexes (or the additional precondition @{prop ‹length s ≤ length t›})
›

definition "naive_algorithm s t ≡ do {
  let i=0;
  let j=0;
  let pos=None;
  (_,_,pos) ← WHILEIT (I_out_na s t) (λ(i,_,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
    (_,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,_). do {
      let j=j+1;
      if j=length s then RETURN (j,Some i) else RETURN (j,None)
    }) (j,pos);
    if pos=None then do {
      let i = i + 1;
      let j = 0;
      RETURN (i,j,None)
    } else RETURN (i,j,Some i)
  }) (i,j,pos);

  RETURN pos
}"

subsection‹Correctness›

text‹The basic lemmas on @{const sublist_at} from the previous chapter together with @{theory Refine_Monadic.Refine_Monadic}'s verification condition generator / solver suffice:›

lemma "s ≠ [] ==> naive_algorithm s t ≤ kmp_SPEC s t"
  unfolding naive_algorithm_def kmp_SPEC_def I_out_na_def I_in_na_def
  apply (refine_vcg
    WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
    WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
    )
                 apply (vc_solve solve: asm_rl)
  subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
  subgoal using less_Suc_eq by blast
  subgoal by (metis less_SucE sublist_all_positions)
  subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans)
  done

text‹Note that the precondition cannot be removed without an extra branch: If @{prop ‹s = []›}, the inner while-condition accesses out-of-bound memory. This will apply to KMP, too.›

section‹Knuth--Morris--Pratt algorithm›

text‹Just like our templatescite‹KMP77›cite‹GAD›, we first verify the main routine and discuss the computation of the auxiliary values @{term ‹f s›} only in a later section.›

subsection‹Preliminaries: Borders of lists›

definition "border xs ys ⟷ prefix xs ys ∧ suffix xs ys"
definition "strict_border xs ys ⟷ border xs ys ∧ length xs < length ys"
definition "intrinsic_border ls ≡ ARG_MAX length b. strict_border b ls"

subsubsection‹Properties›

interpretation border_order: order border strict_border
  by standard (auto simp: border_def suffix_def strict_border_def)
interpretation border_bot: order_bot Nil border strict_border
  by standard (simp add: border_def)

lemma borderE[elim]:
  fixes xs ys :: "'a list"
  assumes "border xs ys"
  obtains "prefix xs ys" and "suffix xs ys"
  using assms unfolding border_def by blast

lemma strict_borderE[elim]:
  fixes xs ys :: "'a list"
  assumes "strict_border xs ys"
  obtains "border xs ys" and "length xs < length ys"
  using assms unfolding strict_border_def by blast

lemma strict_border_simps[simp]:
  "strict_border xs [] ⟷ False"
  "strict_border [] (x # xs) ⟷ True"
  by (simp_all add: strict_border_def)

lemma strict_border_prefix: "strict_border xs ys ==> strict_prefix xs ys"
  and strict_border_suffix: "strict_border xs ys ==> strict_suffix xs ys"
  and strict_border_imp_nonempty: "strict_border xs ys ==> ys ≠ []"
  and strict_border_prefix_suffix: "strict_border xs ys ⟷ strict_prefix xs ys ∧ strict_suffix xs ys"
  by (auto simp: border_order.order.strict_iff_order border_def)

lemma border_length_le: "border xs ys ==> length xs ≤ length ys"
  unfolding border_def by (simp add: prefix_length_le)

lemma border_length_r_less (*rm*): "\<forall>xs. strict_border xs ys \<longrightarrow> length xs < length ys"
  using strict_borderE by auto

lemma border_positions: "border xs ys ==> ∀i<length xs. ys!i = ys!(length ys - length xs + i)"
  unfolding border_def
  by (metis diff_add_inverse diff_add_inverse2 length_append not_add_less1 nth_append prefixE suffixE)

lemma all_positions_drop_length_take: "[i ≤ length w; i ≤ length x;
  ∀j<i. x ! j = w ! (length w + j - i)]
    ==> drop (length w - i) w = take i x"
  by (cases "i = length x") (auto intro: nth_equalityI)

lemma all_positions_suffix_take: "[i ≤ length w; i ≤ length x;
  ∀j<i. x ! j = w ! (length w + j - i)]
    ==> suffix (take i x) w"
  by (metis all_positions_drop_length_take suffix_drop)

lemma suffix_butlast: "suffix xs ys ==> suffix (butlast xs) (butlast ys)"
  unfolding suffix_def by (metis append_Nil2 butlast.simps(1) butlast_append)

lemma positions_border: "∀j<l. w!j = w!(length w - l + j) ==> border (take l w) w"
  by (cases "l < length w") (simp_all add: border_def all_positions_suffix_take take_is_prefix)

lemma positions_strict_border: "l < length w ==> ∀j<l. w!j = w!(length w - l + j) ==> strict_border (take l w) w"
  by (simp add: positions_border strict_border_def)

lemmas intrinsic_borderI = arg_max_natI[OF _ border_length_r_less, folded intrinsic_border_def]

lemmas intrinsic_borderI' = border_bot.bot.not_eq_extremum[THEN iffD1, THEN intrinsic_borderI]

lemmas intrinsic_border_max = arg_max_nat_le[OF _ border_length_r_less, folded intrinsic_border_def]

lemma nonempty_is_arg_max_ib: "ys ≠ [] ==> is_arg_max length (λxs. strict_border xs ys) (intrinsic_border ys)"
  by (simp add: intrinsic_borderI' intrinsic_border_max is_arg_max_linorder)

lemma intrinsic_border_less: "w ≠ [] ==> length (intrinsic_border w) < length w"
  using intrinsic_borderI[of w] border_length_r_less intrinsic_borderI' by blast

lemma intrinsic_border_take_less: "j > 0 ==> w ≠ [] ==> length (intrinsic_border (take j w)) < length w"
  by (metis intrinsic_border_less length_take less_not_refl2 min_less_iff_conj take_eq_Nil)

subsubsection‹Examples›

lemma border_example: "{b. border b ''aabaabaa''} = {'''', ''a'', ''aa'', ''aabaa'', ''aabaabaa''}"
  (is "{b. border b ?l} = {?take0, ?take1, ?take2, ?take5, ?l}")
proof
  show "{?take0, ?take1, ?take2, ?take5, ?l} ⊆ {b. border b ?l}"
    by simp eval
  have "¬border ''aab'' ?l" "¬border ''aaba'' ?l" "¬border ''aabaab'' ?l" "¬border ''aabaaba'' ?l"
    by eval+
  moreover have "{b. border b ?l} ⊆ set (prefixes ?l)"
    using border_def in_set_prefixes by blast
  ultimately show "{b. border b ?l} ⊆ {?take0, ?take1, ?take2, ?take5, ?l}"
    by auto
qed

corollary strict_border_example: "{b. strict_border b ''aabaabaa''} = {'''', ''a'', ''aa'', ''aabaa''}"
  (is "?l = ?r")
proof
  have "?l ⊆ {b. border b ''aabaabaa''}"
    by auto
  also have "… = {'''', ''a'', ''aa'', ''aabaa'', ''aabaabaa''}"
    by (fact border_example)
  finally show "?l ⊆ ?r" by auto
  show "?r ⊆ ?l" by simp eval
qed

corollary "intrinsic_border ''aabaabaa'' = ''aabaa''"
proof - ― ‹We later obtain a fast algorithm for that.›
  have exhaust: "strict_border b ''aabaabaa'' ⟷ b ∈ {'''', ''a'', ''aa'', ''aabaa''}" for b
    using strict_border_example by auto
  then have
    "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''''"
    "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''a''"
    "¬is_arg_max length (λb. strict_border b ''aabaabaa'') ''aa''"
    "is_arg_max length (λb. strict_border b ''aabaabaa'') ''aabaa''"
    unfolding is_arg_max_linorder by auto
  moreover have "strict_border (intrinsic_border ''aabaabaa'') ''aabaabaa''"
    using intrinsic_borderI' by blast
  note this[unfolded exhaust]
  ultimately show ?thesis
    by simp (metis list.discI nonempty_is_arg_max_ib)
qed

subsection‹Main routine›

text‹The following is Seidl's "border"-tablecite‹GAD› (values shifted by 1 so we don't need @{typ int}),
equivalently, "f" from Knuth's, Morris' and Pratt's papercite‹KMP77› (with indexes starting at 0).›
fun f :: "'a list ==> nat ==> nat" where
  "f s 0 = 0" ― ‹This increments the compare position while @{prop ‹j=(0::nat)›}› |
  "f s j = length (intrinsic_border (take j s)) + 1"
text‹Note that we use their "next" only implicitly.›

subsubsection‹Invariants›
definition "I_outer s t ≡ λ(i,j,pos).
  (∀ii<i. ¬sublist_at s t ii) ∧
  (case pos of None ==> (∀jj<j. t!(i+jj) = s!(jj)) ∧ j < length s
    | Some p ==> p=i ∧ sublist_at s t i)"
text‹For the inner loop, we can reuse @{const I_in_na}.›

subsubsection‹Algorithm›
text‹First, we use the non-evaluable function @{const f} directly:›
definition "kmp s t ≡ do {
  ASSERT (s ≠ []);
  let i=0;
  let j=0;
  let pos=None;
  (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
    ASSERT (i + length s ≤ length t);
    (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do {
      let j=j+1;
      if j=length s then RETURN (j,Some i) else RETURN (j,None)
    }) (j,pos);
    if pos=None then do {
      ASSERT (j < length s);
      let i = i + (j - f s j + 1);
      let j = max 0 (f s j - 1); ― ‹‹max› not necessary›
      RETURN (i,j,None)
    } else RETURN (i,j,Some i)
  }) (i,j,pos);

  RETURN pos
}"

subsubsection‹Correctness›
lemma f_eq_0_iff_j_eq_0[simp]: "f s j = 0 ⟷ j = 0"
  by (cases j) simp_all

lemma j_le_f_le: "j ≤ length s ==> f s j ≤ j"
  apply (cases j)
  apply simp_all
  by (metis Suc_leI intrinsic_border_less length_take list.size(3) min.absorb2 nat.simps(3) not_less)

lemma j_le_f_le': "0 < j ==> j ≤ length s ==> f s j - 1 < j"
  by (metis diff_less j_le_f_le le_eq_less_or_eq less_imp_diff_less less_one)

lemma f_le: "s ≠ [] ==> f s j - 1 < length s"
  by (cases j) (simp_all add: intrinsic_border_take_less)

(*
  Only needed for run-time analysis
lemma "p576 et seq":
  assumes
    "j \<le> length s" and
    assignments:
    "i' = i + (j + 1 - \<ff> s j)"
    "j' = max 0 (\<ff> s j - 1)"
  shows
    sum_no_decrease: "i' + j' \<ge> i + j" and
    i_increase: "i' > i"
  using assignments by (simp_all add: j_le_\<ff>_le[OF assms(1), THEN le_imp_less_Suc])
*)

lemma reuse_matches:
  assumes j_le: "j ≤ length s"
  and old_matches: "∀jj<j. t ! (i + jj) = s ! jj"
  shows "∀jj<f s j - 1. t ! (i + (j - f s j + 1) + jj) = s ! jj"
    (is "∀jj<?j'. t ! (?i' + jj) = s ! jj")
proof (cases "j>0")
  assume "j>0"
  have f_le: "f s j ≤ j"
    by (simp add: j_le j_le_f_le)
  with old_matches have 1: "∀jj<?j'. t ! (?i' + jj) = s ! (j - f s j + 1 + jj)"
    by (metis ab_semigroup_add_class.add.commute add.assoc diff_diff_cancel less_diff_conv)
  have [simp]: "length (take j s) = j" "length (intrinsic_border (take j s)) = ?j'"
    by (simp add: j_le) (metis ‹0 < j› diff_add_inverse2 f.elims nat_neq_iff)
  then have "∀jj<?j'. take j s ! jj = take j s ! (j - (f s j - 1) + jj)"
    by (metis intrinsic_borderI' ‹0 < j› border_positions length_greater_0_conv strict_border_def)
  then have "∀jj<?j'. take j s ! jj = take j s ! (j - f s j + 1 + jj)"
    by (simp add: f_le)
  then have 2: "∀jj<?j'. s ! (j - f s j + 1 + jj) = s ! jj"
    using f_le by simp
  from 1 2 show ?thesis by simp
qed simp

theorem shift_safe:
  assumes
    "∀ii<i. ¬sublist_at s t ii"
    "t!(i+j) ≠ s!j" and
    [simp]: "j < length s" and
    matches: "∀jj<j. t!(i+jj) = s!jj"
  defines
    assignment: "i' ≡ i + (j - f s j + 1)"
  shows
    "∀ii<i'. ¬sublist_at s t ii"
proof (standard, standard)
  fix ii
  assume "ii < i'"
  then consider ― ‹The position falls into one of three categories:›
    (old) "ii < i" |
    (current) "ii = i" |
    (skipped) "ii > i"
    by linarith
  then show "¬sublist_at s t ii"
  proof cases
    case old ― ‹Old position, use invariant.›
    with ‹∀ii<i. ¬sublist_at s t ii› show ?thesis by simp
  next
    case current ― ‹The mismatch occurred while testing this alignment.›
    with ‹t!(i+j) ≠ s!j› show ?thesis
      using sublist_all_positions[of s t i] by auto
  next
    case skipped ― ‹The skipped positions.›
    then have "0<j"
      using ‹ii < i'› assignment by linarith
    then have less_j[simp]: "j + i - ii < j" and le_s: "j + i - ii ≤ length s"
      using ‹ii < i'› assms(3) skipped by linarith+
    note f_le[simp] = j_le_f_le[OF assms(3)[THEN less_imp_le]]
    have "0 < f s j"
      using ‹0 < j› f_eq_0_iff_j_eq_0 neq0_conv by blast
    then have "j + i - ii > f s j - 1"
      using ‹ii < i'› assignment f_le by linarith
    then have contradiction_goal: "j + i - ii > length (intrinsic_border (take j s))"
      by (metis f.elims ‹0 < j› add_diff_cancel_right' not_gr_zero)
    show ?thesis
    proof
      assume "sublist_at s t ii"
      note sublist_all_positions[OF this]
      with le_s have a: "∀jj < j+i-ii. t!(ii+jj) = s!jj"
        by simp
      have ff1: "¬ ii < i"
        by (metis not_less_iff_gr_or_eq skipped)
      then have "i + (ii - i + jj) = ii + jj" for jj
        by (metis add.assoc add_diff_inverse_nat)
      then have "¬ jj < j + i - ii ∨ t ! (ii + jj) = s ! (ii - i + jj)" if "ii - i + jj < j" for jj
        using that ff1 by (metis matches)
      then have "¬ jj < j + i - ii ∨ t ! (ii + jj) = s ! (ii - i + jj)" for jj
        using ff1 by auto
      with matches have "∀jj < j+i-ii. t!(ii+jj) = s!(ii-i+jj)" by metis
      then have "∀jj < j+i-ii. s!jj = s!(ii-i+jj)"
        using a by auto
      then have "∀jj < j+i-ii. (take j s)!jj = (take j s)!(ii-i+jj)"
        using ‹i<ii\› by auto
      with positions_strict_border[of "j+i-ii" "take j s", simplified]
      have "strict_border (take (j+i-ii) s) (take j s)".
      note intrinsic_border_max[OF this]
      also note contradiction_goal
      also have "j+i-ii ≤ length s" by (fact le_s)
      ultimately
      show False by simp
    qed
  qed
qed

lemma kmp_correct: "s ≠ []
  ==> kmp s t ≤ kmp_SPEC s t"
  unfolding kmp_def kmp_SPEC_def I_outer_def I_in_na_def
  apply (refine_vcg
    WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
    WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
    )
                   apply (vc_solve solve: asm_rl)
  subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
  subgoal using less_antisym by blast
  subgoal for i jout j using shift_safe[of i s t j] by fastforce
  subgoal for i jout j using reuse_matches[of j s t i] f_le by simp
  subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans)
  done

subsubsection‹Storing the @{const f}-values›
text‹We refine the algorithm to compute the @{const f}-values only once at the start:›
definition compute_fs_SPEC :: "'a list ==> nat list nres" where
  "compute_fs_SPEC s ≡ SPEC (λfs. length fs = length s + 1 ∧ (∀j≤length s. fs!j = f s j))"

definition "kmp1 s t ≡ do {
  ASSERT (s ≠ []);
  let i=0;
  let j=0;
  let pos=None;
  fs ← compute_fs_SPEC (butlast s); ― ‹At the last char, we abort instead.›
  (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
    ASSERT (i + length s ≤ length t);
    (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do {
      let j=j+1;
      if j=length s then RETURN (j,Some i) else RETURN (j,None)
    }) (j,pos);
    if pos=None then do {
      ASSERT (j < length fs);
      let i = i + (j - fs!j + 1);
      let j = max 0 (fs!j - 1); ― ‹‹max› not necessary›
      RETURN (i,j,None)
    } else RETURN (i,j,Some i)
  }) (i,j,pos);

  RETURN pos
}"

lemma f_butlast[simp]: "j < length s ==> f (butlast s) j = f s j"
  by (cases j) (simp_all add: take_butlast)

lemma kmp1_refine: "kmp1 s t ≤ kmp s t"
  apply (rule refine_IdD)
  unfolding kmp1_def kmp_def Let_def compute_fs_SPEC_def nres_monad_laws
  apply (intro ASSERT_refine_right ASSERT_refine_left)
   apply simp
  apply (rule Refine_Basic.intro_spec_refine)
  apply refine_rcg
                apply refine_dref_type
                apply vc_solve
  done

text‹Next, an algorithm that satisfies @{const compute_fs_SPEC}:›
subsection‹Computing @{const f}›
subsubsection‹Invariants›

definition "I_out_cb s ≡ λ(fs,i,j).
  length s + 1 = length fs ∧
  (∀jj<j. fs!jj = f s jj) ∧
  fs!(j-1) = i ∧
  0 < j"
definition "I_in_cb s j ≡ λi.
  if j=1 then i=0 ― ‹first iteration›
  else
    strict_border (take (i-1) s) (take (j-1) s) ∧
    f s j ≤ i + 1"

subsubsection‹Algorithm›

text‹Again, we follow Seidlcite‹GAD›, p.582. Apart from the +1-shift, we make another modification:
of directly setting @{term ‹fs!1›}, we let the first loop-iteration (if there is one) do that for us.
allows us to remove the precondition @{prop ‹s ≠ []›}, as the index bounds are respected even in that corner case.›

definition compute_fs :: "'a list ==> nat list nres" where
  "compute_fs s = do {
  let fs=replicate (length s + 1) 0; ― ‹only the first 0 is needed›
  let i=0;
  let j=1;
  (fs,_,_) ← WHILEIT (I_out_cb s) (λ(fs,_,j). j < length fs) (λ(fs,i,j). do {
    i ← WHILEIT (I_in_cb s j) (λi. i>0 ∧ s!(i-1) ≠ s!(j-1)) (λi. do {
      ASSERT (i-1 < length fs);
      let i=fs!(i-1);
      RETURN i
    }) i;
    let i=i+1;
    ASSERT (j < length fs);
    let fs=fs[j:=i];
    let j=j+1;
    RETURN (fs,i,j)
  }) (fs,i,j);

  RETURN fs
}"

subsubsection‹Correctness›
lemma take_length_ib[simp]:
  assumes "0 < j" "j ≤ length s"
    shows "take (length (intrinsic_border (take j s))) s = intrinsic_border (take j s)"
proof -
  from assms have "prefix (intrinsic_border (take j s)) (take j s)"
    by (metis intrinsic_borderI' border_def list.size(3) neq0_conv not_less strict_border_def take_eq_Nil)
  also have "prefix (take j s) s"
    by (simp add: ‹j ≤ length s› take_is_prefix)
  finally show ?thesis
    by (metis append_eq_conv_conj prefixE)
qed

lemma ib_singleton[simp]: "intrinsic_border [z] = []"
  by (metis intrinsic_border_less length_Cons length_greater_0_conv less_Suc0 list.size(3))

lemma border_butlast: "border xs ys ==> border (butlast xs) (butlast ys)"
  apply (auto simp: border_def)
   apply (metis butlast_append prefixE prefix_order.eq_refl prefix_prefix prefixeq_butlast)
  apply (metis Sublist.suffix_def append.right_neutral butlast.simps(1) butlast_append)
  done

corollary strict_border_butlast: "xs ≠ [] ==> strict_border xs ys ==> strict_border (butlast xs) (butlast ys)"
  unfolding strict_border_def by (simp add: border_butlast less_diff_conv)

lemma border_take_lengths: "i ≤ length s ==> border (take i s) (take j s) ==> i ≤ j"
  using border_length_le by fastforce

lemma border_step: "border xs ys ⟷ border (xs@[ys!length xs]) (ys@[ys!length xs])"
  apply (auto simp: border_def suffix_def)
  using append_one_prefix prefixE apply fastforce
  using append_prefixD apply blast
  done

corollary strict_border_step: "strict_border xs ys ⟷ strict_border (xs@[ys!length xs]) (ys@[ys!length xs])"
  unfolding strict_border_def using border_step by auto

lemma ib_butlast: "length w ≥ 2 ==> length (intrinsic_border w) ≤ length (intrinsic_border (butlast w)) + 1"
proof -
  assume "length w ≥ 2"
  then have "w ≠ []" by auto
  then have "strict_border (intrinsic_border w) w"
    by (fact intrinsic_borderI')
  with ‹2 ≤ length w› have "strict_border (butlast (intrinsic_border w)) (butlast w)"
    by (metis One_nat_def border_bot.bot.not_eq_extremum butlast.simps(1) len_greater_imp_nonempty length_butlast lessI less_le_trans numerals(2) strict_border_butlast zero_less_diff)
  then have "length (butlast (intrinsic_border w)) ≤ length (intrinsic_border (butlast w))"
    using intrinsic_border_max by blast
  then show ?thesis
    by simp
qed

corollary f_Suc(*rm*): "Suc i \<le> length w \<Longrightarrow> \<ff> w (Suc i) \<le> \<ff> w i + 1"
  apply (cases i)
   apply (simp_all add: take_Suc0)
  by (metis One_nat_def Suc_eq_plus1 Suc_to_right butlast_take diff_is_0_eq ib_butlast length_take min.absorb2 nat.simps(3) not_less_eq_eq numerals(2))

lemma f_step_bound(*rm*):
  assumes "j ≤ length w"
  shows "f w j ≤ f w (j-1) + 1"
  using assms[THEN j_le_f_le] f_Suc assms
  by (metis One_nat_def Suc_pred le_SucI not_gr_zero trans_le_add2)

lemma border_take_f: "border (take (f s i - 1) s ) (take i s)"
  apply (cases i, simp_all)
  by (metis intrinsic_borderI' border_order.order.eq_iff border_order.less_imp_le border_positions nat.simps(3) nat_le_linear positions_border take_all take_eq_Nil take_length_ib zero_less_Suc)

corollary f_strict_borderI: "y = f s (i-1) ==> strict_border (take (i-1) s) (take (j-1) s) ==> strict_border (take (y-1) s) (take (j-1) s)"
  using border_order.less_le_not_le border_order.order.trans border_take_f by blast

corollary strict_border_take_f: "0 < i ==> i ≤ length s ==> strict_border (take (f s i - 1) s ) (take i s)"
  by (meson border_order.less_le_not_le border_take_f border_take_lengths j_le_f_le' leD)

lemma f_is_max: "j ≤ length s ==> strict_border b (take j s) ==> f s j ≥ length b + 1"
  by (metis f.elims add_le_cancel_right add_less_same_cancel2 border_length_r_less intrinsic_border_max length_take min_absorb2 not_add_less2)

theorem skipping_ok:
  assumes j_bounds[simp]: "1 < j" "j ≤ length s"
    and mismatch: "s!(i-1) ≠ s!(j-1)"
    and greater_checked: "f s j ≤ i + 1"
    and "strict_border (take (i-1) s) (take (j-1) s)"
  shows "f s j ≤ f s (i-1) + 1"
proof (rule ccontr)
  assume "¬f s j ≤ f s (i-1) + 1"
  then have i_bounds: "0 < i" "i ≤ length s"
    using greater_checked assms(5) take_Nil by fastforce+
  then have i_less_j: "i < j"
    using assms(5) border_length_r_less nz_le_conv_less by auto
  from ‹¬f s j ≤ f s (i-1) + 1› greater_checked consider
    (tested) "f s j = i + 1" ― ‹This contradicts @{thm mismatch}› |
    (skipped) "f s (i-1) + 1 < f s j" "f s j ≤ i"
      ― ‹This contradicts @{thm f_is_max[of "i-1" s]}›
    by linarith
  then show False
  proof cases
    case tested
    then have "f s j - 1 = i" by simp
    moreover note border_positions[OF border_take_f[of s j, unfolded this]]
    ultimately have "take j s ! (i-1) = s!(j-1)" using i_bounds i_less_j by simp
    with ‹i < j› have "s!(i-1) = s!(j-1)"
      by (simp add: less_imp_diff_less)
    with mismatch show False..
  next
    case skipped
    let ?border = "take (i-1) s"
      ― ‹This border of @{term ‹take (j-1) s›} could not be extended to a border of @{term ‹take j s›} due to the mismatch.›
    let ?impossible = "take (f s j - 2) s"
      ― ‹A strict border longer than @{term ‹intrinsic_border ?border›}, a contradiction.›
    have "length (take j s) = j"
      by simp
    have "f s j - 2 < i - 1"
      using skipped by linarith
    then have less_s: "f s j - 2 < length s" "i - 1 < length s"
      using ‹i < j› j_bounds(2) by linarith+
    then have strict: "length ?impossible < length ?border"
      using ‹f s j - 2 < i - 1› by auto
    moreover {
      have "prefix ?impossible (take j s)"
        using prefix_length_prefix take_is_prefix
        by (metis (no_types, lifting) ‹length (take j s) = j› j_bounds(2) diff_le_self j_le_f_le length_take less_s(1) min_simps(2) order_trans)
      moreover have "prefix ?border (take j s)"
        by (metis (no_types, lifting) ‹length (take j s) = j› diff_le_self i_less_j le_trans length_take less_or_eq_imp_le less_s(2) min_simps(2) prefix_length_prefix take_is_prefix)
      ultimately have "prefix ?impossible ?border"
        using strict less_imp_le_nat prefix_length_prefix by blast
    } moreover {
      have "suffix (take (f s j - 1) s) (take j s)" using border_take_f
        by (auto simp: border_def)
      note suffix_butlast[OF this]
      then have "suffix ?impossible (take (j-1) s)"
        by (metis One_nat_def j_bounds(2) butlast_take diff_diff_left f_le len_greater_imp_nonempty less_or_eq_imp_le less_s(2) one_add_one)
      then have "suffix ?impossible (take (j-1) s)" "suffix ?border (take (j-1) s)"
        using assms(5) by auto
      from suffix_length_suffix[OF this strict[THEN less_imp_le]]
        have "suffix ?impossible ?border".
    }
    ultimately have "strict_border ?impossible ?border"
      unfolding strict_border_def[unfolded border_def] by blast
    note f_is_max[of "i-1" s, OF _ this]
    then have "length (take (f s j - 2) s) + 1 ≤ f s (i-1)"
      using less_imp_le_nat less_s(2) by blast
    then have "f s j - 1 ≤ f s (i-1)"
      by (simp add: less_s(1))
    then have "f s j ≤ f s (i-1) + 1"
      using le_diff_conv by blast
    with skipped(1) show False
      by linarith
  qed
qed

lemma extend_border:
  assumes "j ≤ length s"
  assumes "s!(i-1) = s!(j-1)"
  assumes "strict_border (take (i-1) s) (take (j-1) s)"
  assumes "f s j ≤ i + 1"
  shows "f s j = i + 1"
proof -
  from assms(3) have pos_in_range: "i - 1 < length s " "length (take (i-1) s) = i - 1"
    using border_length_r_less min_less_iff_conj by auto
  with strict_border_step[THEN iffD1, OF assms(3)] have "strict_border (take (i-1) s @ [s!(i-1)]) (take (j-1) s @ [s!(i-1)])"
    by (metis assms(3) border_length_r_less length_take min_less_iff_conj nth_take)
  with pos_in_range have "strict_border (take i s) (take (j-1) s @ [s!(i-1)])"
    by (metis Suc_eq_plus1 Suc_pred add.left_neutral border_bot.bot.not_eq_extremum border_order.less_asym neq0_conv take_0 take_Suc_conv_app_nth)
  then have "strict_border (take i s) (take (j-1) s @ [s!(j-1)])"
    by (simp only: ‹s!(i-1) = s!(j-1)›)
  then have "strict_border (take i s) (take j s)"
    by (metis One_nat_def Suc_pred assms(1,3) diff_le_self less_le_trans neq0_conv nz_le_conv_less strict_border_imp_nonempty take_Suc_conv_app_nth take_eq_Nil)
  with f_is_max[OF assms(1) this] have "f s j ≥ i + 1"
    using Suc_leI by fastforce
  with ‹f s j ≤ i + 1› show ?thesis
    using le_antisym by presburger
qed

lemma compute_fs_correct: "compute_fs s ≤ compute_fs_SPEC s"
  unfolding compute_fs_SPEC_def compute_fs_def I_out_cb_def I_in_cb_def
  apply (simp, refine_vcg
    WHILEIT_rule[where R="measure (λ(fs,i,j). length s + 1 - j)"]
    WHILEIT_rule[where R="measure id"] ― ‹@{term ‹i::nat›} decreases with every iteration.›
    )
                      apply (vc_solve, fold One_nat_def)
  subgoal for b j by (rule strict_border_take_f, auto)
  subgoal by (metis Suc_eq_plus1 f_step_bound less_Suc_eq_le)
  subgoal by fastforce
  subgoal
    by (metis (no_types, lifting) One_nat_def Suc_lessD Suc_pred border_length_r_less f_strict_borderI length_take less_Suc_eq less_Suc_eq_le min.absorb2)
  subgoal for b j i
    by (metis (no_types, lifting) One_nat_def Suc_diff_1 Suc_eq_plus1 Suc_leI border_take_lengths less_Suc_eq_le less_antisym skipping_ok strict_border_def)
  subgoal by (metis Suc_diff_1 border_take_lengths j_le_f_le less_Suc_eq_le strict_border_def)
  subgoal for b j i jj
    by (metis Suc_eq_plus1 Suc_eq_plus1_left add.right_neutral extend_border f_eq_0_iff_j_eq_0 j_le_f_le le_zero_eq less_Suc_eq less_Suc_eq_le nth_list_update_eq nth_list_update_neq)
  subgoal by linarith
  done

subsubsection‹Index shift›
text‹To avoid inefficiencies, we refine @{const compute_fs} to take @{term s}
of @{term ‹butlast s›} (it still only uses @{term ‹butlast s›}).›
definition compute_butlast_fs :: "'a list ==> nat list nres" where
  "compute_butlast_fs s = do {
  let fs=replicate (length s) 0;
  let i=0;
  let j=1;
  (fs,_,_) ← WHILEIT (I_out_cb (butlast s)) (λ(b,i,j). j < length b) (λ(fs,i,j). do {
    ASSERT (j < length fs);
    i ← WHILEIT (I_in_cb (butlast s) j) (λi. i>0 ∧ s!(i-1) ≠ s!(j-1)) (λi. do {
      ASSERT (i-1 < length fs);
      let i=fs!(i-1);
      RETURN i
    }) i;
    let i=i+1;
    ASSERT (j < length fs);
    let fs=fs[j:=i];
    let j=j+1;
    RETURN (fs,i,j)
  }) (fs,i,j);

  RETURN fs
}"

lemma compute_fs_inner_bounds:
  assumes "I_out_cb s (fs,ix,j)"
  assumes "j < length fs"
  assumes "I_in_cb s j i"
  shows "i-1 < length s" "j-1 < length s"
  using assms
    by (auto simp: I_out_cb_def I_in_cb_def split: if_splits)

lemma compute_butlast_fs_refine[refine]:
  assumes "(s,s') ∈ br butlast ((≠) [])"
  shows "compute_butlast_fs s ≤ ⇓ Id (compute_fs_SPEC s')"
proof -
  have "compute_butlast_fs s ≤ ⇓ Id (compute_fs s')"
    unfolding compute_butlast_fs_def compute_fs_def
    apply (refine_rcg)
              apply (refine_dref_type)
    using assms apply (vc_solve simp: in_br_conv)
     apply (metis Suc_pred length_greater_0_conv replicate_Suc)
    by (metis One_nat_def compute_fs_inner_bounds nth_butlast)
  also note compute_fs_correct
  finally show ?thesis by simp
qed

subsection‹Conflation›
text‹We replace @{const compute_fs_SPEC} with @{const compute_butlast_fs}›
definition "kmp2 s t ≡ do {
  ASSERT (s ≠ []);
  let i=0;
  let j=0;
  let pos=None;
  fs ← compute_butlast_fs s;
  (_,_,pos) ← WHILEIT (I_outer s t) (λ(i,j,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
    ASSERT (i + length s ≤ length t ∧ pos=None);
    (j,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,pos). do {
      let j=j+1;
      if j=length s then RETURN (j,Some i) else RETURN (j,None)
    }) (j,pos);
    if pos=None then do {
      ASSERT (j < length fs);
      let i = i + (j - fs!j + 1);
      let j = max 0 (fs!j - 1); ― ‹‹max› not necessary›
      RETURN (i,j,None)
    } else RETURN (i,j,Some i)
  }) (i,j,pos);

  RETURN pos
}"

text‹Using @{thm [source] compute_butlast_fs_refine} (it has attribute @{attribute refine}), the proof is trivial:›
lemma kmp2_refine: "kmp2 s t ≤ kmp1 s t"
  apply (rule refine_IdD)
  unfolding kmp2_def kmp1_def
  apply refine_rcg
                  apply refine_dref_type
                  apply (vc_solve simp: in_br_conv)
  done

lemma kmp2_correct: "s ≠ []
  ==> kmp2 s t ≤ kmp_SPEC s t"
proof -
  assume "s ≠ []"
  have "kmp2 s t ≤ kmp1 s t" by (fact kmp2_refine)
  also have "... ≤ kmp s t" by (fact kmp1_refine)
  also have "... ≤ kmp_SPEC s t" by (fact kmp_correct[OF ‹s ≠ []›])
  finally show ?thesis.
qed

text‹For convenience, we also remove the precondition:›
definition "kmp3 s t ≡ do {
  if s=[] then RETURN (Some 0) else kmp2 s t
}"

lemma kmp3_correct: "kmp3 s t ≤ kmp_SPEC s t"
  unfolding kmp3_def by (simp add: kmp2_correct) (simp add: kmp_SPEC_def)

section ‹Refinement to Imperative/HOL›

lemma eq_id_param: "((=), (=)) ∈ Id → Id → Id" by simp

lemmas in_bounds_aux = compute_fs_inner_bounds[of "butlast s" for s, simplified]

sepref_definition compute_butlast_fs_impl is compute_butlast_fs :: "(arl_assn id_assn)^k →_a array_assn nat_assn"
  unfolding compute_butlast_fs_def
  supply in_bounds_aux[dest]
  supply eq_id_param[where 'a='a, sepref_import_param]
  apply (rewrite array_fold_custom_replicate)
  by sepref


declare compute_butlast_fs_impl.refine[sepref_fr_rules]

sepref_register compute_fs

lemma kmp_inner_in_bound:
  assumes "i + length s ≤ length t"
  assumes "I_in_na s t i (j,None)"
  shows "i + j < length t" "j < length s"
  using assms
  by (auto simp: I_in_na_def)

sepref_definition kmp_impl is "uncurry kmp3" :: "(arl_assn id_assn)^k *_a (arl_assn id_assn)^k →_a option_assn nat_assn"
  unfolding kmp3_def kmp2_def
  apply (simp only: max_0L) ― ‹Avoid the unneeded @{const max}›
  apply (rewrite in "WHILEIT (I_in_na _ _ _) 🍋" conj_commute)
  apply (rewrite in "WHILEIT (I_in_na _ _ _) 🍋" short_circuit_conv)
  supply kmp_inner_in_bound[dest]
  supply option.splits[split]
  supply eq_id_param[where 'a='a, sepref_import_param]
  by sepref

export_code kmp_impl in SML_imp module_name KMP

lemma kmp3_correct':
  "(uncurry kmp3, uncurry kmp_SPEC) ∈ Id ×_r Id →_f ⟨Id⟩nres_rel"
  apply (intro frefI nres_relI; clarsimp)
  apply (fact kmp3_correct)
  done

lemmas kmp_impl_correct' = kmp_impl.refine[FCOMP kmp3_correct']

subsection ‹Overall Correctness Theorem›
text ‹The following theorem relates the final Imperative HOL algorithm to its specification,
using, beyond basic HOL concepts
▪ Hoare triples for Imperative/HOL, provided by the Separation Logic Framework for Imperative/HOL (Theory @{theory Separation_Logic_Imperative_HOL.Sep_Main});
▪ The assertion @{const arl_assn} to specify array-lists, which we use to represent the input strings of the algorithm;
▪ The @{const sublist_at} function that we defined in section \ref{sec:spec}.
›
theorem kmp_impl_correct:
  "< arl_assn id_assn s si * arl_assn id_assn t ti >
       kmp_impl si ti
   <λr. arl_assn id_assn s si * arl_assn id_assn t ti * ↑(
      case r of None ==> ∄i. sublist_at s t i
              | Some i ==> sublist_at s t i ∧ (∀ii<i. ¬ sublist_at s t ii)
    )>_t"
  by (sep_auto
    simp: pure_def kmp_SPEC_def
    split: option.split
    heap:  kmp_impl_correct'[THEN hfrefD, THEN hn_refineD, of "(s,t)" "(si,ti)", simplified])

definition "kmp_string_impl ≡ kmp_impl :: (char array × nat) ==> _"

section ‹Tests of Generated ML-Code›

ML_val ‹
fun str2arl s = (Array.fromList (@{code String.explode} s), @{code nat_of_integer} (String.size s))
fun kmp s t = map_option @{code integer_of_nat} (@{code kmp_string_impl} (str2arl s) (str2arl t) ())

val test1 = kmp "anas" "bananas"
val test2 = kmp "" "bananas"
val test3 = kmp "hide_fact" (File.read @{file ‹~~/src/HOL/Main.thy›})
val test4 = kmp "sorry" (File.read @{file ‹~~/src/HOL/HOL.thy›})
›

end

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.23 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.