Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/Isabelle/Archive-of-Formal-Proofs/thys/Earley_Parser/   (Sammlung formaler Beweise Version 2026-5©)  Datei vom 29.4.2026 mit Größe 21 kB image not shown  

Quelle  Earley.thy

  Sprache: Isabelle
 

theory Earley
  imports
    Derivations
begin

section Slices

fun slice :: "'a list ==> nat ==> nat ==> 'a list" where
  "slice [] _ _ = []"
"slice (x#xs) _ 0 = []"
"slice (x#xs) 0 (Suc b) = x # slice xs 0 b"
"slice (x#xs) (Suc a) (Suc b) = slice xs a b"

syntax
  slice :: "'a list ==> nat ==> nat ==> 'a list" (_'/_ [1000,0,01000)

notation (latex output)
  "slice" (_'/_ [1000,0,01000)

lemma slice_drop_take:
  "slice xs a b = drop a (take b xs)"
  by (induction xs a b rule: slice.induct) auto

lemma slice_append_aux:
  "Suc b c ==> slice (x#xs) (Suc b) c = slice xs b (c-1)"
  using Suc_le_D by fastforce

lemma slice_concat:
  "a b ==> b c ==> slice xs a b @ slice xs b c = slice xs a c"
proof (induction xs a b arbitrary: c rule: slice.induct)
  case (3 b x xs)
  then show ?case
      using Suc_le_D by(fastforce simp: slice_append_aux)
qed (auto simp: slice_append_aux)

lemma slice_concat_Ex:
  "a c ==> slice xs a c = ys @ zs ==> b. ys = slice xs a b zs = slice xs b c a b b c"
proof (induction xs a c arbitrary: ys zs rule: slice.induct)
  case (3 x xs b)
  show ?case
  proof (cases ys)
    case Nil
    then obtain zs' where "x # slice xs 0 b = x # zs'" "x # zs' = zs"
      using "3.prems"(2by auto
    thus ?thesis
      using Nil by force
  next
    case (Cons y ys')
    then obtain ys' where "x # slice xs 0 b = x # ys' @ zs" "x # ys' = ys"
      using "3.prems"(2by auto
    thus ?thesis
      using "3.IH"[of ys' zs] by force
  qed
next
  case (4 a b x xs)
  thus ?case
    by (auto, metis slice.simps(4) Suc_le_mono)
qed auto

lemma slice_nth:
  "a < length xs ==> slice xs a (a+1) = [xs!a]"
  unfolding slice_drop_take
  by (metis Cons_nth_drop_Suc One_nat_def diff_add_inverse drop_take take_Suc_Cons take_eq_Nil)

lemma slice_append_nth:
  "a b ==> b < length xs ==> slice xs a b @ [xs!b] = slice xs a (b+1)"
  by (metis le_add1 slice_concat slice_nth)

lemma slice_empty:
  "b a ==> slice xs a b = []"
  by (simp add: slice_drop_take)

lemma slice_id[simp]:
  "slice xs 0 (length xs) = xs"
  by (simp add: slice_drop_take)

lemma slice_singleton:
  "b length xs ==> [x] = slice xs a b ==> b = a + 1"
  by (induction xs a b rule: slice.induct) (auto simp: slice_drop_take)


section Earley recognizer

subsection Earley items

definition lhs_rule :: "'a rule ==> 'a" where
  "lhs_rule fst"

definition rhs_rule :: "'a rule ==> 'a list" where
  "rhs_rule snd"

datatype 'a item = 
  Item (rule_item: "'a rule") (dot_item : nat) (start_item : nat) (end_item : nat)

definition lhs_item :: "'a item ==> 'a" where
  "lhs_item x lhs_rule (rule_item x)"

definition rhs_item :: "'a item ==> 'a list" where
  "rhs_item x rhs_rule (rule_item x)"

definition α_item :: "'a item ==> 'a list" where
  "α_item x take (dot_item x) (rhs_item x)"

definition β_item :: "'a item ==> 'a list" where 
  "β_item x drop (dot_item x) (rhs_item x)"

definition is_complete :: "'a item ==> bool" where
  "is_complete x dot_item x length (rhs_item x)"

definition next_symbol :: "'a item ==> 'a option" where
  "next_symbol x if is_complete x then None else Some (rhs_item x ! dot_item x)"

lemmas item_defs = lhs_item_def rhs_item_def α_item_def β_item_def lhs_rule_def rhs_rule_def

definition is_finished :: "'a cfg ==> 'a list ==> 'a item ==> bool" where
  "is_finished G ψ x
    lhs_item x = S G
    start_item x = 0
    end_item x = length ψ
    is_complete x"

definition recognizing :: "'a item set ==> 'a cfg ==> 'a list ==> bool" where
  "recognizing I G ψ x I. is_finished G ψ x"

inductive_set Earley :: "'a cfg ==> 'a list ==> 'a item set"
  for G :: "'a cfg" and ψ :: "'a list" where
    Init: "r set (R G) ==> fst r = S G ==>
      Item r 0 0 0 Earley G ψ"
  | Scan: "x = Item r b i j ==> x Earley G ψ ==>
    ψ!j = a ==> j < length ψ ==> next_symbol x = Some a ==>
      Item r (b + 1) i (j + 1) Earley G ψ"
  | Predict: "x = Item r b i j ==> x Earley G ψ ==>
    r' set (R G) ==> next_symbol x = Some (lhs_rule r') ==>
      Item r' 0 j j Earley G ψ"
  | Complete: "x = Item rx bx i j ==> x Earley G ψ ==> y = Item ry by j k ==> y Earley G ψ ==>
      is_complete y ==> next_symbol x = Some (lhs_item y) ==>
        Item rx (bx + 1) i k Earley G ψ"


subsection Well-formedness

definition wf_item :: "'a cfg ==> 'a list => 'a item ==> bool" where 
  "wf_item G ψ x
    rule_item x set (R G)
    dot_item x length (rhs_item x)
    start_item x end_item x
    end_item x length ψ"

lemma wf_Init:
  assumes "r set (R G)" "fst r = S G"
  shows "wf_item G ψ (Item r 0 0 0)"
  using assms unfolding wf_item_def by simp

lemma wf_Scan:
  assumes "x = Item r b i j" "wf_item G ψ x" "ψ!j = a" "j < length ψ" "next_symbol x = Some a"
  shows "wf_item G ψ (Item r (b + 1) i (j+1))"
  using assms unfolding wf_item_def by (auto simp: item_defs is_complete_def next_symbol_def split: if_splits)

lemma wf_Predict:
  assumes "x = Item r b i j" "wf_item G ψ x" "r' set (R G)" "next_symbol x = Some (lhs_rule r')"
  shows "wf_item G ψ (Item r' 0 j j)"
  using assms unfolding wf_item_def by simp

lemma wf_Complete:
  assumes "x = Item rx bx i j" "wf_item G ψ x" "y = Item ry by j k" "wf_item G ψ y"
  assumes "is_complete y" "next_symbol x = Some (lhs_item y)"
  shows "wf_item G ψ (Item rx (bx + 1) i k)"
  using assms unfolding wf_item_def is_complete_def next_symbol_def rhs_item_def
  by (auto split: if_splits)

lemma wf_Earley:
  assumes "x Earley G ψ"
  shows "wf_item G ψ x"
  using assms wf_Init wf_Scan wf_Predict wf_Complete
  by (induction rule: Earley.induct) fast+


subsection Soundness

definition sound_item :: "'a cfg ==> 'a list ==> 'a item ==> bool" where
  "sound_item G ψ x G [lhs_item x] ==>* (slice ψ (start_item x) (end_item x) @ β_item x)"

lemma sound_Init:
  assumes "r set (R G)" "fst r = S G"
  shows "sound_item G ψ (Item r 0 0 0)"
proof -
  let ?x = "Item r 0 0 0"
  have "(lhs_item ?x, β_item ?x) set (R G)"
    using assms(1by (simp add: item_defs)
  hence "derives G [lhs_item ?x] (β_item ?x)"
    using derives_if_valid_rule by metis
  thus "sound_item G ψ ?x"
    unfolding sound_item_def by (simp add: slice_empty)
qed

lemma sound_Scan:
  assumes "x = Item r b i j" "wf_item G ψ x" "sound_item G ψ x"
  assumes "ψ!j = a" "j < length ψ" "next_symbol x = Some a"
  shows "sound_item G ψ (Item r (b+1) i (j+1))"
proof -
  define x' where [simp]: "x' = Item r (b+1) i (j+1)"
  obtain β_item' where *: "β_item x = a # β_item'" "β_item x' = β_item'"
    using assms(1,6apply (auto simp: item_defs next_symbol_def is_complete_def split: if_splits)
    by (metis Cons_nth_drop_Suc leI)
  have "slice ψ i j @ β_item x = slice ψ i (j+1) @ β_item'"
    using * assms(1,2,4,5by (auto simp: slice_append_nth wf_item_def)
  moreover have "derives G [lhs_item x] (slice ψ i j @ β_item x)"
    using assms(1,3) sound_item_def by force
  ultimately show ?thesis
    using assms(1) * by (auto simp: item_defs sound_item_def)
qed

lemma sound_Predict:
  assumes "x = Item r b i j" "wf_item G ψ x" "sound_item G ψ x"
  assumes "r' set (R G)" "next_symbol x = Some (lhs_rule r')"
  shows "sound_item G ψ (Item r' 0 j j)"
  using assms by (auto simp: sound_item_def derives_if_valid_rule slice_empty item_defs)

lemma sound_Complete:
  assumes "x = Item rx bx i j" "wf_item G ψ x" "sound_item G ψ x"
  assumes "y = Item ry by j k" "wf_item G ψ y" "sound_item G ψ y"
  assumes "is_complete y" "next_symbol x = Some (lhs_item y)"
  shows "sound_item G ψ (Item rx (bx + 1) i k)"
proof -
  have "derives G [lhs_item y] (slice ψ j k)"
    using assms(4,6,7by (auto simp: sound_item_def is_complete_def item_defs)
  then obtain E where E: "Derivation G [lhs_item y] E (slice ψ j k)"
    using derives_implies_Derivation by blast
  have "derives G [lhs_item x] (slice ψ i j @ β_item x)"
    using assms(1,3,4by (auto simp: sound_item_def)
  moreover have 0"β_item x = (lhs_item y) # tl (β_item x)"
    using assms(8apply (auto simp: next_symbol_def is_complete_def item_defs split: if_splits)
    by (metis drop_eq_Nil hd_drop_conv_nth leI list.collapse)
  ultimately obtain D where D: 
    "Derivation G [lhs_item x] D (slice ψ i j @ [lhs_item y] @ (tl (β_item x)))"
    using derives_implies_Derivation by (metis append_Cons append_Nil)
  obtain F where F:
    "Derivation G [lhs_item x] F (slice ψ i j @ slice ψ j k @ tl (β_item x))"
    using Derivation_append_rewrite D E
    by metis
  moreover have "i j"
    using assms(1,2) wf_item_def by force
  moreover have "j k"
    using assms(4,5) wf_item_def by force
  ultimately have "derives G [lhs_item x] (slice ψ i k @ tl (β_item x))"
    by (metis Derivation_implies_derives append.assoc slice_concat)
  thus "sound_item G ψ (Item rx (bx + 1) i k)"
    using assms(1,4by (auto simp: sound_item_def item_defs drop_Suc tl_drop)
qed

lemma sound_Earley:
  assumes "x Earley G ψ" "wf_item G ψ x"
  shows "sound_item G ψ x"
  using assms
proof (induction rule: Earley.induct)
  case (Init r)
  thus ?case
    using sound_Init by blast
next
  case (Scan x r b i j a)
  thus ?case
    using wf_Earley sound_Scan by fast
next
  case (Predict x r b i j r')
  thus ?case
    using wf_Earley sound_Predict by blast
next
  case (Complete x rx bx i j y ry by k)
  thus ?case
    using wf_Earley sound_Complete by metis
qed

theorem soundness_Earley:
  assumes "recognizing (Earley G ψ) G ψ"
  shows "G [S G] ==>* ψ"
proof -
  obtain x where x: "x Earley G ψ" "is_finished G ψ x"
    using assms recognizing_def by blast
  hence "sound_item G ψ x"
    using wf_Earley sound_Earley by blast
  thus ?thesis
    unfolding sound_item_def using x by (auto simp: is_finished_def is_complete_def item_defs)
qed


subsection Completeness

definition partially_completed :: "nat ==> 'a cfg ==> 'a list ==> 'a item set ==> ('a derivation ==> bool) ==> bool" where
  "partially_completed k G ψ I P r b i' i j x a D.
    i j j k k length ψ
    x = Item r b i' i x I next_symbol x = Some a
    Derivation G [a] D (slice ψ i j) P D
    Item r (b+1) i' j I"

lemma partially_completed_upto:
  assumes "j k" "k length ψ"
  assumes "x = Item (N,α) d i j" "x I" "x I. wf_item G ψ x"
  assumes "Derivation G (β_item x) D (slice ψ j k)"
  assumes "partially_completed k G ψ I (λD'. length D' length D)"
  shows "Item (N,α) (length α) i k I"
  using assms
proof (induction "β_item x" arbitrary: d i j k N α x D)
  case Nil
  have "α_item x = α"
    using Nil(1,4unfolding α_item_def β_item_def rhs_item_def rhs_rule_def by simp
  hence "x = Item (N,α) (length α) i j"
    using Nil.hyps Nil.prems(3-5unfolding wf_item_def item_defs by auto
  have "Derivation G [] D (slice ψ j k)"
    using Nil.hyps Nil.prems(6by auto
  hence "slice ψ j k = []"
    using Derivation_from_empty by blast
  hence "j = k"
    unfolding slice_drop_take using Nil.prems(1,2by simp
  thus ?case
    using x = Item (N, α) (length α) i j Nil.prems(4by blast
next
  case (Cons b bs)
  obtain j' E F where *: 
    "Derivation G [b] E (slice ψ j j')"
    "Derivation G bs F (slice ψ j' k)"
    "j j'" "j' k" "length E length D" "length F length D"
    using Derivation_concat_split[of G "[b]" bs D "slice ψ j k"] slice_concat_Ex
    using Cons.hyps(2) Cons.prems(1,6)
    by (smt (verit, ccfv_threshold) Cons_eq_appendI append_self_conv2)
  have "next_symbol x = Some b"
    using Cons.hyps(2unfolding item_defs(4) next_symbol_def is_complete_def by (auto, metis nth_via_drop)
  hence "Item (N, α) (d+1) i j' I"
    using Cons.prems(7unfolding partially_completed_def
    using Cons.prems(2,3,4) *(1,3-5by blast
  moreover have "partially_completed k G ψ I (λD'. length D' length F)"
    using Cons.prems(7) *(6unfolding partially_completed_def by fastforce
  moreover have "bs = β_item (Item (N,α) (d+1) i j')"
    using Cons.hyps(2) Cons.prems(3unfolding item_defs(4) rhs_item_def 
    by (auto, metis List.list.sel(3) drop_Suc drop_tl)
  ultimately show ?case
    using Cons.hyps(1) *(2,4) Cons.prems(2,3,5) wf_item_def by blast
qed

lemma partially_completed_Earley:
  "partially_completed k G ψ (Earley G ψ) (λ_. True)"
unfolding partially_completed_def
proof (intro allI impI)
  fix r b i' i j x a D
  assume
    "i j j k k length ψ
     x = Item r b i' i x Earley G ψ
     next_symbol x = Some a
     Derivation G [a] D (slice ψ i j) True"
  thus "Item r (b + 1) i' j Earley G ψ"
  proof (induction "length D" arbitrary: r b i' i j x a D rule: nat_less_induct)
    case 1
    show ?case
    proof cases
      assume "D = []"
      hence "[a] = slice ψ i j"
        using "1.prems" by force
      moreover have "j length ψ"
        using le_trans "1.prems" by blast
      ultimately have "j = i+1"
        using slice_singleton by metis
      hence "i < length ψ"
        using j length ψ by simp
      hence "ψ!i = a"
        using slice_nth [a] = slice ψ i j j = i + 1 by fastforce
      hence "Item r (b + 1) i' j Earley G ψ"
        using Earley.Scan "1.prems" i < length ψ j = i + 1 by metis
      thus ?thesis
        by (simp add: j = i + 1)
    next
      assume "¬ D = []"
      then obtain d D' where "D = d # D'"
        by (meson List.list.exhaust)
      then obtain α where *: "Derives1 G [a] (fst d) (snd d) α" "Derivation G α D' (slice ψ i j)"
        using "1.prems" by auto
      hence rule: "(a, α) set (R G)" "fst d = 0" "snd d = (a ,α)"
        using *(1unfolding Derives1_def by (simp add: Cons_eq_append_conv)+
        define y where y_def: "y = Item (a ,α) 0 i i"
        have "length D' < length D"
          using D = d # D' by fastforce
        hence "partially_completed k G ψ (Earley G ψ) (λE. length E length D')"
          unfolding partially_completed_def using "1.hyps" order_le_less_trans by (smt (verit, best))
        hence "partially_completed j G ψ (Earley G ψ) (λE. length E length D')"
          unfolding partially_completed_def using "1.prems" by force
        moreover have "Derivation G (β_item y) D' (slice ψ i j)"
          using *(2by (auto simp: item_defs y_def)
        moreover have "y Earley G ψ"
          using y_def "1.prems" rule by (auto simp: item_defs Earley.Predict)
        moreover have "j length ψ"
          using "1.prems" by simp
        ultimately have "Item (a,α) (length α) i j Earley G ψ"
          using partially_completed_upto "1.prems" wf_Earley y_def by metis
        moreover have x: "x = Item r b i' i" "x Earley G ψ"
          using "1.prems" by blast+
        moreover have "next_symbol x = Some a"
          using "1.prems" by linarith
        ultimately show ?thesis
          using Earley.Complete[OF x] by (auto simp: is_complete_def item_defs)
      qed
    qed
  qed

theorem completeness_Earley:
  assumes "G [S G] ==>* ψ" "is_word G ψ"
  shows "recognizing (Earley G ψ) G ψ"
proof -
  obtain α D where *: "(S G ,α) set (R G)" "Derivation G α D ψ"
    using Derivation_S1 assms derives_implies_Derivation by metis
  define x where x_def: "x = Item (S G, α) 0 0 0"
  have "partially_completed (length ψ) G ψ (Earley G ψ) (λ_. True)"
    using assms(2) partially_completed_Earley by blast
  hence 0"partially_completed (length ψ) G ψ (Earley G ψ) (λD'. length D' length D)"
    unfolding partially_completed_def by blast
  have 1"x Earley G ψ"
    using x_def Earley.Init *(1by fastforce
  have 2"Derivation G (β_item x) D (slice ψ 0 (length ψ))"
    using *(2) x_def by (simp add: item_defs)
  have "Item (S G,α) (length α) 0 (length ψ) Earley G ψ"
    using partially_completed_upto[OF _ _ _ _ _ 2 0] wf_Earley 1 x_def by auto
  then show ?thesis
    unfolding recognizing_def is_finished_def by (auto simp: is_complete_def item_defs, force)
qed


subsection Correctness

theorem correctness_Earley:
  assumes "is_word G ψ"
  shows "recognizing (Earley G ψ) G ψ G [S G] ==>* ψ"
  using assms soundness_Earley completeness_Earley by blast


subsection Finiteness

lemma finiteness_empty:
  "set (R G) = {} ==> finite { x | x. wf_item G ψ x }"
  unfolding wf_item_def by simp

fun item_intro :: "'a rule × nat × nat × nat ==> 'a item" where
  "item_intro (rule, dot, origin, ends) = Item rule dot origin ends" 

lemma finiteness_nonempty:
  assumes "set (R G) {}"
  shows "finite { x. wf_item G ψ x }"
proof -
  define M where "M = Max { length (rhs_rule r) | r. r set (R G) }"
  define Top where "Top = (set (R G) × {0..M} × {0..length ψ} × {0..length ψ})"
  hence "finite Top"
    using finite_cartesian_product finite by blast
  have "inj_on item_intro Top"
    unfolding Top_def inj_on_def by simp
  hence "finite (item_intro ` Top)"
    using finite_image_iff finite Top by auto
  have "{ x | x. wf_item G ψ x } item_intro ` Top"
  proof standard
    fix x
    assume "x { x | x. wf_item G ψ x }"
    then obtain rule dot origin endp where *: "x = Item rule dot origin endp"
      "rule set (R G)" "dot length (rhs_item x)" "origin length ψ" "endp length ψ"
      unfolding wf_item_def using item.exhaust_sel le_trans by blast
    hence "length (rhs_rule rule) { length (rhs_rule r) | r. r set (R G) }"
      using *(1,2) rhs_item_def by blast
    moreover have "finite { length (rhs_rule r) | r. r set (R G) }"
      using finite finite_image_set[of "λx. x set (R G)"by fastforce
    ultimately have "M length (rhs_rule rule)"
      unfolding M_def by simp
    hence "dot M"
      using *(1,3) rhs_item_def by (metis item.sel(1) le_trans)
    hence "(rule, dot, origin, endp) Top"
      using *(2,4,5unfolding Top_def by simp
    thus "x item_intro ` Top"
      using *(1by force
  qed
  thus ?thesis
    using finite (item_intro ` Top) rev_finite_subset by auto
qed

lemma finiteness_UNIV_wf_item:
  "finite { x. wf_item G ψ x }"
  using finiteness_empty finiteness_nonempty by fastforce

theorem finiteness_Earley:
  "finite (Earley G ψ)"
  using finiteness_UNIV_wf_item wf_Earley rev_finite_subset by (metis mem_Collect_eq subsetI)

end

Messung V0.5 in Prozent
C=97 H=99 G=97

¤ Dauer der Verarbeitung: 0.14 Sekunden  (vorverarbeitet am  2026-06-10) ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.