Theory Coinductive_Language

theory Coinductive_Language
imports BNF_Corec
(*<*)
section {* A Codatatype of Formal Languages *}

theory Coinductive_Language
imports "../../BNF_Corec"
begin

hide_const (open) Inter
(*>*)

section {* Introduction *}

text {*
We define formal languages as a codataype of infinite trees branching over the
alphabet @{typ 'a}. Each node in such a tree indicates whether the path to this
node constitutes a word inside or outside of the language.

*}

codatatype 'a language = Lang (𝔬: bool) (𝔡: "'a ⇒ 'a language")

text {* 
This codatatype is isormorphic to the set of lists representation of languages,
but caters for definitions by corecursion and proofs by coinduction.

Regular operations on languages are then defined by corecursion.
The standard definitions of concatenation and
iteration from the coalgebraic literature are not primitively corecursive---they
require guardedness up-to union/concatenation. As an exercise in coinduction we also prove the
axioms of Kleene algebra for the defined regular operations.

Furthermore, a language for context-free grammars given by productions in Greibach
normal form and an initial nonterminal is constructed by corecursion,
yielding an executable decision procedure for the word problem without further ado.
*}
(*<*)
(* custom coinduction theorem (getting rid of rel_fun) *)
declare language.coinduct_strong[unfolded rel_fun_def, simplified, case_names Lang, coinduct pred]
(*>*)

section {* Regular Languages *}

corec Zero :: "'a language" where
  "Zero = Lang False (λ_. Zero)"

lemma Zero_sel[simp]:
  "𝔬 Zero = False"
  "𝔡 Zero = (λ_. Zero)"
  by (subst (1) Zero.code; simp)+

corec One :: "'a language" where
  "One = Lang True (λ_. Zero)"

lemma One_sel[simp]:
  "𝔬 One = True"
  "𝔡 One = (λ_. Zero)"
  by (subst (1) One.code; simp)+

corec Atom :: "'a ⇒ 'a language" where
  "Atom a = Lang False (λb. if a = b then One else Zero)"

lemma Atom_sel[simp]:
  "𝔬 (Atom a) = False"
  "𝔡 (Atom a) = (λb. if a = b then One else Zero)"
  by (subst (1) Atom.code; simp)+

corec (friend) Plus :: "'a language ⇒ 'a language ⇒ 'a language" where
  "Plus r s = Lang (𝔬 r ∨ 𝔬 s) (λa. Plus (𝔡 r a) (𝔡 s a))"

lemma Plus_sel[simp]:
  "𝔬 (Plus r s) = (𝔬 r ∨ 𝔬 s)"
  "𝔡 (Plus r s) = (λa. Plus (𝔡 r a) (𝔡 s a))"
  by (subst (1) Plus.code; simp)+

theorem Plus_ZeroL[simp]: "Plus Zero r = r"
  by (coinduction arbitrary: r) simp

theorem Plus_ZeroR[simp]: "Plus r Zero = r"
  by (coinduction arbitrary: r) simp

theorem Plus_assoc: "Plus (Plus r s) t = Plus r (Plus s t)"
  by (coinduction arbitrary: r s t) auto

theorem Plus_comm: "Plus r s = Plus s r"
  by (coinduction arbitrary: r s) auto

lemma Plus_rotate: "Plus r (Plus s t) = Plus s (Plus r t)"
  using Plus_assoc Plus_comm by metis

theorem Plus_idem: "Plus r r = r"
  by (coinduction arbitrary: r) auto

lemma Plus_idem_assoc: "Plus r (Plus r s) = Plus r s"
  by (metis Plus_assoc Plus_idem)

lemmas Plus_ACI[simp] = Plus_rotate Plus_comm Plus_assoc Plus_idem_assoc Plus_idem

lemma Plus_OneL[simp]: "𝔬 r ⟹ Plus One r = r"
  by (coinduction arbitrary: r) auto

lemma Plus_OneR[simp]: "𝔬 r ⟹ Plus r One = r"
  by (coinduction arbitrary: r) auto

text {*
  Concatenation is not primitively corecursive---the corecursive call of its derivative is
  guarded by @{term Plus}.
*}

corec (friend) Times :: "'a language ⇒ 'a language ⇒ 'a language" where
  "Times r s = Lang (𝔬 r ∧ 𝔬 s) (λa. if 𝔬 r then Plus (Times (𝔡 r a) s) (𝔡 s a) else Times (𝔡 r a) s)"

lemma Times_sel[simp]:
  "𝔬 (Times r s) = (𝔬 r ∧ 𝔬 s)"
  "𝔡 (Times r s) = (λa. if 𝔬 r then Plus (Times (𝔡 r a) s) (𝔡 s a) else Times (𝔡 r a) s)"
  by (subst (1) Times.code; simp)+

theorem Times_ZeroL[simp]: "Times Zero r = Zero"
  by coinduction simp

theorem Times_ZeroR[simp]: "Times r Zero = Zero"
  by (coinduction arbitrary: r) auto

theorem Times_OneL[simp]: "Times One r = r"
  by (coinduction arbitrary: r rule: language.coinduct_strong) (simp add: rel_fun_def)

theorem Times_OneR[simp]: "Times r One = r"
  by (coinduction arbitrary: r) simp

theorem Times_PlusL[simp]: "Times (Plus r s) t = Plus (Times r t) (Times s t)"
  by (coinduction arbitrary: r s rule: language.coinduct_upto)
    (fastforce intro: language.cong_Plus language.cong_base language.cong_refl)

theorem Times_PlusR[simp]: "Times r (Plus s t) = Plus (Times r s) (Times r t)"
  by (coinduction arbitrary: r s rule: language.coinduct_upto)
    (fastforce intro: language.cong_Plus language.cong_base language.cong_refl)

theorem Times_assoc[simp]: "Times (Times r s) t = Times r (Times s t)"
  by (coinduction arbitrary: r s t rule: language.coinduct_upto)
    (fastforce intro: language.cong_Plus language.cong_base language.cong_refl)

text {*
  Similarly to @{term Times}, iteration is not primitively corecursive (guardedness by
  @{term Times} is required).
*}

corec (friend) Star :: "'a language ⇒ 'a language" where
  "Star r = Lang True (λa. Times (𝔡 r a) (Star r))"

lemma Star_sel[simp]:
  "𝔬 (Star r) = True"
  "𝔡 (Star r) = (λa. Times (𝔡 r a) (Star r))"
  by (subst (1) Star.code; simp)+

lemma Star_Zero[simp]: "Star Zero = One"
  by coinduction auto

lemma Star_One[simp]: "Star One = One"
  by coinduction auto

lemma Star_unfoldL: "Star r = Plus One (Times r (Star r))"
  by coinduction auto

corec (friend) Inter :: "'a language ⇒ 'a language ⇒ 'a language" where
  "Inter r s = Lang (𝔬 r ∧ 𝔬 s) (λa. Inter (𝔡 r a) (𝔡 s a))"

lemma Inter_sel[simp]:
  "𝔬 (Inter r s) = (𝔬 r ∧ 𝔬 s)"
  "𝔡 (Inter r s) = (λa. Inter (𝔡 r a) (𝔡 s a))"
  by (subst (1) Inter.code; simp)+

corec (friend) Not :: "'a language ⇒ 'a language" where
  "Not r = Lang (¬ 𝔬 r) (λa. Not (𝔡 r a))"

lemma Not_sel[simp]:
  "𝔬 (Not r) = (¬ 𝔬 r)"
  "𝔡 (Not r) = (λa. Not (𝔡 r a))"
  by (subst (1) Not.code; simp)+


corec Full :: "'a language" (*") where
  "Full = Lang True (λ_. Full)"

lemma Full_sel[simp]:
  "𝔬 Full = True"
  "𝔡 Full = (λ_. Full)"
  by (subst (1) Full.code; simp)+

text {*
  Shuffle product is not primitively corecursive---the corecursive call of its derivative is
  guarded by @{term Plus}.
*}

corec (friend) Shuffle :: "'a language ⇒ 'a language ⇒ 'a language" where
  "Shuffle r s = Lang (𝔬 r ∧ 𝔬 s) (λa. Plus (Shuffle (𝔡 r a) s) (Shuffle r (𝔡 s a)))"

lemmas language_coinduct_upto = language.coinduct_upto[unfolded rel_fun_def, simplified, case_names Lang, consumes 1]

lemma Shuffle_sel[simp]:
  "𝔬 (Shuffle r s) = (𝔬 r ∧ 𝔬 s)"
  "𝔡 (Shuffle r s) = (λa. Plus (Shuffle (𝔡 r a) s) (Shuffle r (𝔡 s a)))"
  by (subst (1) Shuffle.code; simp)+

theorem Shuffle_ZeroL[simp]: "Shuffle Zero r = Zero"
  by (coinduction arbitrary: r rule: language_coinduct_upto)
    (auto 0 4 intro: language.cong_Plus language.cong_trans language.cong_base language.cong_refl)

theorem Shuffle_ZeroR[simp]: "Shuffle r Zero = Zero"
  by (coinduction arbitrary: r rule: language_coinduct_upto)
    (auto 0 4 intro: language.cong_Plus language.cong_trans language.cong_base language.cong_refl)


theorem Shuffle_OneL[simp]: "Shuffle One r = r"
  by (coinduction arbitrary: r) simp

theorem Shuffle_OneR[simp]: "Shuffle r One = r"
  by (coinduction arbitrary: r) simp

theorem Shuffle_PlusL[simp]: "Shuffle (Plus r s) t = Plus (Shuffle r t) (Shuffle s t)"
  by (coinduction arbitrary: r s t rule: language.coinduct_upto)
    (auto 0 4 intro: language.cong_refl language.cong_base language.cong_trans language.cong_Plus
       intro!: exI[where 'a = "'a language"])

theorem Shuffle_PlusR[simp]: "Shuffle r (Plus s t) = Plus (Shuffle r s) (Shuffle r t)"
  by (coinduction arbitrary: r s t rule: language.coinduct_upto)
    (auto 0 4 intro: language.cong_refl language.cong_base language.cong_trans language.cong_Plus
       intro!: exI[where 'a = "'a language"])

theorem Shuffle_assoc[simp]: "Shuffle (Shuffle r s) t = Shuffle r (Shuffle s t)"
  by (coinduction arbitrary: r s t rule: language_coinduct_upto)
     (fastforce intro: language.cong_trans language.cong_base language.cong_refl language.cong_Plus)



lemma Star_unfoldR: "Star r = Plus One (Times (Star r) r)"
proof (coinduction arbitrary: r rule: language_coinduct_upto)
  case Lang
  { fix a have "Plus (Times (𝔡 r a) (Times (Star r) r)) (𝔡 r a) = 
      Times (𝔡 r a) (Plus One (Times (Star r) r))" by simp
  }
  then show ?case by (auto intro: language.cong_intros simp del: Times_PlusR)
qed

lemma Star_Star[simp]: "Star (Star r) = Star r"
  by (subst Star_unfoldL, coinduction arbitrary: r rule: language_coinduct_upto)
    (auto intro: language.cong_intros)

lemma Times_Star[simp]: "Times (Star r) (Star r) = Star r"
proof (coinduction arbitrary: r rule: language_coinduct_upto)
  case Lang
  have *: "⋀r s. Plus (Times r s) r = Times r (Plus s One)" by simp
  show ?case by (auto intro: language.cong_intros simp del: Times_PlusR Plus_ACI simp: Times_PlusR[symmetric] *)
qed

instantiation language :: (type) "{semiring_1, order}"
begin

lemma Zero_One[simp]: "Zero ≠ One"
  by (metis One_sel(1) Zero_sel(1))

definition "zero_language = Zero"
definition "one_language = One"
definition "plus_language = Plus"
definition "times_language = Times"

definition "less_eq_language r s = (Plus r s = s)"
definition "less_language r s = (Plus r s = s ∧ r ≠ s)"

lemmas language_defs = zero_language_def one_language_def plus_language_def times_language_def
  less_eq_language_def less_language_def

instance proof intro_classes
  fix x y z :: "'a language" assume "x ≤ y" "y ≤ z"
  then show "x ≤ z" unfolding language_defs by (metis Plus_assoc)
next
  fix x y z :: "'a language"
  show "x + y + z = x + (y + z)" unfolding language_defs by (rule Plus_assoc)
qed (auto simp: language_defs)

end

text {*
  We prove the missing axioms of Kleene Algebras about @{term Star}, as well as monotonicity
  properties and three standard interesting rules: bisimulation, sliding, and denesting.
*}

theorem le_StarL: "Plus One (Times r (Star r)) ≤ Star r"
  by (rule order_eq_refl[OF Star_unfoldL[symmetric]])

theorem le_StarR: "Plus One (Times (Star r) r) ≤ Star r"
  by (rule order_eq_refl[OF Star_unfoldR[symmetric]])

theorem ardenL: "Plus r (Times s x) ≤ x ⟹ Times (Star s) r ≤ x"
unfolding language_defs
proof (coinduction arbitrary: r s x rule: language_coinduct_upto)
  case Lang
  then have "𝔬 r ⟹ 𝔬 x" by (metis Plus_sel(1))
  then show ?case
    by (subst (3 4) Lang[symmetric])
      (auto intro: language.cong_base simp: Times_PlusR[symmetric] simp del: Times_PlusR
        intro!: exI[where 'a = "'a language"] Lang[simplified Plus_ACI]
          language.cong_Plus language.cong_Times language.cong_refl[OF refl])
qed

theorem ardenR: "Plus r (Times x s) ≤ x ⟹ Times r (Star s) ≤ x"
unfolding language_defs
proof (coinduction arbitrary: r s x rule: language_coinduct_upto)
  case Lang
  let ?R = "(λL K. ∃r s. L = Plus (Times r (Star s)) K ∧ Plus r (Plus (Times K s) K) = K)"
  have "⋀a. 𝔬 x ⟹ Plus (𝔡 r a) (Plus (𝔡 s a) (Plus (𝔡 x a) (Times (𝔡 x a) s))) = 𝔡 x a"
    by (subst (1 2) Lang[symmetric]) auto
  moreover
  have "⋀a. 𝔬 x ⟹ Plus (𝔡 s a) (𝔡 x a) = 𝔡 x a"
    by (subst (1 2) Lang[symmetric]) auto
  then have "⋀a. ?R (Plus (Times (𝔡 r a) (Star s)) (𝔡 x a)) (𝔡 x a)"
    by (subst Lang[symmetric]) (auto simp del: Plus_comm)
  moreover
  from Lang have "𝔬 r ⟹ 𝔬 x" by (metis Plus_sel(1))
  ultimately show ?case
    by (auto 0 4 simp: Times_PlusL[symmetric] simp del: Times_PlusL
      intro: language.cong_base intro: exI[where 'a = "'a language"])
qed

lemma ge_One[simp]: "One ≤ r ⟷ 𝔬 r"
  unfolding less_eq_language_def by (metis One_sel(1) Plus_sel(1) Plus_OneL)

lemma Plus_mono[intro]: "⟦r1 ≤ s1; r2 ≤ s2⟧ ⟹ Plus r1 r2 ≤ Plus s1 s2"
  unfolding less_eq_language_def by (metis Plus_assoc Plus_comm)

lemma Plus_upper: "⟦r1 ≤ s; r2 ≤ s⟧ ⟹ Plus r1 r2 ≤ s"
  by (metis Plus_mono Plus_idem)

lemma le_PlusL: "r ≤ Plus r s"
  by (metis Plus_idem_assoc less_eq_language_def)

lemma le_PlusR: "s ≤ Plus r s"
  by (metis Plus_comm Plus_idem_assoc less_eq_language_def)

lemma Times_mono[intro]: "⟦r1 ≤ s1; r2 ≤ s2⟧ ⟹ Times r1 r2 ≤ Times s1 s2"
proof (unfold less_eq_language_def)
  assume s1[symmetric]: "Plus r1 s1 = s1" and s2[symmetric]: "Plus r2 s2 = s2"
  have "Plus (Times r1 r2) (Times s1 s2) =
    Plus (Times r1 r2) (Plus (Times r1 r2) (Plus (Times s1 r2) (Plus (Times r1 s2) (Times s1 s2))))"
    by (subst s1, subst s2) auto
  also have "… = Plus (Times r1 r2) (Plus (Times s1 r2) (Plus (Times r1 s2) (Times s1 s2)))"
    by (metis Plus_idem Plus_assoc)
  also have "… = Times s1 s2" by (subst s1, subst s2) auto
  finally show "Plus (Times r1 r2) (Times s1 s2) = Times s1 s2" .
qed

lemma le_TimesL: "𝔬 s ⟹ r ≤ Times r s"
  by (metis Plus_OneL Times_OneR Times_mono le_PlusL order_refl)

lemma le_TimesR: "𝔬 r ⟹ s ≤ Times r s"
  by (metis Plus_OneR Times_OneL Times_mono le_PlusR order_refl)

lemma le_Star: "s ≤ Star s"
  by (subst Star_unfoldL, subst Star_unfoldL) (auto intro: order_trans[OF le_PlusL le_PlusR])

lemma Star_mono:
  assumes rs: "r ≤ s"
  shows "Star r ≤ Star s"
proof -
  have "Star r = Plus One (Times (Star r) r)" by (rule Star_unfoldR)
  also have "… ≤ Plus One (Times (Star r) s)" by (blast intro: rs)
  also have "Times (Star r) s ≤ Star s"
  proof (rule ardenL[OF Plus_upper[OF le_Star]])
    have "Times r (Star s) ≤ Times s (Star s)" by (blast intro: rs)
    also have "Times s (Star s) ≤ Plus One (Times s (Star s))" by (rule le_PlusR)
    finally show "Times r (Star s) ≤ Star s" by (subst (2) Star_unfoldL)
  qed
  finally show ?thesis by auto
qed

lemma Inter_mono: "⟦r1 ≤ s1; r2 ≤ s2⟧ ⟹ Inter r1 r2 ≤ Inter s1 s2"
unfolding less_eq_language_def proof (coinduction arbitrary: r1 r2 s1 s2)
  case Lang
  then have "𝔬 (Plus r1 s1) = 𝔬 s1" "𝔬 (Plus r2 s2) = 𝔬 s2"
        "∀a. 𝔡 (Plus r1 s1) a = 𝔡 s1 a" "∀a. 𝔡 (Plus r2 s2) a = 𝔡 s2 a" by simp_all
  then show ?case by fastforce
qed

lemma Not_antimono: "r ≤ s ⟹ Not s ≤ Not r"
unfolding less_eq_language_def proof (coinduction arbitrary: r s)
  case Lang
  then have "𝔬 (Plus r s) = 𝔬 s" "∀a. 𝔡 (Plus r s) a = 𝔡 s a" by simp_all
  then show ?case by auto
qed

lemma Not_Plus[simp]: "Not (Plus r s) = Inter (Not r) (Not s)"
  by (coinduction arbitrary: r s) auto

lemma Not_Inter[simp]: "Not (Inter r s) = Plus (Not r) (Not s)"
  by (coinduction arbitrary: r s) auto

lemma Inter_assoc[simp]: "Inter (Inter r s) t = Inter r (Inter s t)"
  by (coinduction arbitrary: r s t) auto

lemma Inter_comm: "Inter r s = Inter s r"
  by (coinduction arbitrary: r s) auto

lemma Inter_idem[simp]: "Inter r r = r"
  by (coinduction arbitrary: r) auto

lemma Inter_ZeroL[simp]: "Inter Zero r = Zero"
  by (coinduction arbitrary: r) auto

lemma Inter_ZeroR[simp]: "Inter r Zero = Zero"
  by (coinduction arbitrary: r) auto

lemma Inter_FullL[simp]: "Inter Full r = r"
  by (coinduction arbitrary: r) auto

lemma Inter_FullR[simp]: "Inter r Full = r"
  by (coinduction arbitrary: r) auto

lemma Plus_FullL[simp]: "Plus Full r = Full"
  by (coinduction arbitrary: r) auto

lemma Plus_FullR[simp]: "Plus r Full = Full"
  by (coinduction arbitrary: r) auto

lemma Not_Not[simp]: "Not (Not r) = r"
  by (coinduction arbitrary: r) auto

lemma Not_Zero[simp]: "Not Zero = Full"
  by coinduction simp

lemma Not_Full[simp]: "Not Full = Zero"
  by coinduction simp

lemma bisimulation:
  assumes "Times r s = Times s t"
  shows "Times (Star r) s = Times s (Star t)"
proof (rule antisym[OF ardenL[OF Plus_upper[OF le_TimesL]] ardenR[OF Plus_upper[OF le_TimesR]]])
  have "Times r (Times s (Star t)) = Times s (Times t (Star t))" (is "?L = _")
    by (simp only: assms Times_assoc[symmetric])
  also have "… ≤ Times s (Star t)" (is "_ ≤ ?R")
    by (rule Times_mono[OF order_refl ord_le_eq_trans[OF le_PlusR Star_unfoldL[symmetric]]])
  finally show "?L ≤ ?R" .
next
  have "Times (Times (Star r) s) t = Times (Times (Star r) r) s" (is "?L = _")
    by (simp only: assms Times_assoc)
  also have "… ≤ Times (Star r) s" (is "_ ≤ ?R")
    by (rule Times_mono[OF ord_le_eq_trans[OF le_PlusR Star_unfoldR[symmetric]] order_refl])
  finally show "?L ≤ ?R" .
qed simp_all

lemma sliding: "Times (Star (Times r s)) r = Times r (Star (Times s r))"
proof (rule antisym[OF ardenL[OF Plus_upper[OF le_TimesL]] ardenR[OF Plus_upper[OF le_TimesR]]])
  have "Times (Times r s) (Times r (Star (Times s r))) =
    Times r (Times (Times s r) (Star (Times s r)))" (is "?L = _") by simp
  also have "… ≤ Times r (Star (Times s r))" (is "_ ≤ ?R")
    by (rule Times_mono[OF order_refl ord_le_eq_trans[OF le_PlusR Star_unfoldL[symmetric]]])
  finally show "?L ≤ ?R" .
next
  have "Times (Times (Star (Times r s)) r) (Times s r) =
    Times (Times (Star (Times r s)) (Times r s)) r" (is "?L = _") by simp
  also have "… ≤ Times (Star (Times r s)) r" (is "_ ≤ ?R")
    by (rule Times_mono[OF ord_le_eq_trans[OF le_PlusR Star_unfoldR[symmetric]] order_refl])
  finally show "?L ≤ ?R" .
qed simp_all

lemma denesting: "Star (Plus r s) = Times (Star r) (Star (Times s (Star r)))"
proof (rule antisym[OF _ ardenR[OF Plus_upper[OF Star_mono[OF le_PlusL]]]])
  have "Star (Plus r s) = Times (Star (Plus r s)) One" by simp
  also have "… ≤ Times (Star r) (Star (Times s (Star r)))"
  proof (rule ardenL[OF Plus_upper])
    show "Times (Plus r s) (Times (Star r) (Star (Times s (Star r)))) ≤
      Times (Star r) (Star (Times s (Star r)))" (is "Times _ ?L ≤ ?R")
    proof (subst Times_PlusL, rule Plus_upper)
      show "Times s ?L ≤ ?R"
        by (subst (5) Star_unfoldL, rule order_trans[OF order_trans[OF _ le_PlusR] le_TimesR]) auto
    qed (subst (4) Times_Star[symmetric], auto simp del: Times_Star intro: le_Star)
  qed simp
  finally show "Star (Plus r s) ≤ Times (Star r) (Star (Times s (Star r)))" .
next
  have "Times (Star (Plus r s)) (Times s (Star r)) ≤ Times (Star (Plus r s)) (Star (Plus r s))"
    by (subst (4) Star_unfoldL, rule Times_mono[OF order_refl
      order_trans[OF Times_mono[OF le_PlusR Star_mono[OF le_PlusL]] le_PlusR]])
  also have "… = Star (Plus r s)" by simp
  finally show "Times (Star (Plus r s)) (Times s (Star r)) ≤ Star (Plus r s)" .
qed

text {*
It is useful to lift binary operators @{term Plus} and @{term Times}
to $n$-ary operators (that take a list as input).
*}

corec (friend) PLUS :: "'a language list ⇒ 'a language" where
  "PLUS xs = Lang (∃x∈set xs. 𝔬 x) (λa. PLUS (map (λr. 𝔡 r a) xs))"

lemma PLUS_sel[simp]:
  "𝔬 (PLUS xs) = (∃x∈set xs. 𝔬 x)"
  "𝔡 (PLUS xs) = (λa. PLUS (map (λr. 𝔡 r a) xs))"
  by (subst PLUS.code; simp)+

primrec tails where
  "tails [] = [[]]"
| "tails (x # xs) = (x # xs) # tails xs"

lemma tails_snoc[simp]: "tails (xs @ [x]) = map (λys. ys @ [x]) (tails xs) @ [[]]"
  by (induct xs) auto

lemma length_tails[simp]: "length (tails xs) = Suc (length xs)"
  by (induct xs) auto

corec TIMES :: "'a language list ⇒ 'a language" where
  "TIMES xs = Lang (∀x∈set xs. 𝔬 x) (λa. (let n = length (takeWhile 𝔬 xs)
  in PLUS (map (λzs. TIMES (𝔡 (hd zs) a # tl zs)) (take (Suc n) (tails (xs @ [One]))))))"

lemma TIMES_sel[simp]:
  "𝔬 (TIMES xs) = (∀x∈set xs. 𝔬 x)"
  "𝔡 (TIMES xs) a = (let n = length (takeWhile 𝔬 xs)
  in PLUS (map (λzs. TIMES (𝔡 (hd zs) a # tl zs)) (take (Suc n) (tails (xs @ [One])))))"
  by (subst TIMES.code; simp)+


section {* Context Free Languages *}

text {*
A context-free grammar consists of a list of productions for every nonterminal
and an initial nonterminal. The productions are required to be in weak Greibach
normal form, i.e. each right hand side of a production must either be empty or
start with a terminal.
*}

locale cfg =
fixes init :: "'n::enum"
and   prod :: "'n ⇒ ('t + 'n) list list"
assumes weakGreibach: "∀N. ∀rhs ∈ set (prod N). case rhs of (Inr N # _) ⇒ False | _ ⇒ True"

context
fixes init :: "'n::enum"
and   prod :: "'n ⇒ ('t + 'n) list list"
begin

private abbreviation "𝔬n N ≡ ([] ∈ set (prod N))"
private fun 𝔬r where
  "𝔬r [] = True"
| "𝔬r (Inl _ # _) = False"
| "𝔬r (Inr N # xs) = (𝔬n N ∧ 𝔬r xs)"
abbreviation "𝔬P P ≡ fold (op ∨) (map 𝔬r P) False"

private abbreviation "𝔡n N a ≡
  List.map_filter (λxs.
    case xs of Inl b # xs ⇒ if a = b then Some xs else None | _ ⇒ None) (prod N)"
private fun 𝔡r where
  "𝔡r [] a = []"
| "𝔡r (Inl b # xs) a = (if a = b then [xs] else [])"
| "𝔡r (Inr N # xs) a = map (λys. ys @ xs) (𝔡n N a) @ (if 𝔬n N then 𝔡r xs a else [])"
abbreviation "𝔡P P a ≡ fold (op @) (map (λr. 𝔡r r a) P) []"

corec subst :: "('t + 'n) list list ⇒ 't language" where
  "subst P = Lang (𝔬P P) (λx. subst (𝔡P P x))"

end

abbreviation (in cfg) lang where
  "lang ≡ subst prod (prod init)"


section {* Word-theoretic Semantics of Languages *}

text {*
We show our @{type language} codatatype being isomorphic to the standard
language representation as a set of lists.
*}

primrec in_language :: "'a language ⇒ 'a list ⇒ bool" where
  "in_language L [] = 𝔬 L"
| "in_language L (x # xs) = in_language (𝔡 L x) xs"

primcorec to_language :: "'a list set ⇒ 'a language" where
  "𝔬 (to_language L) = ([] ∈ L)"
| "𝔡 (to_language L) = (λa. to_language {w. a # w ∈ L})"

lemma in_language_to_language[simp]: "Collect (in_language (to_language L)) = L"
proof (rule set_eqI, unfold mem_Collect_eq)
  fix w show "in_language (to_language L) w = (w ∈ L)" by (induct w arbitrary: L) auto
qed

lemma to_language_in_language[simp]: "to_language (Collect (in_language L)) = L"
  by (coinduction arbitrary: L) auto

lemma in_language_bij: "bij (Collect o in_language)"
proof (rule bijI', unfold o_apply, safe)
  fix L R :: "'a language" assume "Collect (in_language L) = Collect (in_language R)"
  then show "L = R" unfolding set_eq_iff mem_Collect_eq
    by (coinduction arbitrary: L R) (metis in_language.simps)
next
  fix L :: "'a list set"
  have "L = Collect (in_language (to_language L))" by simp
  then show "∃K. L = Collect (in_language K)" by blast
qed

lemma to_language_bij: "bij to_language"
  by (rule o_bij[of "Collect o in_language"]) (simp_all add: fun_eq_iff)

section ‹Code generation›

export_code Zero One Atom Plus Times Star Inter Not Full Shuffle PLUS TIMES in_language
  in Haskell module_name Coinductive_Language file code

(*<*)
end
(*>*)