(* Title: X86 instruction semantics and basic block symbolic execution Authors:FreekVerbeek,AbhijithBharadwaj,JoshuaBockenek,IanRoessle,TimmyWeerwag,BinoyRavindran Year:2020 Maintainer:FreekVerbeek(freek@vt.edu)
*)
section"Concrete state and instructions"
theory State imports Main Memory begin
text‹A state consists of registers, memory, flags and a rip. Some design considerations here:
begin{itemize}
item All register values are 256 bits. We could also distinguish 64 bits registers, 128 registers etc.
That would increase complexity in proofs and datastructures. The cost of using 256 everywhere is
that a goal typically will have some casted 64 bits values.
item The instruction pointer RIP is a special 64-bit register outside of the normal register set.
item Strings are used for registers and flags. We would prefer an enumerative datatype, however, that would be
extremely slow since there are roughly 100 register names.
end{itemize} ›
record state =
regs :: "string ==> 256word"
mem :: "64 word ==> 8 word"
flags :: "string ==> bool"
rip :: "64 word"
text‹x86 has register aliassing. For example, register EAX is the lower 32 bits of register RAX.
This function map register aliasses to the ``real'' register. For example:
This means that register AH is the second byte (bits 8 to 16) of register RAX.
The bool @{const False} indicates that writing to AH does not overwrite the remainder of RAX.
Register EAX is the lower 4 bytes of RAX. Writing to EAX means overwritting the remainder of RAX
with zeroes. ›
definition reg_size :: "string ==> nat" ― ‹in bytes› where"reg_size reg ≡ let (_,_,l,h) = real_reg reg in (h - l) div 8"
text‹ We now define functions for reading and writing from state.›
definition reg_read :: "state ==> string ==> 256 word" where"reg_read σ reg ≡ if reg = ''rip'' then ucast (rip σ) else if reg = '''' then 0 else ― ‹happens if no base register is used in an address› let (_,r,l,h) = real_reg reg in ⟨l,h⟩(regs σ r)"
primrec fromBool :: "bool ==> 'a :: len word" where "fromBool True = 1"
| "fromBool False = 0"
definition flag_read :: "state ==> string ==> 256 word" where"flag_read σ flag ≡ fromBool (flags σ flag)"
definition mem_read :: "state ==> 64 word ==> nat ==> 256 word" where"mem_read σ a si ≡ word_rcat (read_bytes (mem σ) a si)"
text‹Doing state-updates occur through a tiny deeply embedded language of state updates. This allows us
to reason over state updates through theorems.›
datatype StateUpdate =
RegUpdate string "256 word" ― ‹Write value to register›
| FlagUpdate "string ==> bool" ― ‹Update all flags at once›
| RipUpdate "64 word" ― ‹Update instruction pointer with address›
| MemUpdate "64 word" nat "256 word" ― ‹Write a number of bytes of a value to the address›
primrec state_update where "state_update (RegUpdate reg val) = (λ σ . σ(regs := (regs σ)(reg := val)))"
| "state_update (FlagUpdate val) = (λ σ . σ(flags := val))"
| "state_update (RipUpdate a) = (λ σ . σ(rip := a))"
| "state_update (MemUpdate a si val) = (λ σ . let new = (λ a' . take_byte (unat (a' - a)) val) in σ(mem := override_on (mem σ) new (region_addresses a si)))"
abbreviation RegUpdateSyntax (‹_ :=r _›30) where"RegUpdateSyntax reg val ≡ RegUpdate reg val" abbreviation MemUpdateSyntax (‹[_,_] :=m _›30) where"MemUpdateSyntax a si val ≡ MemUpdate a si val" abbreviation FlagUpdateSyntax (‹setFlags›) where"FlagUpdateSyntax val ≡ FlagUpdate val" abbreviation RipUpdateSyntax (‹setRip›) where"RipUpdateSyntax val ≡ RipUpdate val"
text‹Executes a write to a register in terms of the tiny deeply embedded language above.› definition reg_write where"reg_write reg val σ ≡ let (b,r,l,h) = real_reg reg; curr_val = reg_read σ r; new_val = if b then val else overwrite l h curr_val val in state_update (RegUpdate r new_val) σ"
fun resolve_address :: "state ==> 64 word ==> char list ==> char list ==> nat ==> 64 word" where"resolve_address σ offset base index scale = (let i = ucast (reg_read σ index); b = ucast (reg_read σ base) in offset + b + of_nat scale*i)"
primrec operand_read :: "state ==> Operand ==> 256 word" where "operand_read σ (Imm i) = i"
| "operand_read σ (Reg r) = reg_read σ r"
| "operand_read σ (Flag f) = flag_read σ f"
| "operand_read σ (Mem si offset base index scale) = (let a = resolve_address σ offset base index scale in mem_read σ a si )"
primrec state_with_updates :: "state ==> StateUpdate list ==> state" (infixl‹with›66) where "σ with [] = σ"
| "(σ with (f#fs)) = state_update f (σ with fs)"
primrec (nonexhaustive) operand_write :: "Operand ==> 256word ==> state ==> state" where "operand_write (Reg r) v σ = reg_write r v σ"
| "operand_write (Mem si offset base index scale) v σ = (let i = ucast (reg_read σ index); b = ucast (reg_read σ base); a = offset + b + of_nat scale*i in σ with [[a,si] :=m v] )"
text‹ The following theorems simplify reading from state parts after doing updates to other state parts.›
lemma regs_reg_write: shows"regs (σ with ((r :=r w)#updates)) r' = (if r=r' then w else regs (σ with updates) r')" by (induct updates arbitrary: σ, auto simp add: case_prod_unfold Let_def)
lemma regs_mem_write: shows"regs (σ with (([a,si] :=m v)#updates)) r = regs (σ with updates) r" by (induct updates arbitrary: σ, auto)
lemma regs_flag_write: shows"regs (σ with ((setFlags v)#updates)) r = regs (σ with updates) r" by (induct updates arbitrary: σ, auto)
lemma regs_rip_write: shows"regs (σ with ((setRip a)#updates)) f = regs (σ with updates) f" by (auto)
lemma flag_read_reg_write: shows"flag_read (σ with ((r :=r w)#updates)) f = flag_read (σ with updates) f" by (induct updates arbitrary: σ, auto simp add: flag_read_def)
lemma flag_read_mem_write: shows"flag_read (σ with (([a,si] :=m v)#updates)) f = flag_read (σ with updates) f" by (induct updates arbitrary: σ, auto simp add: flag_read_def)
lemma flag_read_flag_write: shows"flag_read (σ with ((setFlags v)#updates)) = fromBool o v" by (induct updates arbitrary: σ, auto simp add: flag_read_def)
lemma flag_read_rip_write: shows"flag_read (σ with ((setRip a)#updates)) f = flag_read (σ with updates) f" by (auto simp add: flag_read_def)
lemma mem_read_reg_write: shows"mem_read (σ with ((r :=r w)#updates)) a si = mem_read (σ with updates) a si" by (auto simp add: mem_read_def read_bytes_def)
lemma mem_read_flag_write: shows"mem_read (σ with ((setFlags v)#updates)) a si = mem_read (σ with updates) a si" by (auto simp add: mem_read_def read_bytes_def)
lemma mem_read_rip_write: shows"mem_read (σ with ((setRip a')#updates)) a si = mem_read (σ with updates) a si" by (auto simp add: mem_read_def read_bytes_def)
lemma mem_read_mem_write_alias: assumes"si' ≤ si" and"si ≤ 2^64" shows"mem_read (σ with (([a,si] :=m v)#updates)) a si' = ⟨0,si'*8⟩ v" using assms by (auto simp add: mem_read_def word_rcat_read_bytes read_bytes_override_on_enclosed[where offset=0and offset'=0,simplified])
lemma mem_read_mem_write_separate: assumes"separate a si a' si'" shows"mem_read (σ with (([a,si] :=m v)#updates)) a' si' = mem_read (σ with updates) a' si'" using assms by (auto simp add: mem_read_def read_bytes_override_on_separate)
lemma mem_read_mem_write_enclosed_numeral[simp]: assumes"unat (numeral a' - numeral a::64 word) + (numeral si'::nat) ≤ numeral si" and"numeral a' ≥ (numeral a::64 word)" and"numeral si < (2 ^ 64::nat)" shows"mem_read (σ with (([numeral a,numeral si] :=m v)#updates)) (numeral a') (numeral si') = ⟨unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (numeral si')) * 8⟩v" proof- have1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)" using assms(2) by (metis add.commute diff_add_cancel) thus ?thesis using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a""numeral si'""numeral si" σ "numeral a" v updates,OF assms(1,3)] by auto qed
lemma mem_read_mem_write_enclosed_numeral1_[simp]: assumes"unat (numeral a' - numeral a::64 word) + (numeral si'::nat) ≤ Suc 0" and"numeral a' ≥ (numeral a::64 word)" shows"mem_read (σ with (([numeral a,Suc 0] :=m v)#updates)) (numeral a') (numeral si') = ⟨unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (numeral si')) * 8⟩v" proof- have1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)" using assms(2) by (metis add.commute diff_add_cancel) thus ?thesis using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a""numeral si'""Suc 0" σ "numeral a" v updates,OF assms(1)] by auto qed
lemma mem_read_mem_write_enclosed_numeral_1[simp]: assumes"unat (numeral a' - numeral a::64 word) + (Suc 0) ≤ numeral si" and"numeral a' ≥ (numeral a::64 word)" and"numeral si < (2 ^ 64::nat)" shows"mem_read (σ with (([numeral a,numeral si] :=m v)#updates)) (numeral a') (Suc 0) = ⟨unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (Suc 0)) * 8⟩v" proof- have1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)" using assms(2) by (metis add.commute diff_add_cancel) thus ?thesis using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a""Suc 0""numeral si" σ "numeral a" v updates,OF assms(1,3)] by auto qed
lemma mem_read_mem_write_enclosed_numeral11[simp]: assumes"unat (numeral a' - numeral a::64 word) + (Suc 0) ≤ Suc 0" and"numeral a' ≥ (numeral a::64 word)" shows"mem_read (σ with (([numeral a,Suc 0] :=m v)#updates)) (numeral a') (Suc 0) = ⟨unat (numeral a' - (numeral a::64 word)) * 8,(unat (numeral a' - (numeral a::64 word)) + (Suc 0)) * 8⟩v" proof- have1: "numeral a + (numeral a' - numeral a) = (numeral a'::64 word)" using assms(2) by (metis add.commute diff_add_cancel) thus ?thesis using mem_read_mem_write_enclosed_plus2[of "numeral a' - numeral a""Suc 0""Suc 0" σ "numeral a" v updates,OF assms(1)] by auto qed
lemma rip_reg_write[simp]: shows"rip (σ with ((r :=r v)#updates)) = rip (σ with updates)" by (auto simp add: case_prod_unfold Let_def)
lemma rip_flag_write[simp]: shows"rip (σ with ((setFlags v)#updates)) = rip (σ with updates)" by (auto)
lemma rip_mem_write[simp]: shows"rip (σ with (([a,si] :=m v)#updates)) = rip (σ with updates)" by (auto)
lemma rip_rip_write[simp]: shows"rip (σ with ((setRip a)#updates)) = a" by (auto)
lemma with_with: shows"(σ with updates) with updates' = σ with (updates' @ updates)" by (induct updates' arbitrary: σ,auto)
lemma add_state_update_to_list: shows"state_update upd (σ with updates) = σ with (upd#updates)" by auto
text‹The updates performed to a state are ordered: memoery, registers, flags, rip.
This function is basically insertion sort. Moreover, consecutive updates to the same register
are removed.›
fun insert_state_update where "insert_state_update (setRip a) (setRip a'#updates) = insert_state_update (setRip a) updates"
| "insert_state_update (setRip a) (setFlags v#updates) = setFlags v # (insert_state_update (setRip a) updates)"
| "insert_state_update (setRip a) ((r :=r v)#updates) = (r :=r v) # (insert_state_update (setRip a) updates)"
| "insert_state_update (setRip a) (([a',si] :=m v)#updates) = ([a',si] :=m v) # (insert_state_update (setRip a) updates)"
fun clean where "clean [] = []"
| "clean [upd] = [upd]"
| "clean (upd#upd'#updates) = insert_state_update upd (clean (upd'#updates))"
lemma insert_state_update: shows"σ with (insert_state_update upd updates) = σ with (upd # updates)" by (induct updates rule: insert_state_update.induct,auto simp add: fun_upd_twist)
lemma clean_state_updates: shows"σ with (clean updates) = σ with updates" by (induct updates rule: clean.induct,auto simp add: insert_state_update)
text‹The set of simplification rules used during symbolic execution.› lemmas state_simps =
qword_ptr_def dword_ptr_def word_ptr_def byte_ptr_def reg_size_def
reg_write_def real_reg_def reg_read_def
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.