Merge branch 'experiments/dsp' into experiments/minif2f
This commit is contained in:
commit
0ab29e11cd
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"problem": "For any natural number n, n + 0 = n.",
|
||||
"level": "SF foundations level 1",
|
||||
"type": "Algebra",
|
||||
"solution": [
|
||||
"Consider some natural number n. The proof will be by induction. Consider the base case n=0. We have 0 + 0 = 0 which holds by the definition of addion.",
|
||||
"Now the inductive case we want to show n'+1 + 0 = n'+1. Assume it holds for n i.e., n' + 0 = n. So now by the we know the LHS is equal to (n'+0) + 1.",
|
||||
"Then by the induction hypothesis we know the LHS simplifies to n'+1 completing the proof. Qed."
|
||||
]
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"problem": "For any natural number n, 0 + n = n.",
|
||||
"level": "SF foundations level 1",
|
||||
"type": "Algebra",
|
||||
"solution": "Consider some natural number n. We now want to show 0 + n = n. Using addition both sides are equal."
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"problem": "For any natural number n, 0 + n = n.",
|
||||
"level": "SF foundations level 1",
|
||||
"type": "Algebra",
|
||||
"solution": "Consider some natural number n. We now want to show 0 + n = n. For that we use the definition of addition to get n = n and conclude both sides are equal (by reflexity)."
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
[
|
||||
{
|
||||
"problem": "For any natural number n, 0 + n = n.",
|
||||
"level": "SF foundations level 1",
|
||||
"type": "Logical Foundations",
|
||||
"solution": [
|
||||
"Consider some natural number n. We want to show 0 + n = n. ",
|
||||
"By using definition of addition on both sides, LHS and RHS are now equal, done."
|
||||
]
|
||||
},
|
||||
{
|
||||
"problem": "For any natural number n, n + 0 = n.",
|
||||
"level": "SF foundations level 1",
|
||||
"type": "Logical Foundations",
|
||||
"solution": [
|
||||
"Consider some natural number n. The proof will be by induction. ",
|
||||
"The base case n=0, so we have 0 + 0 = 0, which holds by the definition of addion. ",
|
||||
"Consider the inductive case, so we want to show (k + 1) + 0 = (k + 1) for any k < n assuming the IH holds for such k (IH: k + 0 = k). ",
|
||||
"By the IH we have (k + 1) + 0 = (k + 1). ",
|
||||
"By def of addition we have (k + 0) + 1 = (k + 1). ",
|
||||
"By the induction hypothesis (IH) we have k + 0 = k. ",
|
||||
"LHS and RHS are equal so proof is complete."
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,368 +0,0 @@
|
|||
[
|
||||
{
|
||||
"nl_problem": "For any natural number n, n + 0 = n.",
|
||||
"nl_solution": [
|
||||
"Consider some natural number n. We want to show n + 0 = n. ",
|
||||
"By using fats of addition on both sides, LHS and RHS are now equal, done."
|
||||
],
|
||||
"fl_problem": "theorem n_plus_zero : ∀ n : ℕ, n + 0 = n := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + 0 = n.\n",
|
||||
"theorem n_plus_zero : ∀ n : ℕ, n + 0 = n := by\n",
|
||||
" -- Consider some n in Nats.\n",
|
||||
" intro n",
|
||||
"-- Using facts of addition simplify n + 0 to n.\n",
|
||||
" rw [Nat.add_zero]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
],
|
||||
|
||||
"nl_problem_proved_sketch": "For any natural number n, n + 0 = n.",
|
||||
"nl_solution_proved_sketch": [
|
||||
"We want to show n + 0 = n. ",
|
||||
"We have the fact of addition that, n + 0 = n. ",
|
||||
"Thus, the left-hand side and right-hand side are equal, which completes the proof."
|
||||
],
|
||||
"fl_problem_proved_sketch": "theorem n_plus_zero_proved_formal_sketch : ∀ n : ℕ, n + 0 = n := by",
|
||||
"fl_solution_proved_sketch": [
|
||||
"-- Prove that n + 0 = n via a formal proof sketch",
|
||||
"theorem n_plus_zero_proved_formal_sketch : ∀ n : ℕ, n + 0 = n := by",
|
||||
" -- We have the fact of addition n + 0 = n, use it to show left and right are equal.",
|
||||
" have h_nat_add_zero: ∀ n : ℕ, n + 0 = n := Nat.add_zero",
|
||||
" exact h_nat_add_zero"
|
||||
],
|
||||
"src_header_fl_solution_proved_sketch": ["import Mathlib.Data.Nat.Basic"],
|
||||
|
||||
"nl_problem_proved_sketch_aesop": "For any natural number n, n + 0 = n.",
|
||||
"nl_solution_proved_sketch_aesop": [
|
||||
"We want to show n + 0 = n. ",
|
||||
"We have the fact of addition that, n + 0 = n. ",
|
||||
"Thus, the left-hand side and right-hand side are equal, which completes the proof."
|
||||
],
|
||||
"fl_problem_proved_sketch_aesop": "theorem n_plus_zero_proved_formal_sketch' : ∀ n : ℕ, n + 0 = n := by",
|
||||
"fl_solution_proved_sketch_aesop": [
|
||||
"-- Prove that n + 0 = n via a formal proof sketch with aesop. ",
|
||||
"theorem n_plus_zero_proved_formal_sketch' : ∀ n : ℕ, n + 0 = n := by",
|
||||
" -- We have the fact of addition n + 0 = n, use it to show left and right are equal. ",
|
||||
" have h_nat_add_zero: ∀ n : ℕ, n + 0 = n := by aesop",
|
||||
" exact h_nat_add_zero"
|
||||
],
|
||||
"src_header_fl_solution_proved_sketch_aesop": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": "For any natural number n, 0 + n = n.",
|
||||
"nl_solution": [
|
||||
"Consider some natural number n. We want to show 0 + n = n.",
|
||||
"By using facts of addition and induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem zero_plus_n : ∀ n : ℕ, 0 + n = n := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that 0 + n = n by induction",
|
||||
"theorem zero_plus_n : ∀ n : ℕ, 0 + n = n := by",
|
||||
"-- Consider some n in Nats.",
|
||||
"intro n",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: 0 + 0 = 0",
|
||||
"rw [Nat.add_zero]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: assume 0 + n = n, prove 0 + succ n = succ n",
|
||||
"rw [Nat.add_succ]",
|
||||
"rw [ih]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
],
|
||||
|
||||
"nl_problem_proved_sketch": "For any natural number n, 0 + n = n.",
|
||||
"nl_solution_proved_sketch": [
|
||||
"We want to show 0 + n = n.",
|
||||
"By using the fact of addition and performing induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem_proved_sketch": "theorem zero_plus_n_proved_formal_sketch : ∀ n : ℕ, 0 + n = n := by",
|
||||
"fl_solution_proved_sketch": [
|
||||
"-- Prove that 0 + n = n by induction via a formal proof sketch",
|
||||
"theorem zero_plus_n_proved_formal_sketch : ∀ n : ℕ, 0 + n = n := by",
|
||||
"-- Consider some n in Nats.",
|
||||
"intro n",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: 0 + 0 = 0",
|
||||
"have h_base: 0 + 0 = 0 := by rw [Nat.add_zero]",
|
||||
"exact h_base",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: assume 0 + n = n, prove 0 + succ n = succ n",
|
||||
"have h_inductive: 0 + Nat.succ n = Nat.succ n := by",
|
||||
"rw [Nat.add_succ]",
|
||||
"rw [ih]",
|
||||
"exact h_inductive"
|
||||
],
|
||||
"src_header_fl_solution_proved_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
],
|
||||
|
||||
"nl_problem_proved_sketch_aesop": "For any natural number n, 0 + n = n.",
|
||||
"nl_solution_proved_sketch_aesop": [
|
||||
"We want to show 0 + n = n.",
|
||||
"By using the fact of addition and performing induction on n, we can prove the statement for both the base case and the inductive step using aesop."
|
||||
],
|
||||
|
||||
"fl_problem_proved_sketch_aesop": "theorem zero_plus_n_proved_formal_sketch' : ∀ n : ℕ, 0 + n = n := by",
|
||||
"fl_solution_proved_sketch_aesop": [
|
||||
"-- Prove that 0 + n = n by induction via a formal proof sketch with aesop.",
|
||||
"theorem zero_plus_n_proved_formal_sketch' : ∀ n : ℕ, 0 + n = n := by",
|
||||
"-- Consider some n in Nats.",
|
||||
"intro n",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: 0 + 0 = 0",
|
||||
"have h_base: 0 + 0 = 0 := by aesop",
|
||||
"exact h_base",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: assume 0 + n = n, prove 0 + succ n = succ n",
|
||||
"have h_inductive: 0 + Nat.succ n = Nat.succ n := by aesop",
|
||||
"exact h_inductive"
|
||||
],
|
||||
"src_header_fl_solution_proved_sketch_aesop": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": "For any natural numbers n and m we have commutativity, n + m = m + n.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n and m. We want to show n + m = m + n.",
|
||||
"By using facts of addition and induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_comm_normal : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + m = m + n",
|
||||
"theorem add_comm_normal : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"-- Consider some n and m in Nats.",
|
||||
"intros n m",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + m = m + 0.",
|
||||
"-- Using the definition of addition, 0 + m = m and m + 0 = m.",
|
||||
"rw [Nat.zero_add, Nat.add_zero]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + m = m + n, we need to show succ n + m = m + succ n.",
|
||||
"-- We use the fact n + (m + 1) = (n + m) + 1.",
|
||||
"have plus_n_Sm_normal: ∀ n m : ℕ, n + (m + 1) = (n + m) + 1 := by",
|
||||
" intros n m",
|
||||
" rw [Nat.add_succ]",
|
||||
"-- Apply the fact to rewrite succ n + m = (n + m) + 1.",
|
||||
"rw [Nat.add_succ, Nat.add_zero]",
|
||||
"rw [← ih]",
|
||||
"rw [Nat.succ_add]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"nl_problem": "For any natural numbers n and m, n + m = m + n.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n and m. We want to show n + m = m + n.",
|
||||
"By using the fact of addition and performing induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_comm_proved_formal_sketch : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + m = m + n via a formal proof sketch",
|
||||
"theorem add_comm_proved_formal_sketch : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"-- Consider some n and m in Nats.",
|
||||
"intros n m",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + m = m + 0.",
|
||||
"-- We have the fact 0 + m = m by the definition of addition.",
|
||||
"have h_base: 0 + m = m := Nat.zero_add m",
|
||||
"-- We also have the fact m + 0 = m by the definition of addition.",
|
||||
"have h_symm: m + 0 = m := Nat.add_zero m",
|
||||
"-- Combining these, we get 0 + m = m + 0.",
|
||||
"rw [h_base, h_symm]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + m = m + n, we need to show succ n + m = m + succ n.",
|
||||
"-- By the inductive hypothesis, we have n + m = m + n.",
|
||||
"have h_inductive: n + m = m + n := ih",
|
||||
"-- proof is:",
|
||||
"-- We eventually want to flip n + m and simplify to make both sides the same. Thus,",
|
||||
"-- 1. Note we start with: Nat.succ n + m = m + Nat.succ n, so, pull the succ out from m + Nat.succ n on the right side from the addition using addition facts Nat.add_succ.",
|
||||
"have h_pull_succ_out_from_right: m + Nat.succ n = Nat.succ (m + n) := by rw [Nat.add_succ]",
|
||||
"-- 2. then to flip m + S n to something like S (n + m) we need to use the IH.",
|
||||
"have h_flip_n_plus_m: Nat.succ (n + m) = Nat.succ (m + n) := by rw [h_inductive]",
|
||||
"-- 3. Now the n & m are on the correct sides Nat.succ n + m = Nat.succ (n + m), so let's use the def of addition to pull out the succ from the addition on the left using Nat.succ_add.",
|
||||
"have h_pull_succ_out_from_left: Nat.succ n + m = Nat.succ (n + m) := by rw [Nat.succ_add]",
|
||||
"-- Combining these, we get succ n + m = m + succ n.",
|
||||
"rw [h_pull_succ_out_from_right, ←h_flip_n_plus_m, h_pull_succ_out_from_left]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"nl_problem": "For any natural numbers n and m, n + m = m + n.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n and m. We want to show n + m = m + n.",
|
||||
"By using the fact of addition and performing induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_comm_proved_formal_sketch_aesop : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + m = m + n via a formal proof sketch with aesop.",
|
||||
"theorem add_comm_proved_formal_sketch_aesop : ∀ n m : ℕ, n + m = m + n := by",
|
||||
"-- Consider some n and m in Nats.",
|
||||
"intros n m",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + m = m + 0.",
|
||||
"-- We have the fact 0 + m = m by the definition of addition.",
|
||||
"have h_base: 0 + m = m := by aesop",
|
||||
"-- We also have the fact m + 0 = m by the definition of addition.",
|
||||
"have h_symm: m + 0 = m := by aesop",
|
||||
"-- Combining these, we get 0 + m = m + 0.",
|
||||
"rw [h_base, h_symm]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + m = m + n, we need to show succ n + m = m + succ n.",
|
||||
"-- By the inductive hypothesis, we have n + m = m + n.",
|
||||
"have h_inductive: n + m = m + n := by aesop",
|
||||
"-- proof is:",
|
||||
"-- We eventually want to flip n + m and simplify to make both sides the same. Thus,",
|
||||
"-- 1. Note we start with: Nat.succ n + m = m + Nat.succ n, so, pull the succ out from m + Nat.succ n on the right side from the addition using addition facts Nat.add_succ.",
|
||||
"have h_pull_succ_out_from_right: m + Nat.succ n = Nat.succ (m + n) := by aesop",
|
||||
"-- 2. then to flip m + S n to something like S (n + m) we need to use the IH.",
|
||||
"have h_flip_n_plus_m: Nat.succ (n + m) = Nat.succ (m + n) := by aesop",
|
||||
"-- 3. Now the n & m are on the correct sides Nat.succ n + m = Nat.succ (n + m), so let's use the def of addition to pull out the succ from the addition on the left using Nat.succ_add.",
|
||||
"have h_pull_succ_out_from_left: Nat.succ n + m = Nat.succ (n + m) := by rw [Nat.succ_add]",
|
||||
"-- Combining these, we get succ n + m = m + succ n.",
|
||||
"rw [h_pull_succ_out_from_right, ←h_flip_n_plus_m, h_pull_succ_out_from_left]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
]
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": "Prove that for any natural numbers n, m, and p, n + (m + p) = (n + m) + p.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n, m, and p. We want to show n + (m + p) = (n + m) + p.",
|
||||
"By using facts of addition and induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_assoc_normal : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + (m + p) = (n + m) + p",
|
||||
"theorem add_assoc_normal : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"-- Consider some n, m, and p in Nats.",
|
||||
"intros n m p",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + (m + p) = (0 + m) + p.",
|
||||
"-- Using the definition of addition, 0 + (m + p) = m + p and (0 + m) + p = m + p.",
|
||||
"rw [Nat.zero_add, Nat.zero_add]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + (m + p) = (n + m) + p, we need to show succ n + (m + p) = (succ n + m) + p.",
|
||||
"-- proof strategy is, we move succ n out (or in) enough times then use the IH until both sides are the same.",
|
||||
"-- 1. let's start by pulling out the succ from the left side and have the entire addition inside the succ.",
|
||||
"rw [Nat.succ_add]",
|
||||
"-- 2. Now that we have the IH hypothesis appearing inside the left, let's apply it so we have n + (m + p) = (n + m) + p.",
|
||||
"rw [ih]",
|
||||
"-- 3. Now that the parentheses (apps of plus) are in the right place for both sides, push the succ on the left twice so both terms are the same.",
|
||||
"rw [← Nat.succ_add, ← Nat.succ_add]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"nl_problem": "Prove that for any natural numbers n, m, and p, n + (m + p) = (n + m) + p.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n, m, and p. We want to show n + (m + p) = (n + m) + p.",
|
||||
"By using facts of addition and induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_assoc_proved_formal_sketch : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + (m + p) = (n + m) + p",
|
||||
"theorem add_assoc_proved_formal_sketch : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"-- Consider some n, m, and p in Nats.",
|
||||
"intros n m p",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + (m + p) = (0 + m) + p.",
|
||||
"-- Using the definition of addition, 0 + (m + p) = m + p and (0 + m) + p = m + p.",
|
||||
"rw [Nat.zero_add, Nat.zero_add]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + (m + p) = (n + m) + p, we need to show succ n + (m + p) = (succ n + m) + p.",
|
||||
"-- proof strategy is, we move succ n out (or in) enough times then use the IH until both sides are the same.",
|
||||
"-- 1. let's start by pulling out the succ from the left side and have the entire addition inside the succ.",
|
||||
"have h_pull_add_succ_out_from_left: Nat.succ n + (m + p) = Nat.succ (n + (m + p)) := by rw [Nat.succ_add]",
|
||||
"-- 2. Now that we have the IH hypothesis appearing inside the left, let's apply it so we have n + (m + p) = (n + m) + p.",
|
||||
"have h_inside_left_associates: Nat.succ (n + (m + p)) = Nat.succ ((n + m) + p) := by rw [ih]",
|
||||
"-- 3. Now that the parentheses (apps of plus) are in the right place for both sides, push the succ on the left twice so both terms are the same.",
|
||||
"have h_push_succ_in_left_twice: Nat.succ ((n + m) + p) = ((Nat.succ n) + m) + p := by rw [← Nat.succ_add, ← Nat.succ_add]",
|
||||
"-- Combining these, we get succ n + (m + p) = (succ n + m) + p.",
|
||||
"rw [h_pull_add_succ_out_from_left, h_inside_left_associates, h_push_succ_in_left_twice]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic"
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"nl_problem": "Prove that for any natural numbers n, m, and p, n + (m + p) = (n + m) + p.",
|
||||
"nl_solution": [
|
||||
"Consider some natural numbers n, m, and p. We want to show n + (m + p) = (n + m) + p.",
|
||||
"By using facts of addition and induction on n, we can prove the statement for both the base case and the inductive step."
|
||||
],
|
||||
"fl_problem": "theorem add_assoc_proved_formal_sketch_aesop : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"fl_solution": [
|
||||
"-- Prove that n + (m + p) = (n + m) + p via a formal proof sketch with aesop",
|
||||
"theorem add_assoc_proved_formal_sketch_aesop : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by",
|
||||
"-- Consider some n, m, and p in Nats.",
|
||||
"intros n m p",
|
||||
"-- Perform induction on n.",
|
||||
"induction n with",
|
||||
"| zero =>",
|
||||
"-- Base case: When n = 0, we need to show 0 + (m + p) = (0 + m) + p.",
|
||||
"-- Using the definition of addition, 0 + (m + p) = m + p and (0 + m) + p = m + p.",
|
||||
"rw [Nat.zero_add, Nat.zero_add]",
|
||||
"| succ n ih =>",
|
||||
"-- Inductive step: Assume n + (m + p) = (n + m) + p, we need to show succ n + (m + p) = (succ n + m) + p.",
|
||||
"-- proof strategy is, we move succ n out (or in) enough times then use the IH until both sides are the same.",
|
||||
"-- 1. let's start by pulling out the succ from the left side and have the entire addition inside the succ.",
|
||||
"have h_pull_add_succ_out_from_left: Nat.succ n + (m + p) = Nat.succ (n + (m + p)) := by rw [Nat.succ_add]",
|
||||
"-- 2. Now that we have the IH hypothesis appearing inside the left, let's apply it so we have n + (m + p) = (n + m) + p.",
|
||||
"have h_inside_left_associates: Nat.succ (n + (m + p)) = Nat.succ ((n + m) + p) := by aesop",
|
||||
"-- 3. Now that the parentheses (apps of plus) are in the right place for both sides, push the succ on the left twice so both terms are the same.",
|
||||
"have h_push_succ_in_left_twice: Nat.succ ((n + m) + p) = ((Nat.succ n) + m) + p := by rw [← Nat.succ_add, ← Nat.succ_add]",
|
||||
"-- Combining these, we get succ n + (m + p) = (succ n + m) + p.",
|
||||
"rw [h_pull_add_succ_out_from_left, h_inside_left_associates, h_push_succ_in_left_twice]"
|
||||
],
|
||||
"src_header_fl_solution": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,98 +0,0 @@
|
|||
[
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, 0 + n = n."],
|
||||
"nl_solution": [
|
||||
"Consider any natural number n. We will prove the statement by induction on n.",
|
||||
"Base case: When n = 0, we need to show that 0 + 0 = 0. This is true by the definition of addition.",
|
||||
"Inductive step: Assume that for some natural number n, 0 + n = n. We need to show that 0 + (n + 1) = (n + 1). By the definition of addition and the inductive hypothesis, we have 0 + (n + 1) = (0 + n) + 1 = n + 1. Therefore, the statement holds for n + 1.",
|
||||
"Thus, by induction, we have proved that for any natural number n, 0 + n = n."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural number n, and do induction on n.",
|
||||
"Base case: 0 + 0 = 0 by properties of addition.",
|
||||
"Inductive step we have 0 + n = n. Then 0 + (n + 1) = (0 + n) + 1 = n + 1.",
|
||||
"Where, 0 + n = n by assumption,qed."
|
||||
],
|
||||
"fl_problem": ["theorem zero_plus_n_proved_formal_sketch : ∀ n : ℕ, 0 + n = n := "],
|
||||
"fl_partial_sketch": [
|
||||
"by\n",
|
||||
" -- Consider some n in Nats.\n",
|
||||
" intro n\n",
|
||||
" -- Perform induction on n.\n",
|
||||
" induction n with\n",
|
||||
" | zero =>\n",
|
||||
" -- Base case: 0 + 0 = 0\n",
|
||||
" have h_base: 0 + 0 = 0 := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n",
|
||||
" | succ n ih =>\n",
|
||||
" -- Inductive step: assume 0 + n = n, prove 0 + succ n = succ n\n",
|
||||
" have h_inductive: 0 + Nat.succ n = Nat.succ n := <TODO_PROOF_OR_HAMMER>\\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "1"
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, m, and p, n + (m + p) = (n + m) + p."],
|
||||
"nl_solution": [
|
||||
"Consider any natural numbers n, m, and p. We will prove the statement by induction on n.",
|
||||
"Base case: When n = 0, we need to show that 0 + (m + p) = (0 + m) + p. By the definition of addition, we have 0 + (m + p) = m + p and (0 + m) + p = m + p. Therefore, 0 + (m + p) = (0 + m) + p.",
|
||||
"Inductive step: Assume that for some natural number n, n + (m + p) = (n + m) + p. We need to show that (n + 1) + (m + p) = ((n + 1) + m) + p.",
|
||||
"1. First, pull out the successor from the left side to have the entire addition inside the successor: (n + 1) + (m + p) = (n + (m + p)) + 1.",
|
||||
"2. By the inductive hypothesis, we know that n + (m + p) = (n + m) + p. So we can replace n + (m + p) with (n + m) + p inside the successor: (n + (m + p)) + 1 = ((n + m) + p) + 1.",
|
||||
"3. Finally, push the successor on the left twice to align both sides: ((n + m) + p) + 1 = (n + 1) + (m + p) = ((n + 1) + m) + p.",
|
||||
"Thus, by induction, we have proved that for any natural numbers n, m, and p, n + (m + p) = (n + m) + p."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural numbers n, m, and p. We will do induction on n.",
|
||||
"Base case: 0 + (m + p) = (0 + m) + p by properties of addition.",
|
||||
"Inductive step, we have n + (m + p) = (n + m) + p. Then (n + 1) + (m + p) = (n + (m + p)) + 1 = ((n + m) + p) + 1.",
|
||||
"Thus, (n + 1) + (m + p) = ((n + 1) + m) + p, qed."
|
||||
],
|
||||
"fl_problem": ["theorem add_assoc_proved_formal_sketch : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := "],
|
||||
"fl_partial_sketch": [
|
||||
"by\n",
|
||||
" -- Consider some n, m, and p in Nats.\n",
|
||||
" intros n m p\n",
|
||||
" -- Perform induction on n.\n",
|
||||
" induction n with\n",
|
||||
" | zero =>\n",
|
||||
" -- Base case: When n = 0, we need to show 0 + (m + p) = (0 + m) + p.\n",
|
||||
" -- We have the fact 0 + (m + p) = m + p by the definition of addition.\n",
|
||||
" have h_base: 0 + (m + p) = m + p := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- We also have the fact (0 + m) + p = m + p by the definition of addition.\n",
|
||||
" have h_symm: (0 + m) + p = m + p := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n",
|
||||
" | succ n ih =>\n",
|
||||
" -- Inductive step: Assume n + (m + p) = (n + m) + p, we need to show succ n + (m + p) = (succ n + m) + p.\n",
|
||||
" -- By the inductive hypothesis, we have n + (m + p) = (n + m) + p.\n",
|
||||
" have h_inductive: n + (m + p) = (n + m) + p := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 1. Let's start by pulling out the succ from left side and have the entire addition inside the succ.\n",
|
||||
" have h_pull_succ_out_from_left: Nat.succ n + (m + p) = Nat.succ (n + (m + p)) := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 2. Now that we have the IH hypothesis appearing inside the left, let's apply it so we have n + (m + p) = (n + m) + p.\n",
|
||||
" have h_inside_left_associates: Nat.succ (n + (m + p)) = Nat.succ ((n + m) + p) := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 3. Now that the parentheses (apps of plus) are in the right place for both sides, push the succ on the left twice so both terms are the same.\n",
|
||||
" have h_push_succ_in_left_twice: Nat.succ ((n + m) + p) = ((Nat.succ n) + m) + p := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "4"
|
||||
}
|
||||
]
|
|
@ -1,117 +0,0 @@
|
|||
[
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, n + 0 = n."],
|
||||
"nl_solution": [
|
||||
"Consider any natural number n.",
|
||||
"Using the properties of addition, we know that adding zero to any number does not change the value of that number.",
|
||||
"Therefore, we can conclude that n + 0 = n."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural number n.",
|
||||
"From properties of addition, adding zero does not change its values.",
|
||||
"Thus, n + 0 = n."
|
||||
],
|
||||
"fl_problem": ["theorem n_plus_zero_normal : ∀ n : ℕ, n + 0 = n := "],
|
||||
"fl_partial_sketch": [
|
||||
"by\n",
|
||||
" -- We have the fact of addition n + 0 = n, use it to show left and right are equal.\n",
|
||||
" have h_nat_add_zero: ∀ n : ℕ, n + 0 = n := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts with to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "0"
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, n + (m + 1) = (n + m) + 1."],
|
||||
"nl_solution": [
|
||||
"Consider any natural numbers n and m. We want to show that n + (m + 1) = (n + m) + 1.",
|
||||
"Using the properties of addition, we know that adding 1 to the sum of n and m is the same as first adding m to n and then adding 1.",
|
||||
"Therefore, we can conclude that n + (m + 1) = (n + m) + 1."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural numbers n and m.",
|
||||
"From properties of addition, adding 1 to the sum of n and m is the same as first adding m to n and then adding 1.",
|
||||
"Thus, n + (m + 1) = (n + m) + 1."
|
||||
],
|
||||
"fl_problem": ["theorem plus_n_Sm_proved_formal_sketch : ∀ n m : ℕ, n + (m + 1) = (n + m) + 1 := "],
|
||||
"fl_partial_sketch": [
|
||||
"by\n",
|
||||
" -- We have the fact of addition n + (m + 1) = (n + m) + 1, use it to show left and right are equal.\n",
|
||||
" have h_nat_add_succ: ∀ n m : ℕ, n + (m + 1) = (n + m) + 1 := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "2"
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n and m, n + m = m + n."],
|
||||
"nl_solution": [
|
||||
"Consider any natural numbers n and m. We will prove the statement by induction on n.",
|
||||
"Base case: When n = 0, we need to show that 0 + m = m + 0. By the definition of addition, we have 0 + m = m and m + 0 = m. Therefore, 0 + m = m + 0.",
|
||||
"Inductive step: Assume that for some natural number n, n + m = m + n. We need to show that (n + 1) + m = m + (n + 1).",
|
||||
"1. Start by using the fact that (n + 1) + m = n + (m + 1) and m + (n + 1) = (m + n) + 1.",
|
||||
"2. By the inductive hypothesis, we have n + m = m + n. So we can replace n + (m + 1) with (m + n) + 1.",
|
||||
"3. Now, both sides have the same structure, showing that (n + 1) + m = m + (n + 1).",
|
||||
"Thus, by induction, we have proved that for any natural numbers n and m, n + m = m + n."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural numbers n and m. We will do induction on n.",
|
||||
"Base case: 0 + m = m + 0 by properties of addition.",
|
||||
"Inductive step, we have n + m = m + n. Then (n + 1) + m = (n + m) + 1 = (m + n) + 1 = m + (n + 1).",
|
||||
"Thus, by induction, n + m = m + n, qed."
|
||||
],
|
||||
"fl_problem": ["theorem add_comm_proved_formal_sketch : ∀ n m : ℕ, n + m = m + n := "],
|
||||
"fl_partial_sketch": [
|
||||
"by\n",
|
||||
" -- Consider some n and m in Nats.\n",
|
||||
" intros n m\n",
|
||||
" -- Perform induction on n.\n",
|
||||
" induction n with\n",
|
||||
" | zero =>\n",
|
||||
" -- Base case: When n = 0, we need to show 0 + m = m + 0.\n",
|
||||
" -- We have the fact 0 + m = m by the definition of addition.\n",
|
||||
" have h_base: 0 + m = m := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- We also have the fact m + 0 = m by the definition of addition.\n",
|
||||
" have h_symm: m + 0 = m := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n",
|
||||
" | succ n ih =>\n",
|
||||
" -- Inductive step: Assume n + m = m + n, we need to show succ n + m = m + succ n.\n",
|
||||
" -- By the inductive hypothesis, we have n + m = m + n.\n",
|
||||
" have h_inductive: n + m = m + n := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 1. Note we start with: Nat.succ n + m = m + Nat.succ n, so, pull the succ out from m + Nat.succ n on the right side from the addition using addition facts Nat.add_succ.\n",
|
||||
" have h_pull_succ_out_from_right: m + Nat.succ n = Nat.succ (m + n) := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 2. then to flip m + S n to something like S (n + m) we need to use the IH.\n",
|
||||
" have h_flip_n_plus_m: Nat.succ (n + m) = Nat.succ (m + n) := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- 3. Now the n & m are on the correct sides Nat.succ n + m = Nat.succ (n + m), so let's use the def of addition to pull out the succ from the addition on the left using Nat.succ_add.\n",
|
||||
" have h_pull_succ_out_from_left: Nat.succ n + m = Nat.succ (n + m) := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "3"
|
||||
}
|
||||
]
|
|
@ -1,106 +0,0 @@
|
|||
https://www.evernote.com/shard/s410/nl/75276202/2170cbbd-24a1-2d25-da32-bd8f3270d190?title=prompt%20for%20creating%20toy%20example
|
||||
https://chatgpt.com/c/0ad32608-cbc9-4627-a705-786ed7421826
|
||||
I want all final responses in this format:
|
||||
```json
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, n + 0 = n."],
|
||||
"nl_solution": [
|
||||
"Consider any natural number n.",
|
||||
"Using the properties of addition, we know that adding zero to any number does not change the value of that number.",
|
||||
"Therefore, we can conclude that n + 0 = n."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural number n.",
|
||||
"From properties of addition, adding zero does not change its values.",
|
||||
"Thus, n + 0 = n."
|
||||
],
|
||||
"fl_problem": ["theorem n_plus_zero_normal : ∀ n : ℕ, n + 0 = n :="],
|
||||
"fl_partial_sketch": [
|
||||
"-- Prove that n + 0 = n via a formal proof sketch with holes to be filled\n",
|
||||
"theorem n_plus_zero_proved_formal_sketch'' : ∀ n : ℕ, n + 0 = n := by\n",
|
||||
" -- We have the fact of addition n + 0 = n, use it to show left and right are equal.\n",
|
||||
" have h_nat_add_zero: ∀ n : ℕ, n + 0 = n := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts with to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
],
|
||||
"path_2_file": "~/gold-ai-olympiad/lean_src_proj/lean_basics/basic_nats_using_mathlib_nats2_simp_no_rw.lean",
|
||||
"fl_statement_idx": "0"
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
"nl_problem": ["Prove that for any natural number n, 0 + n = n."],
|
||||
"nl_solution": [
|
||||
"Consider any natural number n. We will prove the statement by induction on n.",
|
||||
"Base case: When n = 0, we need to show that 0 + 0 = 0. This is true by the definition of addition.",
|
||||
"Inductive step: Assume that for some natural number n, 0 + n = n. We need to show that 0 + (n + 1) = (n + 1). By the definition of addition and the inductive hypothesis, we have 0 + (n + 1) = (0 + n) + 1 = n + 1. Therefore, the statement holds for n + 1.",
|
||||
"Thus, by induction, we have proved that for any natural number n, 0 + n = n."
|
||||
],
|
||||
"nl_solution_sketch": [
|
||||
"Consider any natural number n, and do induction on n.",
|
||||
"Base case: 0 + 0 = 0 by properties of addition.",
|
||||
"Inductive step we have 0 + n = n. Then 0 + (n + 1) = (0 + n) + 1 = n + 1.",
|
||||
"Where, 0 + n = n by assumption,qed."
|
||||
],
|
||||
"fl_problem": ["theorem zero_plus_n_proved_formal_sketch : ∀ n : ℕ, 0 + n = n :="],
|
||||
"fl_partial_sketch": [
|
||||
"-- Prove that 0 + n = n by induction via a formal proof sketch with holes to be filled\n",
|
||||
"theorem zero_plus_n_proved_formal_sketch'' : ∀ n : ℕ, 0 + n = n := by\n",
|
||||
" -- Consider some n in Nats.\n",
|
||||
" intro n\n",
|
||||
" -- Perform induction on n.\n",
|
||||
" induction n with\n",
|
||||
" | zero =>\n",
|
||||
" -- Base case: 0 + 0 = 0\n",
|
||||
" have h_base: 0 + 0 = 0 := <TODO_PROOF_OR_HAMMER>\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n",
|
||||
" | succ n ih =>\n",
|
||||
" -- Inductive step: assume 0 + n = n, prove 0 + succ n = succ n\n",
|
||||
" have h_inductive: 0 + Nat.succ n = Nat.succ n := <TODO_PROOF_OR_HAMMER>\\n",
|
||||
" -- Combine facts to close goal\n",
|
||||
" <TODO_PROOF_OR_HAMMER>\n"
|
||||
],
|
||||
"src_header_fl_problem": ["import Mathlib.Data.Nat.Basic"],
|
||||
"fl_header_sketch": [
|
||||
"import Mathlib.Data.Nat.Basic",
|
||||
"import Aesop"
|
||||
]
|
||||
}
|
||||
```
|
||||
I want to translate the following formal proof (solution) in lean 4 to a natural language proof (solution) that a human would write (without lean code in it) and eventually make it into a concise nl_solution_sketch, like the following one:
|
||||
```human_problem_solution_proof.json
|
||||
"nl_problem": ["Let \\[f(x) = \\left\\{\n\\begin{array}{cl} ax+3, &\\text{ if }x>2, \\\\\nx-5 &\\text{ if } -2 \\le x \\le 2, \\\\\n2x-b &\\text{ if } x <-2.\n\\end{array}\n\\right.\\]Find $a+b$ if the piecewise function is continuous (which means that its graph can be drawn without lifting your pencil from the paper)."],
|
||||
"nl_solution_sketch": ["For the piecewise function to be continuous, the cases must \"meet\" at $2$ and $-2$. For example, $ax+3$ and $x-5$ must be equal when $x=2$. This implies $a(2)+3=2-5$, which we solve to get $2a=-6 \\Rightarrow a=-3$. Similarly, $x-5$ and $2x-b$ must be equal when $x=-2$. Substituting, we get $-2-5=2(-2)-b$, which implies $b=3$. So $a+b=-3+3=\\boxed{0}$."]
|
||||
```
|
||||
This is my lean 4 fl theorem (fl problem) and fl proof (fl solution):
|
||||
```
|
||||
-- Prove that n + (m + p) = (n + m) + p
|
||||
theorem add_assoc_proved_formal_sketch : ∀ n m p : ℕ, n + (m + p) = (n + m) + p := by
|
||||
-- Consider some n, m, and p in Nats.
|
||||
intros n m p
|
||||
-- Perform induction on n.
|
||||
induction n with
|
||||
| zero =>
|
||||
-- Base case: When n = 0, we need to show 0 + (m + p) = (0 + m) + p.
|
||||
-- Using the definition of addition, 0 + (m + p) = m + p and (0 + m) + p = m + p.
|
||||
simp [Nat.zero_add, Nat.zero_add]
|
||||
| succ n ih =>
|
||||
-- Inductive step: Assume n + (m + p) = (n + m) + p, we need to show succ n + (m + p) = (succ n + m) + p.
|
||||
-- proof strategy is, we move succ n out (or in) enough times then use the IH until both sides are the same.
|
||||
-- 1. let's start by pulling out the scc from left side and have the entire addition inside the succ.
|
||||
have h_pull_add_succ_out_from_left: Nat.succ n + (m + p) = Nat.succ (n + (m + p)) := by simp [Nat.succ_add]
|
||||
-- 2. Now that we have the IH hypothesis appearing inside the left, let's apply it so we have n + (m + p) = (n + m) + p.
|
||||
have h_inside_left_associates: Nat.succ (n + (m + p)) = Nat.succ ((n + m) + p) := by simp [ih]
|
||||
-- 3. Now that the parenthesis (apps of plus) are on the right place for both side, push the succ on the left twice so both terms are the same.
|
||||
have h_push_succ_in_left_twice: Nat.succ ((n + m) + p) = ((Nat.succ n) + m) + p := by simp [Nat.succ_add, Nat.succ_add]
|
||||
-- Combining these, we get succ n + (m + p) = (succ n + m) + p.
|
||||
simp [h_pull_add_succ_out_from_left, h_inside_left_associates, h_push_succ_in_left_twice]
|
||||
```
|
||||
use the comments to translate the fl proof (solution) to natural language solution then use that to output a natural language concise sketch. Make the natural language proof (solution) sketch concise with the core elements of the solution proof. Do this by first outputting the natural language solution, distill it into a very concise proof sketch in natural language with only the core components. Output everything in a json code block please:
|
|
@ -4,8 +4,24 @@ based on Sean Welleck's DSP for Isabelle: https://github.com/wellecks/ntptutoria
|
|||
|
||||
## Execution
|
||||
|
||||
First of all, build the experiment repo.
|
||||
|
||||
``` sh
|
||||
python3 experiments/dsp/main.py eval
|
||||
# experiments/dsp
|
||||
cd lean_src_proj
|
||||
lake build
|
||||
```
|
||||
Then run `main.py`
|
||||
``` sh
|
||||
python3 experiments/dsp/main.py -h
|
||||
```
|
||||
|
||||
The main command for running DSP is `eval`. Due to the multitude of data format
|
||||
out there, use the `--format` flag to specify the data format. For example,
|
||||
running DSP on minif2f is:
|
||||
|
||||
``` sh
|
||||
python3 experiments/dsp/main.py eval --dataset ../minif2f/valid.jsonl --format minif2f
|
||||
```
|
||||
|
||||
## Related work
|
||||
|
|
|
@ -1,68 +1,75 @@
|
|||
{"version": 7,
|
||||
{"version": "1.1.0",
|
||||
"packagesDir": ".lake/packages",
|
||||
"packages":
|
||||
[{"url": "https://github.com/leanprover/std4",
|
||||
[{"url": "https://github.com/leanprover-community/batteries",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "cbc437aed076ea3aeb83f318d572f8b6de38265d",
|
||||
"name": "std",
|
||||
"scope": "",
|
||||
"rev": "2ead90d24b4fac3a05c9c4294daa39bd8686fb98",
|
||||
"name": "batteries",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "main",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
{"url": "https://github.com/leanprover-community/aesop.git",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"scope": "",
|
||||
"rev": "a64fe24aa94e21404940e9217363a9a1ed9a33a6",
|
||||
"name": "aesop",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "v4.10.0-rc1",
|
||||
"inherited": false,
|
||||
"configFile": "lakefile.toml"},
|
||||
{"url": "https://github.com/leanprover-community/quote4",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "fd760831487e6835944e7eeed505522c9dd47563",
|
||||
"scope": "leanprover-community",
|
||||
"rev": "a7bfa63f5dddbcab2d4e0569c4cac74b2585e2c6",
|
||||
"name": "Qq",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "master",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
{"url": "https://github.com/leanprover-community/aesop",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "82ac8cce559c3da0ade17cee3e275111fb7f1920",
|
||||
"name": "aesop",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "master",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
{"url": "https://github.com/leanprover-community/ProofWidgets4",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "f5b2b6ff817890d85ffd8ed47637e36fcac544a6",
|
||||
"scope": "leanprover-community",
|
||||
"rev": "d1b33202c3a29a079f292de65ea438648123b635",
|
||||
"name": "proofwidgets",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "v0.0.28",
|
||||
"inputRev": "v0.0.39",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
{"url": "https://github.com/leanprover/lean4-cli",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "a751d21d4b68c999accb6fc5d960538af26ad5ec",
|
||||
"scope": "",
|
||||
"rev": "a11566029bd9ec4f68a65394e8c3ff1af74c1a29",
|
||||
"name": "Cli",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "main",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
{"url": "https://github.com/leanprover-community/import-graph.git",
|
||||
{"url": "https://github.com/leanprover-community/import-graph",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "64d082eeaad1a8e6bbb7c23b7a16b85a1715a02f",
|
||||
"scope": "leanprover-community",
|
||||
"rev": "d366a602cc4a325a6f9db3a3991dfa6d6cf409c5",
|
||||
"name": "importGraph",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": "main",
|
||||
"inherited": true,
|
||||
"configFile": "lakefile.lean"},
|
||||
"configFile": "lakefile.toml"},
|
||||
{"url": "https://github.com/leanprover-community/mathlib4.git",
|
||||
"type": "git",
|
||||
"subDir": null,
|
||||
"rev": "a6f770740f4c07b236c336115f4de99c28cd8910",
|
||||
"scope": "",
|
||||
"rev": "f5c3f06aa7f6d6c221786d2890c345a00e6341f8",
|
||||
"name": "mathlib",
|
||||
"manifestFile": "lake-manifest.json",
|
||||
"inputRev": null,
|
||||
"inputRev": "v4.10.0-rc1",
|
||||
"inherited": false,
|
||||
"configFile": "lakefile.lean"}],
|
||||
"name": "lean_src_proj",
|
||||
"name": "LeanSrcProj",
|
||||
"lakeDir": ".lake"}
|
||||
|
|
|
@ -1,20 +1,12 @@
|
|||
import Lake
|
||||
open Lake DSL
|
||||
|
||||
package «lean_src_proj» {
|
||||
-- add any package configuration options here
|
||||
}
|
||||
|
||||
require mathlib from git
|
||||
"https://github.com/leanprover-community/mathlib4.git"
|
||||
|
||||
-- ref: https://github.com/leanprover-community/aesop?tab=readme-ov-file#building
|
||||
-- ref2: https://chatgpt.com/c/fbb6dde3-46e7-4117-9c02-78e5df1e0df5
|
||||
-- add aesop pkg as a depedency
|
||||
require aesop from git
|
||||
"https://github.com/JLimperg/aesop"
|
||||
"https://github.com/leanprover-community/aesop.git" @ "v4.10.0-rc1"
|
||||
require mathlib from git
|
||||
"https://github.com/leanprover-community/mathlib4.git" @ "v4.10.0-rc1"
|
||||
|
||||
package LeanSrcProj
|
||||
@[default_target]
|
||||
lean_lib «LeanSrcProj» {
|
||||
-- add any library configuration options here
|
||||
}
|
||||
lean_lib «LeanSrcProj»
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
leanprover/lean4:v4.6.0-rc1
|
|
@ -0,0 +1 @@
|
|||
../../../src/lean-toolchain
|
|
@ -1,13 +1,13 @@
|
|||
import sys, os, json
|
||||
import sys, os, json, subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Union, Any
|
||||
from collections import namedtuple
|
||||
from typing import Union, Any, Tuple
|
||||
from tqdm import tqdm
|
||||
from openai import OpenAI
|
||||
import wandb
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
from pantograph import Server
|
||||
from termcolor import colored
|
||||
|
||||
from solve.prompts import (
|
||||
extract_lean_code,
|
||||
|
@ -19,6 +19,8 @@ from solve.prompts import (
|
|||
STOP_TOKENS_SKETCH_V0,
|
||||
get_prompt_sketch_template_4_lean_v0,
|
||||
)
|
||||
from solve.prove import HammerAgent
|
||||
from solve.data import Datum
|
||||
|
||||
# prompt_draft_template_lean4_v0 = "Draft an informal solution similar to the one below. The informal solution will be used to sketch a formal proof in the Lean 4 Proof Assistant. Here are some examples of informal problem solutions pairs:\n\nInformal:\n(*### Problem\n\nProve that for any natural number n, n + 0 = n.\n\n### Solution\n\nConsider any natural number n. From properties of addition, adding zero does not change its values. Thus, n + 0 = n.*)\n\nInformal:\n(*### Problem\n\nProve that for any natural number n, n + (m + 1) = (n + m) + 1.\n\n### Solution\n\nConsider any natural numbers n and m. From properties of addition, adding 1 to the sum of n and m is the same as first adding m to n and then adding 1. Thus, n + (m + 1) = (n + m) + 1.*)\n\nInformal:\n(*### Problem\n\nProve that for any natural number n and m, n + m = m + n.\n\n### Solution\n\nConsider any natural numbers n and m. We will do induction on n. Base case: 0 + m = m + 0 by properties of addition. Inductive step, we have n + m = m + n. Then (n + 1) + m = (n + m) + 1 = (m + n) + 1 = m + (n + 1). Thus, by induction, n + m = m + n, qed.*)\n\nInformal: \n(*### Problem\n\n{nl_problem}\n\n### Solution\n"
|
||||
|
||||
|
@ -39,25 +41,25 @@ class Engine:
|
|||
|
||||
class OpenAI_DSP_Engine(Engine):
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
api_key: str = None,
|
||||
base_url: str = None, # e.g., Mistral-7B-Instrcut-v0.2 on http://120.77.8.29:12345
|
||||
# Draft Params
|
||||
draft_system_prompt: str = SYSTEM_PROMPT_DRAFT_V0, # 'You are an expert mathematician and an expert in the Lean 4 Proof Assistant.' (goal do draft)
|
||||
draft_prompt_template: str = prompt_draft_template_lean4_v0,
|
||||
draft_sampling_params = None,
|
||||
draft_stop_tokens: list[str] = STOP_TOKENS_DRAFT_V0,
|
||||
# Sketch Params
|
||||
sketch_system_prompt: str = SYSTEM_PROMPT_SKETCH_V0,
|
||||
sketch_prompt_template: str = prompt_sketch_template_lean4_v0,
|
||||
sketch_sampling_params = None,
|
||||
sketch_stop_tokens: list[str] = STOP_TOKENS_SKETCH_V0,
|
||||
# Prove Params
|
||||
# ...TODO not sure if needed right now...
|
||||
# Misc
|
||||
verbose_init: bool = True,
|
||||
):
|
||||
self,
|
||||
model: str,
|
||||
api_key: str = None,
|
||||
base_url: str = None, # e.g., Mistral-7B-Instrcut-v0.2 on http://120.77.8.29:12345
|
||||
# Draft Params
|
||||
draft_system_prompt: str = SYSTEM_PROMPT_DRAFT_V0, # 'You are an expert mathematician and an expert in the Lean 4 Proof Assistant.' (goal do draft)
|
||||
draft_prompt_template: str = prompt_draft_template_lean4_v0,
|
||||
draft_sampling_params = None,
|
||||
draft_stop_tokens: list[str] = STOP_TOKENS_DRAFT_V0,
|
||||
# Sketch Params
|
||||
sketch_system_prompt: str = SYSTEM_PROMPT_SKETCH_V0,
|
||||
sketch_prompt_template: str = prompt_sketch_template_lean4_v0,
|
||||
sketch_sampling_params = None,
|
||||
sketch_stop_tokens: list[str] = STOP_TOKENS_SKETCH_V0,
|
||||
# Prove Params
|
||||
# ...TODO not sure if needed right now...
|
||||
# Misc
|
||||
verbose_init: bool = True,
|
||||
):
|
||||
super().__init__()
|
||||
print(f'{base_url=}') if verbose_init else None
|
||||
|
||||
|
@ -82,25 +84,25 @@ class OpenAI_DSP_Engine(Engine):
|
|||
|
||||
@retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128))
|
||||
def autoformalize_prob(
|
||||
eng,
|
||||
data_pt: dict,
|
||||
verbose: bool = False,
|
||||
):
|
||||
eng: Engine,
|
||||
datum: Datum,
|
||||
verbose: bool = False,
|
||||
):
|
||||
""" Autoformalize natural language problem to formal language problem. """
|
||||
...
|
||||
pass
|
||||
|
||||
@retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128))
|
||||
def draft(
|
||||
eng,
|
||||
data_pt: dict,
|
||||
verbose: bool = False,
|
||||
eng: Engine,
|
||||
datum: Datum,
|
||||
verbose: bool = False,
|
||||
) -> list:
|
||||
"""
|
||||
Creates (informal nl) draft (nl soln, nl proof sketch) for latter use in a formal proof sketch.
|
||||
y_pred_nl ~ draft(eng, x_nl_prob, P_draft)
|
||||
"""
|
||||
# Make prompt from template
|
||||
nl_problem: str = data_pt['nl_problem'][0]
|
||||
nl_problem: str = datum.nl_problem_str
|
||||
prompt = eng.draft_prompt_template.replace('{nl_problem}', nl_problem)
|
||||
# Get all **completions** to single prompt, one (in) -> many (out)
|
||||
# ref: https://platform.openai.com/docs/api-reference/chat/object
|
||||
|
@ -116,32 +118,35 @@ def draft(
|
|||
stop=eng.draft_sampling_params.stop[:3],
|
||||
)
|
||||
# Get all completions for single prompt
|
||||
completions: list[str] = [completion.message.content for completion in response.choices] # response.choices[i].message
|
||||
completions: list[str] = [
|
||||
completion.message.content
|
||||
for completion in response.choices
|
||||
] # response.choices[i].message
|
||||
drafts: list[str] = completions
|
||||
return drafts
|
||||
|
||||
@retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128))
|
||||
def sketch(
|
||||
eng,
|
||||
data_pt: dict,
|
||||
drafts: list,
|
||||
autoformalize_prob_in_prompt: bool = False,
|
||||
verbose: bool = False,
|
||||
) -> list:
|
||||
eng: Engine,
|
||||
datum: Datum,
|
||||
drafts: list[str],
|
||||
autoformalize_prob_in_prompt: bool = False,
|
||||
verbose: bool = False,
|
||||
) -> Tuple[list[str], str]:
|
||||
"""
|
||||
Creates (formal fl) sketch (fl proof sketch) for latter use in a formal proof sketch.
|
||||
z_pred_fl ~ sketch(eng, x_nl_prob, y_pred_nl, x_fl_prob, P_sketch)
|
||||
"""
|
||||
assert len(drafts) == 1, f"For now only 1 draft."
|
||||
# Make prompt from template
|
||||
x_nl_problem: str = data_pt['nl_problem'][0]
|
||||
x_nl_problem: str = datum.nl_problem_str
|
||||
y_nl_solution: str = drafts[0]
|
||||
x_fl_problem = None
|
||||
if autoformalize_prob_in_prompt:
|
||||
# prompt = eng.sketch_prompt_template.replace('{nl_problem}', x_nl_problem).replace('{nl_solution}', y_nl_solution)
|
||||
not NotImplemented
|
||||
else:
|
||||
x_fl_problem = data_pt['fl_problem'][0] if 'fl_problem' in data_pt else autoformalize_prob(eng, data_pt)
|
||||
x_fl_problem = datum.fl_problem if datum.fl_problem else autoformalize_prob(eng, datum)
|
||||
prompt = eng.sketch_prompt_template.replace('{fl_problem}', x_nl_problem).replace('{fl_problem}', y_nl_solution)
|
||||
# Get all **completions** to single prompt, one (in) -> many (out), ref: https://platform.openai.com/docs/api-reference/chat/object
|
||||
response: Any = eng.llm.chat.completions.create(
|
||||
|
@ -162,11 +167,11 @@ def sketch(
|
|||
return sketches, x_fl_problem
|
||||
|
||||
def prove(
|
||||
eng: Engine,
|
||||
server: Server,
|
||||
fl_prob: str,
|
||||
fl_sketch: list[str],
|
||||
):
|
||||
eng: Engine,
|
||||
server: Server,
|
||||
fl_prob: str,
|
||||
fl_sketch: list[str],
|
||||
):
|
||||
"""
|
||||
|
||||
Complete formal sketch and check if it proves the theorem.
|
||||
|
@ -176,61 +181,89 @@ def prove(
|
|||
|
||||
"""
|
||||
# If this throws index out of bound errors it means the source doesn't contain walled off Lean sections.
|
||||
print(colored("Sketch:", "yellow"), fl_sketch)
|
||||
lean_code, = [extract_lean_code(sketch)[0] for sketch in fl_sketch]
|
||||
state, = server.load_sorry(lean_code)
|
||||
agent = HammerAgent()
|
||||
result = agent.search(server, state)
|
||||
print(colored(f"Result: {result}", "blue"))
|
||||
|
||||
print(state)
|
||||
raise RuntimeError("Not implemented")
|
||||
# -- Prove
|
||||
correct: bool = False
|
||||
# -- Return
|
||||
return correct
|
||||
return result
|
||||
|
||||
# -- DSP for Lean
|
||||
|
||||
def single_proof_search_dsp_lean(
|
||||
eng: Engine,
|
||||
server: Server,
|
||||
data_pt: dict,
|
||||
datum: Datum,
|
||||
) -> bool:
|
||||
# -- Draft: [y_nl_pred_draft]_n ~ draft(eng, x_nl_prob, P_draft)
|
||||
y_nl_pred_drafts = draft(eng, data_pt)
|
||||
y_nl_pred_drafts = draft(eng, datum)
|
||||
|
||||
# -- Sketch: z_fl_pred_sketch ~ sketch(eng, x_nl_prob, [y_nl_pred_draft]_n, x_fl_prob, P_sketch)
|
||||
z_fl_pred_sketches, x_fl_prob = sketch(eng, data_pt, y_nl_pred_drafts)
|
||||
z_fl_pred_sketches, x_fl_prob = sketch(eng, datum, y_nl_pred_drafts)
|
||||
|
||||
# -- Prove: y_fl = prove(eng, x_fl_prob, z_fl_pred_sketches)
|
||||
correct: bool = prove(eng, server, x_fl_prob, z_fl_pred_sketches)
|
||||
result: bool = prove(eng, server, x_fl_prob, z_fl_pred_sketches)
|
||||
|
||||
# -- Return
|
||||
return correct
|
||||
return result
|
||||
|
||||
def full_proof_search_dsp_lean(
|
||||
eng: Engine,
|
||||
server: Server,
|
||||
path_2_eval_dataset: Path,
|
||||
):
|
||||
# -- Get eval data
|
||||
eval_dataset: list[dict] = json.load(open(path_2_eval_dataset, 'r'))
|
||||
print(f'{len(eval_dataset)=}')
|
||||
eng: Engine,
|
||||
server: Server,
|
||||
data: list[Datum],
|
||||
):
|
||||
print(colored(f"DSP on {len(data)} points", "blue", attrs=["bold", "underline"]))
|
||||
# -- Proof search by DSP over all eval data
|
||||
for data_pt in tqdm(eval_dataset, total=len(eval_dataset), desc='DSP proof loop per data point in benchmark.'):
|
||||
print(f'{data_pt=}')
|
||||
flag = single_proof_search_dsp_lean(eng, server, data_pt)
|
||||
server.gc()
|
||||
for datum in tqdm(data, total=len(data), desc='DSP proof loop per data point in benchmark.'):
|
||||
print("Problem:", colored(datum.id, "cyan"))
|
||||
result = single_proof_search_dsp_lean(eng, server, datum)
|
||||
print(result)
|
||||
#server.gc()
|
||||
return
|
||||
|
||||
|
||||
experiment_dir = Path(__file__).resolve().parent
|
||||
|
||||
def get_project_and_lean_path():
|
||||
cwd = experiment_dir / 'lean_src_proj'
|
||||
p = subprocess.check_output(['lake', 'env', 'printenv', 'LEAN_PATH'], cwd=cwd)
|
||||
return cwd, p
|
||||
|
||||
def load_data(args) -> list[Datum]:
|
||||
p = Path(args.dataset).expanduser()
|
||||
data = None
|
||||
if p.suffix == ".json":
|
||||
data = [
|
||||
Datum.load(obj, data_format=args.format)
|
||||
for obj in json.load(open(p, 'r'))
|
||||
]
|
||||
elif p.suffix == ".jsonl":
|
||||
with open(p, 'r') as f:
|
||||
data = [
|
||||
Datum.load(json.loads(line), data_format=args.format)
|
||||
for line in list(f)
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Unknown data suffix: {p.suffix}")
|
||||
data = [datum for datum in data if datum]
|
||||
return data
|
||||
|
||||
# -- Main
|
||||
|
||||
def main(args):
|
||||
import time
|
||||
start_time = time.time()
|
||||
path_2_eval_dataset = Path(args.eval_dataset).expanduser()
|
||||
print(f'{path_2_eval_dataset=}')
|
||||
data_eval = load_data(args)
|
||||
|
||||
server = Server()
|
||||
# Start server
|
||||
project_path, lean_path = get_project_and_lean_path()
|
||||
server = Server(
|
||||
imports=["Mathlib", "Aesop"],
|
||||
project_path=project_path,
|
||||
lean_path=lean_path,
|
||||
)
|
||||
|
||||
# - Start wandb run
|
||||
# print(f'\n\n-- Setup params')
|
||||
|
@ -269,9 +302,9 @@ def main(args):
|
|||
)
|
||||
|
||||
# - Full proof search with DSP
|
||||
print(f'\n\n-- Full proof search with DSP')
|
||||
full_proof_search_dsp_lean(eng, server, path_2_eval_dataset)
|
||||
print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
|
||||
full_proof_search_dsp_lean(eng, server, data_eval)
|
||||
msg = f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a"
|
||||
print(colored(msg, "magenta"))
|
||||
|
||||
# - End run
|
||||
# wandb.config.update(config)
|
||||
|
@ -292,11 +325,22 @@ if __name__ == "__main__":
|
|||
choices=['eval', 'prompts'],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eval-dataset",
|
||||
"--dataset",
|
||||
help="Evaluation dataset path",
|
||||
default=experiment_dir / 'debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json',
|
||||
)
|
||||
parser.add_argument("--model", help="Model", default="gpt-4o", choices=["gpt2", "gpt-3.5-turbo", "gpt-4o", "deepseek-ai/deepseek-math-7b-instruct"])
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
help="Model",
|
||||
default="gpt-4o",
|
||||
choices=["gpt2", "gpt-3.5-turbo", "gpt-4o", "deepseek-ai/deepseek-math-7b-instruct"],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
help="Data format",
|
||||
default="default",
|
||||
choices=["default", "minif2f"],
|
||||
)
|
||||
parser.add_argument("--start", default=0)
|
||||
parser.add_argument("--end", default=sys.maxsize)
|
||||
parser.add_argument("--batchsize", default=10, help="putnam has 348")
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
import json
|
||||
from typing import Union, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class Datum:
|
||||
"""
|
||||
Represents one theorem proving datapoint.
|
||||
"""
|
||||
|
||||
id: Optional[str] = None
|
||||
|
||||
# Problem and solution in natural language
|
||||
nl_problem: Optional[Union[str, list[str]]] = None
|
||||
nl_solution: Optional[Union[str, list[str]]] = None
|
||||
|
||||
# Problem in formal language
|
||||
fl_problem: Optional[str] = None
|
||||
|
||||
def __str__(self):
|
||||
if self.id:
|
||||
return self.id
|
||||
return str(self.nl_problem)
|
||||
|
||||
@property
|
||||
def nl_problem_str(self) -> Optional[str]:
|
||||
if not self.nl_problem:
|
||||
return None
|
||||
if isinstance(self.nl_problem, list):
|
||||
return "\n".join(self.nl_problem)
|
||||
return self.nl_problem
|
||||
|
||||
@staticmethod
|
||||
def load_default(obj: dict):
|
||||
"""
|
||||
Loads data in the "default" format
|
||||
"""
|
||||
fl_problem = obj.get("fl_problem")
|
||||
if isinstance(fl_problem, list):
|
||||
fl_problem = "\n".join(fl_problem)
|
||||
return Datum(
|
||||
nl_problem=obj.get("nl_problem"),
|
||||
nl_solution=obj.get("nl_solution"),
|
||||
fl_problem=fl_problem,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def load_minif2f(obj: dict):
|
||||
"""
|
||||
Loads minif2f data
|
||||
"""
|
||||
fl_problem = obj["formal_statement"].strip()
|
||||
if fl_problem.startswith("--"):
|
||||
return None
|
||||
return Datum(
|
||||
id=obj["id"],
|
||||
fl_problem=fl_problem,
|
||||
#header=obj["header"],
|
||||
nl_problem=obj["informal_stmt"],
|
||||
nl_solution=obj["informal_proof"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def load(obj: dict, data_format: str):
|
||||
if data_format == "default":
|
||||
return Datum.load_default(obj)
|
||||
elif data_format == "minif2f":
|
||||
return Datum.load_minif2f(obj)
|
||||
else:
|
||||
raise ValueError(f"Invalid data format {data_format}")
|
|
@ -166,9 +166,11 @@ def extract_lean_code(
|
|||
|
||||
if line.rstrip() == WALL:
|
||||
if is_walled_lean:
|
||||
# found wall
|
||||
code = "\n".join(curr) + "\n"
|
||||
code = code.replace("ℕ", "Nat").replace(placeholder, "sorry")
|
||||
lean_codes.append(code)
|
||||
curr = []
|
||||
is_walled = False
|
||||
is_walled_lean = False
|
||||
continue
|
||||
|
@ -186,8 +188,13 @@ class TestPrompts(unittest.TestCase):
|
|||
sketch = "```lean\nimport Mathlib.Data.Nat.Basic\nimport Aesop\n\ntheorem n_plus_zero : ∀ n : ℕ, n + 0 = n := by\n -- Consider any natural number n. We need to show that n + 0 = n.\n -- Use the fact that adding zero to any natural number does not change its value.\n have h_nat_add_zero: ∀ n : ℕ, n + 0 = n := <TODO_PROOF_OR_HAMMER>\n -- Combine facts to close goal\n <TODO_PROOF_OR_HAMMER>\n```"
|
||||
codes = extract_lean_code(sketch)
|
||||
self.assertEqual(codes, [
|
||||
"import Mathlib.Data.Nat.Basic\nimport Aesop\n\ntheorem n_plus_zero : ∀ n : Nat, n + 0 = n := by\n -- Consider any natural number n. We need to show that n + 0 = n.\n -- Use the fact that adding zero to any natural number does not change its value.\n have h_nat_add_zero: ∀ n : Nat, n + 0 = n := sorry\n -- Combine facts to close goal\n sorry\n"
|
||||
"\ntheorem n_plus_zero : ∀ n : Nat, n + 0 = n := by\n -- Consider any natural number n. We need to show that n + 0 = n.\n -- Use the fact that adding zero to any natural number does not change its value.\n have h_nat_add_zero: ∀ n : Nat, n + 0 = n := sorry\n -- Combine facts to close goal\n sorry\n"
|
||||
])
|
||||
|
||||
def test_extract_sketch_minif2f(self):
|
||||
sketch = "To solve the problem formally in Lean 4, we will sketch the proof by breaking down the steps of the informal solution, leveraging Lean 4 tactics and possibly automated theorem proving. Here's how the formal sketch might look:\n\n```lean\nimport Mathlib.Data.Complex.Basic\nimport Aesop\n\n-- Define the complex number z\ndef z : ℂ := (1 + Complex.i) / Real.sqrt 2\n\ntheorem complex_problem_solution : \n (∑ i in finset.range 12, z ^ (i + 1) ^ 2) * \n (∑ i in finset.range 12, z ^ -((i + 1) ^ 2)) = 36 := by\n -- We first compute z as a complex number with a modulus of 1.\n -- Thus, powers of z represent rotations in the complex plane.\n have h_mod_z : Complex.abs z = 1 := <TODO_PROOF_OR_HAMMER>\n -- Recognize that each term z^(k^2) and its reciprocal are conjugates due to modulus 1.\n have h_conjugates : ∀ k : ℕ, z^(k^2) * (z^(-k^2)) = 1 := <TODO_PROOF_OR_HAMMER>\n -- The product (z^(1^2) + z^(2^2) + ... + z^(12^2)) * (z^(-1^2) + z^(-2^2) + ... + z^(-12^2))\n -- simplifies as a result of this conjugate pair property.\n have h_sum_conjugates : ∑ i in finset.range 12, z ^ (i + 1) ^ 2 = 0 := <TODO_PROOF_OR_HAMMER>\n have h_sum_reciprocals : ∑ i in finset.range 12, z ^ -((i + 1) ^ 2) = 0 := <TODO_PROOF_OR_HAMMER>\n -- Combine all the contributions, where non-zero terms contribute to the total sum\n -- to ensure the final product is 36 based on the angle and distribution of terms.\n have h_final_sum_product : (∑ i in finset.range 12, z ^ (i + 1) ^ 2) *\n (∑ i in finset.range 12, z ^ -((i + 1) ^ 2)) = 36 := <TODO_PROOF_OR_HAMMER>\n -- Conclude the proof with the calculated product.\n exact h_final_sum_product\n```\n\nIn this sketch:\n- We define \\( z \\) as the complex number \\(\\frac{1+i}{\\sqrt{2}}\\).\n- We use properties of complex numbers with modulus 1, recognizing rotational symmetry and conjugate pair relations.\n- We use automated tactics (`<TODO_PROOF_OR_HAMMER>`) to handle calculations involving sums, products, and properties of the complex exponentials.\n- Finally, we show that the structure of these complex exponentials simplifies the product to yield the desired result of 36."
|
||||
codes = extract_lean_code(sketch)
|
||||
self.assertEqual(len(codes), 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import collections
|
||||
from typing import Optional
|
||||
from pantograph.search import Agent
|
||||
from pantograph.expr import GoalState, Tactic
|
||||
|
||||
class HammerAgent(Agent):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.goal_tactic_id_map = collections.defaultdict(lambda : 0)
|
||||
self.tactics = [
|
||||
"aesop",
|
||||
]
|
||||
|
||||
def next_tactic(
|
||||
self,
|
||||
state: GoalState,
|
||||
goal_id: int,
|
||||
) -> Optional[Tactic]:
|
||||
key = (state.state_id, goal_id)
|
||||
i = self.goal_tactic_id_map[key]
|
||||
|
||||
if i >= len(self.tactics):
|
||||
return None
|
||||
|
||||
self.goal_tactic_id_map[key] = i + 1
|
||||
return self.tactics[i]
|
|
@ -793,13 +793,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
|
|||
|
||||
[[package]]
|
||||
name = "ipython"
|
||||
version = "8.27.0"
|
||||
version = "8.28.0"
|
||||
description = "IPython: Productive Interactive Computing"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"},
|
||||
{file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"},
|
||||
{file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"},
|
||||
{file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -2047,25 +2047,29 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "pywin32"
|
||||
version = "306"
|
||||
version = "307"
|
||||
description = "Python for Window Extensions"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"},
|
||||
{file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"},
|
||||
{file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"},
|
||||
{file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"},
|
||||
{file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"},
|
||||
{file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"},
|
||||
{file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"},
|
||||
{file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"},
|
||||
{file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"},
|
||||
{file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"},
|
||||
{file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"},
|
||||
{file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"},
|
||||
{file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"},
|
||||
{file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
|
||||
{file = "pywin32-307-cp310-cp310-win32.whl", hash = "sha256:f8f25d893c1e1ce2d685ef6d0a481e87c6f510d0f3f117932781f412e0eba31b"},
|
||||
{file = "pywin32-307-cp310-cp310-win_amd64.whl", hash = "sha256:36e650c5e5e6b29b5d317385b02d20803ddbac5d1031e1f88d20d76676dd103d"},
|
||||
{file = "pywin32-307-cp310-cp310-win_arm64.whl", hash = "sha256:0c12d61e0274e0c62acee79e3e503c312426ddd0e8d4899c626cddc1cafe0ff4"},
|
||||
{file = "pywin32-307-cp311-cp311-win32.whl", hash = "sha256:fec5d27cc893178fab299de911b8e4d12c5954e1baf83e8a664311e56a272b75"},
|
||||
{file = "pywin32-307-cp311-cp311-win_amd64.whl", hash = "sha256:987a86971753ed7fdd52a7fb5747aba955b2c7fbbc3d8b76ec850358c1cc28c3"},
|
||||
{file = "pywin32-307-cp311-cp311-win_arm64.whl", hash = "sha256:fd436897c186a2e693cd0437386ed79f989f4d13d6f353f8787ecbb0ae719398"},
|
||||
{file = "pywin32-307-cp312-cp312-win32.whl", hash = "sha256:07649ec6b01712f36debf39fc94f3d696a46579e852f60157a729ac039df0815"},
|
||||
{file = "pywin32-307-cp312-cp312-win_amd64.whl", hash = "sha256:00d047992bb5dcf79f8b9b7c81f72e0130f9fe4b22df613f755ab1cc021d8347"},
|
||||
{file = "pywin32-307-cp312-cp312-win_arm64.whl", hash = "sha256:b53658acbfc6a8241d72cc09e9d1d666be4e6c99376bc59e26cdb6223c4554d2"},
|
||||
{file = "pywin32-307-cp313-cp313-win32.whl", hash = "sha256:ea4d56e48dc1ab2aa0a5e3c0741ad6e926529510516db7a3b6981a1ae74405e5"},
|
||||
{file = "pywin32-307-cp313-cp313-win_amd64.whl", hash = "sha256:576d09813eaf4c8168d0bfd66fb7cb3b15a61041cf41598c2db4a4583bf832d2"},
|
||||
{file = "pywin32-307-cp313-cp313-win_arm64.whl", hash = "sha256:b30c9bdbffda6a260beb2919f918daced23d32c79109412c2085cbc513338a0a"},
|
||||
{file = "pywin32-307-cp37-cp37m-win32.whl", hash = "sha256:5101472f5180c647d4525a0ed289ec723a26231550dbfd369ec19d5faf60e511"},
|
||||
{file = "pywin32-307-cp37-cp37m-win_amd64.whl", hash = "sha256:05de55a7c110478dc4b202230e98af5e0720855360d2b31a44bb4e296d795fba"},
|
||||
{file = "pywin32-307-cp38-cp38-win32.whl", hash = "sha256:13d059fb7f10792542082f5731d5d3d9645320fc38814759313e5ee97c3fac01"},
|
||||
{file = "pywin32-307-cp38-cp38-win_amd64.whl", hash = "sha256:7e0b2f93769d450a98ac7a31a087e07b126b6d571e8b4386a5762eb85325270b"},
|
||||
{file = "pywin32-307-cp39-cp39-win32.whl", hash = "sha256:55ee87f2f8c294e72ad9d4261ca423022310a6e79fb314a8ca76ab3f493854c6"},
|
||||
{file = "pywin32-307-cp39-cp39-win_amd64.whl", hash = "sha256:e9d5202922e74985b037c9ef46778335c102b74b95cec70f629453dbe7235d87"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2954,13 +2958,13 @@ test = ["pytest", "ruff"]
|
|||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.0.1"
|
||||
version = "2.0.2"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
{file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"},
|
||||
{file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3101,13 +3105,13 @@ tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
|
|||
|
||||
[[package]]
|
||||
name = "types-python-dateutil"
|
||||
version = "2.9.0.20240906"
|
||||
version = "2.9.0.20241003"
|
||||
description = "Typing stubs for python-dateutil"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "types-python-dateutil-2.9.0.20240906.tar.gz", hash = "sha256:9706c3b68284c25adffc47319ecc7947e5bb86b3773f843c73906fd598bc176e"},
|
||||
{file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"},
|
||||
{file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"},
|
||||
{file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -25,8 +25,8 @@ tenacity = "8.3.0"
|
|||
tiktoken = "^0.7.0"
|
||||
torch = "2.2.1"
|
||||
wandb = "0.17.0"
|
||||
# vllm = "0.4.1"
|
||||
termcolor = "^2.4.0"
|
||||
# vllm = "0.4.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
Loading…
Reference in New Issue