2024-12-11 16:52:20 -08:00
14 changed files with 402 additions and 129 deletions
--- a/Pantograph/Frontend.lean
+++ b/Pantograph/Frontend.lean
@ -1,4 +1,4 @@
-/- Adapted from lean-training-data by semorrison -/
 import Pantograph.Frontend.Basic
 import Pantograph.Frontend.Elab
+import Pantograph.Frontend.InfoTree
 import Pantograph.Frontend.MetaTranslate
--- a/Pantograph/Frontend/Basic.lean
+++ b/Pantograph/Frontend/Basic.lean
@ -30,6 +30,13 @@ end Lean.PersistentArray

 namespace Pantograph.Frontend

+@[export pantograph_frontend_stx_byte_range]
+def stxByteRange (stx : Syntax) : String.Pos × String.Pos :=
+  let pos := stx.getPos?.getD 0
+  let endPos := stx.getTailPos?.getD 0
+  (pos, endPos)
+
+
 abbrev FrontendM := Elab.Frontend.FrontendM

 structure CompilationStep where
--- a/Pantograph/Frontend/Elab.lean
+++ b/Pantograph/Frontend/Elab.lean
@ -1,87 +1,21 @@
-/- Adapted from https://github.com/semorrison/lean-training-data -/
 import Lean.Elab.Import
 import Lean.Elab.Command
 import Lean.Elab.InfoTree
+import Lean.DeclarationRange

 import Pantograph.Frontend.Basic
 import Pantograph.Frontend.MetaTranslate
 import Pantograph.Goal
 import Pantograph.Protocol
+import Pantograph.Frontend.InfoTree

 open Lean

-namespace Lean.Elab.Info
-/-- The `Syntax` for a `Lean.Elab.Info`, if there is one. -/
-protected def stx? : Info → Option Syntax
-  | .ofTacticInfo         info => info.stx
-  | .ofTermInfo           info => info.stx
-  | .ofCommandInfo        info => info.stx
-  | .ofMacroExpansionInfo info => info.stx
-  | .ofOptionInfo         info => info.stx
-  | .ofFieldInfo          info => info.stx
-  | .ofCompletionInfo     info => info.stx
-  | .ofUserWidgetInfo     info => info.stx
-  | .ofCustomInfo         info => info.stx
-  | .ofFVarAliasInfo      _    => none
-  | .ofFieldRedeclInfo    info => info.stx
-  | .ofOmissionInfo       info => info.stx
-/-- Is the `Syntax` for this `Lean.Elab.Info` original, or synthetic? -/
-protected def isOriginal (i : Info) : Bool :=
-  match i.stx? with
-  | none => true   -- Somewhat unclear what to do with `FVarAliasInfo`, so be conservative.
-  | some stx => match stx.getHeadInfo with
-    | .original .. => true
-    | _ => false
-end Lean.Elab.Info
-
-namespace Lean.Elab.TacticInfo
-
-/-- Find the name for the outermost `Syntax` in this `TacticInfo`. -/
-def name? (t : TacticInfo) : Option Name :=
-  match t.stx with
-  | Syntax.node _ n _ => some n
-  | _ => none
-/-- Decide whether a tactic is "substantive",
-or is merely a tactic combinator (e.g. `by`, `;`, multiline tactics, parenthesized tactics). -/
-def isSubstantive (t : TacticInfo) : Bool :=
-  match t.name? with
-  | none => false
-  | some `null => false
-  | some ``cdot => false
-  | some ``cdotTk => false
-  | some ``Lean.Parser.Term.byTactic => false
-  | some ``Lean.Parser.Tactic.tacticSeq => false
-  | some ``Lean.Parser.Tactic.tacticSeq1Indented => false
-  | some ``Lean.Parser.Tactic.«tactic_<;>_» => false
-  | some ``Lean.Parser.Tactic.paren => false
-  | _ => true
-
-end Lean.Elab.TacticInfo
-
-namespace Lean.Elab.InfoTree
-
-/--
-Keep `.node` nodes and `.hole` nodes satisfying predicates.
-
-Returns a `List InfoTree`, although in most situations this will be a singleton.
-/
-partial def filter (p : Info → Bool) (m : MVarId → Bool := fun _ => false) :
-    InfoTree → List InfoTree
-  | .context ctx tree => tree.filter p m |>.map (.context ctx)
-  | .node info children =>
-    if p info then
-      [.node info (children.toList.map (filter p m)).join.toPArray']
-    else
-      (children.toList.map (filter p m)).join
-  | .hole mvar => if m mvar then [.hole mvar] else []
-
-end Lean.Elab.InfoTree
-
-
 namespace Pantograph.Frontend

 -- Info tree filtering functions

+/- Adapted from lean-training-data -/
 structure TacticInvocation where
  info : Elab.TacticInfo
  ctx : Elab.ContextInfo
@ -131,19 +65,10 @@ protected def usedConstants (t: TacticInvocation) : NameSet :=

 end TacticInvocation

-/-- Analogue of `Lean.Elab.InfoTree.findInfo?`, but that returns a list of all results. -/
-partial def findAllInfo (t : Elab.InfoTree) (context?: Option Elab.ContextInfo) (pred : Elab.Info → Bool) :
-    List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree) :=
-  match t with
-  | .context inner t => findAllInfo t (inner.mergeIntoOuter? context?) pred
-  | .node i children  =>
-      (if pred i then [(i, context?, children)] else []) ++ children.toList.bind (fun t => findAllInfo t context? pred)
-  | _ => []
-
 /-- Return all `TacticInfo` nodes in an `InfoTree` corresponding to tactics,
 each equipped with its relevant `ContextInfo`, and any children info trees. -/
 private def collectTacticNodes (t : Elab.InfoTree) : List TacticInvocation :=
-  let infos := findAllInfo t none fun i => match i with
+  let infos := t.findAllInfo none false fun i => match i with
    | .ofTacticInfo _ => true
    | _ => false
  infos.filterMap fun p => match p with
@ -162,9 +87,11 @@ def collectTacticsFromCompilationStep (step : CompilationStep) : IO (List Protoc
  tactics.mapM λ invocation => do
    let goalBefore := (Format.joinSep (← invocation.goalState) "\n").pretty
    let goalAfter := (Format.joinSep (← invocation.goalStateAfter) "\n").pretty
-    let tactic ← invocation.ctx.runMetaM {} do
-      let t ← PrettyPrinter.ppTactic ⟨invocation.info.stx⟩
-      return t.pretty
+    let tactic ← invocation.ctx.runMetaM {} <| Meta.withMCtx invocation.info.mctxBefore do
+      return (← invocation.ctx.ppSyntax {} invocation.info.stx).pretty
+      -- FIXME: Why does this not work? There are problems with `term.pseudo.antiquot`
+      --PrettyPrinter.ppTactic ⟨invocation.info.stx⟩
+      --return t.pretty
    let usedConstants := invocation.usedConstants.toArray.map λ n => n.toString
    return {
      goalBefore,
@ -177,47 +104,79 @@ structure InfoWithContext where
  info: Elab.Info
  context?: Option Elab.ContextInfo := .none

-private def collectSorrysInTree (t : Elab.InfoTree) : List InfoWithContext :=
-  let infos := findAllInfo t none fun i => match i with
-    | .ofTermInfo { expectedType?, expr, stx, .. } =>
-      expr.isSorry ∧ expectedType?.isSome ∧ stx.isOfKind `Lean.Parser.Term.sorry
+private def collectSorrysInTree (t : Elab.InfoTree) : IO (List InfoWithContext) := do
+  let infos ← t.findAllInfoM none fun i ctx? => match i with
+    | .ofTermInfo { expectedType?, expr, stx, lctx, .. } => do
+      let .some ctx := ctx? | return (false, true)
+      if expr.isSorry ∧ stx.isOfKind `Lean.Parser.Term.sorry then
+        if expectedType?.isNone then
+          throw $ .userError "Sorry of indeterminant type is not allowed"
+        return (true, false)
+      let .some expectedType := expectedType? | return (false, true)
+      let typeMatch ← ctx.runMetaM lctx do
+        let type ← Meta.inferType expr
+        Meta.isExprDefEqGuarded type expectedType
+      return match typeMatch, expr.hasSorry with
+      | false, true => (true, false) -- Types mismatch but has sorry -> collect, halt
+      | false, false => (true, false) -- Types mistmatch but no sorry -> collect, halt
+      | true, true => (false, true) -- Types match but has sorry -> continue
+      | true, false => (false, false) -- Types match but no sorries -> halt
    | .ofTacticInfo { stx, goalsBefore, .. } =>
      -- The `sorry` term is distinct from the `sorry` tactic
      let isSorry := stx.isOfKind `Lean.Parser.Tactic.tacticSorry
-      isSorry ∧ !goalsBefore.isEmpty
-    | _ => false
-  infos.map fun (info, context?, _) => { info, context? }
+      return (isSorry ∧ !goalsBefore.isEmpty, ¬ isSorry)
+    | _ => return (false, true)
+  return infos.map fun (info, context?, _) => { info, context? }

 -- NOTE: Plural deliberately not spelled "sorries"
@[export pantograph_frontend_collect_sorrys_m]
-def collectSorrys (step: CompilationStep) : List InfoWithContext :=
-  step.trees.bind collectSorrysInTree
-
+def collectSorrys (step: CompilationStep) : IO (List InfoWithContext) := do
+  return (← step.trees.mapM collectSorrysInTree).join

+structure AnnotatedGoalState where
+  state : GoalState
+  srcBoundaries : List (String.Pos × String.Pos)

 /--
 Since we cannot directly merge `MetavarContext`s, we have to get creative. This
 function duplicates frozen mvars in term and tactic info nodes, and add them to
 the current `MetavarContext`.
 -/
-@[export pantograph_frontend_sorrys_to_goal_state]
-def sorrysToGoalState (sorrys : List InfoWithContext) : MetaM GoalState := do
+@[export pantograph_frontend_sorrys_to_goal_state_m]
+def sorrysToGoalState (sorrys : List InfoWithContext) : MetaM AnnotatedGoalState := do
  assert! !sorrys.isEmpty
  let goalsM := sorrys.mapM λ i => do
    match i.info with
    | .ofTermInfo termInfo  => do
      let mvarId ← MetaTranslate.translateMVarFromTermInfo termInfo i.context?
-      return [mvarId]
+      return [(mvarId, stxByteRange termInfo.stx)]
    | .ofTacticInfo tacticInfo => do
-      MetaTranslate.translateMVarFromTacticInfoBefore tacticInfo i.context?
+      let mvarIds ← MetaTranslate.translateMVarFromTacticInfoBefore tacticInfo i.context?
+      let range := stxByteRange tacticInfo.stx
+      return mvarIds.map (·, range)
    | _ => panic! "Invalid info"
-  let goals := List.join (← goalsM.run {} |>.run' {})
+  let annotatedGoals := List.join (← goalsM.run {} |>.run' {})
+  let goals := annotatedGoals.map Prod.fst
+  let srcBoundaries := annotatedGoals.map Prod.snd
  let root := match goals with
    | [] => panic! "No MVars generated"
    | [g] => g
    | _ => { name := .anonymous }
-  GoalState.createFromMVars goals root
+  let state ← GoalState.createFromMVars goals root
+  return { state, srcBoundaries }


+@[export pantograph_frontend_collect_new_defined_constants_m]
+def collectNewDefinedConstants (step : CompilationStep) : IO (List Name) := do
+  step.after.constants.map₂.foldlM (λ acc name _ => do
+    if step.before.contains name then
+      return acc
+    let coreM : CoreM Bool := Option.isSome <$> findDeclarationRanges? name
+    let hasRange ← coreM.run' { fileName := step.fileName, fileMap := step.fileMap } { env := step.after } |>.toBaseIO
+    match hasRange with
+    | .ok true => return name :: acc
+    | .ok false => return acc
+    | .error e => throw $ IO.userError (← e.toMessageData.toString)
+    ) []

 end Pantograph.Frontend
--- a/Pantograph/Frontend/InfoTree.lean
+++ b/Pantograph/Frontend/InfoTree.lean
@ -0,0 +1,153 @@
+/- Adapted from lean-training-data -/
+import Lean.Elab.InfoTree
+import Lean.Parser.Term
+import Lean.PrettyPrinter
+
+open Lean
+
+namespace Lean.Elab
+
+private def elaboratorToString : Name → String
+  | .anonymous => ""
+  | n => s!"⟨{n}⟩ "
+private def indent (s : String) : String := "\n".intercalate $ s.splitOn "\n" |>.map ("\t" ++ .)
+
+/-- The `Syntax` for a `Lean.Elab.Info`, if there is one. -/
+protected def Info.stx? : Info → Option Syntax
+  | .ofTacticInfo         info => info.stx
+  | .ofTermInfo           info => info.stx
+  | .ofCommandInfo        info => info.stx
+  | .ofMacroExpansionInfo info => info.stx
+  | .ofOptionInfo         info => info.stx
+  | .ofFieldInfo          info => info.stx
+  | .ofCompletionInfo     info => info.stx
+  | .ofUserWidgetInfo     info => info.stx
+  | .ofCustomInfo         info => info.stx
+  | .ofFVarAliasInfo      _    => none
+  | .ofFieldRedeclInfo    info => info.stx
+  | .ofOmissionInfo       info => info.stx
+/-- Is the `Syntax` for this `Lean.Elab.Info` original, or synthetic? -/
+protected def Info.isOriginal (i : Info) : Bool :=
+  match i.stx? with
+  | none => true   -- Somewhat unclear what to do with `FVarAliasInfo`, so be conservative.
+  | some stx => match stx.getHeadInfo with
+    | .original .. => true
+    | _ => false
+
+def ContextInfo.ppExpr (ctx : ContextInfo) (lctx : LocalContext) (e : Expr) : IO Format :=
+  ctx.runMetaM lctx (do Meta.ppExpr (← instantiateMVars e))
+
+def CommandInfo.toString (info : CommandInfo) (ctx : ContextInfo) : IO String := do
+  let stx := (← ctx.ppSyntax {} info.stx).pretty
+  return s!"{elaboratorToString info.elaborator}\n{stx}"
+
+def TermInfo.toString (info : TermInfo) (ctx : ContextInfo) : IO String := do
+  let stx := (← ctx.ppSyntax info.lctx info.stx).pretty
+  let expectedType := (← info.expectedType?.mapM fun ty => do
+    pure s!": {(← ctx.ppExpr info.lctx ty).pretty}").getD ""
+  let expr := (← ctx.ppExpr info.lctx info.expr).pretty
+  return s!"{elaboratorToString info.elaborator}{expr}{expectedType}\n{stx}"
+
+/-- Find the name for the outermost `Syntax` in this `TacticInfo`. -/
+def TacticInfo.name? (t : TacticInfo) : Option Name :=
+  match t.stx with
+  | Syntax.node _ n _ => some n
+  | _ => none
+/-- Decide whether a tactic is "substantive",
+or is merely a tactic combinator (e.g. `by`, `;`, multiline tactics, parenthesized tactics). -/
+def TacticInfo.isSubstantive (t : TacticInfo) : Bool :=
+  match t.name? with
+  | none => false
+  | some `null => false
+  | some ``cdot => false
+  | some ``cdotTk => false
+  | some ``Lean.Parser.Term.byTactic => false
+  | some ``Lean.Parser.Tactic.tacticSeq => false
+  | some ``Lean.Parser.Tactic.tacticSeq1Indented => false
+  | some ``Lean.Parser.Tactic.«tactic_<;>_» => false
+  | some ``Lean.Parser.Tactic.paren => false
+  | _ => true
+def TacticInfo.pp (info : TacticInfo) (ctx : ContextInfo) : IO Format :=
+  ctx.runMetaM {} try
+    Lean.PrettyPrinter.ppTactic ⟨info.stx⟩
+  catch _ =>
+    pure "<failed to pretty print>"
+def TacticInfo.toString (i : TacticInfo) (ctx : ContextInfo) : IO String := do
+  let name := i.name?
+  let stx := Format.pretty (← i.pp ctx)
+  return s!"{name}\n{stx}"
+
+/--
+Keep `.node` nodes and `.hole` nodes satisfying predicates.
+
+Returns a `List InfoTree`, although in most situations this will be a singleton.
+-/
+partial def InfoTree.filter (p : Info → Bool) (m : MVarId → Bool := fun _ => false) :
+    InfoTree → List InfoTree
+  | .context ctx tree => tree.filter p m |>.map (.context ctx)
+  | .node info children =>
+    if p info then
+      [.node info (children.toList.map (filter p m)).join.toPArray']
+    else
+      (children.toList.map (filter p m)).join
+  | .hole mvar => if m mvar then [.hole mvar] else []
+
+/-- Analogue of `Lean.Elab.InfoTree.findInfo?`, but that returns a list of all results. -/
+partial def InfoTree.findAllInfo
+    (t : InfoTree)
+    (context?: Option Elab.ContextInfo)
+    (haltOnMatch : Bool := false)
+    (pred : Elab.Info → Bool)
+    : List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree) :=
+  match t with
+  | .context inner t => findAllInfo t (inner.mergeIntoOuter? context?) haltOnMatch pred
+  | .node i children  =>
+    let head := if pred i then [(i, context?, children)] else []
+    let tail := if haltOnMatch ∧ !head.isEmpty then [] else children.toList.bind (fun t => findAllInfo t context? haltOnMatch pred)
+    head ++ tail
+  | _ => []
+
+/-- Monadic analogue of `findAllInfo`, but predicate controls whether to recurse. -/
+partial def InfoTree.findAllInfoM [Monad m]
+    (t : InfoTree)
+    (context?: Option Elab.ContextInfo)
+    (pred : Elab.Info → Option Elab.ContextInfo → m (Bool × Bool))
+    : m (List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree)) := do
+  match t with
+  | .context inner t => t.findAllInfoM (inner.mergeIntoOuter? context?) pred
+  | .node i children  =>
+    let (flagCollect, flagRecurse) ← pred i context?
+    let head := if flagCollect then [(i, context?, children)] else []
+    let tail := if ¬ flagRecurse then pure [] else children.toList.mapM (fun t => t.findAllInfoM context? pred)
+    return head ++ (← tail).join
+  | _ => return []
+
+@[export pantograph_infotree_to_string_m]
+partial def InfoTree.toString (t : InfoTree) (ctx?: Option Elab.ContextInfo := .none) : IO String := do
+  match t with
+  | .context ctx t => t.toString (ctx.mergeIntoOuter? ctx?)
+  | .node info children =>
+    if let some ctx := ctx? then
+      let node : String ← match info with
+      | .ofTermInfo    info => pure s!"[term] {(← info.toString ctx)}"
+      | .ofCommandInfo info => pure s!"[command] {(← info.toString ctx)}"
+      | .ofTacticInfo  info => pure s!"[tactic] {(← info.toString ctx)}"
+      | .ofMacroExpansionInfo _ => pure "[macro_exp]"
+      | .ofOptionInfo _ => pure "[option]"
+      | .ofFieldInfo _ => pure "[field]"
+      | .ofCompletionInfo _ => pure "[completion]"
+      | .ofUserWidgetInfo _ => pure "[user_widget]"
+      | .ofCustomInfo _ => pure "[custom]"
+      | .ofFVarAliasInfo _ => pure "[fvar]"
+      | .ofFieldRedeclInfo _ => pure "[field_redecl]"
+      | .ofOmissionInfo _ => pure "[omission]"
+      let children := "\n".intercalate (← children.toList.mapM λ t' => do pure $ indent $ ← t'.toString ctx)
+      return s!"{node}\n{children}"
+    else throw <| IO.userError "No `ContextInfo` available."
+  | .hole mvarId =>
+    if let some ctx := ctx? then
+      let payload := (← ctx.runMetaM {} (do Meta.ppGoal mvarId)).pretty
+      return s!"[hole] {payload}"
+    else throw <| IO.userError "No `ContextInfo` available."
+
+end Lean.Elab
--- a/Pantograph/Goal.lean
+++ b/Pantograph/Goal.lean
@ -177,16 +177,51 @@ protected def GoalState.getMVarEAssignment (goalState: GoalState) (mvarId: MVarI

 --- Tactic execution functions ---

-protected def GoalState.step (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit)
+-- Mimics `Elab.Term.logUnassignedUsingErrorInfos`
+private def collectAllErroredMVars (src : MVarId) : Elab.TermElabM (List MVarId) := do
+  -- These descendants serve as "seed" mvars. If a MVarError's mvar is related
+  -- to one of these seed mvars, it means an error has occurred when a tactic
+  -- was executing on `src`. `evalTactic`, will not capture these mvars, so we
+  -- need to manually find them and save them into the goal list.
+  let descendants ←  Meta.getMVars $ ← instantiateMVars (.mvar src)
+  --let _ ← Elab.Term.logUnassignedUsingErrorInfos descendants
+  let mut alreadyVisited : MVarIdSet := {}
+  let mut result : MVarIdSet := {}
+  for { mvarId, .. } in (← get).mvarErrorInfos do
+    unless alreadyVisited.contains mvarId do
+      alreadyVisited := alreadyVisited.insert mvarId
+      /- The metavariable `mvarErrorInfo.mvarId` may have been assigned or
+         delayed assigned to another metavariable that is unassigned. -/
+      let mvarDeps ← Meta.getMVars (.mvar mvarId)
+      if mvarDeps.any descendants.contains then do
+        result := mvarDeps.foldl (·.insert ·) result
+  return result.toList
+
+private def mergeMVarLists (li1 li2 : List MVarId) : List MVarId :=
+  let li2' := li2.filter (¬ li1.contains ·)
+  li1 ++ li2'
+
+/--
+Set `guardMVarErrors` to true to capture mvar errors. Lean will not
+automatically collect mvars from text tactics (vide
+`test_tactic_failure_synthesize_placeholder`)
+-/
+protected def GoalState.step (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit) (guardMVarErrors : Bool := false)
  : Elab.TermElabM GoalState := do
  unless (← getMCtx).decls.contains goal do
    throwError s!"Goal is not in context: {goal.name}"
  goal.checkNotAssigned `GoalState.step
-  let (_, newGoals) ← tacticM { elaborator := .anonymous } |>.run { goals := [goal] }
+  let (_, { goals }) ← tacticM { elaborator := .anonymous } |>.run { goals := [goal] }
  let nextElabState ← MonadBacktrack.saveState
+  Elab.Term.synthesizeSyntheticMVarsNoPostponing
+
+  let goals ← if guardMVarErrors then
+      pure $ mergeMVarLists goals (← collectAllErroredMVars goal)
+    else
+      pure goals
  return {
    state with
-    savedState := { term := nextElabState, tactic := newGoals },
+    savedState := { term := nextElabState, tactic := { goals }, },
    parentMVar? := .some goal,
    calcPrevRhs? := .none,
  }
@ -203,10 +238,10 @@ inductive TacticResult where
  | invalidAction (message: String)

 /-- Executes a `TacticM` monad on this `GoalState`, collecting the errors as necessary -/
-protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit):
+protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit) (guardMVarErrors : Bool := false):
      Elab.TermElabM TacticResult := do
  try
-    let nextState ← state.step goal tacticM
+    let nextState ← state.step goal tacticM guardMVarErrors

    -- Check if error messages have been generated in the core.
    let newMessages ← (← Core.getMessageLog).toList.drop state.coreState.messages.toList.length
@ -215,6 +250,7 @@ protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: E
          return .some $ ← m.toString
        else
          return .none
+    Core.resetMessageLog
    if ¬ newMessages.isEmpty then
      return .failure newMessages.toArray
    return .success nextState
@ -233,7 +269,7 @@ protected def GoalState.tryTactic (state: GoalState) (goal: MVarId) (tactic: Str
    (fileName := ← getFileName) with
    | .ok stx => pure $ stx
    | .error error => return .parseError error
-  state.tryTacticM goal $ Elab.Tactic.evalTactic tactic
+  state.tryTacticM goal (Elab.Tactic.evalTactic tactic) true

 protected def GoalState.tryAssign (state: GoalState) (goal: MVarId) (expr: String):
      Elab.TermElabM TacticResult := do
--- a/Pantograph/Protocol.lean
+++ b/Pantograph/Protocol.lean
@ -316,6 +316,8 @@ structure FrontendProcess where
  invocations: Bool := false
  -- If set to true, collect `sorry`s
  sorrys: Bool := false
+  -- If set to true, extract new constants
+  newConstants: Bool := false
  deriving Lean.FromJson
 structure InvokedTactic where
  goalBefore: String
@ -329,11 +331,16 @@ structure InvokedTactic where
 structure CompilationUnit where
  -- String boundaries of compilation units
  boundary: (Nat × Nat)
+  messages: Array String := #[]
  -- Tactic invocations
  invocations?: Option (List InvokedTactic) := .none
  goalStateId?: Option Nat := .none
-  goals: Array Goal := #[]
-  messages: Array String := #[]
+  goals?: Option (Array Goal) := .none
+  -- Code segments which generated the goals
+  goalSrcBoundaries?: Option (Array (Nat × Nat)) := .none
+
+  -- New constants defined in compilation unit
+  newConstants?: Option (Array String) := .none
  deriving Lean.ToJson
 structure FrontendProcessResult where
  units: List CompilationUnit
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ A Machine-to-Machine interaction system for Lean 4.
 Pantograph provides interfaces to execute proofs, construct expressions, and
 examine the symbol list of a Lean project for machine learning.

-See [documentations](doc/) for design rationale and references.
+See [documentations](doc/rationale.md) for design rationale and references.

 ## Installation

--- a/Repl.lean
+++ b/Repl.lean
@ -79,6 +79,7 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
    let state ← get
    let nGoals := state.goalStates.size
    set { state with nextId := 0, goalStates := .empty }
+    Lean.Core.resetMessageLog
    return .ok { nGoals }
  stat (_: Protocol.Stat): MainM (CR Protocol.StatResult) := do
    let state ← get
@ -258,27 +259,38 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
            pure $ .some invocations
          else
            pure .none
-        let sorrys := if args.sorrys then
+        let sorrys ← if args.sorrys then
            Frontend.collectSorrys step
          else
-            []
+            pure []
        let messages ← step.messageStrings
-        return (step.before, boundary, invocations?, sorrys, messages)
+        let newConstants ← if args.newConstants then
+            Frontend.collectNewDefinedConstants step
+          else
+            pure []
+        return (step.before, boundary, invocations?, sorrys, messages, newConstants)
      let li ← frontendM.run context |>.run' state
-      let units ← li.mapM λ (env, boundary, invocations?, sorrys, messages) => Lean.withEnv env do
-        let (goalStateId?, goals) ← if sorrys.isEmpty then do
-            pure (.none, #[])
+      let units ← li.mapM λ (env, boundary, invocations?, sorrys, messages, newConstants) => Lean.withEnv env do
+        let newConstants? := if args.newConstants then
+            .some $ newConstants.toArray.map λ name => name.toString
+          else
+            .none
+        let (goalStateId?, goals?, goalSrcBoundaries?) ← if sorrys.isEmpty then do
+            pure (.none, .none, .none)
          else do
-            let goalState ← runMetaInMainM $ Frontend.sorrysToGoalState sorrys
-            let stateId ← newGoalState goalState
-            let goals ← goalSerialize goalState options
-            pure (.some stateId, goals)
+            let { state, srcBoundaries } ← runMetaInMainM $ Frontend.sorrysToGoalState sorrys
+            let stateId ← newGoalState state
+            let goals ← goalSerialize state options
+            let srcBoundaries := srcBoundaries.toArray.map (λ (b, e) => (b.byteIdx, e.byteIdx))
+            pure (.some stateId, .some goals, .some srcBoundaries)
        return {
          boundary,
+          messages,
          invocations?,
          goalStateId?,
-          goals,
-          messages,
+          goals?,
+          goalSrcBoundaries?,
+          newConstants?,
        }
      return .ok { units }
    catch e =>
--- a/Test/Frontend.lean
+++ b/Test/Frontend.lean
@ -10,13 +10,13 @@ def collectSorrysFromSource (source: String) : MetaM (List GoalState) := do
  let filename := "<anonymous>"
  let (context, state) ← do Frontend.createContextStateFromFile source filename (← getEnv) {}
  let m := Frontend.mapCompilationSteps λ step => do
-    return (step.before, Frontend.collectSorrys step)
+    return (step.before, ← Frontend.collectSorrys step)
  let li ← m.run context |>.run' state
  let goalStates ← li.filterMapM λ (env, sorrys) => withEnv env do
    if sorrys.isEmpty then
      return .none
-    let goalState ← Frontend.sorrysToGoalState sorrys
-    return .some goalState
+    let { state, .. } ← Frontend.sorrysToGoalState sorrys
+    return .some state
  return goalStates

 def test_multiple_sorrys_in_proof : TestT MetaM Unit := do
@ -177,6 +177,47 @@ example (n: Nat) : mystery n + 1 = n + 2 := sorry
    }
  ])

+def test_capture_type_mismatch : TestT MetaM Unit := do
+  let input := "
+def mystery (k: Nat) : Nat := true
+  "
+  let goalStates ← (collectSorrysFromSource input).run' {}
+  let [goalState] := goalStates | panic! s!"Incorrect number of states: {goalStates.length}"
+  checkEq "goals" ((← goalState.serializeGoals (options := {})).map (·.devolatilize)) #[
+    {
+      target := { pp? := "Nat" },
+      vars := #[{
+         userName := "k",
+         type? := .some { pp? := "Nat" },
+      }],
+    }
+  ]
+
+def collectNewConstants (source: String) : MetaM (List (List Name)) := do
+  let filename := "<anonymous>"
+  let (context, state) ← do Frontend.createContextStateFromFile source filename (← getEnv) {}
+  let m := Frontend.mapCompilationSteps λ step => do
+    Frontend.collectNewDefinedConstants step
+  m.run context |>.run' state
+
+def test_collect_one_constant : TestT MetaM Unit := do
+  let input := "
+def mystery : Nat := 123
+  "
+  let names ← collectNewConstants input
+  checkEq "constants" names [[`mystery]]
+def test_collect_one_theorem : TestT MetaM Unit := do
+  let input := "
+theorem mystery [SizeOf α] (as : List α) (i : Fin as.length) : sizeOf (as.get i) < sizeOf as := by
+  match as, i with
+  | a::as, ⟨0, _⟩  => simp_arith [get]
+  | a::as, ⟨i+1, h⟩ =>
+    have ih := sizeOf_get as ⟨i, Nat.le_of_succ_le_succ h⟩
+    apply Nat.lt_trans ih
+    simp_arith
+  "
+  let names ← collectNewConstants input
+  checkEq "constants" names [[`mystery]]

 def suite (env : Environment): List (String × IO LSpec.TestSeq) :=
  let tests := [
@ -185,6 +226,9 @@ def suite (env : Environment): List (String × IO LSpec.TestSeq) :=
    ("sorry_in_induction", test_sorry_in_induction),
    ("sorry_in_coupled", test_sorry_in_coupled),
    ("environment_capture", test_environment_capture),
+    ("capture_type_mismatch", test_capture_type_mismatch),
+    ("collect_one_constant", test_collect_one_constant),
+    ("collect_one_theorem", test_collect_one_theorem),
  ]
  tests.map (fun (name, test) => (name, runMetaMSeq env $ runTest test))

--- a/Test/Integration.lean
+++ b/Test/Integration.lean
@ -174,6 +174,7 @@ def test_frontend_process : Test :=
        ("file", .str file),
        ("invocations", .bool true),
        ("sorrys", .bool false),
+        ("newConstants", .bool false),
      ]
     ({
       units := [{
@ -214,6 +215,7 @@ def test_frontend_process_sorry : Test :=
        ("file", .str file),
        ("invocations", .bool false),
        ("sorrys", .bool true),
+        ("newConstants", .bool false),
      ]
     ({
       units := [{
@ -221,7 +223,8 @@ def test_frontend_process_sorry : Test :=
       }, {
         boundary := (solved.utf8ByteSize, solved.utf8ByteSize + withSorry.utf8ByteSize),
         goalStateId? := .some 0,
-         goals := #[goal1],
+         goals? := .some #[goal1],
+         goalSrcBoundaries? := .some #[(57, 62)],
         messages := #["<anonymous>:2:0: warning: declaration uses 'sorry'\n"],
       }],
    }: Protocol.FrontendProcessResult),
--- a/Test/Proofs.lean
+++ b/Test/Proofs.lean
@ -701,7 +701,7 @@ def test_nat_zero_add_alt: TestM Unit := do
      }
    ])

-def test_composite_tactic_failure: TestM Unit := do
+def test_tactic_failure_unresolved_goals : TestM Unit := do
  let state? ← startProof (.expr "∀ (p : Nat → Prop), ∃ (x : Nat), p (0 + x + 0)")
  let state0 ← match state? with
    | .some state => pure state
@ -720,6 +720,37 @@ def test_composite_tactic_failure: TestM Unit := do
  let .failure messages ← state1.tacticOn 0 tactic | addTest $ assertUnreachable s!"{tactic} should fail"
  checkEq s!"{tactic} fails" messages #[s!"{← getFileName}:0:12: error: unsolved goals\np : Nat → Prop\n⊢ p 0\n"]

+
+def test_tactic_failure_synthesize_placeholder : TestM Unit := do
+  let state? ← startProof (.expr "∀ (p q r : Prop) (h : p → q), q ∧ r")
+  let state0 ← match state? with
+    | .some state => pure state
+    | .none => do
+      addTest $ assertUnreachable "Goal could not parse"
+      return ()
+
+  let tactic := "intro p q r h"
+  let state1 ← match ← state0.tacticOn 0 tactic with
+    | .success state => pure state
+    | other => do
+      addTest $ assertUnreachable $ other.toString
+      return ()
+
+  let tactic := "simpa [h] using And.imp_left h _"
+  let state2 ← match ← state1.tacticOn 0 tactic with
+    | .success state => pure state
+    | other => do
+      addTest $ assertUnreachable $ other.toString
+      return ()
+
+  checkEq tactic ((← state2.serializeGoals).map (·.devolatilize))  #[
+    buildGoal [("p", "Prop"), ("q", "Prop"), ("r", "Prop"), ("h", "p → q")] "p ∧ r"
+  ]
+
+  --let .failure messages ← state1.tacticOn 0 tactic | addTest $ assertUnreachable s!"{tactic} should fail"
+  --let message := s!"<Pantograph>:0:31: error: don't know how to synthesize placeholder\ncontext:\np q r : Prop\nh : p → q\n⊢ p ∧ r\n"
+  --checkEq s!"{tactic} fails" messages #[message]
+
 def suite (env: Environment): List (String × IO LSpec.TestSeq) :=
  let tests := [
    ("identity", test_identity),
@ -732,7 +763,8 @@ def suite (env: Environment): List (String × IO LSpec.TestSeq) :=
    ("calc", test_calc),
    ("Nat.zero_add", test_nat_zero_add),
    ("Nat.zero_add alt", test_nat_zero_add_alt),
-    ("composite tactic failure", test_composite_tactic_failure),
+    ("tactic failure with unresolved goals", test_tactic_failure_unresolved_goals),
+    ("tactic failure with synthesize placeholder", test_tactic_failure_synthesize_placeholder),
  ]
  tests.map (fun (name, test) => (name, proofRunner env test))

--- a/doc/rationale.md
+++ b/doc/rationale.md
@ -24,6 +24,22 @@ The name Pantograph is a pun. It means two things
  a locomotive. In comparison the (relatively) simple Pantograph software powers
  theorem proving projects.

+## Caveats
+
+Pantograph does not exactly mimic Lean LSP's behaviour. That would not grant the
+flexibility it offers.  To support tree search means Pantograph has to act
+differently from Lean in some times, but never at the sacrifice of soundness.
+
+- When Lean LSP says "don't know how to synthesize placeholder", this indicates
+  the human operator needs to manually move the cursor to the placeholder and
+  type in the correct expression. This error therefore should not halt the proof
+  process, and the placeholder should be turned into a goal.
+- When Lean LSP says "unresolved goals", that means a proof cannot finish where
+  it is supposed to finish at the end of a `by` block. Pantograph will raise the
+  error in this case, since it indicates the termination of a proof search branch.
+- `pick_goal` or `swap` will not work since they run contrary to tree search
+  paradigms.
+
 ## References

 * [Pantograph Paper](https://arxiv.org/abs/2410.16429)
--- a/doc/repl.md
+++ b/doc/repl.md
@ -44,9 +44,11 @@ See `Pantograph/Protocol.lean` for a description of the parameters and return va
  state. The user is responsible to ensure the sender/receiver instances share
  the same environment.
 * `frontend.process { ["fileName": <fileName>,] ["file": <str>], invocations:
-  <bool>, sorrys: <bool> }`: Executes the Lean frontend on a file, collecting
-  either the tactic invocations (`"invocations": true`) or the sorrys into goal
-  states (`"sorrys": true`)
+  <bool>, sorrys: <bool>, newConstants: <bool> }`: Executes the Lean frontend on
+  a file, collecting the tactic invocations (`"invocations": true`), the
+  sorrys and type errors into goal states (`"sorrys": true`), and new constants
+  (`"newConstants": true`). In the case of `sorrys`, this command additionally
+  outputs the position of each captured `sorry`.

 ## Errors

--- a/flake.nix
+++ b/flake.nix
@ -22,6 +22,8 @@
    flake = {
    };
    systems = [
+      "aarch64-linux"
+      "aarch64-darwin"
      "x86_64-linux"
      "x86_64-darwin"
    ];