2024-12-11 16:52:20 -08:00
14 changed files with 402 additions and 129 deletions
--- a/Pantograph/Frontend.lean
+++ b/Pantograph/Frontend.lean
@ -1,4 +1,4 @@
 /- Adapted from lean-training-data by semorrison -/
 import Pantograph.Frontend.Basic
 import Pantograph.Frontend.Elab
 import Pantograph.Frontend.InfoTree
 import Pantograph.Frontend.MetaTranslate
--- a/Pantograph/Frontend/Basic.lean
+++ b/Pantograph/Frontend/Basic.lean
@ -30,6 +30,13 @@ end Lean.PersistentArray
 namespace Pantograph.Frontend
@[export pantograph_frontend_stx_byte_range]
 def stxByteRange (stx : Syntax) : String.Pos × String.Pos :=
  let pos := stx.getPos?.getD 0
  let endPos := stx.getTailPos?.getD 0
  (pos, endPos)
 abbrev FrontendM := Elab.Frontend.FrontendM
 structure CompilationStep where
--- a/Pantograph/Frontend/Elab.lean
+++ b/Pantograph/Frontend/Elab.lean
@ -1,87 +1,21 @@
 /- Adapted from https://github.com/semorrison/lean-training-data -/
 import Lean.Elab.Import
 import Lean.Elab.Command
 import Lean.Elab.InfoTree
 import Lean.DeclarationRange
 import Pantograph.Frontend.Basic
 import Pantograph.Frontend.MetaTranslate
 import Pantograph.Goal
 import Pantograph.Protocol
 import Pantograph.Frontend.InfoTree
 open Lean
 namespace Lean.Elab.Info
 /-- The `Syntax` for a `Lean.Elab.Info`, if there is one. -/
 protected def stx? : Info → Option Syntax
  | .ofTacticInfo         info => info.stx
  | .ofTermInfo           info => info.stx
  | .ofCommandInfo        info => info.stx
  | .ofMacroExpansionInfo info => info.stx
  | .ofOptionInfo         info => info.stx
  | .ofFieldInfo          info => info.stx
  | .ofCompletionInfo     info => info.stx
  | .ofUserWidgetInfo     info => info.stx
  | .ofCustomInfo         info => info.stx
  | .ofFVarAliasInfo      _    => none
  | .ofFieldRedeclInfo    info => info.stx
  | .ofOmissionInfo       info => info.stx
 /-- Is the `Syntax` for this `Lean.Elab.Info` original, or synthetic? -/
 protected def isOriginal (i : Info) : Bool :=
  match i.stx? with
  | none => true   -- Somewhat unclear what to do with `FVarAliasInfo`, so be conservative.
  | some stx => match stx.getHeadInfo with
    | .original .. => true
    | _ => false
 end Lean.Elab.Info
 namespace Lean.Elab.TacticInfo
 /-- Find the name for the outermost `Syntax` in this `TacticInfo`. -/
 def name? (t : TacticInfo) : Option Name :=
  match t.stx with
  | Syntax.node _ n _ => some n
  | _ => none
 /-- Decide whether a tactic is "substantive",
 or is merely a tactic combinator (e.g. `by`, `;`, multiline tactics, parenthesized tactics). -/
 def isSubstantive (t : TacticInfo) : Bool :=
  match t.name? with
  | none => false
  | some `null => false
  | some ``cdot => false
  | some ``cdotTk => false
  | some ``Lean.Parser.Term.byTactic => false
  | some ``Lean.Parser.Tactic.tacticSeq => false
  | some ``Lean.Parser.Tactic.tacticSeq1Indented => false
  | some ``Lean.Parser.Tactic.«tactic_<;>_» => false
  | some ``Lean.Parser.Tactic.paren => false
  | _ => true
 end Lean.Elab.TacticInfo
 namespace Lean.Elab.InfoTree
 /--
 Keep `.node` nodes and `.hole` nodes satisfying predicates.
 Returns a `List InfoTree`, although in most situations this will be a singleton.
 -/
 partial def filter (p : Info → Bool) (m : MVarId → Bool := fun _ => false) :
    InfoTree → List InfoTree
  | .context ctx tree => tree.filter p m |>.map (.context ctx)
  | .node info children =>
    if p info then
      [.node info (children.toList.map (filter p m)).join.toPArray']
    else
      (children.toList.map (filter p m)).join
  | .hole mvar => if m mvar then [.hole mvar] else []
 end Lean.Elab.InfoTree
 namespace Pantograph.Frontend
 -- Info tree filtering functions
 /- Adapted from lean-training-data -/
 structure TacticInvocation where
  info : Elab.TacticInfo
  ctx : Elab.ContextInfo
@ -131,19 +65,10 @@ protected def usedConstants (t: TacticInvocation) : NameSet :=
 end TacticInvocation
 /-- Analogue of `Lean.Elab.InfoTree.findInfo?`, but that returns a list of all results. -/
 partial def findAllInfo (t : Elab.InfoTree) (context?: Option Elab.ContextInfo) (pred : Elab.Info → Bool) :
    List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree) :=
  match t with
  | .context inner t => findAllInfo t (inner.mergeIntoOuter? context?) pred
  | .node i children  =>
      (if pred i then [(i, context?, children)] else []) ++ children.toList.bind (fun t => findAllInfo t context? pred)
  | _ => []
 /-- Return all `TacticInfo` nodes in an `InfoTree` corresponding to tactics,
 each equipped with its relevant `ContextInfo`, and any children info trees. -/
 private def collectTacticNodes (t : Elab.InfoTree) : List TacticInvocation :=
-  let infos := findAllInfo t none fun i => match i with
+  let infos := t.findAllInfo none false fun i => match i with
    | .ofTacticInfo _ => true
    | _ => false
  infos.filterMap fun p => match p with
@ -162,9 +87,11 @@ def collectTacticsFromCompilationStep (step : CompilationStep) : IO (List Protoc
  tactics.mapM λ invocation => do
    let goalBefore := (Format.joinSep (← invocation.goalState) "\n").pretty
    let goalAfter := (Format.joinSep (← invocation.goalStateAfter) "\n").pretty
-    let tactic ← invocation.ctx.runMetaM {} do
+    let tactic ← invocation.ctx.runMetaM {} <| Meta.withMCtx invocation.info.mctxBefore do
-      let t ← PrettyPrinter.ppTactic ⟨invocation.info.stx⟩
+      return (← invocation.ctx.ppSyntax {} invocation.info.stx).pretty
-      return t.pretty
+      -- FIXME: Why does this not work? There are problems with `term.pseudo.antiquot`
      --PrettyPrinter.ppTactic ⟨invocation.info.stx⟩
      --return t.pretty
    let usedConstants := invocation.usedConstants.toArray.map λ n => n.toString
    return {
      goalBefore,
@ -177,47 +104,79 @@ structure InfoWithContext where
  info: Elab.Info
  context?: Option Elab.ContextInfo := .none
-private def collectSorrysInTree (t : Elab.InfoTree) : List InfoWithContext :=
+private def collectSorrysInTree (t : Elab.InfoTree) : IO (List InfoWithContext) := do
-  let infos := findAllInfo t none fun i => match i with
+  let infos ← t.findAllInfoM none fun i ctx? => match i with
-    | .ofTermInfo { expectedType?, expr, stx, .. } =>
+    | .ofTermInfo { expectedType?, expr, stx, lctx, .. } => do
-      expr.isSorry ∧ expectedType?.isSome ∧ stx.isOfKind `Lean.Parser.Term.sorry
+      let .some ctx := ctx? | return (false, true)
      if expr.isSorry ∧ stx.isOfKind `Lean.Parser.Term.sorry then
        if expectedType?.isNone then
          throw $ .userError "Sorry of indeterminant type is not allowed"
        return (true, false)
      let .some expectedType := expectedType? | return (false, true)
      let typeMatch ← ctx.runMetaM lctx do
        let type ← Meta.inferType expr
        Meta.isExprDefEqGuarded type expectedType
      return match typeMatch, expr.hasSorry with
      | false, true => (true, false) -- Types mismatch but has sorry -> collect, halt
      | false, false => (true, false) -- Types mistmatch but no sorry -> collect, halt
      | true, true => (false, true) -- Types match but has sorry -> continue
      | true, false => (false, false) -- Types match but no sorries -> halt
    | .ofTacticInfo { stx, goalsBefore, .. } =>
      -- The `sorry` term is distinct from the `sorry` tactic
      let isSorry := stx.isOfKind `Lean.Parser.Tactic.tacticSorry
-      isSorry ∧ !goalsBefore.isEmpty
+      return (isSorry ∧ !goalsBefore.isEmpty, ¬ isSorry)
-    | _ => false
+    | _ => return (false, true)
-  infos.map fun (info, context?, _) => { info, context? }
+  return infos.map fun (info, context?, _) => { info, context? }
 -- NOTE: Plural deliberately not spelled "sorries"
@[export pantograph_frontend_collect_sorrys_m]
-def collectSorrys (step: CompilationStep) : List InfoWithContext :=
+def collectSorrys (step: CompilationStep) : IO (List InfoWithContext) := do
-  step.trees.bind collectSorrysInTree
+  return (← step.trees.mapM collectSorrysInTree).join
 structure AnnotatedGoalState where
  state : GoalState
  srcBoundaries : List (String.Pos × String.Pos)
 /--
 Since we cannot directly merge `MetavarContext`s, we have to get creative. This
 function duplicates frozen mvars in term and tactic info nodes, and add them to
 the current `MetavarContext`.
 -/
-@[export pantograph_frontend_sorrys_to_goal_state]
+@[export pantograph_frontend_sorrys_to_goal_state_m]
-def sorrysToGoalState (sorrys : List InfoWithContext) : MetaM GoalState := do
+def sorrysToGoalState (sorrys : List InfoWithContext) : MetaM AnnotatedGoalState := do
  assert! !sorrys.isEmpty
  let goalsM := sorrys.mapM λ i => do
    match i.info with
    | .ofTermInfo termInfo  => do
      let mvarId ← MetaTranslate.translateMVarFromTermInfo termInfo i.context?
-      return [mvarId]
+      return [(mvarId, stxByteRange termInfo.stx)]
    | .ofTacticInfo tacticInfo => do
-      MetaTranslate.translateMVarFromTacticInfoBefore tacticInfo i.context?
+      let mvarIds ← MetaTranslate.translateMVarFromTacticInfoBefore tacticInfo i.context?
      let range := stxByteRange tacticInfo.stx
      return mvarIds.map (·, range)
    | _ => panic! "Invalid info"
-  let goals := List.join (← goalsM.run {} |>.run' {})
+  let annotatedGoals := List.join (← goalsM.run {} |>.run' {})
  let goals := annotatedGoals.map Prod.fst
  let srcBoundaries := annotatedGoals.map Prod.snd
  let root := match goals with
    | [] => panic! "No MVars generated"
    | [g] => g
    | _ => { name := .anonymous }
-  GoalState.createFromMVars goals root
+  let state ← GoalState.createFromMVars goals root
  return { state, srcBoundaries }
@[export pantograph_frontend_collect_new_defined_constants_m]
 def collectNewDefinedConstants (step : CompilationStep) : IO (List Name) := do
  step.after.constants.map₂.foldlM (λ acc name _ => do
    if step.before.contains name then
      return acc
    let coreM : CoreM Bool := Option.isSome <$> findDeclarationRanges? name
    let hasRange ← coreM.run' { fileName := step.fileName, fileMap := step.fileMap } { env := step.after } |>.toBaseIO
    match hasRange with
    | .ok true => return name :: acc
    | .ok false => return acc
    | .error e => throw $ IO.userError (← e.toMessageData.toString)
    ) []
 end Pantograph.Frontend
--- a/Pantograph/Frontend/InfoTree.lean
+++ b/Pantograph/Frontend/InfoTree.lean
@ -0,0 +1,153 @@
 /- Adapted from lean-training-data -/
 import Lean.Elab.InfoTree
 import Lean.Parser.Term
 import Lean.PrettyPrinter
 open Lean
 namespace Lean.Elab
 private def elaboratorToString : Name → String
  | .anonymous => ""
  | n => s!"⟨{n}⟩ "
 private def indent (s : String) : String := "\n".intercalate $ s.splitOn "\n" |>.map ("\t" ++ .)
 /-- The `Syntax` for a `Lean.Elab.Info`, if there is one. -/
 protected def Info.stx? : Info → Option Syntax
  | .ofTacticInfo         info => info.stx
  | .ofTermInfo           info => info.stx
  | .ofCommandInfo        info => info.stx
  | .ofMacroExpansionInfo info => info.stx
  | .ofOptionInfo         info => info.stx
  | .ofFieldInfo          info => info.stx
  | .ofCompletionInfo     info => info.stx
  | .ofUserWidgetInfo     info => info.stx
  | .ofCustomInfo         info => info.stx
  | .ofFVarAliasInfo      _    => none
  | .ofFieldRedeclInfo    info => info.stx
  | .ofOmissionInfo       info => info.stx
 /-- Is the `Syntax` for this `Lean.Elab.Info` original, or synthetic? -/
 protected def Info.isOriginal (i : Info) : Bool :=
  match i.stx? with
  | none => true   -- Somewhat unclear what to do with `FVarAliasInfo`, so be conservative.
  | some stx => match stx.getHeadInfo with
    | .original .. => true
    | _ => false
 def ContextInfo.ppExpr (ctx : ContextInfo) (lctx : LocalContext) (e : Expr) : IO Format :=
  ctx.runMetaM lctx (do Meta.ppExpr (← instantiateMVars e))
 def CommandInfo.toString (info : CommandInfo) (ctx : ContextInfo) : IO String := do
  let stx := (← ctx.ppSyntax {} info.stx).pretty
  return s!"{elaboratorToString info.elaborator}\n{stx}"
 def TermInfo.toString (info : TermInfo) (ctx : ContextInfo) : IO String := do
  let stx := (← ctx.ppSyntax info.lctx info.stx).pretty
  let expectedType := (← info.expectedType?.mapM fun ty => do
    pure s!": {(← ctx.ppExpr info.lctx ty).pretty}").getD ""
  let expr := (← ctx.ppExpr info.lctx info.expr).pretty
  return s!"{elaboratorToString info.elaborator}{expr}{expectedType}\n{stx}"
 /-- Find the name for the outermost `Syntax` in this `TacticInfo`. -/
 def TacticInfo.name? (t : TacticInfo) : Option Name :=
  match t.stx with
  | Syntax.node _ n _ => some n
  | _ => none
 /-- Decide whether a tactic is "substantive",
 or is merely a tactic combinator (e.g. `by`, `;`, multiline tactics, parenthesized tactics). -/
 def TacticInfo.isSubstantive (t : TacticInfo) : Bool :=
  match t.name? with
  | none => false
  | some `null => false
  | some ``cdot => false
  | some ``cdotTk => false
  | some ``Lean.Parser.Term.byTactic => false
  | some ``Lean.Parser.Tactic.tacticSeq => false
  | some ``Lean.Parser.Tactic.tacticSeq1Indented => false
  | some ``Lean.Parser.Tactic.«tactic_<;>_» => false
  | some ``Lean.Parser.Tactic.paren => false
  | _ => true
 def TacticInfo.pp (info : TacticInfo) (ctx : ContextInfo) : IO Format :=
  ctx.runMetaM {} try
    Lean.PrettyPrinter.ppTactic ⟨info.stx⟩
  catch _ =>
    pure "<failed to pretty print>"
 def TacticInfo.toString (i : TacticInfo) (ctx : ContextInfo) : IO String := do
  let name := i.name?
  let stx := Format.pretty (← i.pp ctx)
  return s!"{name}\n{stx}"
 /--
 Keep `.node` nodes and `.hole` nodes satisfying predicates.
 Returns a `List InfoTree`, although in most situations this will be a singleton.
 -/
 partial def InfoTree.filter (p : Info → Bool) (m : MVarId → Bool := fun _ => false) :
    InfoTree → List InfoTree
  | .context ctx tree => tree.filter p m |>.map (.context ctx)
  | .node info children =>
    if p info then
      [.node info (children.toList.map (filter p m)).join.toPArray']
    else
      (children.toList.map (filter p m)).join
  | .hole mvar => if m mvar then [.hole mvar] else []
 /-- Analogue of `Lean.Elab.InfoTree.findInfo?`, but that returns a list of all results. -/
 partial def InfoTree.findAllInfo
    (t : InfoTree)
    (context?: Option Elab.ContextInfo)
    (haltOnMatch : Bool := false)
    (pred : Elab.Info → Bool)
    : List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree) :=
  match t with
  | .context inner t => findAllInfo t (inner.mergeIntoOuter? context?) haltOnMatch pred
  | .node i children  =>
    let head := if pred i then [(i, context?, children)] else []
    let tail := if haltOnMatch ∧ !head.isEmpty then [] else children.toList.bind (fun t => findAllInfo t context? haltOnMatch pred)
    head ++ tail
  | _ => []
 /-- Monadic analogue of `findAllInfo`, but predicate controls whether to recurse. -/
 partial def InfoTree.findAllInfoM [Monad m]
    (t : InfoTree)
    (context?: Option Elab.ContextInfo)
    (pred : Elab.Info → Option Elab.ContextInfo → m (Bool × Bool))
    : m (List (Elab.Info × Option Elab.ContextInfo × PersistentArray Elab.InfoTree)) := do
  match t with
  | .context inner t => t.findAllInfoM (inner.mergeIntoOuter? context?) pred
  | .node i children  =>
    let (flagCollect, flagRecurse) ← pred i context?
    let head := if flagCollect then [(i, context?, children)] else []
    let tail := if ¬ flagRecurse then pure [] else children.toList.mapM (fun t => t.findAllInfoM context? pred)
    return head ++ (← tail).join
  | _ => return []
@[export pantograph_infotree_to_string_m]
 partial def InfoTree.toString (t : InfoTree) (ctx?: Option Elab.ContextInfo := .none) : IO String := do
  match t with
  | .context ctx t => t.toString (ctx.mergeIntoOuter? ctx?)
  | .node info children =>
    if let some ctx := ctx? then
      let node : String ← match info with
      | .ofTermInfo    info => pure s!"[term] {(← info.toString ctx)}"
      | .ofCommandInfo info => pure s!"[command] {(← info.toString ctx)}"
      | .ofTacticInfo  info => pure s!"[tactic] {(← info.toString ctx)}"
      | .ofMacroExpansionInfo _ => pure "[macro_exp]"
      | .ofOptionInfo _ => pure "[option]"
      | .ofFieldInfo _ => pure "[field]"
      | .ofCompletionInfo _ => pure "[completion]"
      | .ofUserWidgetInfo _ => pure "[user_widget]"
      | .ofCustomInfo _ => pure "[custom]"
      | .ofFVarAliasInfo _ => pure "[fvar]"
      | .ofFieldRedeclInfo _ => pure "[field_redecl]"
      | .ofOmissionInfo _ => pure "[omission]"
      let children := "\n".intercalate (← children.toList.mapM λ t' => do pure $ indent $ ← t'.toString ctx)
      return s!"{node}\n{children}"
    else throw <| IO.userError "No `ContextInfo` available."
  | .hole mvarId =>
    if let some ctx := ctx? then
      let payload := (← ctx.runMetaM {} (do Meta.ppGoal mvarId)).pretty
      return s!"[hole] {payload}"
    else throw <| IO.userError "No `ContextInfo` available."
 end Lean.Elab
--- a/Pantograph/Goal.lean
+++ b/Pantograph/Goal.lean
@ -177,16 +177,51 @@ protected def GoalState.getMVarEAssignment (goalState: GoalState) (mvarId: MVarI
 --- Tactic execution functions ---
-protected def GoalState.step (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit)
+-- Mimics `Elab.Term.logUnassignedUsingErrorInfos`
 private def collectAllErroredMVars (src : MVarId) : Elab.TermElabM (List MVarId) := do
  -- These descendants serve as "seed" mvars. If a MVarError's mvar is related
  -- to one of these seed mvars, it means an error has occurred when a tactic
  -- was executing on `src`. `evalTactic`, will not capture these mvars, so we
  -- need to manually find them and save them into the goal list.
  let descendants ←  Meta.getMVars $ ← instantiateMVars (.mvar src)
  --let _ ← Elab.Term.logUnassignedUsingErrorInfos descendants
  let mut alreadyVisited : MVarIdSet := {}
  let mut result : MVarIdSet := {}
  for { mvarId, .. } in (← get).mvarErrorInfos do
    unless alreadyVisited.contains mvarId do
      alreadyVisited := alreadyVisited.insert mvarId
      /- The metavariable `mvarErrorInfo.mvarId` may have been assigned or
         delayed assigned to another metavariable that is unassigned. -/
      let mvarDeps ← Meta.getMVars (.mvar mvarId)
      if mvarDeps.any descendants.contains then do
        result := mvarDeps.foldl (·.insert ·) result
  return result.toList
 private def mergeMVarLists (li1 li2 : List MVarId) : List MVarId :=
  let li2' := li2.filter (¬ li1.contains ·)
  li1 ++ li2'
 /--
 Set `guardMVarErrors` to true to capture mvar errors. Lean will not
 automatically collect mvars from text tactics (vide
 `test_tactic_failure_synthesize_placeholder`)
 -/
 protected def GoalState.step (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit) (guardMVarErrors : Bool := false)
  : Elab.TermElabM GoalState := do
  unless (← getMCtx).decls.contains goal do
    throwError s!"Goal is not in context: {goal.name}"
  goal.checkNotAssigned `GoalState.step
-  let (_, newGoals) ← tacticM { elaborator := .anonymous } |>.run { goals := [goal] }
+  let (_, { goals }) ← tacticM { elaborator := .anonymous } |>.run { goals := [goal] }
  let nextElabState ← MonadBacktrack.saveState
  Elab.Term.synthesizeSyntheticMVarsNoPostponing
  let goals ← if guardMVarErrors then
      pure $ mergeMVarLists goals (← collectAllErroredMVars goal)
    else
      pure goals
  return {
    state with
-    savedState := { term := nextElabState, tactic := newGoals },
+    savedState := { term := nextElabState, tactic := { goals }, },
    parentMVar? := .some goal,
    calcPrevRhs? := .none,
  }
@ -203,10 +238,10 @@ inductive TacticResult where
  | invalidAction (message: String)
 /-- Executes a `TacticM` monad on this `GoalState`, collecting the errors as necessary -/
-protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit):
+protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: Elab.Tactic.TacticM Unit) (guardMVarErrors : Bool := false):
      Elab.TermElabM TacticResult := do
  try
-    let nextState ← state.step goal tacticM
+    let nextState ← state.step goal tacticM guardMVarErrors
    -- Check if error messages have been generated in the core.
    let newMessages ← (← Core.getMessageLog).toList.drop state.coreState.messages.toList.length
@ -215,6 +250,7 @@ protected def GoalState.tryTacticM (state: GoalState) (goal: MVarId) (tacticM: E
          return .some $ ← m.toString
        else
          return .none
    Core.resetMessageLog
    if ¬ newMessages.isEmpty then
      return .failure newMessages.toArray
    return .success nextState
@ -233,7 +269,7 @@ protected def GoalState.tryTactic (state: GoalState) (goal: MVarId) (tactic: Str
    (fileName := ← getFileName) with
    | .ok stx => pure $ stx
    | .error error => return .parseError error
-  state.tryTacticM goal $ Elab.Tactic.evalTactic tactic
+  state.tryTacticM goal (Elab.Tactic.evalTactic tactic) true
 protected def GoalState.tryAssign (state: GoalState) (goal: MVarId) (expr: String):
      Elab.TermElabM TacticResult := do
--- a/Pantograph/Protocol.lean
+++ b/Pantograph/Protocol.lean
@ -316,6 +316,8 @@ structure FrontendProcess where
  invocations: Bool := false
  -- If set to true, collect `sorry`s
  sorrys: Bool := false
  -- If set to true, extract new constants
  newConstants: Bool := false
  deriving Lean.FromJson
 structure InvokedTactic where
  goalBefore: String
@ -329,11 +331,16 @@ structure InvokedTactic where
 structure CompilationUnit where
  -- String boundaries of compilation units
  boundary: (Nat × Nat)
  messages: Array String := #[]
  -- Tactic invocations
  invocations?: Option (List InvokedTactic) := .none
  goalStateId?: Option Nat := .none
-  goals: Array Goal := #[]
+  goals?: Option (Array Goal) := .none
-  messages: Array String := #[]
+  -- Code segments which generated the goals
  goalSrcBoundaries?: Option (Array (Nat × Nat)) := .none
  -- New constants defined in compilation unit
  newConstants?: Option (Array String) := .none
  deriving Lean.ToJson
 structure FrontendProcessResult where
  units: List CompilationUnit
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ A Machine-to-Machine interaction system for Lean 4.
 Pantograph provides interfaces to execute proofs, construct expressions, and
 examine the symbol list of a Lean project for machine learning.
-See [documentations](doc/) for design rationale and references.
+See [documentations](doc/rationale.md) for design rationale and references.
 ## Installation
--- a/Repl.lean
+++ b/Repl.lean
@ -79,6 +79,7 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
    let state ← get
    let nGoals := state.goalStates.size
    set { state with nextId := 0, goalStates := .empty }
    Lean.Core.resetMessageLog
    return .ok { nGoals }
  stat (_: Protocol.Stat): MainM (CR Protocol.StatResult) := do
    let state ← get
@ -258,27 +259,38 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
            pure $ .some invocations
          else
            pure .none
-        let sorrys := if args.sorrys then
+        let sorrys ← if args.sorrys then
            Frontend.collectSorrys step
          else
-            []
+            pure []
        let messages ← step.messageStrings
-        return (step.before, boundary, invocations?, sorrys, messages)
+        let newConstants ← if args.newConstants then
            Frontend.collectNewDefinedConstants step
          else
            pure []
        return (step.before, boundary, invocations?, sorrys, messages, newConstants)
      let li ← frontendM.run context |>.run' state
-      let units ← li.mapM λ (env, boundary, invocations?, sorrys, messages) => Lean.withEnv env do
+      let units ← li.mapM λ (env, boundary, invocations?, sorrys, messages, newConstants) => Lean.withEnv env do
-        let (goalStateId?, goals) ← if sorrys.isEmpty then do
+        let newConstants? := if args.newConstants then
-            pure (.none, #[])
+            .some $ newConstants.toArray.map λ name => name.toString
          else
            .none
        let (goalStateId?, goals?, goalSrcBoundaries?) ← if sorrys.isEmpty then do
            pure (.none, .none, .none)
          else do
-            let goalState ← runMetaInMainM $ Frontend.sorrysToGoalState sorrys
+            let { state, srcBoundaries } ← runMetaInMainM $ Frontend.sorrysToGoalState sorrys
-            let stateId ← newGoalState goalState
+            let stateId ← newGoalState state
-            let goals ← goalSerialize goalState options
+            let goals ← goalSerialize state options
-            pure (.some stateId, goals)
+            let srcBoundaries := srcBoundaries.toArray.map (λ (b, e) => (b.byteIdx, e.byteIdx))
            pure (.some stateId, .some goals, .some srcBoundaries)
        return {
          boundary,
          messages,
          invocations?,
          goalStateId?,
-          goals,
+          goals?,
-          messages,
+          goalSrcBoundaries?,
          newConstants?,
        }
      return .ok { units }
    catch e =>
--- a/Test/Frontend.lean
+++ b/Test/Frontend.lean
@ -10,13 +10,13 @@ def collectSorrysFromSource (source: String) : MetaM (List GoalState) := do
  let filename := "<anonymous>"
  let (context, state) ← do Frontend.createContextStateFromFile source filename (← getEnv) {}
  let m := Frontend.mapCompilationSteps λ step => do
-    return (step.before, Frontend.collectSorrys step)
+    return (step.before, ← Frontend.collectSorrys step)
  let li ← m.run context |>.run' state
  let goalStates ← li.filterMapM λ (env, sorrys) => withEnv env do
    if sorrys.isEmpty then
      return .none
-    let goalState ← Frontend.sorrysToGoalState sorrys
+    let { state, .. } ← Frontend.sorrysToGoalState sorrys
-    return .some goalState
+    return .some state
  return goalStates
 def test_multiple_sorrys_in_proof : TestT MetaM Unit := do
@ -177,6 +177,47 @@ example (n: Nat) : mystery n + 1 = n + 2 := sorry
    }
  ])
 def test_capture_type_mismatch : TestT MetaM Unit := do
  let input := "
 def mystery (k: Nat) : Nat := true
  "
  let goalStates ← (collectSorrysFromSource input).run' {}
  let [goalState] := goalStates | panic! s!"Incorrect number of states: {goalStates.length}"
  checkEq "goals" ((← goalState.serializeGoals (options := {})).map (·.devolatilize)) #[
    {
      target := { pp? := "Nat" },
      vars := #[{
         userName := "k",
         type? := .some { pp? := "Nat" },
      }],
    }
  ]
 def collectNewConstants (source: String) : MetaM (List (List Name)) := do
  let filename := "<anonymous>"
  let (context, state) ← do Frontend.createContextStateFromFile source filename (← getEnv) {}
  let m := Frontend.mapCompilationSteps λ step => do
    Frontend.collectNewDefinedConstants step
  m.run context |>.run' state
 def test_collect_one_constant : TestT MetaM Unit := do
  let input := "
 def mystery : Nat := 123
  "
  let names ← collectNewConstants input
  checkEq "constants" names [[`mystery]]
 def test_collect_one_theorem : TestT MetaM Unit := do
  let input := "
 theorem mystery [SizeOf α] (as : List α) (i : Fin as.length) : sizeOf (as.get i) < sizeOf as := by
  match as, i with
  | a::as, ⟨0, _⟩  => simp_arith [get]
  | a::as, ⟨i+1, h⟩ =>
    have ih := sizeOf_get as ⟨i, Nat.le_of_succ_le_succ h⟩
    apply Nat.lt_trans ih
    simp_arith
  "
  let names ← collectNewConstants input
  checkEq "constants" names [[`mystery]]
 def suite (env : Environment): List (String × IO LSpec.TestSeq) :=
  let tests := [
@ -185,6 +226,9 @@ def suite (env : Environment): List (String × IO LSpec.TestSeq) :=
    ("sorry_in_induction", test_sorry_in_induction),
    ("sorry_in_coupled", test_sorry_in_coupled),
    ("environment_capture", test_environment_capture),
    ("capture_type_mismatch", test_capture_type_mismatch),
    ("collect_one_constant", test_collect_one_constant),
    ("collect_one_theorem", test_collect_one_theorem),
  ]
  tests.map (fun (name, test) => (name, runMetaMSeq env $ runTest test))
--- a/Test/Integration.lean
+++ b/Test/Integration.lean
@ -174,6 +174,7 @@ def test_frontend_process : Test :=
        ("file", .str file),
        ("invocations", .bool true),
        ("sorrys", .bool false),
        ("newConstants", .bool false),
      ]
     ({
       units := [{
@ -214,6 +215,7 @@ def test_frontend_process_sorry : Test :=
        ("file", .str file),
        ("invocations", .bool false),
        ("sorrys", .bool true),
        ("newConstants", .bool false),
      ]
     ({
       units := [{
@ -221,7 +223,8 @@ def test_frontend_process_sorry : Test :=
       }, {
         boundary := (solved.utf8ByteSize, solved.utf8ByteSize + withSorry.utf8ByteSize),
         goalStateId? := .some 0,
-         goals := #[goal1],
+         goals? := .some #[goal1],
         goalSrcBoundaries? := .some #[(57, 62)],
         messages := #["<anonymous>:2:0: warning: declaration uses 'sorry'\n"],
       }],
    }: Protocol.FrontendProcessResult),
--- a/Test/Proofs.lean
+++ b/Test/Proofs.lean
@ -701,7 +701,7 @@ def test_nat_zero_add_alt: TestM Unit := do
      }
    ])
-def test_composite_tactic_failure: TestM Unit := do
+def test_tactic_failure_unresolved_goals : TestM Unit := do
  let state? ← startProof (.expr "∀ (p : Nat → Prop), ∃ (x : Nat), p (0 + x + 0)")
  let state0 ← match state? with
    | .some state => pure state
@ -720,6 +720,37 @@ def test_composite_tactic_failure: TestM Unit := do
  let .failure messages ← state1.tacticOn 0 tactic | addTest $ assertUnreachable s!"{tactic} should fail"
  checkEq s!"{tactic} fails" messages #[s!"{← getFileName}:0:12: error: unsolved goals\np : Nat → Prop\n⊢ p 0\n"]
 def test_tactic_failure_synthesize_placeholder : TestM Unit := do
  let state? ← startProof (.expr "∀ (p q r : Prop) (h : p → q), q ∧ r")
  let state0 ← match state? with
    | .some state => pure state
    | .none => do
      addTest $ assertUnreachable "Goal could not parse"
      return ()
  let tactic := "intro p q r h"
  let state1 ← match ← state0.tacticOn 0 tactic with
    | .success state => pure state
    | other => do
      addTest $ assertUnreachable $ other.toString
      return ()
  let tactic := "simpa [h] using And.imp_left h _"
  let state2 ← match ← state1.tacticOn 0 tactic with
    | .success state => pure state
    | other => do
      addTest $ assertUnreachable $ other.toString
      return ()
  checkEq tactic ((← state2.serializeGoals).map (·.devolatilize))  #[
    buildGoal [("p", "Prop"), ("q", "Prop"), ("r", "Prop"), ("h", "p → q")] "p ∧ r"
  ]
  --let .failure messages ← state1.tacticOn 0 tactic | addTest $ assertUnreachable s!"{tactic} should fail"
  --let message := s!"<Pantograph>:0:31: error: don't know how to synthesize placeholder\ncontext:\np q r : Prop\nh : p → q\n⊢ p ∧ r\n"
  --checkEq s!"{tactic} fails" messages #[message]
 def suite (env: Environment): List (String × IO LSpec.TestSeq) :=
  let tests := [
    ("identity", test_identity),
@ -732,7 +763,8 @@ def suite (env: Environment): List (String × IO LSpec.TestSeq) :=
    ("calc", test_calc),
    ("Nat.zero_add", test_nat_zero_add),
    ("Nat.zero_add alt", test_nat_zero_add_alt),
-    ("composite tactic failure", test_composite_tactic_failure),
+    ("tactic failure with unresolved goals", test_tactic_failure_unresolved_goals),
    ("tactic failure with synthesize placeholder", test_tactic_failure_synthesize_placeholder),
  ]
  tests.map (fun (name, test) => (name, proofRunner env test))
--- a/doc/rationale.md
+++ b/doc/rationale.md
@ -24,6 +24,22 @@ The name Pantograph is a pun. It means two things
  a locomotive. In comparison the (relatively) simple Pantograph software powers
  theorem proving projects.
 ## Caveats
 Pantograph does not exactly mimic Lean LSP's behaviour. That would not grant the
 flexibility it offers.  To support tree search means Pantograph has to act
 differently from Lean in some times, but never at the sacrifice of soundness.
 - When Lean LSP says "don't know how to synthesize placeholder", this indicates
  the human operator needs to manually move the cursor to the placeholder and
  type in the correct expression. This error therefore should not halt the proof
  process, and the placeholder should be turned into a goal.
 - When Lean LSP says "unresolved goals", that means a proof cannot finish where
  it is supposed to finish at the end of a `by` block. Pantograph will raise the
  error in this case, since it indicates the termination of a proof search branch.
 - `pick_goal` or `swap` will not work since they run contrary to tree search
  paradigms.
 ## References
 * [Pantograph Paper](https://arxiv.org/abs/2410.16429)
--- a/doc/repl.md
+++ b/doc/repl.md
@ -44,9 +44,11 @@ See `Pantograph/Protocol.lean` for a description of the parameters and return va
  state. The user is responsible to ensure the sender/receiver instances share
  the same environment.
 * `frontend.process { ["fileName": <fileName>,] ["file": <str>], invocations:
-  <bool>, sorrys: <bool> }`: Executes the Lean frontend on a file, collecting
+  <bool>, sorrys: <bool>, newConstants: <bool> }`: Executes the Lean frontend on
-  either the tactic invocations (`"invocations": true`) or the sorrys into goal
+  a file, collecting the tactic invocations (`"invocations": true`), the
-  states (`"sorrys": true`)
+  sorrys and type errors into goal states (`"sorrys": true`), and new constants
  (`"newConstants": true`). In the case of `sorrys`, this command additionally
  outputs the position of each captured `sorry`.
 ## Errors
--- a/flake.nix
+++ b/flake.nix
@ -22,6 +22,8 @@
    flake = {
    };
    systems = [
      "aarch64-linux"
      "aarch64-darwin"
      "x86_64-linux"
      "x86_64-darwin"
    ];