Merge pull request 'feat: Pickling goal states' (#129) from serial/pickle into dev

Reviewed-on: #129
2024-12-05 16:02:25 -08:00 · 2024-12-05 16:02:25 -08:00 · ebf9ab24f7
parent 49b0101862 bfdc7dd39e
commit ebf9ab24f7
8 changed files with 315 additions and 75 deletions
--- a/Pantograph/Protocol.lean
+++ b/Pantograph/Protocol.lean
@ -289,6 +289,19 @@ structure GoalDiag where
  instantiate: Bool := true
  printSexp: Bool := false
 structure GoalSave where
  id: Nat
  path: System.FilePath
  deriving Lean.FromJson
 structure GoalSaveResult where
  deriving Lean.ToJson
 structure GoalLoad where
  path: System.FilePath
  deriving Lean.FromJson
 structure GoalLoadResult where
  id: Nat
  deriving Lean.ToJson
 /-- Executes the Lean compiler on a single file -/
 structure FrontendProcess where
--- a/Pantograph/Serial.lean
+++ b/Pantograph/Serial.lean
@ -2,6 +2,7 @@ import Lean.Environment
 import Lean.Replay
 import Init.System.IOError
 import Std.Data.HashMap
 import Pantograph.Goal
 /-!
 Input/Output functions
@ -55,7 +56,7 @@ and when unpickling, we build a fresh `Environment` from the imports,
 and then add the new constants.
 -/
@[export pantograph_env_pickle_m]
-def env_pickle (env : Environment) (path : System.FilePath) : IO Unit :=
+def environmentPickle (env : Environment) (path : System.FilePath) : IO Unit :=
  Pantograph.pickle path (env.header.imports, env.constants.map₂)
 /--
@ -65,9 +66,97 @@ We construct a fresh `Environment` with the relevant imports,
 and then replace the new constants.
 -/
@[export pantograph_env_unpickle_m]
-def env_unpickle (path : System.FilePath) : IO (Environment × CompactedRegion) := unsafe do
+def environmentUnpickle (path : System.FilePath) : IO (Environment × CompactedRegion) := unsafe do
  let ((imports, map₂), region) ← Pantograph.unpickle (Array Import × PHashMap Name ConstantInfo) path
  let env ← importModules imports {} 0
  return (← env.replay (Std.HashMap.ofList map₂.toList), region)
 open Lean.Core in
 structure CompactCoreState where
  -- env             : Environment
  nextMacroScope  : MacroScope     := firstFrontendMacroScope + 1
  ngen            : NameGenerator  := {}
  -- traceState      : TraceState     := {}
  -- cache           : Cache     := {}
  -- messages        : MessageLog     := {}
  -- infoState       : Elab.InfoState := {}
@[export pantograph_goal_state_pickle_m]
 def goalStatePickle (goalState : GoalState) (path : System.FilePath) : IO Unit :=
  let {
    savedState := {
      term := {
        meta := {
          core,
          meta,
        }
        «elab»,
      },
      tactic
    }
    root,
    parentMVar?,
    convMVar?,
    calcPrevRhs?,
  } := goalState
  --let env := core.env
  Pantograph.pickle path (
    ({ core with } : CompactCoreState),
    meta,
    «elab»,
    tactic,
    root,
    parentMVar?,
    convMVar?,
    calcPrevRhs?,
  )
@[export pantograph_goal_state_unpickle_m]
 def goalStateUnpickle (path : System.FilePath) (env : Environment)
    : IO (GoalState × CompactedRegion) := unsafe do
  let ((
    compactCore,
    meta,
    «elab»,
    tactic,
    root,
    parentMVar?,
    convMVar?,
    calcPrevRhs?,
  ), region) ← Pantograph.unpickle (
    CompactCoreState ×
    Meta.State ×
    Elab.Term.State ×
    Elab.Tactic.State ×
    MVarId ×
    Option MVarId ×
    Option (MVarId × MVarId × List MVarId) ×
    Option (MVarId × Expr)
  ) path
  let goalState := {
    savedState := {
      term := {
        meta := {
          core := {
            compactCore with
            passedHeartbeats := 0,
            env,
          },
          meta,
        },
        «elab»,
      },
      tactic,
    },
    root,
    parentMVar?,
    convMVar?,
    calcPrevRhs?,
  }
  return (goalState, region)
 end Pantograph
--- a/README.md
+++ b/README.md
@ -64,62 +64,7 @@ stat
 ```
 where the application of `assumption` should lead to a failure.
-### Commands
+For a list of commands, see [REPL Documentation](doc/repl.md).
 See `Pantograph/Protocol.lean` for a description of the parameters and return values in JSON.
 * `reset`: Delete all cached expressions and proof trees
 * `stat`: Display resource usage
 * `expr.echo {"expr": <expr>, "type": <optional expected type>, ["levels": [<levels>]]}`: Determine the
  type of an expression and format it.
 * `env.catalog`: Display a list of all safe Lean symbols in the current environment
 * `env.inspect {"name": <name>, "value": <bool>}`: Show the type and package of a
  given symbol; If value flag is set, the value is printed or hidden. By default
  only the values of definitions are printed.
 * `options.set { key: value, ... }`: Set one or more options (not Lean options; those
  have to be set via command line arguments.), for options, see `Pantograph/Protocol.lean`
  One particular option for interest for machine learning researchers is the
  automatic mode (flag: `"automaticMode"`).  By default it is turned on, with
  all goals automatically resuming. This makes Pantograph act like a gym,
  with no resumption necessary to manage your goals.
 * `options.print`: Display the current set of options
 * `goal.start {["name": <name>], ["expr": <expr>], ["levels": [<levels>]], ["copyFrom": <symbol>]}`:
  Start a new proof from a given expression or symbol
 * `goal.tactic {"stateId": <id>, "goalId": <id>, ...}`: Execute a tactic string on a
  given goal. The tactic is supplied as additional key-value pairs in one of the following formats:
  - `{ "tactic": <tactic> }`: Execute an ordinary tactic
  - `{ "expr": <expr> }`: Assign the given proof term to the current goal
  - `{ "have": <expr>, "binderName": <name> }`: Execute `have` and creates a branch goal
  - `{ "calc": <expr> }`: Execute one step of a `calc` tactic. Each step must
    be of the form `lhs op rhs`. An `lhs` of `_` indicates that it should be set
    to the previous `rhs`.
  - `{ "conv": <bool> }`: Enter or exit conversion tactic mode. In the case of
    exit, the goal id is ignored.
 * `goal.continue {"stateId": <id>, ["branch": <id>], ["goals": <names>]}`:
  Execute continuation/resumption
  - `{ "branch": <id> }`: Continue on branch state. The current state must have no goals.
  - `{ "goals": <names> }`: Resume the given goals
 * `goal.remove {"stateIds": [<id>]}"`: Drop the goal states specified in the list
 * `goal.print {"stateId": <id>}"`: Print a goal state
 * `frontend.process { ["fileName": <fileName>",] ["file": <str>], invocations:
  <bool>, sorrys: <bool> }`: Executes the Lean frontend on a file, collecting
  either the tactic invocations (`"invocations": true`) or the sorrys into goal
  states (`"sorrys": true`)
 ### Errors
 When an error pertaining to the execution of a command happens, the returning JSON structure is
 ``` json
 { "error": "type", "desc": "description" }
 ```
 Common error forms:
 * `command`: Indicates malformed command structure which results from either
  invalid command or a malformed JSON structure that cannot be fed to an
  individual command.
 * `index`: Indicates an invariant maintained by the output of one command and
  input of another is broken. For example, attempting to query a symbol not
  existing in the library or indexing into a non-existent proof state.
 ### Project Environment
--- a/Repl.lean
+++ b/Repl.lean
@ -15,6 +15,16 @@ structure State where
 /-- Main state monad for executing commands -/
 abbrev MainM := ReaderT Context (StateT State Lean.CoreM)
 def newGoalState (goalState: GoalState) : MainM Nat := do
  let state ← get
  let stateId := state.nextId
  set { state with
    goalStates := state.goalStates.insert stateId goalState,
    nextId := state.nextId + 1
  }
  return stateId
 -- HACK: For some reason writing `CommandM α := MainM (Except ... α)` disables
 -- certain monadic features in `MainM`
 abbrev CR α := Except Protocol.InteractionError α
@ -50,6 +60,8 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
    | "goal.continue" => run goal_continue
    | "goal.delete"   => run goal_delete
    | "goal.print"    => run goal_print
    | "goal.save"     => run goal_save
    | "goal.load"     => run goal_load
    | "frontend.process"  => run frontend_process
    | cmd =>
      let error: Protocol.InteractionError :=
@ -62,14 +74,6 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
  errorCommand := errorI "command"
  errorIndex := errorI "index"
  errorIO := errorI "io"
  newGoalState (goalState: GoalState) : MainM Nat := do
    let state ← get
    let stateId := state.nextId
    set { state with
      goalStates := state.goalStates.insert stateId goalState,
      nextId := state.nextId + 1
    }
    return stateId
  -- Command Functions
  reset (_: Protocol.Reset): MainM (CR Protocol.StatResult) := do
    let state ← get
@ -90,10 +94,10 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
    Environment.addDecl args
  env_save (args: Protocol.EnvSaveLoad): MainM (CR Protocol.EnvSaveLoadResult) := do
    let env ← Lean.MonadEnv.getEnv
-    env_pickle env args.path
+    environmentPickle env args.path
    return .ok {}
  env_load (args: Protocol.EnvSaveLoad): MainM (CR Protocol.EnvSaveLoadResult) := do
-    let (env, _) ← env_unpickle args.path
+    let (env, _) ← environmentUnpickle args.path
    Lean.setEnv env
    return .ok {}
  expr_echo (args: Protocol.ExprEcho): MainM (CR Protocol.ExprEchoResult) := do
@ -203,11 +207,7 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
    match nextState? with
    | .error error => return .error <| errorI "structure" error
    | .ok nextGoalState =>
-      let nextStateId := state.nextId
+      let nextStateId ← newGoalState nextGoalState
      set { state with
        goalStates := state.goalStates.insert nextStateId nextGoalState,
        nextId := state.nextId + 1
      }
      let goals ← goalSerialize nextGoalState (options := state.options)
      return .ok {
        nextStateId,
@ -224,6 +224,16 @@ def execute (command: Protocol.Command): MainM Lean.Json := do
      return .error $ errorIndex s!"Invalid state index {args.stateId}"
    let result ← runMetaInMainM <| goalPrint goalState state.options
    return .ok result
  goal_save (args: Protocol.GoalSave): MainM (CR Protocol.GoalSaveResult) := do
    let state ← get
    let .some goalState := state.goalStates[args.id]? |
      return .error $ errorIndex s!"Invalid state index {args.id}"
    goalStatePickle goalState args.path
    return .ok {}
  goal_load (args: Protocol.GoalLoad): MainM (CR Protocol.GoalLoadResult) := do
    let (goalState, _) ← goalStateUnpickle args.path (← Lean.MonadEnv.getEnv)
    let id ← newGoalState goalState
    return .ok { id }
  frontend_process (args: Protocol.FrontendProcess): MainM (CR Protocol.FrontendProcessResult) := do
    let options := (← get).options
    try
--- a/Test/Common.lean
+++ b/Test/Common.lean
@ -125,11 +125,20 @@ def mvarUserNameAndType (mvarId: MVarId): MetaM (Name × String) := do
 abbrev TestT := StateT LSpec.TestSeq
-def addTest [Monad m] (test: LSpec.TestSeq): TestT m Unit := do
+def addTest [Monad m] (test: LSpec.TestSeq) : TestT m Unit := do
  set $ (← get) ++ test
 def checkEq [Monad m] [DecidableEq α] (desc : String) (lhs rhs : α) : TestT m Unit := do
  addTest $ LSpec.check desc (lhs == rhs)
 def checkTrue [Monad m] (desc : String) (flag : Bool) : TestT m Unit := do
  addTest $ LSpec.check desc flag
 def fail [Monad m] (desc : String) : TestT m Unit := do
  addTest $ LSpec.check desc false
 def runTest [Monad m] (t: TestT m Unit): m LSpec.TestSeq :=
  Prod.snd <$> t.run LSpec.TestSeq.done
 def runTestWithResult { α } [Monad m] (t: TestT m α): m (α × LSpec.TestSeq) :=
  t.run LSpec.TestSeq.done
 def runTestTermElabM (env: Environment) (t: TestT Elab.TermElabM Unit):
  IO LSpec.TestSeq :=
--- a/Test/Main.lean
+++ b/Test/Main.lean
@ -1,11 +1,12 @@
 import LSpec
 import Test.Delate
 import Test.Environment
 import Test.Frontend
 import Test.Integration
 import Test.Library
 import Test.Metavar
 import Test.Proofs
-import Test.Delate
+import Test.Serial
 import Test.Tactic
 -- Test running infrastructure
@ -51,6 +52,7 @@ def main (args: List String) := do
    ("Metavar", Metavar.suite env_default),
    ("Proofs", Proofs.suite env_default),
    ("Delate", Delate.suite env_default),
    ("Serial", Serial.suite env_default),
    ("Tactic/Congruence", Tactic.Congruence.suite env_default),
    ("Tactic/Motivated Apply", Tactic.MotivatedApply.suite env_default),
    ("Tactic/No Confuse", Tactic.NoConfuse.suite env_default),
--- a/Test/Serial.lean
+++ b/Test/Serial.lean
@ -0,0 +1,109 @@
 import LSpec
 import Test.Common
 import Lean
 import Pantograph.Library
 open Lean
 namespace Pantograph.Test.Serial
 def tempPath : IO System.FilePath := do
  Prod.snd <$> IO.FS.createTempFile
 structure MultiState where
  coreContext : Core.Context
  env: Environment
 abbrev TestM := TestT $ StateRefT MultiState $ IO
 instance : MonadEnv TestM where
  getEnv      := return (← getThe MultiState).env
  modifyEnv f := do modifyThe MultiState fun s => { s with env := f s.env }
 def runCoreM { α } (state : Core.State) (testCoreM : TestT CoreM α) : TestM (α × Core.State) := do
  let multiState ← getThe MultiState
  let coreM := runTestWithResult testCoreM
  match ← (coreM.run multiState.coreContext state).toBaseIO with
  | .error e => do
    throw $ .userError $ ← e.toMessageData.toString
  | .ok ((a, tests), state') => do
    set $ (← getThe LSpec.TestSeq) ++ tests
    return (a, state')
 def test_environment_pickling : TestM Unit := do
  let coreSrc : Core.State := { env := ← getEnv }
  let coreDst : Core.State := { env := ← getEnv }
  let name := `mystery
  let envPicklePath ← tempPath
  let ((), _) ← runCoreM coreSrc do
    let type: Expr := .forallE `p (.sort 0) (.forallE `h (.bvar 0) (.bvar 1) .default) .default
    let value: Expr := .lam `p (.sort 0) (.lam `h (.bvar 0) (.bvar 0) .default) .default
    let c := Lean.Declaration.defnDecl <| Lean.mkDefinitionValEx
      (name := name)
      (levelParams := [])
      (type := type)
      (value := value)
      (hints := Lean.mkReducibilityHintsRegularEx 1)
      (safety := Lean.DefinitionSafety.safe)
      (all := [])
    let env' ← match (← getEnv).addDecl (← getOptions) c with
      | .error e => do
        let error ← (e.toMessageData (← getOptions)).toString
        throwError error
      | .ok env' => pure env'
    environmentPickle env' envPicklePath
  let _ ← runCoreM coreDst do
    let (env', _) ← environmentUnpickle envPicklePath
    checkTrue s!"Has symbol {name}" (env'.find? name).isSome
    let anotherName := `mystery2
    checkTrue s!"Doesn't have symbol {anotherName}" (env'.find? anotherName).isNone
  IO.FS.removeFile envPicklePath
 def test_goal_state_pickling_simple : TestM Unit := do
  let coreSrc : Core.State := { env := ← getEnv }
  let coreDst : Core.State := { env := ← getEnv }
  let statePath ← tempPath
  let type: Expr := .forallE `p (.sort 0) (.forallE `h (.bvar 0) (.bvar 1) .default) .default
  let stateGenerate : MetaM GoalState := runTermElabMInMeta do
    GoalState.create type
  let ((), _) ← runCoreM coreSrc do
    let state ← stateGenerate.run'
    goalStatePickle state statePath
  let ((), _) ← runCoreM coreDst do
    let (goalState, _) ← goalStateUnpickle statePath (← getEnv)
    let metaM : MetaM (List Expr) := do
      goalState.goals.mapM λ goal => goalState.withContext goal goal.getType
    let types ← metaM.run'
    checkTrue "Goals" $ types[0]!.equal type
  IO.FS.removeFile statePath
 structure Test where
  name : String
  routine: TestM Unit
 protected def Test.run (test: Test) (env: Lean.Environment) : IO LSpec.TestSeq := do
  -- Create the state
  let state : MultiState := {
    coreContext := ← createCoreContext #[],
    env,
  }
  match ← ((runTest $ test.routine).run' state).toBaseIO with
  | .ok e => return e
  | .error e =>
    return LSpec.check s!"Emitted exception: {e.toString}" (e.toString == "")
 def suite (env : Lean.Environment): List (String × IO LSpec.TestSeq) :=
  let tests: List Test := [
    { name := "environment_pickling", routine := test_environment_pickling, },
    { name := "goal_state_pickling_simple", routine := test_goal_state_pickling_simple, },
  ]
  tests.map (fun test => (test.name, test.run env))
 end Pantograph.Test.Serial
--- a/doc/repl.md
+++ b/doc/repl.md
@ -0,0 +1,63 @@
 # REPL
 ## Commands
 See `Pantograph/Protocol.lean` for a description of the parameters and return values in JSON.
 * `reset`: Delete all cached expressions and proof trees
 * `stat`: Display resource usage
 * `expr.echo {"expr": <expr>, "type": <optional expected type>, ["levels": [<levels>]]}`: Determine the
  type of an expression and format it.
 * `env.catalog`: Display a list of all safe Lean symbols in the current environment
 * `env.inspect {"name": <name>, "value": <bool>}`: Show the type and package of a
  given symbol; If value flag is set, the value is printed or hidden. By default
  only the values of definitions are printed.
 * `env.save { path }`, `env.load { path }`: Save/Load the current environment
  to/from a file
 * `options.set { key: value, ... }`: Set one or more options (not Lean options; those
  have to be set via command line arguments.), for options, see `Pantograph/Protocol.lean`
  One particular option for interest for machine learning researchers is the
  automatic mode (flag: `"automaticMode"`).  By default it is turned on, with
  all goals automatically resuming. This makes Pantograph act like a gym,
  with no resumption necessary to manage your goals.
 * `options.print`: Display the current set of options
 * `goal.start {["name": <name>], ["expr": <expr>], ["levels": [<levels>]], ["copyFrom": <symbol>]}`:
  Start a new proof from a given expression or symbol
 * `goal.tactic {"stateId": <id>, "goalId": <id>, ...}`: Execute a tactic string on a
  given goal. The tactic is supplied as additional key-value pairs in one of the following formats:
  - `{ "tactic": <tactic> }`: Execute an ordinary tactic
  - `{ "expr": <expr> }`: Assign the given proof term to the current goal
  - `{ "have": <expr>, "binderName": <name> }`: Execute `have` and creates a branch goal
  - `{ "calc": <expr> }`: Execute one step of a `calc` tactic. Each step must
    be of the form `lhs op rhs`. An `lhs` of `_` indicates that it should be set
    to the previous `rhs`.
  - `{ "conv": <bool> }`: Enter or exit conversion tactic mode. In the case of
    exit, the goal id is ignored.
 * `goal.continue {"stateId": <id>, ["branch": <id>], ["goals": <names>]}`:
  Execute continuation/resumption
  - `{ "branch": <id> }`: Continue on branch state. The current state must have no goals.
  - `{ "goals": <names> }`: Resume the given goals
 * `goal.remove {"stateIds": [<id>]}"`: Drop the goal states specified in the list
 * `goal.print {"stateId": <id>}"`: Print a goal state
 * `goal.save{ id, path }`, `goal.load { path }`: Save/Load a goal state to/from a
  file. The environment is not carried with the state. The user is responsible
  to ensure the sender/receiver instances share the same environment.
 * `frontend.process { ["fileName": <fileName>",] ["file": <str>], invocations:
  <bool>, sorrys: <bool> }`: Executes the Lean frontend on a file, collecting
  either the tactic invocations (`"invocations": true`) or the sorrys into goal
  states (`"sorrys": true`)
 ## Errors
 When an error pertaining to the execution of a command happens, the returning JSON structure is
 ``` json
 { "error": "type", "desc": "description" }
 ```
 Common error forms:
 * `command`: Indicates malformed command structure which results from either
  invalid command or a malformed JSON structure that cannot be fed to an
  individual command.
 * `index`: Indicates an invariant maintained by the output of one command and
  input of another is broken. For example, attempting to query a symbol not
  existing in the library or indexing into a non-existent proof state.