diff --git a/experiments/minif2f/.gitignore b/experiments/minif2f/.gitignore new file mode 100644 index 0000000..55ac6ac --- /dev/null +++ b/experiments/minif2f/.gitignore @@ -0,0 +1 @@ +/output* diff --git a/experiments/minif2f/README.md b/experiments/minif2f/README.md index ad72c20..560eb4d 100644 --- a/experiments/minif2f/README.md +++ b/experiments/minif2f/README.md @@ -1,11 +1,14 @@ # MiniF2F -This is an experiment on running a LLM prover on miniF2F data. Run with +This is an experiment on running a LLM prover on miniF2F data. Build the project +`MiniF2F` with `lake build`, and run with ```sh -python3 experiments/minif2f/main.py [--dry-run] +python3 experiments/minif2f/main.py [--dry-run] [--use-llm] ``` +Read the help message carefully. + ## Developing Run unit tests with diff --git a/experiments/minif2f/main.py b/experiments/minif2f/main.py index 4186348..ea1fd50 100755 --- a/experiments/minif2f/main.py +++ b/experiments/minif2f/main.py @@ -72,9 +72,10 @@ def run_eval(args): if file_name.is_file(): print(f"Skipping {datum['id']}") continue - server = Server(imports=["MiniF2F"], project_path=project_path, lean_path=lean_path, core_options=["maxHeartbeats=0"]) + server = Server(imports=["MiniF2F"], project_path=project_path, lean_path=lean_path) agent = LLMAgent(server, use_hammer=args.use_hammer, use_llm=args.use_llm) result = try_test_data(server, agent, datum, max_steps=args.max_steps, max_trials_per_goal=args.max_trials_per_goal) + #server.gc() if result is None: with open(placeholder_file_name, 'w') as f: json.dump({ 'id': datum['id'] }, f) diff --git a/pantograph/server.py b/pantograph/server.py index e0f6870..a94b593 100644 --- a/pantograph/server.py +++ b/pantograph/server.py @@ -28,8 +28,8 @@ class Server: # Options for executing the REPL. # Set `{ "automaticMode" : False }` to handle resumption by yourself. options={}, - core_options=[], - timeout=20, + core_options=["maxHeartbeats=0"], + timeout=60, maxread=1000000): """ timeout: Amount of time to wait for execution @@ -86,7 +86,10 @@ class Server: self.proc.sendline(f"{cmd} {s}") try: line = self.proc.readline() - return json.loads(line) + try: + return json.loads(line) + except Exception as e: + raise ServerError(f"Cannot decode: {line}") from e except pexpect.exceptions.TIMEOUT as exc: raise exc @@ -96,9 +99,12 @@ class Server: Must be called periodically. """ - if self.to_remove_goal_states: - self.run('goal.delete', {'stateIds': self.to_remove_goal_states}) - self.to_remove_goal_states.clear() + if not self.to_remove_goal_states: + return + result = self.run('goal.delete', {'stateIds': self.to_remove_goal_states}) + self.to_remove_goal_states.clear() + if "error" in result: + raise ServerError(result["desc"]) def expr_type(self, expr: Expr) -> Expr: """