diff --git a/experiments/minif2f/MiniF2F/.gitignore b/experiments/minif2f/MiniF2F/.gitignore new file mode 100644 index 0000000..1824d0c --- /dev/null +++ b/experiments/minif2f/MiniF2F/.gitignore @@ -0,0 +1,3 @@ +/build +/lakefile.olean +/lake-packages/* diff --git a/experiments/minif2f/MiniF2F/MiniF2F.lean b/experiments/minif2f/MiniF2F/MiniF2F.lean new file mode 100644 index 0000000..46005ca --- /dev/null +++ b/experiments/minif2f/MiniF2F/MiniF2F.lean @@ -0,0 +1,2 @@ +import Aesop +import Mathlib diff --git a/experiments/minif2f/MiniF2F/lake-manifest.json b/experiments/minif2f/MiniF2F/lake-manifest.json new file mode 100644 index 0000000..b0a5048 --- /dev/null +++ b/experiments/minif2f/MiniF2F/lake-manifest.json @@ -0,0 +1,75 @@ +{"version": "1.1.0", + "packagesDir": ".lake/packages", + "packages": + [{"url": "https://github.com/leanprover-community/batteries", + "type": "git", + "subDir": null, + "scope": "", + "rev": "2ead90d24b4fac3a05c9c4294daa39bd8686fb98", + "name": "batteries", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "a64fe24aa94e21404940e9217363a9a1ed9a33a6", + "name": "aesop", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.10.0-rc1", + "inherited": false, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/quote4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "a7bfa63f5dddbcab2d4e0569c4cac74b2585e2c6", + "name": "Qq", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/ProofWidgets4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "d1b33202c3a29a079f292de65ea438648123b635", + "name": "proofwidgets", + "manifestFile": "lake-manifest.json", + "inputRev": "v0.0.39", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "", + "rev": "a11566029bd9ec4f68a65394e8c3ff1af74c1a29", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/import-graph", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "d366a602cc4a325a6f9db3a3991dfa6d6cf409c5", + "name": "importGraph", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/mathlib4.git", + "type": "git", + "subDir": null, + "scope": "", + "rev": "f5c3f06aa7f6d6c221786d2890c345a00e6341f8", + "name": "mathlib", + "manifestFile": "lake-manifest.json", + "inputRev": "v4.10.0-rc1", + "inherited": false, + "configFile": "lakefile.lean"}], + "name": "MiniF2F", + "lakeDir": ".lake"} diff --git a/experiments/minif2f/MiniF2F/lakefile.lean b/experiments/minif2f/MiniF2F/lakefile.lean new file mode 100644 index 0000000..f5ec5eb --- /dev/null +++ b/experiments/minif2f/MiniF2F/lakefile.lean @@ -0,0 +1,12 @@ +import Lake +open Lake DSL + +require aesop from git + "https://github.com/leanprover-community/aesop.git" @ "v4.10.0-rc1" +require mathlib from git + "https://github.com/leanprover-community/mathlib4.git" @ "v4.10.0-rc1" + +package MiniF2F + +@[default_target] +lean_lib MiniF2F diff --git a/experiments/minif2f/MiniF2F/lean-toolchain b/experiments/minif2f/MiniF2F/lean-toolchain new file mode 120000 index 0000000..b494e97 --- /dev/null +++ b/experiments/minif2f/MiniF2F/lean-toolchain @@ -0,0 +1 @@ +../../../src/lean-toolchain \ No newline at end of file diff --git a/experiments/minif2f/README.md b/experiments/minif2f/README.md index 0d93665..ad72c20 100644 --- a/experiments/minif2f/README.md +++ b/experiments/minif2f/README.md @@ -5,3 +5,12 @@ This is an experiment on running a LLM prover on miniF2F data. Run with ```sh python3 experiments/minif2f/main.py [--dry-run] ``` + +## Developing + +Run unit tests with + +``` sh +python3 -m model.{llm_agent,gen_tactic} +``` + diff --git a/experiments/minif2f/main.py b/experiments/minif2f/main.py index b4c41d4..16e56f1 100755 --- a/experiments/minif2f/main.py +++ b/experiments/minif2f/main.py @@ -5,15 +5,17 @@ from typing import Optional from pathlib import Path from pantograph.server import Server, ServerError from pantograph.search import SearchResult -from pantograph.search_llm import LLMAgent +from model.llm_agent import LLMAgent + +PATH_EXPERIMENT = Path(__file__).parent.resolve() def get_project_and_lean_path(): - cwd = Path(__file__).parent.resolve() / 'Example' + cwd = PATH_EXPERIMENT / 'MiniF2F' p = subprocess.check_output(['lake', 'env', 'printenv', 'LEAN_PATH'], cwd=cwd) return cwd, p def read_test_data(use_valid: bool): - jsonl_path = Path(__file__).parent / ('valid.jsonl' if use_valid else 'test.jsonl') + jsonl_path = PATH_EXPERIMENT / ('valid.jsonl' if use_valid else 'test.jsonl') with open(jsonl_path, 'r') as f: return [json.loads(l) for l in list(f)] @@ -44,7 +46,7 @@ def output_file_name(datum, use_hammer: bool, use_llm: bool): folder += '-hammer' if use_llm: folder += '-llm' - folder = Path(__file__).parent / folder + folder = PATH_EXPERIMENT / folder folder.mkdir(exist_ok=True, parents=True) return folder / f"{name}.json" @@ -65,7 +67,7 @@ def run_eval(args): if file_name.is_file(): print(f"Skipping {datum['id']}") continue - server = Server(imports=["Example"], project_path=project_path, lean_path=lean_path, options=["maxHeartbeats=0"]) + server = Server(imports=["MiniF2F"], project_path=project_path, lean_path=lean_path, options=["maxHeartbeats=0"]) agent = LLMAgent(server, use_hammer=args.use_hammer, use_llm=args.use_llm) result = try_test_data(server, agent, datum, max_steps=args.max_steps, max_trials_per_goal=args.max_trials_per_goal) if result is None: diff --git a/experiments/minif2f/model/__init__.py b/experiments/minif2f/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pantograph/gen_tactic.py b/experiments/minif2f/model/gen_tactic.py similarity index 100% rename from pantograph/gen_tactic.py rename to experiments/minif2f/model/gen_tactic.py diff --git a/pantograph/search_llm.py b/experiments/minif2f/model/llm_agent.py similarity index 98% rename from pantograph/search_llm.py rename to experiments/minif2f/model/llm_agent.py index c98ad23..3105d69 100644 --- a/pantograph/search_llm.py +++ b/experiments/minif2f/model/llm_agent.py @@ -3,7 +3,7 @@ import collections, unittest from pantograph.search import Agent from pantograph.server import Server, TacticFailure, ServerError from pantograph.expr import Expr, Tactic, GoalState -from pantograph.gen_tactic import LEAN4_REWRITE, select_tactic +from .gen_tactic import LEAN4_REWRITE, select_tactic import sglang as sgl class LLMAgent(Agent): diff --git a/poetry.lock b/poetry.lock index 179e415..59bc761 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3250,4 +3250,4 @@ test = ["websockets"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "686e7f1af124ef2404bc6b46677850c581e8e74f3cab51992fac8e8578f88a3a" +content-hash = "1ce8e928cff885e8c66d9c353e982e91e84ae84c91e96860aa3ca5a885bb0d2e" diff --git a/pyproject.toml b/pyproject.toml index d674e24..9a6fc33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,12 +7,8 @@ license = "GPL-3" readme = "README.md" [tool.poetry.dependencies] -# vllm = "0.4.1" -numpy = "^1.26.4" pexpect = "^4.9.0" python = "^3.10" -sglang = "^0.1.16" -torch = "2.2.1" [tool.poetry.build] generate-setup-file = false @@ -22,10 +18,14 @@ script = "build.py" # Experiment related dependencies here to not clutter the main project dependencies. fire = "0.6.0" notebook = "^7.2.1" +numpy = "^1.26.4" openai = "^1.31.0" +sglang = "^0.1.16" tenacity = "8.3.0" tiktoken = "^0.7.0" +torch = "2.2.1" wandb = "0.17.0" +# vllm = "0.4.1" [build-system] requires = ["poetry-core"]