diff --git a/README.md b/README.md index f63947c..96829fc 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,10 @@ In `experiments/`, there are some experiments: 1. `minif2f/` is an example of executing a SGLANG based prover on the miniF2F dataset 2. `dsp` is an Lean implementation of Draft-Sketch-Prove +If the experiments don't work, run them in `poetry shell`. The environment +variable `OPENAI_API_KEY` must be set when running experiments calling the +OpenAI API. + ## Referencing ```bib diff --git a/experiments/dsp/solve/readme.md b/experiments/dsp/README.md similarity index 91% rename from experiments/dsp/solve/readme.md rename to experiments/dsp/README.md index eefb2db..f605932 100644 --- a/experiments/dsp/solve/readme.md +++ b/experiments/dsp/README.md @@ -1,4 +1,6 @@ -# +# Lean Draft Sketch Prove (DSP) + +based on Sean Welleck's DSP for Isabelle: https://github.com/wellecks/ntptutorial/tree/main/partII_dsp ## Related work @@ -29,4 +31,4 @@ Questions: - Q1: what is this: "we perform deduplication by problem statements" when does it matter? All MATH train are unique, so why would I care about this? Idea: -- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver) \ No newline at end of file +- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver) diff --git a/experiments/dsp/dsp_for_lean4.py b/experiments/dsp/main.py similarity index 98% rename from experiments/dsp/dsp_for_lean4.py rename to experiments/dsp/main.py index cfe4be2..93a70cf 100644 --- a/experiments/dsp/dsp_for_lean4.py +++ b/experiments/dsp/main.py @@ -1,11 +1,9 @@ -import sys +import sys, os, json +from pathlib import Path +from typing import Union, Any from collections import namedtuple import fire -from pathlib import Path from tqdm import tqdm -from typing import Union, Any -import json -import os from openai import OpenAI import wandb from tenacity import retry, stop_after_attempt, wait_exponential @@ -117,6 +115,7 @@ def sketch( # Make prompt from template x_nl_problem: str = data_pt['nl_problem'][0] y_nl_solution: str = drafts[0] + x_fl_problem = None if autoformalize_prob_in_prompt: # prompt = eng.sketch_prompt_template.replace('{nl_problem}', x_nl_problem).replace('{nl_solution}', y_nl_solution) not NotImplemented @@ -228,7 +227,7 @@ def main( # - Run DSP for Lean if 'gpt-4-' in model or 'gpt-3.5-' in model or 'gpt-4o' in model: - api_key = open(Path('~/keys/openai_api_key_brandos_koyejolab.txt').expanduser(), 'r').read().strip() + api_key = os.environ['OPENAI_API_KEY'] SamplingParams = namedtuple('SamplingParams', ['n', 'max_tokens', 'top_p', 'temperature', 'stop']) draft_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_DRAFT_V0) sketch_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_SKETCH_V0) @@ -249,4 +248,4 @@ if __name__ == "__main__": import time start_time = time.time() fire.Fire(main) - print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a") \ No newline at end of file + print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a") diff --git a/experiments/dsp/solve/dsp_lean_prompts.py b/experiments/dsp/solve/dsp_lean_prompts.py index c741da0..cc0e8f8 100644 --- a/experiments/dsp/solve/dsp_lean_prompts.py +++ b/experiments/dsp/solve/dsp_lean_prompts.py @@ -12,10 +12,12 @@ import sys from pathlib import Path from typing import Optional +experiment_dir = Path(__file__).resolve().parent.parent + # just an example of stop tokens from the MATH eval code # STOP_TOKENS: list[str] = ["Solution:", "Problem:", "Question:", "USER:", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"] -default_path_2_examples = '~/gold-ai-olympiad/data/debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json' +default_path_2_examples = 'debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json' # -- Prompt draft (P_draft) for Lean 4 """ @@ -48,7 +50,7 @@ def get_prompt_draft_template_4_lean_v0( prompt_draft_template_4_lean: Optional[str] = prompt_draft_template_lean4_v0, verbose: bool = False, ): - path_2_examples = Path(path_2_examples).expanduser() + path_2_examples = experiment_dir / Path(path_2_examples) # load json file with list of dicts from file in one line with open(path_2_examples, 'r') as f: examples: list[dict] = json.load(f) @@ -108,7 +110,7 @@ def get_prompt_sketch_template_4_lean_v0( autoformalize_prob_in_prompt: Optional[bool] = False, verbose: bool = False, ): - path_2_examples = Path(path_2_examples).expanduser() + path_2_examples = experiment_dir / Path(path_2_examples) # load json file with list of dicts from file in one line with open(path_2_examples, 'r') as f: examples: list[dict] = json.load(f) @@ -159,4 +161,4 @@ if __name__ == '__main__': # fire.Fire() main() end = time.time() - print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.') \ No newline at end of file + print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.') diff --git a/experiments/minif2f/README.md b/experiments/minif2f/README.md new file mode 100644 index 0000000..0d93665 --- /dev/null +++ b/experiments/minif2f/README.md @@ -0,0 +1,7 @@ +# MiniF2F + +This is an experiment on running a LLM prover on miniF2F data. Run with + +```sh +python3 experiments/minif2f/main.py [--dry-run] +``` diff --git a/poetry.lock b/poetry.lock index 713fd3d..179e415 100644 --- a/poetry.lock +++ b/poetry.lock @@ -700,13 +700,13 @@ files = [ [[package]] name = "httpcore" -version = "1.0.5" +version = "1.0.6" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, - {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, + {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"}, + {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"}, ] [package.dependencies] @@ -717,7 +717,7 @@ h11 = ">=0.13,<0.15" asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] -trio = ["trio (>=0.22.0,<0.26.0)"] +trio = ["trio (>=0.22.0,<1.0)"] [[package]] name = "httpx" @@ -1646,14 +1646,14 @@ files = [ [[package]] name = "nvidia-nvjitlink-cu12" -version = "12.6.68" +version = "12.6.77" description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" files = [ - {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"}, - {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"}, - {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"}, + {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"}, + {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"}, + {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:410718cd44962bed862a31dd0318620f6f9a8b28a6291967bcfcb446a6516771"}, ] [[package]] @@ -1669,13 +1669,13 @@ files = [ [[package]] name = "openai" -version = "1.50.0" +version = "1.51.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.50.0-py3-none-any.whl", hash = "sha256:8545b3e37aa28a39e5177adbb6142f3e2b2b9e2889ae002c0ba785d917e466e2"}, - {file = "openai-1.50.0.tar.gz", hash = "sha256:fc774e36ad96839b9fc14f1097093527b8abd1348ed824e25818309820afa344"}, + {file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"}, + {file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"}, ] [package.dependencies] @@ -2560,13 +2560,13 @@ win32 = ["pywin32"] [[package]] name = "sentry-sdk" -version = "2.14.0" +version = "2.15.0" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = ">=3.6" files = [ - {file = "sentry_sdk-2.14.0-py2.py3-none-any.whl", hash = "sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4"}, - {file = "sentry_sdk-2.14.0.tar.gz", hash = "sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d"}, + {file = "sentry_sdk-2.15.0-py2.py3-none-any.whl", hash = "sha256:8fb0d1a4e1a640172f31502e4503543765a1fe8a9209779134a4ac52d4677303"}, + {file = "sentry_sdk-2.15.0.tar.gz", hash = "sha256:a599e7d3400787d6f43327b973e55a087b931ba2c592a7a7afa691f8eb5e75e2"}, ] [package.dependencies] @@ -3250,4 +3250,4 @@ test = ["websockets"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4e7eaffa149ad2f623d61b4f0308a96142b52a01bf543954a378c4d775811ea4" +content-hash = "686e7f1af124ef2404bc6b46677850c581e8e74f3cab51992fac8e8578f88a3a" diff --git a/pyproject.toml b/pyproject.toml index b52ba59..10a1ea6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,24 +7,25 @@ license = "GPL-3" readme = "README.md" [tool.poetry.dependencies] -python = "^3.10" -pexpect = "^4.9.0" -torch = "2.2.1" # vllm = "0.4.1" -fire = "0.6.0" -wandb = "0.17.0" -tenacity = "8.3.0" -sglang = "^0.1.16" numpy = "^1.26.4" -openai = "^1.31.0" -tiktoken = "^0.7.0" +pexpect = "^4.9.0" +python = "^3.10" +sglang = "^0.1.16" +torch = "2.2.1" [tool.poetry.build] generate-setup-file = false script = "build.py" [tool.poetry.group.dev.dependencies] +# Experiment related dependencies here to not clutter the main project dependencies. +fire = "0.6.0" notebook = "^7.2.1" +openai = "^1.31.0" +tenacity = "8.3.0" +tiktoken = "^0.7.0" +wandb = "0.17.0" [build-system] requires = ["poetry-core"]