fix: Absolute directories in experiments

doc: Add documentation about API key
2024-10-01 11:34:30 -07:00 · 2024-10-01 11:34:30 -07:00 · e942359666
parent 95e90cc026
commit e942359666
7 changed files with 52 additions and 37 deletions
--- a/README.md
+++ b/README.md
@ -33,6 +33,10 @@ In `experiments/`, there are some experiments:
 1. `minif2f/` is an example of executing a SGLANG based prover on the miniF2F dataset
 2. `dsp` is an Lean implementation of Draft-Sketch-Prove

+If the experiments don't work, run them in `poetry shell`. The environment
+variable `OPENAI_API_KEY` must be set when running experiments calling the
+OpenAI API.
+
 ## Referencing

 ```bib
--- a/experiments/dsp/solve/readme.md
+++ b/experiments/dsp/solve/readme.md
@ -1,4 +1,6 @@
-#
+# Lean Draft Sketch Prove (DSP)
+
+based on Sean Welleck's DSP for Isabelle: https://github.com/wellecks/ntptutorial/tree/main/partII_dsp

 ## Related work

@ -29,4 +31,4 @@ Questions:
 - Q1: what is this: "we perform deduplication by problem statements" when does it matter? All MATH train are unique, so why would I care about this?

 Idea:
- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver)
+- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver)
--- a/experiments/dsp/dsp_for_lean4.py
+++ b/experiments/dsp/dsp_for_lean4.py
@ -1,11 +1,9 @@
-import sys
+import sys, os, json
+from pathlib import Path
+from typing import Union, Any
 from collections import namedtuple
 import fire
-from pathlib import Path
 from tqdm import tqdm
-from typing import Union, Any
-import json
-import os
 from openai import OpenAI
 import wandb
 from tenacity import retry, stop_after_attempt, wait_exponential
@ -117,6 +115,7 @@ def sketch(
    # Make prompt from template
    x_nl_problem: str = data_pt['nl_problem'][0]
    y_nl_solution: str = drafts[0]
+    x_fl_problem = None
    if autoformalize_prob_in_prompt:
        # prompt = eng.sketch_prompt_template.replace('{nl_problem}', x_nl_problem).replace('{nl_solution}', y_nl_solution)
        not NotImplemented
@ -228,7 +227,7 @@ def main(

    # - Run DSP for Lean
    if 'gpt-4-' in model or 'gpt-3.5-' in model or 'gpt-4o' in model:
-        api_key = open(Path('~/keys/openai_api_key_brandos_koyejolab.txt').expanduser(), 'r').read().strip()
+        api_key = os.environ['OPENAI_API_KEY']
        SamplingParams = namedtuple('SamplingParams', ['n', 'max_tokens', 'top_p', 'temperature', 'stop'])
        draft_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_DRAFT_V0)
        sketch_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_SKETCH_V0)
@ -249,4 +248,4 @@ if __name__ == "__main__":
    import time
    start_time = time.time()
    fire.Fire(main)
-    print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
+    print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
--- a/experiments/dsp/solve/dsp_lean_prompts.py
+++ b/experiments/dsp/solve/dsp_lean_prompts.py
@ -12,10 +12,12 @@ import sys
 from pathlib import Path
 from typing import Optional

+experiment_dir = Path(__file__).resolve().parent.parent
+
 # just an example of stop tokens from the MATH eval code
 # STOP_TOKENS: list[str] = ["Solution:", "Problem:", "Question:", "USER:", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"]

-default_path_2_examples = '~/gold-ai-olympiad/data/debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json'
+default_path_2_examples = 'debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json'

 # -- Prompt draft (P_draft) for Lean 4
 """
@ -48,7 +50,7 @@ def get_prompt_draft_template_4_lean_v0(
    prompt_draft_template_4_lean: Optional[str] = prompt_draft_template_lean4_v0, 
    verbose: bool = False,
    ):
-    path_2_examples = Path(path_2_examples).expanduser()
+    path_2_examples = experiment_dir / Path(path_2_examples)
    # load json file with list of dicts from file in one line
    with open(path_2_examples, 'r') as f:
        examples: list[dict] = json.load(f)
@ -108,7 +110,7 @@ def get_prompt_sketch_template_4_lean_v0(
    autoformalize_prob_in_prompt: Optional[bool] = False,
    verbose: bool = False,
    ):
-    path_2_examples = Path(path_2_examples).expanduser()
+    path_2_examples = experiment_dir / Path(path_2_examples)
    # load json file with list of dicts from file in one line
    with open(path_2_examples, 'r') as f:
        examples: list[dict] = json.load(f)
@ -159,4 +161,4 @@ if __name__ == '__main__':
    # fire.Fire()
    main()
    end = time.time()
-    print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.')
+    print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.')
--- a/experiments/minif2f/README.md
+++ b/experiments/minif2f/README.md
@ -0,0 +1,7 @@
+# MiniF2F
+
+This is an experiment on running a LLM prover on miniF2F data. Run with
+
+```sh
+python3 experiments/minif2f/main.py [--dry-run]
+```
--- a/poetry.lock
+++ b/poetry.lock
@ -700,13 +700,13 @@ files = [

 [[package]]
 name = "httpcore"
-version = "1.0.5"
+version = "1.0.6"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
-    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
+    {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
+    {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
 ]

 [package.dependencies]
@ -717,7 +717,7 @@ h11 = ">=0.13,<0.15"
 asyncio = ["anyio (>=4.0,<5.0)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.26.0)"]
+trio = ["trio (>=0.22.0,<1.0)"]

 [[package]]
 name = "httpx"
@ -1646,14 +1646,14 @@ files = [

 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.6.68"
+version = "12.6.77"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"},
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"},
-    {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"},
+    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:410718cd44962bed862a31dd0318620f6f9a8b28a6291967bcfcb446a6516771"},
 ]

 [[package]]
@ -1669,13 +1669,13 @@ files = [

 [[package]]
 name = "openai"
-version = "1.50.0"
+version = "1.51.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.50.0-py3-none-any.whl", hash = "sha256:8545b3e37aa28a39e5177adbb6142f3e2b2b9e2889ae002c0ba785d917e466e2"},
-    {file = "openai-1.50.0.tar.gz", hash = "sha256:fc774e36ad96839b9fc14f1097093527b8abd1348ed824e25818309820afa344"},
+    {file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"},
+    {file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"},
 ]

 [package.dependencies]
@ -2560,13 +2560,13 @@ win32 = ["pywin32"]

 [[package]]
 name = "sentry-sdk"
-version = "2.14.0"
+version = "2.15.0"
 description = "Python client for Sentry (https://sentry.io)"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "sentry_sdk-2.14.0-py2.py3-none-any.whl", hash = "sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4"},
-    {file = "sentry_sdk-2.14.0.tar.gz", hash = "sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d"},
+    {file = "sentry_sdk-2.15.0-py2.py3-none-any.whl", hash = "sha256:8fb0d1a4e1a640172f31502e4503543765a1fe8a9209779134a4ac52d4677303"},
+    {file = "sentry_sdk-2.15.0.tar.gz", hash = "sha256:a599e7d3400787d6f43327b973e55a087b931ba2c592a7a7afa691f8eb5e75e2"},
 ]

 [package.dependencies]
@ -3250,4 +3250,4 @@ test = ["websockets"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "4e7eaffa149ad2f623d61b4f0308a96142b52a01bf543954a378c4d775811ea4"
+content-hash = "686e7f1af124ef2404bc6b46677850c581e8e74f3cab51992fac8e8578f88a3a"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,24 +7,25 @@ license = "GPL-3"
 readme = "README.md"

 [tool.poetry.dependencies]
-python = "^3.10"
-pexpect = "^4.9.0"
-torch = "2.2.1"
 # vllm = "0.4.1"
-fire = "0.6.0"
-wandb = "0.17.0"
-tenacity = "8.3.0"
-sglang = "^0.1.16"
 numpy = "^1.26.4"
-openai = "^1.31.0"
-tiktoken = "^0.7.0"
+pexpect = "^4.9.0"
+python = "^3.10"
+sglang = "^0.1.16"
+torch = "2.2.1"

 [tool.poetry.build]
 generate-setup-file = false
 script = "build.py"

 [tool.poetry.group.dev.dependencies]
+# Experiment related dependencies here to not clutter the main project dependencies.
+fire = "0.6.0"
 notebook = "^7.2.1"
+openai = "^1.31.0"
+tenacity = "8.3.0"
+tiktoken = "^0.7.0"
+wandb = "0.17.0"

 [build-system]
 requires = ["poetry-core"]