fix: Absolute directories in experiments
doc: Add documentation about API key
This commit is contained in:
parent
95e90cc026
commit
e942359666
|
@ -33,6 +33,10 @@ In `experiments/`, there are some experiments:
|
|||
1. `minif2f/` is an example of executing a SGLANG based prover on the miniF2F dataset
|
||||
2. `dsp` is an Lean implementation of Draft-Sketch-Prove
|
||||
|
||||
If the experiments don't work, run them in `poetry shell`. The environment
|
||||
variable `OPENAI_API_KEY` must be set when running experiments calling the
|
||||
OpenAI API.
|
||||
|
||||
## Referencing
|
||||
|
||||
```bib
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#
|
||||
# Lean Draft Sketch Prove (DSP)
|
||||
|
||||
based on Sean Welleck's DSP for Isabelle: https://github.com/wellecks/ntptutorial/tree/main/partII_dsp
|
||||
|
||||
## Related work
|
||||
|
||||
|
@ -29,4 +31,4 @@ Questions:
|
|||
- Q1: what is this: "we perform deduplication by problem statements" when does it matter? All MATH train are unique, so why would I care about this?
|
||||
|
||||
Idea:
|
||||
- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver)
|
||||
- Idea1: use the formal ground truth solution string in MATH, implement Draft Sketch Proof (DSP) for Lean4 + use some symbolic/ntp solver (hammer/tidy/ReProver)
|
|
@ -1,11 +1,9 @@
|
|||
import sys
|
||||
import sys, os, json
|
||||
from pathlib import Path
|
||||
from typing import Union, Any
|
||||
from collections import namedtuple
|
||||
import fire
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
from typing import Union, Any
|
||||
import json
|
||||
import os
|
||||
from openai import OpenAI
|
||||
import wandb
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
@ -117,6 +115,7 @@ def sketch(
|
|||
# Make prompt from template
|
||||
x_nl_problem: str = data_pt['nl_problem'][0]
|
||||
y_nl_solution: str = drafts[0]
|
||||
x_fl_problem = None
|
||||
if autoformalize_prob_in_prompt:
|
||||
# prompt = eng.sketch_prompt_template.replace('{nl_problem}', x_nl_problem).replace('{nl_solution}', y_nl_solution)
|
||||
not NotImplemented
|
||||
|
@ -228,7 +227,7 @@ def main(
|
|||
|
||||
# - Run DSP for Lean
|
||||
if 'gpt-4-' in model or 'gpt-3.5-' in model or 'gpt-4o' in model:
|
||||
api_key = open(Path('~/keys/openai_api_key_brandos_koyejolab.txt').expanduser(), 'r').read().strip()
|
||||
api_key = os.environ['OPENAI_API_KEY']
|
||||
SamplingParams = namedtuple('SamplingParams', ['n', 'max_tokens', 'top_p', 'temperature', 'stop'])
|
||||
draft_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_DRAFT_V0)
|
||||
sketch_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_SKETCH_V0)
|
||||
|
@ -249,4 +248,4 @@ if __name__ == "__main__":
|
|||
import time
|
||||
start_time = time.time()
|
||||
fire.Fire(main)
|
||||
print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
|
||||
print(f"Time taken: {time.time() - start_time:.2f} seconds, or {(time.time() - start_time) / 60:.2f} minutes, or {(time.time() - start_time) / 3600:.2f} hours.\a")
|
|
@ -12,10 +12,12 @@ import sys
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
experiment_dir = Path(__file__).resolve().parent.parent
|
||||
|
||||
# just an example of stop tokens from the MATH eval code
|
||||
# STOP_TOKENS: list[str] = ["Solution:", "Problem:", "Question:", "USER:", "USER:", "USER", "ASSISTANT:", "ASSISTANT", "Instruction:", "Instruction", "Response:", "Response"]
|
||||
|
||||
default_path_2_examples = '~/gold-ai-olympiad/data/debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json'
|
||||
default_path_2_examples = 'debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json'
|
||||
|
||||
# -- Prompt draft (P_draft) for Lean 4
|
||||
"""
|
||||
|
@ -48,7 +50,7 @@ def get_prompt_draft_template_4_lean_v0(
|
|||
prompt_draft_template_4_lean: Optional[str] = prompt_draft_template_lean4_v0,
|
||||
verbose: bool = False,
|
||||
):
|
||||
path_2_examples = Path(path_2_examples).expanduser()
|
||||
path_2_examples = experiment_dir / Path(path_2_examples)
|
||||
# load json file with list of dicts from file in one line
|
||||
with open(path_2_examples, 'r') as f:
|
||||
examples: list[dict] = json.load(f)
|
||||
|
@ -108,7 +110,7 @@ def get_prompt_sketch_template_4_lean_v0(
|
|||
autoformalize_prob_in_prompt: Optional[bool] = False,
|
||||
verbose: bool = False,
|
||||
):
|
||||
path_2_examples = Path(path_2_examples).expanduser()
|
||||
path_2_examples = experiment_dir / Path(path_2_examples)
|
||||
# load json file with list of dicts from file in one line
|
||||
with open(path_2_examples, 'r') as f:
|
||||
examples: list[dict] = json.load(f)
|
||||
|
@ -159,4 +161,4 @@ if __name__ == '__main__':
|
|||
# fire.Fire()
|
||||
main()
|
||||
end = time.time()
|
||||
print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.')
|
||||
print(f'Time elapsed: {end - start} seconds, or {(end - start) / 60} minutes, or {(end - start) / 3600} hours.')
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
# MiniF2F
|
||||
|
||||
This is an experiment on running a LLM prover on miniF2F data. Run with
|
||||
|
||||
```sh
|
||||
python3 experiments/minif2f/main.py [--dry-run]
|
||||
```
|
|
@ -700,13 +700,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.5"
|
||||
version = "1.0.6"
|
||||
description = "A minimal low-level HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
|
||||
{file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
|
||||
{file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
|
||||
{file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -717,7 +717,7 @@ h11 = ">=0.13,<0.15"
|
|||
asyncio = ["anyio (>=4.0,<5.0)"]
|
||||
http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
trio = ["trio (>=0.22.0,<0.26.0)"]
|
||||
trio = ["trio (>=0.22.0,<1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
|
@ -1646,14 +1646,14 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "nvidia-nvjitlink-cu12"
|
||||
version = "12.6.68"
|
||||
version = "12.6.77"
|
||||
description = "Nvidia JIT LTO Library"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"},
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"},
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"},
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"},
|
||||
{file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:410718cd44962bed862a31dd0318620f6f9a8b28a6291967bcfcb446a6516771"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1669,13 +1669,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.50.0"
|
||||
version = "1.51.0"
|
||||
description = "The official Python library for the openai API"
|
||||
optional = false
|
||||
python-versions = ">=3.7.1"
|
||||
files = [
|
||||
{file = "openai-1.50.0-py3-none-any.whl", hash = "sha256:8545b3e37aa28a39e5177adbb6142f3e2b2b9e2889ae002c0ba785d917e466e2"},
|
||||
{file = "openai-1.50.0.tar.gz", hash = "sha256:fc774e36ad96839b9fc14f1097093527b8abd1348ed824e25818309820afa344"},
|
||||
{file = "openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c"},
|
||||
{file = "openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -2560,13 +2560,13 @@ win32 = ["pywin32"]
|
|||
|
||||
[[package]]
|
||||
name = "sentry-sdk"
|
||||
version = "2.14.0"
|
||||
version = "2.15.0"
|
||||
description = "Python client for Sentry (https://sentry.io)"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "sentry_sdk-2.14.0-py2.py3-none-any.whl", hash = "sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4"},
|
||||
{file = "sentry_sdk-2.14.0.tar.gz", hash = "sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d"},
|
||||
{file = "sentry_sdk-2.15.0-py2.py3-none-any.whl", hash = "sha256:8fb0d1a4e1a640172f31502e4503543765a1fe8a9209779134a4ac52d4677303"},
|
||||
{file = "sentry_sdk-2.15.0.tar.gz", hash = "sha256:a599e7d3400787d6f43327b973e55a087b931ba2c592a7a7afa691f8eb5e75e2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -3250,4 +3250,4 @@ test = ["websockets"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "4e7eaffa149ad2f623d61b4f0308a96142b52a01bf543954a378c4d775811ea4"
|
||||
content-hash = "686e7f1af124ef2404bc6b46677850c581e8e74f3cab51992fac8e8578f88a3a"
|
||||
|
|
|
@ -7,24 +7,25 @@ license = "GPL-3"
|
|||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
pexpect = "^4.9.0"
|
||||
torch = "2.2.1"
|
||||
# vllm = "0.4.1"
|
||||
fire = "0.6.0"
|
||||
wandb = "0.17.0"
|
||||
tenacity = "8.3.0"
|
||||
sglang = "^0.1.16"
|
||||
numpy = "^1.26.4"
|
||||
openai = "^1.31.0"
|
||||
tiktoken = "^0.7.0"
|
||||
pexpect = "^4.9.0"
|
||||
python = "^3.10"
|
||||
sglang = "^0.1.16"
|
||||
torch = "2.2.1"
|
||||
|
||||
[tool.poetry.build]
|
||||
generate-setup-file = false
|
||||
script = "build.py"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
# Experiment related dependencies here to not clutter the main project dependencies.
|
||||
fire = "0.6.0"
|
||||
notebook = "^7.2.1"
|
||||
openai = "^1.31.0"
|
||||
tenacity = "8.3.0"
|
||||
tiktoken = "^0.7.0"
|
||||
wandb = "0.17.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
Loading…
Reference in New Issue