From ce2d689b03b01a125892c0e77897ad404bf00641 Mon Sep 17 00:00:00 2001 From: Leni Aniva Date: Wed, 2 Oct 2024 11:03:00 -0700 Subject: [PATCH] refactor: Clarify code in dsp --- experiments/dsp/main.py | 118 ++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/experiments/dsp/main.py b/experiments/dsp/main.py index 93a70cf..59bd6b5 100644 --- a/experiments/dsp/main.py +++ b/experiments/dsp/main.py @@ -1,4 +1,5 @@ import sys, os, json +from dataclasses import dataclass from pathlib import Path from typing import Union, Any from collections import namedtuple @@ -7,12 +8,21 @@ from tqdm import tqdm from openai import OpenAI import wandb from tenacity import retry, stop_after_attempt, wait_exponential +from pantograph import Server from solve.dsp_lean_prompts import SYSTEM_PROMPT_DRAFT_V0, prompt_draft_template_lean4_v0, STOP_TOKENS_DRAFT_V0 from solve.dsp_lean_prompts import SYSTEM_PROMPT_SKETCH_V0, prompt_sketch_template_lean4_v0, STOP_TOKENS_SKETCH_V0 # prompt_draft_template_lean4_v0 = "Draft an informal solution similar to the one below. The informal solution will be used to sketch a formal proof in the Lean 4 Proof Assistant. Here are some examples of informal problem solutions pairs:\n\nInformal:\n(*### Problem\n\nProve that for any natural number n, n + 0 = n.\n\n### Solution\n\nConsider any natural number n. From properties of addition, adding zero does not change its values. Thus, n + 0 = n.*)\n\nInformal:\n(*### Problem\n\nProve that for any natural number n, n + (m + 1) = (n + m) + 1.\n\n### Solution\n\nConsider any natural numbers n and m. From properties of addition, adding 1 to the sum of n and m is the same as first adding m to n and then adding 1. Thus, n + (m + 1) = (n + m) + 1.*)\n\nInformal:\n(*### Problem\n\nProve that for any natural number n and m, n + m = m + n.\n\n### Solution\n\nConsider any natural numbers n and m. We will do induction on n. Base case: 0 + m = m + 0 by properties of addition. Inductive step, we have n + m = m + n. Then (n + 1) + m = (n + m) + 1 = (m + n) + 1 = m + (n + 1). Thus, by induction, n + m = m + n, qed.*)\n\nInformal: \n(*### Problem\n\n{nl_problem}\n\n### Solution\n" +@dataclass +class SamplingParams: + n: int + max_tokens: int + top_p: int + temperature: float + stop: str + class Engine: def __init__(self): pass @@ -22,10 +32,10 @@ class Engine: class OpenAI_DSP_Engine(Engine): def __init__( - self, - model: str, + self, + model: str, api_key: str = None, - base_url: str = None, # e.g., Mistral-7B-Instrcut-v0.2 on http://120.77.8.29:12345 + base_url: str = None, # e.g., Mistral-7B-Instrcut-v0.2 on http://120.77.8.29:12345 # Draft Params draft_system_prompt: str = SYSTEM_PROMPT_DRAFT_V0, # 'You are an expert mathematician and an expert in the Lean 4 Proof Assistant.' (goal do draft) draft_prompt_template: str = prompt_draft_template_lean4_v0, @@ -43,9 +53,13 @@ class OpenAI_DSP_Engine(Engine): ): super().__init__() print(f'{api_key=}, {base_url=}') if verbose_init else None + + + if not ('gpt-4-' in model or 'gpt-3.5-' in model or 'gpt-4o' in model): + raise ValueError(f"Model {model=} not supported.") self.model = model self.api_key = api_key - self.llm = OpenAI(api_key=self.api_key, base_url=base_url) + self.llm = OpenAI(api_key=self.api_key, base_url=base_url) # Draft params self.draft_system_prompt = draft_system_prompt self.draft_prompt_template = draft_prompt_template @@ -61,7 +75,7 @@ class OpenAI_DSP_Engine(Engine): @retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128)) def autoformalize_prob( - eng, + eng, data_pt: dict, verbose: bool = False, ): @@ -70,13 +84,13 @@ def autoformalize_prob( @retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128)) def draft( - eng, - data_pt: dict, + eng, + data_pt: dict, verbose: bool = False, ) -> list: - """ - Creates (informal nl) draft (nl soln, nl proof sketch) for latter use in a formal proof sketch. - y_pred_nl ~ draft(eng, x_nl_prob, P_draft) + """ + Creates (informal nl) draft (nl soln, nl proof sketch) for latter use in a formal proof sketch. + y_pred_nl ~ draft(eng, x_nl_prob, P_draft) """ # Make prompt from template nl_problem: str = data_pt['nl_problem'][0] @@ -98,7 +112,7 @@ def draft( completions: list[str] = [completion.message.content for completion in response.choices] # response.choices[i].message drafts: list[str] = completions return drafts - + @retry(stop=stop_after_attempt(15), wait=wait_exponential(multiplier=2, max=128)) def sketch( eng, @@ -107,10 +121,10 @@ def sketch( autoformalize_prob_in_prompt: bool = False, verbose: bool = False, ) -> list: - """ - Creates (formal fl) sketch (fl proof sketch) for latter use in a formal proof sketch. - z_pred_fl ~ sketch(eng, x_nl_prob, y_pred_nl, x_fl_prob, P_sketch) - """ + """ + Creates (formal fl) sketch (fl proof sketch) for latter use in a formal proof sketch. + z_pred_fl ~ sketch(eng, x_nl_prob, y_pred_nl, x_fl_prob, P_sketch) + """ assert len(drafts) == 1, f"For now only 1 draft." # Make prompt from template x_nl_problem: str = data_pt['nl_problem'][0] @@ -133,86 +147,92 @@ def sketch( top_p=eng.sketch_sampling_params.top_p, n=eng.sketch_sampling_params.n, # stop=eng.sketch_sampling_params.stop[:3], - ) + ) # Get all completions for single prompt completions: list[str] = [completion.message.content for completion in response.choices] # response.choices[i].message sketches: list[str] = completions - # Return + # Return return sketches, x_fl_problem def prove( - eng, - fl_prob: str, + eng, + fl_prob: str, fl_sketch: list[str], ): """ - - Complete formal sketch and check if it proves the theorem. - fl_prob --> Lean4 theorem (problem) + Complete formal sketch and check if it proves the theorem. + + fl_prob --> Lean4 theorem (problem) fl_sketch --> Lean4 Form Sketch --> have x have ha - + """ + print(f"fl_prob={fl_prob}") + print(f"fl_sketch={fl_sketch}") + raise RuntimeError("Not implemented") # -- Prove correct: bool = False # -- Return return correct - + # -- DSP for Lean def single_proof_search_dsp_lean( - eng: Engine, + eng: Engine, + server: Server, data_pt: dict, ) -> bool: # -- Draft: [y_nl_pred_draft]_n ~ draft(eng, x_nl_prob, P_draft) y_nl_pred_drafts = draft(eng, data_pt) - # -- Sketch: z_fl_pred_sketch ~ sketch(eng, x_nl_prob, [y_nl_pred_draft]_n, x_fl_prob, P_sketch) + # -- Sketch: z_fl_pred_sketch ~ sketch(eng, x_nl_prob, [y_nl_pred_draft]_n, x_fl_prob, P_sketch) z_fl_pred_sketches, x_fl_prob = sketch(eng, data_pt, y_nl_pred_drafts) # -- Prove: y_fl = prove(eng, x_fl_prob, z_fl_pred_sketches) correct: bool = prove(eng, x_fl_prob, z_fl_pred_sketches) # -- Return - return - + return correct def full_proof_search_dsp_lean( eng: Engine, - path_2_eval_dataset: Union[str, Path], + server: Server, + path_2_eval_dataset: Path, ): # -- Get eval data - path_2_eval_dataset = Path(path_2_eval_dataset).expanduser() eval_dataset: list[dict] = json.load(open(path_2_eval_dataset, 'r')) print(f'{len(eval_dataset)=}') # -- Proof search by DSP over all eval data - data_pt: dict for data_pt in tqdm(eval_dataset, total=len(eval_dataset), desc='DSP proof loop per data point in benchmark.'): print(f'{data_pt=}') - single_proof_search_dsp_lean(eng, data_pt) + flag = single_proof_search_dsp_lean(eng, server, data_pt) return +experiment_dir = Path(__file__).resolve().parent + # -- Main def main( - path_2_eval_dataset: str = '~/PyPantograph/examples/lean4_dsp/debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json', + path_2_eval_dataset: str = experiment_dir / 'debug/toy_example1_dsp/dsp_debug5_sf/dsp_debug5_sf_train.json', # model: str = 'deepseek-ai/deepseek-math-7b-instruct', # model: str = 'gpt2', # model: str = 'gpt-3.5-turbo', model: str = 'gpt-4o', - start: int = 0, - end: int = sys.maxsize, - # end: int = 10, # do 10 so enough boxed qs are there - batch_size: int = 10, # putnam has 348 - n: int = 1, # num seqs to return for given prompt + start: int = 0, + end: int = sys.maxsize, + # end: int = 10, # do 10 so enough boxed qs are there + batch_size: int = 10, # putnam has 348 + n_samples: int = 1, # num seqs to return for given prompt max_tokens: int = 2048, - top_p: float = 0.95, + top_p: float = 0.95, temperature: float = 0.8, mode: str = "dryrun", ): path_2_eval_dataset = Path(path_2_eval_dataset).expanduser() print(f'{path_2_eval_dataset=}') + server = Server() + # - Start wandb run # print(f'\n\n-- Setup params') # CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES") @@ -226,18 +246,20 @@ def main( # print(f'\n Config: \n{config=}') # - Run DSP for Lean - if 'gpt-4-' in model or 'gpt-3.5-' in model or 'gpt-4o' in model: - api_key = os.environ['OPENAI_API_KEY'] - SamplingParams = namedtuple('SamplingParams', ['n', 'max_tokens', 'top_p', 'temperature', 'stop']) - draft_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_DRAFT_V0) - sketch_sampling_params = SamplingParams(n=n, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_SKETCH_V0) - eng: OpenAI_DSP_Engine = OpenAI_DSP_Engine(model=model, api_key=api_key, verbose_init=True, draft_sampling_params=draft_sampling_params, sketch_sampling_params=sketch_sampling_params) - else: - raise ValueError(f"Model {model=} not supported.") + api_key = os.environ['OPENAI_API_KEY'] + draft_sampling_params = SamplingParams(n=n_samples, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_DRAFT_V0) + sketch_sampling_params = SamplingParams(n=n_samples, max_tokens=max_tokens, top_p=top_p, temperature=temperature, stop=STOP_TOKENS_SKETCH_V0) + eng: OpenAI_DSP_Engine = OpenAI_DSP_Engine( + model=model, + api_key=api_key, + verbose_init=True, + draft_sampling_params=draft_sampling_params, + sketch_sampling_params=sketch_sampling_params, + ) # - Full proof search with DSP print(f'\n\n-- Full proof search with DSP') - full_proof_search_dsp_lean(eng, path_2_eval_dataset) + full_proof_search_dsp_lean(eng, server, path_2_eval_dataset) # - End run # wandb.config.update(config)