feat: Add stat function to show prove rate

2024-10-06 23:29:14 -07:00 · 2024-10-06 23:29:14 -07:00 · 789452f7b7
parent 402df63395
commit 789452f7b7
1 changed files with 26 additions and 3 deletions
--- a/experiments/dsp/main.py
+++ b/experiments/dsp/main.py
@ -276,6 +276,7 @@ def full_proof_search_dsp_lean(
    ):
    print(colored(f"DSP on {len(data)} points", "blue", attrs=["bold", "underline"]))
    n_success = 0
    n_tried = 0
    # -- Proof search by DSP over all eval data
    for i, datum in tqdm(enumerate(data), total=len(data), desc='DSP proof loop per data point in benchmark.'):
        file_name = path_output / f"{i:03}.json"
@ -285,11 +286,12 @@ def full_proof_search_dsp_lean(
            obj = json.load(open(file_name, "r"))
            if obj['name'] != key:
                print(colored(f"Existing datum name {obj['name']} does not match dataset {key}. The output directory may be wrong"))
-                break
+                return
            print(f"Skipped {i}:", colored(key, "green"))
            continue
        n_tried += 1
        print(f"Problem {i}:", colored(key, "cyan"))
        result = single_proof_search_dsp_lean(eng, server_func, datum)
@ -298,7 +300,7 @@ def full_proof_search_dsp_lean(
        if result.success:
            n_success += 1
        #server.gc()
-    print(f"Proved {n_success}/{len(data)} problems")
+    print(f"Proved {n_success}/{n_tried} problems")
 experiment_dir = Path(__file__).resolve().parent
@ -394,6 +396,25 @@ def main(args):
    # print(f"{wandb.config=}")
    # run.finish()
 def stat(args):
    path_output = Path(args.output)
    data = load_data(args)
    n_success = 0
    n_tried = 0
    for i, datum in tqdm(enumerate(data), total=len(data), desc='DSP proof loop per data point in benchmark.'):
        file_name = path_output / f"{i:03}.json"
        key = str(datum)
        # Detect if file exists
        obj = json.load(open(file_name, "r"))
        if obj['name'] != key:
            print(colored(f"Existing datum name {obj['name']} does not match dataset {key}. The output directory may be wrong"))
            return
        n_tried += 1
        if obj['success']:
            n_success += 1
    print(f"Proved {n_success}/{n_tried} problems")
 if __name__ == "__main__":
    import argparse
@ -405,7 +426,7 @@ if __name__ == "__main__":
    parser.add_argument(
        'mode',
        help="Function",
-        choices=['eval', 'prompts'],
+        choices=['eval', 'prompts', 'stat'],
    )
    parser.add_argument(
        "--dataset",
@ -441,6 +462,8 @@ if __name__ == "__main__":
    if args.mode == "eval":
        main(args)
    elif args.mode == 'stat':
        stat(args)
    elif args.mode == "prompts":
        prompt = get_prompt_sketch_template_4_lean_v0(verbose=args.verbose)
        print(prompt)