diff --git a/.gitmodules b/.gitmodules index e69de29..dfc1f1a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "src/proof_wala/data/proofs/lean/miniF2F-lean4"] + path = src/proof_wala/data/proofs/lean/miniF2F-lean4 + url = https://github.com/yangky11/miniF2F-lean4.git + ignore = dirty diff --git a/src/proof_wala/data/proofs/lean/miniF2F-lean4 b/src/proof_wala/data/proofs/lean/miniF2F-lean4 new file mode 160000 index 0000000..f1b5660 --- /dev/null +++ b/src/proof_wala/data/proofs/lean/miniF2F-lean4 @@ -0,0 +1 @@ +Subproject commit f1b56606f92bb53ac6ec26efce85a04563292ee8 diff --git a/src/proof_wala/main/config/benchmark/miniF2F_test.yaml b/src/proof_wala/main/config/benchmark/miniF2F_test.yaml new file mode 100644 index 0000000..1a47250 --- /dev/null +++ b/src/proof_wala/main/config/benchmark/miniF2F_test.yaml @@ -0,0 +1,94 @@ +name: miniF2F_test +num_files: 1 +language: LEAN4 +few_shot_data_path_for_retrieval: null +few_shot_metadata_filename_for_retrieval: null +dfs_data_path_for_retrieval: null +dfs_metadata_filename_for_retrieval: local.meta.json +theorem_cnt: 1 +timeout_per_theorem_in_secs: 720 +datasets: +- project: src/proof_wala/data/proofs/lean/miniF2F-lean4 + files: + - path: MiniF2F/Test.lean + theorems: "*" + # - aime_1989_p8 + # - aime_1990_p4 + # - algebra_sqineq_unitcircatbpamblt1 + # - amc12a_2002_p6 + # - amc12a_2013_p4 + # - amc12a_2021_p9 + # - amc12b_2002_p19 + # - amc12b_2002_p2 + # - amc12b_2002_p7 + # - amc12b_2020_p2 + # - amc12b_2020_p6 + # - mathd_algebra_107 + # - mathd_algebra_113 + # - mathd_algebra_125 + # - mathd_algebra_129 + # - mathd_algebra_139 + # - mathd_algebra_141 + # - mathd_algebra_142 + # - mathd_algebra_143 + # - mathd_algebra_148 + # - mathd_algebra_158 + # - mathd_algebra_160 + # - mathd_algebra_171 + # - mathd_algebra_176 + # - mathd_algebra_188 + # - mathd_algebra_209 + # - mathd_algebra_24 + # - mathd_algebra_246 + # - mathd_algebra_263 + # - mathd_algebra_270 + # - mathd_algebra_275 + # - mathd_algebra_296 + # - mathd_algebra_302 + # - mathd_algebra_304 + # - mathd_algebra_314 + # - mathd_algebra_329 + # - mathd_algebra_33 + # - mathd_algebra_346 + # - mathd_algebra_354 + # - mathd_algebra_359 + # - mathd_algebra_388 + # - mathd_algebra_398 + # - mathd_algebra_400 + # - mathd_algebra_412 + # - mathd_algebra_419 + # - mathd_algebra_427 + # - mathd_algebra_432 + # - mathd_algebra_44 + # - mathd_algebra_440 + # - mathd_algebra_441 + # - mathd_algebra_478 + # - mathd_algebra_513 + # - mathd_algebra_76 + # - mathd_numbertheory_100 + # - mathd_numbertheory_1124 + # - mathd_numbertheory_12 + # - mathd_numbertheory_127 + # - mathd_numbertheory_150 + # - mathd_numbertheory_175 + # - mathd_numbertheory_207 + # - mathd_numbertheory_212 + # - mathd_numbertheory_229 + # - mathd_numbertheory_235 + # - mathd_numbertheory_237 + # - mathd_numbertheory_239 + # - mathd_numbertheory_254 + # - mathd_numbertheory_293 + # - mathd_numbertheory_299 + # - mathd_numbertheory_3 + # - mathd_numbertheory_341 + # - mathd_numbertheory_342 + # - mathd_numbertheory_343 + # - mathd_numbertheory_345 + # - mathd_numbertheory_447 + # - mathd_numbertheory_517 + # - mathd_numbertheory_551 + # - mathd_numbertheory_66 + # - mathd_numbertheory_728 + # - mathd_numbertheory_769 + # - mathd_numbertheory_85 \ No newline at end of file diff --git a/src/proof_wala/main/config/eval_miniF2F_test_lean.yaml b/src/proof_wala/main/config/eval_miniF2F_test_lean.yaml new file mode 100644 index 0000000..49bf957 --- /dev/null +++ b/src/proof_wala/main/config/eval_miniF2F_test_lean.yaml @@ -0,0 +1,18 @@ +# @package _global_ +defaults: + - benchmark: miniF2F_test + - eval_settings: codet5_beam_64 + - env_settings: bm25_retrieval + - override hydra/job_logging: 'disabled' + +log_dir: /data/LeanDojo_Random/proof_logs/lean +eval_settings: + proof_dump_dir: /data/LeanDojo_Random/proof_dumps/lean + model_name: amitayusht/ProofWala-Lean + checkpoint_dir: .log/checkpoints/ProofWala-Lean + proof_retries: 5 + timeout_in_secs: 600 + temperature: 1.2 + +benchmark: + timeout_per_theorem_in_secs: 600 \ No newline at end of file diff --git a/src/proof_wala/main/config/eval_miniF2F_test_multilingual.yaml b/src/proof_wala/main/config/eval_miniF2F_test_multilingual.yaml new file mode 100644 index 0000000..d63f2a2 --- /dev/null +++ b/src/proof_wala/main/config/eval_miniF2F_test_multilingual.yaml @@ -0,0 +1,18 @@ +# @package _global_ +defaults: + - benchmark: miniF2F_test + - eval_settings: codet5_beam_64 + - env_settings: bm25_retrieval + - override hydra/job_logging: 'disabled' + +log_dir: /data/LeanDojo_Random/proof_logs/multilingual +eval_settings: + proof_dump_dir: /data/LeanDojo_Random/proof_dumps/multilingual/wt-ct + model_name: amitayusht/ProofWala-Multilingual + checkpoint_dir: .log/checkpoints/ProofWala-Multilingual + proof_retries: 5 + timeout_in_secs: 600 + temperature: 1.2 + +benchmark: + timeout_per_theorem_in_secs: 600 diff --git a/src/proof_wala/main/run_proof_search.py b/src/proof_wala/main/run_proof_search.py index 886640a..24cab5b 100644 --- a/src/proof_wala/main/run_proof_search.py +++ b/src/proof_wala/main/run_proof_search.py @@ -252,7 +252,8 @@ def eval_dataset_once( always_use_retrieval=eval_settings.always_use_useful_theorem_retrieval, logger=logger, setup_cmds=eval_benchmark.setup_cmds, - enable_search=False) # TODO: Make this configurable + enable_search=False, + enforce_qed=True) # TODO: Make this configurable if reshuffle: file_theorems = list(file.theorems) random.shuffle(file_theorems)