Skip to content

[CI]add rl to ci #10553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
May 13, 2025
Merged
Prev Previous commit
Next Next commit
fix
  • Loading branch information
Liujie0926 committed May 10, 2025
commit f673a7b44578b485397bc37e46c971a30d03dc4e
119 changes: 0 additions & 119 deletions tests/fixtures/llm/grpo.yaml

This file was deleted.

119 changes: 0 additions & 119 deletions tests/fixtures/llm/reinforce_plus_plus.yaml

This file was deleted.

15 changes: 12 additions & 3 deletions tests/llm/test_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
[["qwen"]],
)
class FinetuneTest(LLMTest, unittest.TestCase):
config_path: str = "./tests/fixtures/llm/grpo.yaml"
config_path: str = None
model_dir: str = None

def setUp(self) -> None:
Expand Down Expand Up @@ -88,17 +88,26 @@ def test_finetune(self):
time.sleep(30)

# 运行主逻辑
repo_path = os.getcwd()
rl_dir = os.path.join(os.getcwd(), "./llm/alignment/rl")
os.chdir(rl_dir)
cmd = "python -u -m paddle.distributed.launch --devices \"$CUDA_VISIBLE_DEVICES\" run_rl.py ./tests/fixtures/llm/grpo.yaml"
cmd = "python -u -m paddle.distributed.launch \
--devices \"$CUDA_VISIBLE_DEVICES\" run_rl.py \
../../config/qwen/reinforce_plus_plus_argument.yaml \
--actor_model_name_or_path \"Qwen/Qwen2-1.5B\" \
--max_dec_len 128 \
--max_steps 3 \
--kl_coeff 0.000 \
--kl_loss_coeff 0.000 \
--use_fused_rms_norm true "
pro = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = pro.communicate()
print(out)
pro.wait()
pro.returncode == 0
assert str(out).find("Error") == -1
assert str(err).find("Error") == -1

os.chdir(repo_path)
finally:
# main 执行完毕,关闭 reward server
if reward_proc.poll() is None: # 确保进程还在
Expand Down
20 changes: 14 additions & 6 deletions tests/llm/test_reinforce_plus_plus.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import subprocess
import time
import signal
from unittest import skip

from parameterized import parameterized_class

Expand All @@ -35,7 +34,7 @@
[["qwen"]],
)
class FinetuneTest(LLMTest, unittest.TestCase):
config_path: str = "./tests/fixtures/llm/reinforce_plus_plus.yaml"
config_path: str = None
model_dir: str = None

def setUp(self) -> None:
Expand All @@ -59,7 +58,6 @@ def test_finetune(self):
"FLAGS_mla_use_tensorcore": "0",
"FLAGS_cascade_attention_max_partition_size": "2048",
}

case_env = os.environ.copy()
case_env.update(env_vars)

Expand All @@ -70,7 +68,7 @@ def test_finetune(self):
shell=True,
check=True
)

# 启动 reward server
reward_dir = os.path.join(os.getcwd(), "./llm/alignment/rl/reward")
reward_log = os.path.join(reward_dir, "reward_server.log")
Expand All @@ -90,17 +88,27 @@ def test_finetune(self):
time.sleep(30)

# 运行主逻辑
repo_path = os.getcwd()
rl_dir = os.path.join(os.getcwd(), "./llm/alignment/rl")
os.chdir(rl_dir)
cmd = "python -u -m paddle.distributed.launch --devices \"$CUDA_VISIBLE_DEVICES\" run_rl.py ./tests/fixtures/llm/reinforce_plus_plus.yaml"
cmd = "python -u -m paddle.distributed.launch \
--devices \"$CUDA_VISIBLE_DEVICES\" run_rl.py \
../../config/qwen/reinforce_plus_plus_argument.yaml \
--rl_algorithm \"reinforce_plus_plus\" \
--actor_model_name_or_path \"Qwen/Qwen2-1.5B\" \
--max_dec_len 128 \
--max_steps 3 \
--kl_coeff 0.000 \
--kl_loss_coeff 0.000 \
--use_fused_rms_norm true "
pro = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = pro.communicate()
print(out)
pro.wait()
pro.returncode == 0
assert str(out).find("Error") == -1
assert str(err).find("Error") == -1

os.chdir(repo_path)
finally:
# main 执行完毕,关闭 reward server
if reward_proc.poll() is None: # 确保进程还在
Expand Down
Loading