Skip to content

Commit b7efffb

Browse files
author
Jun Gong
committed
Update notebook and saved runs.
1 parent 42f0963 commit b7efffb

File tree

5 files changed

+12666
-11
lines changed

5 files changed

+12666
-11
lines changed

ray-rllib/odsc_west_workshop_2022/anyscale_recsys_tutorial.ipynb

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,14 @@
279279
"metadata": {},
280280
"outputs": [],
281281
"source": [
282-
"# TODO (exercise): code here"
282+
"# Let's checkout the reward space\n",
283+
"obs = env.reset()\n",
284+
"rewards = []\n",
285+
"done = False\n",
286+
"while not done:\n",
287+
" action = # TODO (exercise): code here\n",
288+
" obs, reward, done, info = env.step(action)\n",
289+
" rewards.append(reward)"
283290
]
284291
},
285292
{
@@ -608,8 +615,8 @@
608615
" DQN,\n",
609616
" param_space=bandit_config_offline.to_dict(),\n",
610617
" run_config=air.RunConfig(\n",
611-
" local_dir=\"./results_notebook/offline_bandit/\",\n",
612-
" stop={\"training_iteration\": 100},\n",
618+
" local_dir=\"./results_notebook/offline_bandits/\",\n",
619+
" stop={\"training_iteration\": 1000},\n",
613620
" )\n",
614621
")\n",
615622
"offline_bandit_results = bandit_tuner.fit()"
@@ -629,7 +636,7 @@
629636
"outputs": [],
630637
"source": [
631638
"print('Mean Bandit Episode reward:')\n",
632-
"offline_bandit_results[0].metrics['evaluation']['episode_reward_mean']"
639+
"offline_bandits_results[0].metrics['evaluation']['episode_reward_mean']"
633640
]
634641
},
635642
{
@@ -683,10 +690,10 @@
683690
" param_space=dqn_config_offline.to_dict(),\n",
684691
" run_config=air.RunConfig(\n",
685692
" local_dir=\"./results_notebook/offline_rl/\",\n",
686-
" stop={\"training_iteration\": 100},\n",
693+
" stop={\"training_iteration\": 30},\n",
687694
" )\n",
688695
")\n",
689-
"offline_dqn_results = dqn_tuner.fit()"
696+
"offline_rl_results = dqn_tuner.fit()"
690697
]
691698
},
692699
{
@@ -696,7 +703,7 @@
696703
"outputs": [],
697704
"source": [
698705
"print('Mean DQN Episode reward:')\n",
699-
"offline_dqn_results[0].metrics['evaluation']['episode_reward_mean']"
706+
"offline_rl_results[0].metrics['evaluation']['episode_reward_mean']"
700707
]
701708
},
702709
{
@@ -705,8 +712,11 @@
705712
"metadata": {},
706713
"outputs": [],
707714
"source": [
715+
"import pandas as pd\n",
716+
"\n",
708717
"# plot the results and compare to baselines\n",
709-
"offline_dqn_df = pd.read_csv(\"saved_runs/dqn_offline/random_data/progress.csv\")"
718+
"offline_rl_df = pd.read_csv(\"saved_runs/offline_rl/progress.csv\")\n",
719+
"offline_bandits_df = pd.read_csv(\"saved_runs/offline_bandits/progress.csv\")"
710720
]
711721
},
712722
{
@@ -716,9 +726,8 @@
716726
"outputs": [],
717727
"source": [
718728
"\n",
719-
"sns.lineplot(data=offline_dqn_df, x=\"training_iteration\", y=\"evaluation/episode_reward_mean\", label=\"Offline_DQN\")\n",
720-
"sns.lineplot(data=dqn_df, x=\"training_iteration\", y=\"episode_reward_mean\", label=\"Online_DQN\")\n",
721-
"sns.lineplot(data=bandit_df, x=\"training_iteration\", y=\"episode_reward_mean\", label=\"Bandits\")\n",
729+
"sns.lineplot(data=offline_rl_df, x=\"training_iteration\", y=\"evaluation/episode_reward_mean\", label=\"Offline_DQN\")\n",
730+
"sns.lineplot(data=offline_bandits_df, x=\"training_iteration\", y=\"evaluation/episode_reward_mean\", label=\"Offline_Bandits\")\n",
722731
"plt.axhline(random_baseline, color=\"red\", linestyle='--', label=\"random baseline\")\n",
723732
"plt.legend()\n",
724733
"plt.title('Offline RL vs. Baselines training performance')"
Binary file not shown.

0 commit comments

Comments
 (0)