Skip to content

Atqy/refactor music recommendation #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
PR edits
  • Loading branch information
atqy committed May 11, 2022
commit e3d8d8fab05a2d0cf8429261a1a399fac039644a
2 changes: 1 addition & 1 deletion end_to_end/music_recommendation/01_data_exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@
"source": [
"ratings[[\"ratingEventId\", \"userId\"]].plot.hist(\n",
" by=\"userId\", bins=50, title=\"Distribution of # of Ratings by User\"\n",
");"
")"
]
},
{
Expand Down
20 changes: 5 additions & 15 deletions end_to_end/music_recommendation/02_export_feature_groups.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
"import os\n",
"from awscli.customizations.s3.utils import split_s3_bucket_key\n",
"\n",
"# Sagemaker session\n",
"# SageMaker session\n",
"sess = sagemaker.Session()\n",
"# get session bucket name\n",
"bucket = sess.default_bucket()\n",
Expand All @@ -102,9 +102,7 @@
"# s3 client\n",
"s3_client = boto3.client(\"s3\")\n",
"\n",
"print(f\"this is your default SageMaker Studio bucket name: {bucket}\")\n",
"\n",
"# ps.add({'bucket': bucket, 'prefix': prefix}, namespace='music-rec')"
"print(f\"this is your default SageMaker Studio bucket name: {bucket}\")"
]
},
{
Expand Down Expand Up @@ -449,15 +447,7 @@
"# controls if online store is enabled. Enabling the online store allows quick access to\n",
"# the latest value for a Record via the GetRecord API.\n",
"enable_online_store = True\n",
"fg_name_tracks = feature_group_name\n",
"dw_ecrlist = {\n",
" \"region\": {\n",
" \"us-west-2\": \"174368400705\",\n",
" \"us-east-2\": \"415577184552\",\n",
" \"us-west-1\": \"926135532090\",\n",
" \"us-east-1\": \"663277389841\",\n",
" }\n",
"}"
"fg_name_tracks = feature_group_name"
]
},
{
Expand Down Expand Up @@ -799,7 +789,7 @@
"outputs": [],
"source": [
"# Data Wrangler Container URL.\n",
"container_uri = f\"{dw_ecrlist['region'][region]}.dkr.ecr.{region}.amazonaws.com/sagemaker-data-wrangler-container:1.x\"\n",
"container_uri = sagemaker.image_uris.retrieve(framework=\"data-wrangler\", region=region)\n",
"\n",
"# Processing Job Instance count and instance type.\n",
"instance_count = 2\n",
Expand Down Expand Up @@ -1047,7 +1037,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"check if the athena queres have been done and the data sets exist, then just do train test split or just proceed to training"
"Check if the Athena queries have been done and the data sets exist, then just do train test split or just proceed to training"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Sagemaker session\n",
"# SageMaker session\n",
"sess = sagemaker.Session()\n",
"# get session bucket name\n",
"bucket = sess.default_bucket()\n",
Expand Down Expand Up @@ -312,7 +312,6 @@
" instance_type=train_instance_type,\n",
" image_uri=image,\n",
" hyperparameters=hyperparameters,\n",
" # base_job_name=model_name,\n",
" output_path=estimator_output_path,\n",
" debugger_hook_config=DebuggerHookConfig(\n",
" s3_output_path=estimator_output_path + \"/debugger\",\n",
Expand Down
12 changes: 2 additions & 10 deletions end_to_end/music_recommendation/end_to_end_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -461,15 +461,7 @@
"# Define feature group names we previously created in notebooks 02a-c\n",
"fg_name_tracks = \"track-features-music-rec\"\n",
"fg_name_ratings = \"ratings-features-music-rec\"\n",
"fg_name_user_preferences = \"user-5star-track-features-music-rec\"\n",
"dw_ecrlist = {\n",
" \"region\": {\n",
" \"us-west-2\": \"174368400705\",\n",
" \"us-east-2\": \"415577184552\",\n",
" \"us-west-1\": \"926135532090\",\n",
" \"us-east-1\": \"663277389841\",\n",
" }\n",
"}"
"fg_name_user_preferences = \"user-5star-track-features-music-rec\""
]
},
{
Expand Down Expand Up @@ -540,7 +532,7 @@
"# Data Wrangler Container URL\n",
"# You can also find the proper container uri by exporting your Data Wrangler flow to a pipeline notebook\n",
"\n",
"container_uri = f\"{dw_ecrlist['region'][region]}.dkr.ecr.{region}.amazonaws.com/sagemaker-data-wrangler-container:1.x\"\n",
"container_uri = sagemaker.image_uris.retrieve(framework=\"data-wrangler\", region=region)\n",
"\n",
"\n",
"flow_processor = sagemaker.processing.Processor(\n",
Expand Down