Skip to content

Fixed and Added unit tests for EnsureResourceAsync hanging issue #4943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fedd23c
Update ResourceManagerUtils.cs
mstfbl Mar 13, 2020
85f10af
Added TestDownloadFromLocal
mstfbl Mar 16, 2020
63b3f33
Added TestDownloadError
mstfbl Mar 16, 2020
e16b7d6
Revert "Added TestDownloadError"
mstfbl Mar 16, 2020
2caf810
Edit EnsureResourceAsync and its dependencies
mstfbl Mar 16, 2020
6e05a87
Edited TestDownloadFromLocal and re-added TestDownloadError()
mstfbl Mar 16, 2020
69b9827
Disabling TestDownloadFromLocal and TestDownloadError
mstfbl Mar 16, 2020
6e5b246
Edits
mstfbl Mar 16, 2020
cd56549
Re-activated TestDownloadError and TestDownloadFromLocal
mstfbl Mar 16, 2020
2c4d22e
Edits, added 5 min timeout, and debugging requested url
mstfbl Mar 16, 2020
2f67666
Removed timeouts, and re-added Resource download tests in separate un…
mstfbl Mar 17, 2020
8bf03c8
Edits
mstfbl Mar 17, 2020
fd3c7e6
Removed hardcode "microsoft.com" check for HTTP Status Code
mstfbl Mar 18, 2020
bc8b065
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
95514c4
Edits for reviews, removing hardcodings of status codes
mstfbl Mar 18, 2020
93b5454
Removing paranthesis from one-liner if statement
mstfbl Mar 18, 2020
a54c7e0
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
b8d5094
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
38fc48f
Nit fix + test case fixes
mstfbl Mar 18, 2020
666d328
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
a9e1b5d
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
d460db7
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
ed3c6fc
Update ResourceManagerUtils.cs
mstfbl Mar 19, 2020
d7b43ed
Added checking for the host of the download absoluteURL euqaling "aka…
mstfbl Mar 24, 2020
d9cdc07
Edit TestResourceDownload
mstfbl Mar 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 43 additions & 21 deletions src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,10 @@ private async Task<string> DownloadFromUrlWithRetryAsync(IHostEnvironment env, I
for (int i = 0; i < retryTimes; ++i)
{
var thisDownloadResult = await DownloadFromUrlAsync(env, ch, url, fileName, timeout, filePath);

if (string.IsNullOrEmpty(thisDownloadResult))
return thisDownloadResult;
else
downloadResult += thisDownloadResult + @"\n";

await Task.Delay(10 * 1000);
}

Expand Down Expand Up @@ -160,27 +158,8 @@ private async Task<string> DownloadFromUrlAsync(IHostEnvironment env, IChannel c
deleteNeeded = true;
return (await t).Message;
}

return CheckValidDownload(ch, filePath, url, ref deleteNeeded);
}
}

private static string CheckValidDownload(IChannel ch, string filePath, string url, ref bool deleteNeeded)
{
// If the relative url does not exist, aka.ms redirects to www.microsoft.com. Make sure this did not happen.
// If the file is big then it is definitely not the redirect.
var info = new FileInfo(filePath);
if (info.Length > 4096)
return null;
string error = null;
using (var r = new StreamReader(filePath))
{
var text = r.ReadToEnd();
if (text.Contains("<head>") && text.Contains("<body>") && text.Contains("microsoft.com"))
error = $"The url '{url}' does not exist. Url was redirected to www.microsoft.com.";
}
deleteNeeded = error != null;
return error;
}

private static void TryDelete(IChannel ch, string filePath, bool warn = true)
Expand Down Expand Up @@ -274,6 +253,8 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
using (var ws = fh.CreateWriteStream())
{
var headers = webClient.ResponseHeaders.GetValues("Content-Length");
if (IsRedirectToDefaultPage(uri.AbsoluteUri))
return ch.Except($"The provided url ({uri}) redirects to the default url ({DefaultUrl})");
if (Utils.Size(headers) == 0 || !long.TryParse(headers[0], out var size))
size = 10000000;

Expand Down Expand Up @@ -311,6 +292,36 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
}
}

/// <summary>This method checks whether or not the provided url redirects to
/// the default url <see cref="ResourceManagerUtils.DefaultUrl"/>.</summary>
/// <param name="url"> The provided url to check </param>
public bool IsRedirectToDefaultPage(string url)
{
try
{
var request = WebRequest.Create(url);
// FileWebRequests cannot be redirected to default aka.ms webpage <see cref="Default"/>
if (request.GetType() == typeof(FileWebRequest))
return false;
HttpWebRequest httpWebRequest = (HttpWebRequest)request;
httpWebRequest.AllowAutoRedirect = false;
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
}
catch (WebException e)
{
HttpStatusCode statusCode = ((HttpWebResponse)e.Response).StatusCode;
// Redirects to default url
if (statusCode == HttpStatusCode.Redirect)
return true;
// Redirects to another url
else if (statusCode == HttpStatusCode.MovedPermanently)
return false;
else
return false;
}
return false;
}

public static ResourceDownloadResults GetErrorMessage(out string errorMessage, params ResourceDownloadResults[] result)
{
var errorResult = result.FirstOrDefault(res => !string.IsNullOrEmpty(res.ErrorMessage));
Expand All @@ -328,4 +339,15 @@ public static ResourceDownloadResults GetErrorMessage(out string errorMessage, p
private static extern int chmod(string pathname, int mode);
#pragma warning restore IDE1006
}

public class WebClientResponse : WebClient
{
public WebResponse Response { get; private set; }

protected override WebResponse GetWebResponse(WebRequest request)
{
Response = base.GetWebResponse(request);
return Response;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ private string EnsureModelFile(IHostEnvironment env, out int linesToSkip, WordEm
{
string dir = kind == WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding ? Path.Combine("Text", "Sswe") : "WordVectors";
var url = $"{dir}/{modelFileName}";
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(Host, ch, url, modelFileName, dir, Timeout);
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, url, modelFileName, dir, Timeout);
ensureModel.Wait();
var errorResult = ResourceManagerUtils.GetErrorMessage(out var errorMessage, ensureModel.Result);
if (errorResult != null)
Expand Down
276 changes: 276 additions & 0 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestResourceDownload.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.RunTests;
using Microsoft.ML.Runtime;
using Xunit;
using Xunit.Abstractions;

[assembly: CollectionBehavior(DisableTestParallelization = true)]

namespace Microsoft.ML.Core.Tests.UnitTests
{
public class TestResourceDownload : BaseTestBaseline
{
public TestResourceDownload(ITestOutputHelper helper)
: base(helper)
{
}
[Fact]
[TestCategory("ResourceDownload")]
public void TestDownloadFromLocal()
{
var envVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
var resourcePathVarOld = Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, null);

var baseDir = GetOutputPath("resources");
Assert.True(Uri.TryCreate(baseDir, UriKind.Absolute, out var baseDirUri), "Uri.TryCreate failed");
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, baseDirUri.AbsoluteUri);
var envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
Assert.True(envVar == baseDirUri.AbsoluteUri);
var path = DeleteOutputPath(Path.Combine("resources", "subdir"), "breast-cancer.txt");

var bc = GetDataPath("breast-cancer.txt");
File.Copy(bc, path);

var saveToDir = GetOutputPath("copyto");
DeleteOutputPath("copyto", "breast-cancer.txt");
var sbOut = new StringBuilder();
var env = new ConsoleEnvironment(42);
using (var ch = env.Start("Downloading"))
{
try
{
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "subdir/breast-cancer.txt", "breast-cancer.txt", saveToDir, 1 * 60 * 1000);
t.Wait();

if (t.Result.ErrorMessage != null)
Fail(String.Format("Expected zero length error string. Received error: {0}", t.Result.ErrorMessage));
if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
if (!File.Exists(GetOutputPath("copyto", "breast-cancer.txt")))
{
Fail($"File '{GetOutputPath("copyto", "breast-cancer.txt")}' does not exist. " +
$"File was downloaded to '{t.Result.FileName}' instead." +
$"MICROSOFTML_RESOURCE_PATH is set to {Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable)}");
}
Done();
}
finally
{
// Set environment variable back to its old value.
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, resourcePathVarOld);
}
}
}

[Fact]
[TestCategory("ResourceDownload")]
public void TestDownloadError()
{
var envVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
var timeoutVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable);
var resourcePathVarOld = Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, null);

// Bad local path.
try
{
if (!Uri.TryCreate($@"\\ct01\public\{Guid.NewGuid()}\", UriKind.Absolute, out var badUri))
Fail("Uri could not be created");
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, badUri.AbsoluteUri);
var envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != badUri.AbsoluteUri)
Fail("Environment variable not set properly");

var saveToDir = GetOutputPath("copyto");
DeleteOutputPath("copyto", "breast-cancer.txt");
var sbOut = new StringBuilder();
var sbErr = new StringBuilder();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "breast-cancer.txt", "breast-cancer.txt", saveToDir, 10 * 1000);
t.Wait();

Log("Bad path");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");
if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
if (File.Exists(Path.Combine(saveToDir, "breast-cancer.txt")))
Fail($"File '{GetOutputPath("copyto", "breast-cancer.txt")}' should have been deleted.");
}
}

// Good local path, bad file name.
if (!Uri.TryCreate(GetDataPath("breast-cancer.txt") + "bad_addition", UriKind.Absolute, out var goodUri))
Fail("Uri could not be created");

Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, goodUri.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != goodUri.AbsoluteUri)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "breast-cancer.txt");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);

using (var ch = env.Start("Downloading"))
{
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "breast-cancer1.txt", "breast-cancer.txt", saveToDir, 10 * 1000);
t.Wait();

Log("Good path, bad file name");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
if (File.Exists(Path.Combine(saveToDir, "breast-cancer.txt")))
Fail($"File '{GetOutputPath("copyto", "breast-cancer.txt")}' should have been deleted.");
}
}

// Bad url.
if (!Uri.TryCreate("https://fake-website/fake-model.model/", UriKind.Absolute, out badUri))
Fail("Uri could not be created");

Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, badUri.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != badUri.AbsoluteUri)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "ResNet_18_Updated.model");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "test_bad_url.model";
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000);
t.Wait();

Log("Bad url");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
if (File.Exists(Path.Combine(saveToDir, fileName)))
Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
}
}

// Good url, bad page.
if (!Uri.TryCreate("https://cnn.com/", UriKind.Absolute, out var cnn))
Fail("Uri could not be created");
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, cnn.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != cnn.AbsoluteUri)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "ResNet_18_Updated.model");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "test_cnn_page_does_not_exist.model";
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000);
t.Wait();

Log("Good url, bad page");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
#if !CORECLR
if (!sbErr.ToString().Contains("(404) Not Found"))
Fail($"Error message should contain '(404) Not Found. Instead: {sbErr.ToString()}");
#endif
if (File.Exists(Path.Combine(saveToDir, fileName)))
Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
}
}

// Download from local, short time out.
#if CORECLR
var path = Path.Combine(Path.GetDirectoryName(typeof(TestImageAnalyticsTransforms).Assembly.Location), "..", "AutoLoad");
#else
var path = Path.GetDirectoryName(typeof(TestImageAnalyticsTransforms).Assembly.Location);
#endif

Assert.True(Uri.TryCreate(path, UriKind.Absolute, out var baseDirUri));
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, baseDirUri.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != baseDirUri.AbsoluteUri)
Fail("Environment variable not set properly");

Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, null);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != null)
Fail("Environment variable not set properly");

Environment.SetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable, "10");
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable);
if (envVar != "10")
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "ResNet_18_Updated.model");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "test_short_timeout.model";
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/AlexNet_Updated.model", fileName, saveToDir, 10 * 1000);
t.Wait();

Log("Default url, short time out");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

#if !CORECLR
if (!sbErr.ToString().Contains("Download timed out"))
Fail($"Error message should contain the string 'Download timed out'. Instead: {sbErr.ToString()}");
#endif
if (File.Exists(Path.Combine(saveToDir, fileName)))
Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
}
}
Done();
}
finally
{
// Set environment variable back to its old value.
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
Environment.SetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable, timeoutVarOld);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, resourcePathVarOld);
}
}
}
}