Skip to content

Add XmlReader Async methods #1910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
@@ -70,7 +70,7 @@
<LatestTargetFramework>net8.0</LatestTargetFramework>
<SamplesFrameworks>net8.0</SamplesFrameworks>
<SamplesFrameworks Condition=" '$(OS)' == 'Windows_NT' ">$(SamplesFrameworks);net472</SamplesFrameworks>
<DefineConstants Condition=" '$(TargetFramework)' != 'net35' And '$(TargetFramework)' != 'net40' And '$(TargetFramework)' != 'net46' And '$(TargetFramework)' != 'net472' ">$(DefineConstants);FEATURE_ASYNC_SAX_XML</DefineConstants>
<DefineConstants Condition=" '$(TargetFramework)' != 'net35' And '$(TargetFramework)' != 'net40' And '$(TargetFramework)' != 'net46' And '$(TargetFramework)' != 'net472' ">$(DefineConstants);TASKS_SUPPORTED</DefineConstants>
</PropertyGroup>
</Otherwise>
</Choose>
33 changes: 24 additions & 9 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlPartReader.cs
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@ namespace DocumentFormat.OpenXml
/// <summary>
/// Represents the Open XML part reader class.
/// </summary>
public class OpenXmlPartReader : OpenXmlReader
public partial class OpenXmlPartReader : OpenXmlReader
{
private readonly IRootElementFeature _rootElements;
private readonly IOpenXmlNamespaceResolver _resolver;
@@ -100,7 +100,7 @@ public OpenXmlPartReader(Stream partStream, IFeatureCollection features, OpenXml

_resolver = features.GetRequired<IOpenXmlNamespaceResolver>();
_rootElements = features.GetRequired<IRootElementFeature>();
_xmlReader = CreateReader(partStream, options.CloseStream, options.MaxCharactersInPart, ignoreWhitespace: options.IgnoreWhitespace, out _standalone, out _encoding);
_xmlReader = CreateReader(partStream, options, out _standalone, out _encoding);
}

/// <summary>
@@ -402,11 +402,18 @@ public override void Skip()
/// <returns>true if the next element was read successfully; false if there are no more elements to read. </returns>
private bool MoveToNextElement()
{
switch (_elementState)
if (_elementState == ElementState.Null)
{
case ElementState.Null:
return ReadRoot();
return ReadRoot();
}

return MoveToNextElementHelper();
}

private bool MoveToNextElementHelper()
{
switch (_elementState)
{
case ElementState.EOF:
return false;

@@ -667,17 +674,20 @@ public override void Close()
_xmlReader.Close();
}

private XmlReader CreateReader(Stream partStream, bool closeInput, long maxCharactersInPart, bool ignoreWhitespace, out bool? standalone, out string? encoding)
private XmlReader CreateReader(Stream partStream, OpenXmlPartReaderOptions options, out bool? standalone, out string? encoding)
{
var settings = new XmlReaderSettings
{
MaxCharactersInDocument = maxCharactersInPart,
CloseInput = closeInput,
IgnoreWhitespace = ignoreWhitespace,
MaxCharactersInDocument = options.MaxCharactersInPart,
CloseInput = options.CloseStream,
IgnoreWhitespace = options.IgnoreWhitespace,
#if NET35
ProhibitDtd = true,
#else
DtdProcessing = DtdProcessing.Prohibit,
#endif
#if TASKS_SUPPORTED
Async = options.Async,
#endif
};

@@ -727,6 +737,11 @@ private bool ReadRoot()
_xmlReader.Skip();
}

return ReadRootHelper();
}

private bool ReadRootHelper()
{
if (_xmlReader.EOF || !_xmlReader.IsStartElement())
{
throw new InvalidDataException(ExceptionMessages.PartIsEmpty);
110 changes: 110 additions & 0 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
#if TASKS_SUPPORTED
using System.Threading.Tasks;
#endif
using System.Xml;

namespace DocumentFormat.OpenXml;

/// <summary>
/// Represents the Open XML part reader class.
/// </summary>
public partial class OpenXmlPartReader : OpenXmlReader
{
/// <summary>
/// Gets the type of the current node in the XML document being read.
/// </summary>
/// <remarks>
/// The <see cref="XmlNodeType"/> indicates the type of the current node, such as
/// <c>Element</c>, <c>Attribute</c>, <c>Text</c>, <c>CDATA</c>, <c>Comment</c>, or others.
/// This property provides information about the structure of the XML document
/// and is useful for determining how to process the current node.
/// </remarks>
public override XmlNodeType NodeType
{
get
{
return _xmlReader.NodeType;
}
}

#if TASKS_SUPPORTED
/// <summary>
/// Asynchronously reads the next element in the Open XML document.
/// </summary>
/// <returns>
/// A task that represents the asynchronous read operation. The task result is <c>true</c> if the next element
/// was read successfully; <c>false</c> if there are no more elements to read.
/// </returns>
public async override Task<bool> ReadAsync()
{
ThrowIfObjectDisposed();

bool result = await MoveToNextElementAsync().ConfigureAwait(false);

if (result && !ReadMiscNodes)
{
// skip miscellaneous node
while (result && IsMiscNode)
{
result = await MoveToNextElementAsync().ConfigureAwait(false);
}
}

return result;
}

public async override Task<bool> ReadFirstChildAsync()

Check failure on line 64 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check failure on line 64 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

Symbol 'override DocumentFormat.OpenXml.OpenXmlPartReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task<bool>!' is not part of the declared public API (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)

Check failure on line 64 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 64 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Analyze (csharp)

Missing XML comment for publicly visible type or member 'OpenXmlPartReader.ReadFirstChildAsync()'

Check warning on line 64 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Analyze (csharp)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
//ThrowIfObjectDisposed();

Check failure on line 66 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

Check warning on line 66 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Analyze (csharp)


//bool result = await MoveToFirstChildAsync().ConfigureAwait(true);

Check failure on line 68 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

Check warning on line 68 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Analyze (csharp)


//if (result && !ReadMiscNodes)

Check failure on line 70 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

//{

Check failure on line 71 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

// // skip miscellaneous node
// while (result && IsMiscNode)
// {
// result = MoveToNextSibling();
// }
//}

Check failure on line 77 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run


//return result;

Check failure on line 79 in src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderAsync.cs

GitHub Actions / Run

return true;
}

private async Task<bool> MoveToNextElementAsync()
{
if (_elementState == ElementState.Null)
{
return await ReadRootAsync().ConfigureAwait(false);
}

return MoveToNextElementHelper();
}

private async Task<bool> ReadRootAsync()
{
Debug.Assert(_elementState == ElementState.Null);
Debug.Assert(_elementStack.Count == 0);

// TODO: should we take care of entity? <!DOCTYPE page [ <!ENTITY company "Microsoft"> ]>
// TODO: is it OK that we skip all prologue ( DOCTYPE, Comment, PT ) ?
await _xmlReader.MoveToContentAsync().ConfigureAwait(false);

while (!_xmlReader.EOF && _xmlReader.NodeType != XmlNodeType.Element)
{
await _xmlReader.SkipAsync().ConfigureAwait(false);
}

return ReadRootHelper();
}
#endif
}
15 changes: 15 additions & 0 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlPartReaderOptions.cs
Original file line number Diff line number Diff line change
@@ -31,11 +31,26 @@ public struct OpenXmlPartReaderOptions
/// </summary>
public bool CloseStream { get; set; }

#if TASKS_SUPPORTED
/// <summary>
/// Gets or sets a value indicating whether the part reader should operate asynchronously.
/// </summary>
/// <remarks>
/// When set to <c>true</c>, the reader will use asynchronous methods for reading XML data,
/// allowing non-blocking operations. This property is only available when the build target
/// supports asynchronous SAX XML processing.
/// </remarks>
public bool Async { get; set; }
#endif

internal OpenXmlPartReaderOptions UpdateForPart(OpenXmlPart part) => new()
{
ReadMiscellaneousNodes = ReadMiscellaneousNodes,
MaxCharactersInPart = MaxCharactersInPart != 0 ? MaxCharactersInPart : part.MaxCharactersInPart,
IgnoreWhitespace = IgnoreWhitespace,
CloseStream = true,
#if TASKS_SUPPORTED
Async = Async,
#endif
};
}
8 changes: 4 additions & 4 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlPartWriter.cs
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
using DocumentFormat.OpenXml.Framework;
using System.Threading.Tasks;
#endif
@@ -82,7 +82,7 @@ public OpenXmlPartWriter(OpenXmlPart openXmlPart, OpenXmlPartWriterSettings sett
{
CloseOutput = true,
Encoding = settings.Encoding,
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
Async = settings.Async,
#endif
};
@@ -146,7 +146,7 @@ public OpenXmlPartWriter(Stream partStream, OpenXmlPartWriterSettings settings)
{
CloseOutput = settings.CloseOutput,
Encoding = settings.Encoding,
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
Async = settings.Async,
#endif
};
@@ -430,7 +430,7 @@ public override void Close()
#endregion

// Async Methods
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
/// <summary>
/// Asynchronously writes the XML declaration with the version "1.0".
/// </summary>
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ namespace DocumentFormat.OpenXml;
/// </summary>
public class OpenXmlPartWriterSettings
{
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
/// <summary>
/// Gets or sets a value indicating whether asynchronous <see cref="OpenXmlPartWriter" /> methods can be used.
/// </summary>
50 changes: 50 additions & 0 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlReader.cs
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.IO;
#if TASKS_SUPPORTED
using System.Threading.Tasks;
#endif
using System.Xml;

namespace DocumentFormat.OpenXml
@@ -183,6 +186,17 @@ public virtual bool HasAttributes
/// </summary>
public abstract string Prefix { get; }

/// <summary>
/// Gets the type of the current node in the XML document being read.
/// </summary>
/// <remarks>
/// The <see cref="XmlNodeType"/> indicates the type of the current node, such as
/// <c>Element</c>, <c>Attribute</c>, <c>Text</c>, <c>CDATA</c>, <c>Comment</c>, or others.
/// This property provides information about the structure of the XML document
/// and is useful for determining how to process the current node.
/// </remarks>
public virtual XmlNodeType NodeType { get; }

/// <summary>
/// Gets an instance of <see cref="IXmlLineInfo"/> if available for the current reader.
/// </summary>
@@ -235,6 +249,42 @@ public virtual bool HasAttributes
/// </summary>
public abstract void Close();

#if TASKS_SUPPORTED
#region Async methods

/// <summary>
/// Asynchronously reads the next element in the Open XML document.
/// </summary>
/// <returns>
/// A task that represents the asynchronous read operation. The task result is <c>true</c> if the next element
/// was read successfully; <c>false</c> if there are no more elements to read.
/// </returns>
/// <remarks>
/// This method is only available when the build target supports asynchronous SAX XML processing.
/// </remarks>
public virtual Task<bool> ReadAsync()
{
return Task.FromResult(Read());
}

/// <summary>
/// Asynchronously moves the reader to the first child element of the current node.
/// </summary>
/// <returns>
/// A task that represents the asynchronous operation. The task result is <c>true</c> if the first child element
/// was read successfully; <c>false</c> if there are no child elements to read.
/// </returns>
/// <remarks>
/// This method can only be called when the reader is positioned on an element start. If no child elements exist,
/// the reader will move to the end tag of the current element.
/// </remarks>
public virtual Task<bool> ReadFirstChildAsync()
{
return Task.FromResult(ReadFirstChild());
}
#endregion
#endif

/// <summary>
/// Thrown if the object is disposed.
/// </summary>
4 changes: 2 additions & 2 deletions src/DocumentFormat.OpenXml.Framework/OpenXmlWriter.cs
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
using System.Threading.Tasks;
#endif

@@ -131,7 +131,7 @@ protected OpenXmlWriter()
/// </summary>
public abstract void Close();

#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
/// <summary>
/// Asynchronously writes the XML declaration with the version "1.0".
/// </summary>
Original file line number Diff line number Diff line change
@@ -1009,3 +1009,6 @@
DocumentFormat.OpenXml.OpenXmlPartWriterSettings.OpenXmlPartWriterSettings() -> void
DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(DocumentFormat.OpenXml.Packaging.OpenXmlPart! openXmlPart, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void
DocumentFormat.OpenXml.OpenXmlPartWriter.OpenXmlPartWriter(System.IO.Stream! partStream, DocumentFormat.OpenXml.OpenXmlPartWriterSettings! settings) -> void
DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType

Check failure on line 1012 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt

GitHub Actions / Run

Symbol 'DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)

Check warning on line 1012 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Shipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
virtual DocumentFormat.OpenXml.OpenXmlReader.NodeType.get -> System.Xml.XmlNodeType
override DocumentFormat.OpenXml.OpenXmlPartReader.NodeType.get -> System.Xml.XmlNodeType
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
#nullable enable
DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool

Check warning on line 2 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.get -> bool' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void

Check warning on line 3 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'DocumentFormat.OpenXml.OpenXmlPartReaderOptions.Async.set -> void' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task<bool>!

Check warning on line 4 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'override DocumentFormat.OpenXml.OpenXmlPartReader.ReadAsync() -> System.Threading.Tasks.Task<bool>!' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task<bool>!

Check warning on line 5 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'virtual DocumentFormat.OpenXml.OpenXmlReader.ReadAsync() -> System.Threading.Tasks.Task<bool>!' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
virtual DocumentFormat.OpenXml.OpenXmlReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task<bool>!

Check warning on line 6 in src/DocumentFormat.OpenXml.Framework/PublicAPI/PublicAPI.Unshipped.txt

GitHub Actions / Analyze (csharp)

Symbol 'virtual DocumentFormat.OpenXml.OpenXmlReader.ReadFirstChildAsync() -> System.Threading.Tasks.Task<bool>!' is part of the declared API, but is either not public or could not be found (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
11 changes: 11 additions & 0 deletions src/DocumentFormat.OpenXml.Framework/XmlConvertingReader.cs
Original file line number Diff line number Diff line change
@@ -4,6 +4,9 @@
using DocumentFormat.OpenXml.Features;
using DocumentFormat.OpenXml.Framework;
using System;
#if TASKS_SUPPORTED
using System.Threading.Tasks;
#endif
using System.Xml;

namespace DocumentFormat.OpenXml
@@ -53,6 +56,14 @@ protected override void Dispose(bool disposing)
/// <inheritdoc/>
public override bool Read() => BaseReader.Read();

#if TASKS_SUPPORTED
public override Task<bool> ReadAsync() => BaseReader.ReadAsync();

public override Task SkipAsync() => BaseReader.SkipAsync();

public override Task<XmlNodeType> MoveToContentAsync() => BaseReader.MoveToContentAsync();
#endif

/// <inheritdoc/>
public override string GetAttribute(int index) => BaseReader.GetAttribute(index);

Original file line number Diff line number Diff line change
@@ -239,7 +239,7 @@ public void WriteStringExceptionTest7()
}
}

#if FEATURE_ASYNC_SAX_XML
#if TASKS_SUPPORTED
[Fact]
public async Task WriteStartDocumentAsync_ShouldWriteStartDocument()
{