Skip to content

Commit 4c4ae1e

Browse files
enable large initializer offset align for save external data in ORT (microsoft#21604)
### Description Address issue microsoft#21524 Enable offset align for model saved as external data format python data convertor fix here: onnx/onnx#6248 ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 27a6890 commit 4c4ae1e

File tree

6 files changed

+135
-18
lines changed

6 files changed

+135
-18
lines changed

include/onnxruntime/core/graph/graph.h

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1139,16 +1139,48 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
11391139
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
11401140
ONNX_NAMESPACE::GraphProto ToGraphProto() const;
11411141

1142+
// Options to align external initializer offset.
1143+
// For models running on CPU, ORT will try to use mmap to load external initializers.
1144+
// To use mmap, external initializer need to be offset aligned.
1145+
// ORT saves external initializers into signle data file, each initializer is accessed with
1146+
// offset(start position of initializer) and length(byte length of initializer) of the data file.
1147+
// To use mmap, each offset need to be aligned which means offset need to divisible by
1148+
// allocation granularity(64KB for windows and 4K for other OSes).
1149+
// With align_offset to true, ORT will align offset for large initializer when
1150+
// save ONNX model with external data file.
1151+
struct OffsetAlignmentInfo {
1152+
// Offset will always be page aligned and allocation granularity aligned for mmap support.
1153+
// This is done by padding previous tensor data with zeros keeping same length.
1154+
bool align_offset = false;
1155+
// Alignment threshold for size of data.
1156+
// Having a low threshold will waste file space for small initializers.
1157+
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
1158+
// Default to 1MB.
1159+
int64_t align_threshold = 1048576;
1160+
// The allocation Granularity for mmap() support.
1161+
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
1162+
int64_t allocation_granularity = 65536;
1163+
};
1164+
11421165
/** Gets the GraphProto representation of this Graph
11431166
@param external_file_path File path of the binary file to use for initializers.
11441167
@param model_file_path path of the model file.
11451168
@param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
11461169
in the external file. Initializer smaller than this threshold are included in the onnx file.
1170+
@param align_info offset alignment info.
11471171
@returns GraphProto serialization of the graph.
11481172
*/
11491173
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
11501174
const std::filesystem::path& model_file_path,
1151-
size_t initializer_size_threshold) const;
1175+
size_t initializer_size_threshold,
1176+
const OffsetAlignmentInfo& align_info) const;
1177+
1178+
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
1179+
const std::filesystem::path& model_file_path,
1180+
size_t initializer_size_threshold) const {
1181+
OffsetAlignmentInfo default_options;
1182+
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);
1183+
}
11521184

11531185
/** Gets the ISchemaRegistry instances being used with this Graph. */
11541186
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;

onnxruntime/core/graph/graph.cc

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4021,7 +4021,8 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
40214021

40224022
ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
40234023
const std::filesystem::path& model_file_path,
4024-
size_t initializer_size_threshold) const {
4024+
size_t initializer_size_threshold,
4025+
const OffsetAlignmentInfo& align_info) const {
40254026
GraphProto result;
40264027
ToGraphProtoInternal(result);
40274028
ORT_ENFORCE(external_file_path.is_relative());
@@ -4059,6 +4060,27 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
40594060
continue;
40604061
}
40614062

4063+
// update external_offset for alignment
4064+
// need to do padding before write actual tensor data as we do offset alignment at the begin of
4065+
// large tensors (offset need to be page aligned and alloction granularity aligned) like below:
4066+
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
4067+
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
4068+
if (align_info.align_offset && static_cast<int64_t>(tensor_bytes_size) > align_info.align_threshold) {
4069+
// Align to the larger of the page size or the allocation granularity
4070+
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), align_info.allocation_granularity);
4071+
// Align to the next page or alloc granularity boundary
4072+
int64_t new_external_offset = static_cast<int64_t>(
4073+
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
4074+
alignment_factor;
4075+
4076+
// padding tensor with zeros for alignment
4077+
for (int64_t index = external_offset; index != new_external_offset; ++index) {
4078+
external_stream << '0';
4079+
}
4080+
4081+
external_offset = new_external_offset;
4082+
}
4083+
40624084
for (size_t index = 0; index != tensor_bytes_size; ++index) {
40634085
external_stream << raw_data[index];
40644086
}

onnxruntime/core/graph/model.cc

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -383,12 +383,14 @@ ModelProto Model::ToProto() const {
383383

384384
ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
385385
const std::filesystem::path& file_path,
386-
size_t initializer_size_threshold) const {
386+
size_t initializer_size_threshold,
387+
const Graph::OffsetAlignmentInfo& align_info) const {
387388
ModelProto result(model_proto_);
388389
const auto& graph = *graph_;
389390
*(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
390391
file_path,
391-
initializer_size_threshold);
392+
initializer_size_threshold,
393+
align_info);
392394
return result;
393395
}
394396

@@ -605,14 +607,16 @@ template <typename T>
605607
static Status SaveModelWithExternalInitializers(Model& model,
606608
const T& file_path,
607609
const std::filesystem::path& external_file_name,
608-
size_t initializer_size_threshold) {
610+
size_t initializer_size_threshold,
611+
const Graph::OffsetAlignmentInfo& align_info) {
609612
int fd = 0;
610613
Status status = Env::Default().FileOpenWr(file_path, fd);
611614
ORT_RETURN_IF_ERROR(status);
612615

613616
ORT_TRY {
614617
status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
615-
initializer_size_threshold);
618+
initializer_size_threshold,
619+
align_info);
616620
}
617621
ORT_CATCH(const std::exception& ex) {
618622
ORT_HANDLE_EXCEPTION([&]() {
@@ -642,8 +646,10 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,
642646

643647
Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
644648
const std::filesystem::path& external_file_name,
645-
size_t initializer_size_threshold) {
646-
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
649+
size_t initializer_size_threshold,
650+
const Graph::OffsetAlignmentInfo& align_info) {
651+
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold,
652+
align_info);
647653
}
648654

649655
Status Model::LoadFromBytes(int count, const void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) {
@@ -759,15 +765,17 @@ Status Model::SaveWithExternalInitializers(Model& model,
759765
int fd,
760766
const std::filesystem::path& file_path,
761767
const std::filesystem::path& external_file_name,
762-
size_t initializer_size_threshold) {
768+
size_t initializer_size_threshold,
769+
const Graph::OffsetAlignmentInfo& align_info) {
763770
if (fd < 0) {
764771
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
765772
}
766773

767774
ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());
768775

769776
auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path,
770-
initializer_size_threshold);
777+
initializer_size_threshold,
778+
align_info);
771779
google::protobuf::io::FileOutputStream output(fd);
772780
const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
773781
if (result) {

onnxruntime/core/graph/model.h

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,25 +187,54 @@ class Model {
187187
// Get model's serialization proto data.
188188
// Save initializer larger than the given threshold (in bytes) into an external binary file
189189
// with the given name. This function is useful to avoid hitting the size limit of protobuf files.
190+
// initializer offset could be page aligned and allocation granularity aligned for mmap support.
190191
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
191192
const std::filesystem::path& file_path,
192-
size_t initializer_size_threshold) const;
193+
size_t initializer_size_threshold,
194+
const Graph::OffsetAlignmentInfo& align_info) const;
195+
196+
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
197+
const std::filesystem::path& file_path,
198+
size_t initializer_size_threshold) const {
199+
Graph::OffsetAlignmentInfo default_align_info;
200+
return ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold, default_align_info);
201+
}
193202

194203
static common::Status Save(Model& model, const PathString& file_path);
195204

196205
static common::Status Save(Model& model, int fd);
197206

198207
// Save the model to file using an external file for initializers larger than the given threshold (in bytes).
208+
// Initializer offset could be page aligned and allocation granularity aligned for mmap support.
209+
static common::Status SaveWithExternalInitializers(Model& model,
210+
const std::filesystem::path& file_path,
211+
const std::filesystem::path& external_file_path,
212+
size_t initializer_size_threshold,
213+
const Graph::OffsetAlignmentInfo& align_info);
214+
215+
static common::Status SaveWithExternalInitializers(Model& model,
216+
const std::filesystem::path& file_path,
217+
const std::filesystem::path& external_file_path,
218+
size_t initializer_size_threshold) {
219+
Graph::OffsetAlignmentInfo default_align_info;
220+
return SaveWithExternalInitializers(model, file_path, external_file_path, initializer_size_threshold, default_align_info);
221+
}
222+
199223
static common::Status SaveWithExternalInitializers(Model& model,
224+
int fd,
200225
const std::filesystem::path& file_path,
201226
const std::filesystem::path& external_file_path,
202-
size_t initializer_size_threshold);
227+
size_t initializer_size_threshold,
228+
const Graph::OffsetAlignmentInfo& align_info);
203229

204230
static common::Status SaveWithExternalInitializers(Model& model,
205231
int fd,
206232
const std::filesystem::path& file_path,
207233
const std::filesystem::path& external_file_path,
208-
size_t initializer_size_threshold);
234+
size_t initializer_size_threshold) {
235+
Graph::OffsetAlignmentInfo default_align_info;
236+
return SaveWithExternalInitializers(model, fd, file_path, external_file_path, initializer_size_threshold, default_align_info);
237+
}
209238

210239
static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);
211240

onnxruntime/core/session/inference_session.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2054,10 +2054,13 @@ common::Status InferenceSession::Initialize() {
20542054
const size_t optimized_model_external_initializers_min_size_in_bytes =
20552055
ParseStringWithClassicLocale<size_t>(session_options_.config_options.GetConfigOrDefault(
20562056
kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "1024"));
2057+
Graph::OffsetAlignmentInfo align_info;
2058+
align_info.align_offset = true;
20572059
ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_,
20582060
session_options_.optimized_model_filepath,
20592061
optimized_model_external_initializers_file_name,
2060-
optimized_model_external_initializers_min_size_in_bytes));
2062+
optimized_model_external_initializers_min_size_in_bytes,
2063+
align_info));
20612064
}
20622065
}
20632066
}

onnxruntime/test/framework/save_model_with_external_initializers.cc

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
2323
const std::filesystem::path& input_external_init_file,
2424
const std::filesystem::path& output_onnx,
2525
const std::filesystem::path& output_external_init_file,
26-
size_t initializer_size_threshold) {
26+
size_t initializer_size_threshold,
27+
const Graph::OffsetAlignmentInfo& align_info) {
2728
auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
2829
std::shared_ptr<Model> model;
2930
ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
3031
std::filesystem::remove(output_onnx);
3132
std::filesystem::remove(output_external_init_file);
32-
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
33+
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold,
34+
align_info));
3335

3436
std::shared_ptr<Model> model_from_external;
3537
ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
@@ -75,6 +77,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
7577

7678
ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
7779
ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");
80+
81+
if (align_info.align_offset) {
82+
for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) {
83+
if (entry.has_key() && entry.has_value() && entry.key() == "offset") {
84+
size_t tensor_offset;
85+
std::stringstream stream(entry.value());
86+
stream >> tensor_offset;
87+
ORT_RETURN_IF_NOT(tensor_offset % align_info.allocation_granularity == 0, "tensor offset not align");
88+
}
89+
}
90+
}
7891
}
7992
// Cleanup.
8093
ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
@@ -84,12 +97,22 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
8497

8598
// Original model does not have external initializers
8699
TEST(SaveWithExternalInitializers, Mnist) {
87-
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100));
100+
Graph::OffsetAlignmentInfo align_info;
101+
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100, align_info));
88102
}
89103

90104
// Original model has external initializers
91105
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
92-
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
106+
Graph::OffsetAlignmentInfo align_info;
107+
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
108+
}
109+
110+
// Original model has external initializers, align offset
111+
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) {
112+
Graph::OffsetAlignmentInfo align_info;
113+
align_info.align_offset = true;
114+
align_info.align_threshold = 0;
115+
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
93116
}
94117

95118
} // namespace test

0 commit comments

Comments
 (0)