texpine
diff --git a/‎RELEASE.md‎
Lines changed: 2 additions & 0 deletions b/‎RELEASE.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎libxsmm.BUILD‎
Lines changed: 37 additions & 16 deletions b/‎libxsmm.BUILD‎
Lines changed: 37 additions & 16 deletions
diff --git a/‎tensorflow/cc/framework/scope.cc‎
Lines changed: 1 addition & 1 deletion b/‎tensorflow/cc/framework/scope.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorflow/contrib/android/jni/jni_utils.cc‎
Lines changed: 14 additions & 1 deletion b/‎tensorflow/contrib/android/jni/jni_utils.cc‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎tensorflow/contrib/cmake/tf_tests.cmake‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow/contrib/cmake/tf_tests.cmake‎
Lines changed: 1 addition & 0 deletions
@@ -57,6 +57,8 @@
 * tf.image.decode_jpeg by default uses the faster DCT method, sacrificing
   a little fidelity for improved speed. One can revert to the old
   behavior by specifying the attribute dct_method='INTEGER_ACCURATE'.
+* `tf.complex_abs` has been removed from the Python interface. `tf.abs`
+  supports complex tensors and should be used instead.
 
 # Release 0.12.0
 
 
@@ -7,12 +7,12 @@ exports_files(["LICENSE"])
 
 # Arguments to ./scripts/libxsmm_interface.py, see that file for detailed description.
 #  precision: SP & DP
-#  ilp64: no
 #  prefetch: 1 (auto)
-libxsmm_interface_arguments = "0 0 1"
+libxsmm_interface_arguments = "0 1"
 
 # Arguments to ./scripts/libxsmm_config.py, see that file for detailed description.
 #  ilp64: no
+#  big: no
 #  offload: no
 #  alignment [b]
 #  prefetch: 1 (auto)
@@ -22,7 +22,11 @@ libxsmm_interface_arguments = "0 0 1"
 #  flags
 #  alpha = 1
 #  beta = 1
-libxsmm_config_arguments = "0 0 64 1 0 1 1 0 1 1"
+libxsmm_config_arguments = "0 0 0 64 1 0 1 1 0 1 1"
+
+# Arguments to ./scripts/libxsmm_dispatch.py, see that file for detailed description.
+#  (dummy argument)
+libxsmm_dispatch_arguments = "0"
 
 genrule(
     name = "libxsmm_headers",
@@ -33,13 +37,20 @@ genrule(
     outs = [
         "include/libxsmm.h",
         "include/libxsmm_config.h",
+        "include/libxsmm_dispatch.h",
     ],
     cmd = "$(location :libxsmm_interface) $(location src/template/libxsmm.h) " + libxsmm_interface_arguments + " > $(location include/libxsmm.h);" +
-          "$(location :libxsmm_config) $(location src/template/libxsmm_config.h) " + libxsmm_config_arguments + " > $(location include/libxsmm_config.h)",
+          "$(location :libxsmm_config) $(location src/template/libxsmm_config.h) " + libxsmm_config_arguments + " > $(location include/libxsmm_config.h);" +
+          "$(location :libxsmm_dispatch) " + libxsmm_dispatch_arguments + " > $(location include/libxsmm_dispatch.h)",
     tools = [
         ":libxsmm_config",
+        ":libxsmm_dispatch",
         ":libxsmm_interface",
     ],
+    visibility = [
+        "//tensorflow/core/kernels:__pkg__",
+        "//third_party/eigen3:__pkg__",
+    ],
 )
 
 cc_library(
@@ -49,37 +60,40 @@ cc_library(
         "src/libxsmm_dump.c",
         "src/libxsmm_malloc.c",
         "src/libxsmm_gemm.c",
+        "src/libxsmm_gemm_diff.c",
+        "src/libxsmm_hash.c",
         "src/libxsmm_timer.c",
         "src/libxsmm_trace.c",
         "src/libxsmm_trans.c",
         "src/libxsmm_sync.c",
         "src/libxsmm_perf.c",
+        "src/libxsmm_spmdm.c",
         "src/libxsmm_dnn.c",
+        "src/libxsmm_dnn_handle.c",
         "src/libxsmm_dnn_convolution_forward.c",
+        "src/libxsmm_dnn_convolution_backward.c",
+        "src/libxsmm_dnn_convolution_weight_update.c",
         "src/libxsmm_cpuid_x86.c",
     ] + glob([
         "src/generator_*.c",
     ]),
     hdrs = [
+        "include/libxsmm_cpuid.h",
         "include/libxsmm_dnn.h",
         "include/libxsmm_frontend.h",
         "include/libxsmm_generator.h",
+        "include/libxsmm_intrinsics_x86.h",
         "include/libxsmm_macros.h",
         "include/libxsmm_malloc.h",
+        "include/libxsmm_spmdm.h",
         "include/libxsmm_sync.h",
         "include/libxsmm_timer.h",
         "include/libxsmm_typedefs.h",
-        "include/libxsmm_dispatch.h",
-        "src/libxsmm_gemm_diff.c",
-        "src/libxsmm_cpuid_x86.c",
-        "src/libxsmm_hash.c",
         # Generated:
         "include/libxsmm.h",
         "include/libxsmm_config.h",
-    ] + glob([
-        "src/*.h",
-        "src/template/*.c",
-    ]),
+        "include/libxsmm_dispatch.h",
+    ],
     copts = [
         "-mavx",  # JIT does not work without avx anyway, and this silences some CRC32 warnings.
         "-Wno-vla",  # Libxsmm convolutions heavily use VLA.
@@ -89,12 +103,13 @@ cc_library(
         "LIBXSMM_CPUID_X86_NOINLINE",
         "__BLAS=0",
     ],
-    includes = ["include"],
+    includes = [
+        "include",
+        "src",
+        "src/template",
+    ],
     linkopts = ["-ldl"],
     visibility = ["//visibility:public"],
-    deps = [
-        ":libxsmm_headers",
-    ],
 )
 
 py_library(
@@ -114,3 +129,9 @@ py_binary(
     srcs = ["scripts/libxsmm_config.py"],
     deps = [":libxsmm_scripts"],
 )
+
+py_binary(
+    name = "libxsmm_dispatch",
+    srcs = ["scripts/libxsmm_dispatch.py"],
+    deps = [":libxsmm_scripts"],
+)
@@ -186,7 +186,7 @@ std::unordered_set<string> Scope::GetColocationConstraints(
 void Scope::UpdateStatus(const Status s) const {
   status_->Update(s);
   if (exit_on_error_ && !status_->ok()) {
-    LOG(FATAL) << status_;
+    LOG(FATAL) << *status_;
   }
 }
 
 
@@ -38,7 +38,12 @@ namespace {
 class IfstreamInputStream : public ::google::protobuf::io::CopyingInputStream {
  public:
   explicit IfstreamInputStream(const std::string& file_name)
-      : ifs_(file_name.c_str(), std::ios::in | std::ios::binary) {}
+      : ifs_(file_name.c_str(), std::ios::in | std::ios::binary) {
+    CHECK(ifs_.good()) << "Failed to open file \"" << file_name
+                       << "\" or file is 0 length! Use prefix \""
+                       << ASSET_PREFIX
+                       << "\" if attempting to load proto from assets.";
+  }
   ~IfstreamInputStream() { ifs_.close(); }
 
   int Read(void* buffer, int size) {
@@ -59,6 +64,7 @@ bool PortableReadFileToProto(const std::string& file_name,
                              ::google::protobuf::MessageLite* proto) {
   ::google::protobuf::io::CopyingInputStreamAdaptor stream(
       new IfstreamInputStream(file_name));
+
   stream.SetOwnsCopyingStream(true);
   // TODO(jiayq): the following coded stream is for debugging purposes to allow
   // one to parse arbitrarily large messages for MessageLite. One most likely
@@ -119,6 +125,13 @@ void ReadFileToProtoOrDie(AAssetManager* const asset_manager,
     // it to memory first.
     VLOG(0) << "Opening asset " << asset_filename << " from disk with copy.";
     const off_t data_size = AAsset_getLength(asset);
+
+    // TODO(andrewharp): Add codepath for loading compressed protos as well.
+    if (data_size > 64 * 1024 * 1024) {
+      LOG(WARNING) << "Compressed proto is larger than 64mb; if problems occur "
+                   << " turn off compression for protocol buffer files in APK.";
+    }
+
     const void* const memory = AAsset_getBuffer(asset);
     CHECK(message->ParseFromArray(memory, data_size));
   }
 
@@ -162,6 +162,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/training/saver_large_variable_test.py"  # Overflow error.
       "${tensorflow_source_dir}/tensorflow/python/training/supervisor_test.py"  # Flaky I/O error on rename.
       "${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py"  # Needs portpicker.
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py"  # depends on python/framework/test_ops
     )
   endif()
   list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude})
Original file line number	Diff line number	Diff line change
`@@ -186,7 +186,7 @@ std::unordered_set<string> Scope::GetColocationConstraints(`
`186`	`186`	`void Scope::UpdateStatus(const Status s) const {`
`187`	`187`	`status_->Update(s);`
`188`	`188`	`if (exit_on_error_ && !status_->ok()) {`
`189`		`- LOG(FATAL) << status_;`
	`189`	`+ LOG(FATAL) << *status_;`
`190`	`190`	`}`
`191`	`191`	`}`
`192`	`192`
Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)`
`162`	`162`	`"${tensorflow_source_dir}/tensorflow/python/training/saver_large_variable_test.py" # Overflow error.`
`163`	`163`	`"${tensorflow_source_dir}/tensorflow/python/training/supervisor_test.py" # Flaky I/O error on rename.`
`164`	`164`	`"${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py" # Needs portpicker.`
	`165`	`+ "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py" # depends on python/framework/test_ops`
`165`	`166`	`)`
`166`	`167`	`endif()`
`167`	`168`	`list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude})`