diff -ur a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
--- a/cmake/CMakeLists.txt	2021-12-14 22:34:21.754062247 +0100
+++ b/cmake/CMakeLists.txt	2021-12-14 22:36:18.094061909 +0100
@@ -240,7 +240,7 @@
   if (onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)
     add_compile_definitions(ORT_MINIMAL_BUILD_CUSTOM_OPS)
   endif()
-  
+
   set(onnxruntime_REDUCED_OPS_BUILD ON)
 
   if (NOT onnxruntime_ENABLE_PYTHON)
@@ -568,7 +568,7 @@
   endif()
 endif()
 
-if(NOT WIN32 AND NOT onnxruntime_PREFER_SYSTEM_LIB)
+if(NOT WIN32)
   add_subdirectory(${PROJECT_SOURCE_DIR}/external/nsync EXCLUDE_FROM_ALL)
 endif()
 # External dependencies
@@ -596,7 +596,7 @@
     if(NOT Protobuf_USE_STATIC_LIBS)
       #Indeed here should be a warning, not a fatal error. ONNX Runtime itself can work in such a
       #setting but it may cause compatibility issue when ONNX Runtime is integrated with the other ONNX ecosystem softwares.
-      message(FATAL_ERROR "Please enable Protobuf_USE_STATIC_LIBS")
+      message(WARNING "Please enable Protobuf_USE_STATIC_LIBS")
     endif()
   else()
     set(PROTOBUF_LIB protobuf::libprotobuf-lite)
diff -ur a/include/onnxruntime/core/platform/ort_mutex.h b/include/onnxruntime/core/platform/ort_mutex.h
--- a/include/onnxruntime/core/platform/ort_mutex.h	2021-12-14 22:34:21.784062247 +0100
+++ b/include/onnxruntime/core/platform/ort_mutex.h	2021-12-14 22:36:18.164061909 +0100
@@ -101,7 +101,7 @@
   return steady_clock::now() - steady_now < rel_time ? std::cv_status::no_timeout : std::cv_status::timeout;
 }
 }  // namespace onnxruntime
-#else
+#elif !defined(__aarch64__)
 #include "nsync.h"
 #include <mutex>               //for unique_lock
 #include <condition_variable>  //for cv_status
@@ -186,4 +186,11 @@
   return steady_clock::now() - steady_now < rel_time ? std::cv_status::no_timeout : std::cv_status::timeout;
 }
 };  // namespace onnxruntime
+#else
+#include <mutex>
+#include <condition_variable>
+namespace onnxruntime {
+using OrtMutex = std::mutex;
+using OrtCondVar = std::condition_variable;
+}  // namespace onnxruntime
 #endif
diff -ur a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h	2021-12-14 22:34:21.784062247 +0100
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h	2021-12-14 22:36:18.164061909 +0100
@@ -345,8 +345,8 @@
 
 struct Session : Base<OrtSession> {
   explicit Session(std::nullptr_t) {}
-  Session(Env& env, const ORTCHAR_T* model_path, const SessionOptions& options);
-  Session(Env& env, const void* model_data, size_t model_data_length, const SessionOptions& options);
+  Session(const Env& env, const ORTCHAR_T* model_path, const SessionOptions& options);
+  Session(const Env& env, const void* model_data, size_t model_data_length, const SessionOptions& options);
 
   // Run that will allocate the output values
   std::vector<Value> Run(const RunOptions& run_options, const char* const* input_names, const Value* input_values, size_t input_count,
diff -ur a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h	2021-12-14 22:34:21.784062247 +0100
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h	2021-12-14 22:36:18.164061909 +0100
@@ -500,11 +500,11 @@
   return *this;
 }
 
-inline Session::Session(Env& env, const ORTCHAR_T* model_path, const SessionOptions& options) {
+inline Session::Session(const Env& env, const ORTCHAR_T* model_path, const SessionOptions& options) {
   ThrowOnError(GetApi().CreateSession(env, model_path, options, &p_));
 }
 
-inline Session::Session(Env& env, const void* model_data, size_t model_data_length, const SessionOptions& options) {
+inline Session::Session(const Env& env, const void* model_data, size_t model_data_length, const SessionOptions& options) {
   ThrowOnError(GetApi().CreateSessionFromArray(env, model_data, model_data_length, options, &p_));
 }
 
diff -ur a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp
--- a/onnxruntime/core/mlas/lib/platform.cpp	2021-12-14 22:34:21.864062247 +0100
+++ b/onnxruntime/core/mlas/lib/platform.cpp	2021-12-14 22:36:18.244061908 +0100
@@ -16,6 +16,7 @@
 --*/
 
 #include "mlasi.h"
+#include <string>
 
 //
 // Stores the platform information.
@@ -170,8 +171,11 @@
         //
 
         uint64_t xcr0 = MlasReadExtendedControlRegister(_XCR_XFEATURE_ENABLED_MASK);
+        const char *cpu_opt = std::getenv("MLAS_DYNAMIC_CPU_ARCH");
+        if (cpu_opt == nullptr) cpu_opt = "99";
+        auto opt = std::stoi(cpu_opt);
 
-        if ((xcr0 & 0x6) == 0x6) {
+        if (opt > 0 && (xcr0 & 0x6) == 0x6) {
 
             this->GemmFloatKernel = MlasGemmFloatKernelAvx;
 
@@ -204,7 +208,7 @@
             __cpuid_count(7, 0, Cpuid7[0], Cpuid7[1], Cpuid7[2], Cpuid7[3]);
 #endif
 
-            if (((Cpuid1[2] & 0x1000) != 0) && ((Cpuid7[1] & 0x20) != 0)) {
+            if (opt > 1 && ((Cpuid1[2] & 0x1000) != 0) && ((Cpuid7[1] & 0x20) != 0)) {
 
                 this->GemmU8S8Operation = MlasGemmU8X8Operation<MLAS_GEMM_U8S8_KERNEL_AVX2>;
                 this->GemmU8S8PackedOperation = MlasGemmU8X8PackedOperation<MLAS_GEMM_U8S8_KERNEL_AVX2>;
@@ -264,7 +268,7 @@
                 // operating system supports saving AVX512F state.
                 //
 
-                if (((Cpuid7[1] & 0x10000) != 0) && ((xcr0 & 0xE0) == 0xE0)) {
+                if (opt > 2 && ((Cpuid7[1] & 0x10000) != 0) && ((xcr0 & 0xE0) == 0xE0)) {
 
                     this->GemmFloatKernel = MlasGemmFloatKernelAvx512F;
                     this->GemmDoubleKernel = MlasGemmDoubleKernelAvx512F;
diff -ur a/onnxruntime/core/platform/posix/ort_mutex.cc b/onnxruntime/core/platform/posix/ort_mutex.cc
--- a/onnxruntime/core/platform/posix/ort_mutex.cc	2021-12-14 22:34:21.874062247 +0100
+++ b/onnxruntime/core/platform/posix/ort_mutex.cc	2021-12-14 22:36:18.254061908 +0100
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#if !defined(__aarch64__)
 #include "core/common/common.h"
 #include "core/platform/ort_mutex.h"
 #include <assert.h>
@@ -40,4 +41,5 @@
   nsync::nsync_cv_wait(&native_cv_object, lk.mutex()->native_handle());
 }
 
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
+#endif