diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index c4412e0934f17..c61d720d8321c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -201,6 +201,7 @@ option(onnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER "Enable this option to run t
 option(onnxruntime_ENABLE_WEBASSEMBLY_DEBUG_INFO "Enable this option to turn on DWARF format debug info" OFF)
 option(onnxruntime_ENABLE_WEBASSEMBLY_PROFILING "Enable this option to turn on WebAssembly profiling and preserve function names" OFF)
 option(onnxruntime_ENABLE_WEBASSEMBLY_OUTPUT_OPTIMIZED_MODEL "Enable this option to allow WebAssembly to output optimized model" OFF)
+option(onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64 "Enable this option to allow WebAssembly to use 64bit memory" OFF)
 
 # Enable bitcode for iOS
 option(onnxruntime_ENABLE_BITCODE "Enable bitcode for iOS only" OFF)
diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake
index 6eb784a4063ed..11daeb9f6bc99 100644
--- a/cmake/adjust_global_compile_flags.cmake
+++ b/cmake/adjust_global_compile_flags.cmake
@@ -52,8 +52,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   endif()
 
   if (onnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING)
-    string(APPEND CMAKE_C_FLAGS " -s DISABLE_EXCEPTION_CATCHING=0")
-    string(APPEND CMAKE_CXX_FLAGS " -s DISABLE_EXCEPTION_CATCHING=0")
+    string(APPEND CMAKE_C_FLAGS " -fwasm-exceptions")
+    string(APPEND CMAKE_CXX_FLAGS " -fwasm-exceptions")
+#    string(APPEND CMAKE_C_FLAGS " -s DISABLE_EXCEPTION_CATCHING=0")
+#    string(APPEND CMAKE_CXX_FLAGS " -s DISABLE_EXCEPTION_CATCHING=0")
   endif()
 
   # Build WebAssembly with multi-threads support.
@@ -63,6 +65,12 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   endif()
 endif()
 
+# Build WebAssembly with 64bit support.
+if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64)
+  string(APPEND CMAKE_C_FLAGS " -sMEMORY64 -Wno-experimental")
+  string(APPEND CMAKE_CXX_FLAGS " -sMEMORY64 -Wno-experimental")
+endif()
+
 if (onnxruntime_EXTERNAL_TRANSFORMER_SRC_PATH)
   add_definitions(-DORT_TRAINING_EXTERNAL_GRAPH_TRANSFORMERS=1)
 endif()
diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake
index 7a49e90c00bce..d93eba122077a 100644
--- a/cmake/onnxruntime_webassembly.cmake
+++ b/cmake/onnxruntime_webassembly.cmake
@@ -168,9 +168,9 @@ else()
       "${ONNXRUNTIME_ROOT}/wasm/api.cc"
       "${ONNXRUNTIME_ROOT}/core/session/onnxruntime_c_api.cc"
     )
-    set (WASM_API_EXCEPTION_CATCHING "-s DISABLE_EXCEPTION_CATCHING=0")
-    message(STATUS "onnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING_ON_API set")
-    set_source_files_properties(${onnxruntime_webassembly_src_exc} PROPERTIES COMPILE_FLAGS ${WASM_API_EXCEPTION_CATCHING})
+    # set (WASM_API_EXCEPTION_CATCHING "-s DISABLE_EXCEPTION_CATCHING=0")
+    # message(STATUS "onnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING_ON_API set")
+    # set_source_files_properties(${onnxruntime_webassembly_src_exc} PROPERTIES COMPILE_FLAGS ${WASM_API_EXCEPTION_CATCHING})
   endif()
 
   target_link_libraries(onnxruntime_webassembly PRIVATE
@@ -193,7 +193,7 @@ else()
     re2::re2
   )
 
-  set(EXPORTED_RUNTIME_METHODS "'stackAlloc','stackRestore','stackSave','UTF8ToString','stringToUTF8','lengthBytesUTF8'")
+  set(EXPORTED_RUNTIME_METHODS "'stackAlloc','stackRestore','stackSave','UTF8ToString','stringToUTF8','lengthBytesUTF8','setValue','getValue','FS'")
 
   if (onnxruntime_USE_XNNPACK)
     target_link_libraries(onnxruntime_webassembly PRIVATE XNNPACK)
@@ -215,18 +215,63 @@ else()
     set(EXPORTED_FUNCTIONS "_malloc,_free")
   endif()
 
+  if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64)
+    set(MAXIMUM_MEMORY "17179869184")
+    target_link_options(onnxruntime_webassembly PRIVATE
+      "SHELL:-s MEMORY64=1"
+    )
+    string(APPEND CMAKE_C_FLAGS " -sMEMORY64 -Wno-experimental")
+    string(APPEND CMAKE_CXX_FLAGS " -sMEMORY64 -Wno-experimental")
+    set(SMEMORY_FLAG "-sMEMORY64")
+
+    target_compile_options(onnx PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_common PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_session PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_framework PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(nsync_cpp PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(nsync_cpp PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnx_proto PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(protoc PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(libprotobuf-lite PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_providers PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_optimizer PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_mlas PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_optimizer PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_graph PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_flatbuffers PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(onnxruntime_util PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(re2 PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_base PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_hash PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_raw_hash_set PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_throw_delegate PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_city PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+    target_compile_options(absl_low_level_hash PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
+
+    target_link_options(onnxruntime_webassembly PRIVATE
+      --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js"
+    )
+  else ()
+    set(MAXIMUM_MEMORY "4294967296")
+    target_link_options(onnxruntime_webassembly PRIVATE
+      --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js.js"
+    )
+  endif ()
+
   target_link_options(onnxruntime_webassembly PRIVATE
     "SHELL:-s EXPORTED_RUNTIME_METHODS=[${EXPORTED_RUNTIME_METHODS}]"
     "SHELL:-s EXPORTED_FUNCTIONS=${EXPORTED_FUNCTIONS}"
-    "SHELL:-s MAXIMUM_MEMORY=4294967296"
+    "SHELL:-s MAXIMUM_MEMORY=${MAXIMUM_MEMORY}"
     "SHELL:-s EXIT_RUNTIME=0"
     "SHELL:-s ALLOW_MEMORY_GROWTH=1"
     "SHELL:-s MODULARIZE=1"
     "SHELL:-s EXPORT_ALL=0"
     "SHELL:-s VERBOSE=0"
-    "SHELL:-s FILESYSTEM=0"
+    "SHELL:-s FILESYSTEM=1"
     "SHELL:-s INCOMING_MODULE_JS_API=[preRun,locateFile,arguments,onExit,wasmMemory,buffer,instantiateWasm,mainScriptUrlOrBlob]"
     "SHELL:-s WASM_BIGINT=1"
+    "SHELL:-s ERROR_ON_UNDEFINED_SYMBOLS=0"
+    "SHELL:-s SIGNATURE_CONVERSIONS=OrtRun:_pppppppp,OrtGetTensorData:_ppppp,OrtCreateTensor:p_pppp,OrtCreateSession:pppp,OrtReleaseSession:_p,OrtGetInputOutputCount:pppp,OrtCreateSessionOptions:pp__p_ppppp,OrtAddSessionConfigEntry:pppp,OrtReleaseSessionOptions:_p,OrtAppendExecutionProvider:ppp,OrtAddSessionConfigEntry:pppp,OrtGetInputName:ppp,OrtGetOutputName:ppp,OrtCreateRunOptions:ppp_p,OrtReleaseRunOptions:pp,OrtReleaseTensor:_p,OrtFree:_p,OrtGetLastError:_pp,JsepOutput:pp_p"
     ${WASM_API_EXCEPTION_CATCHING}
     --no-entry
     "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre.js\""
@@ -241,8 +286,11 @@ else()
     target_compile_definitions(onnxruntime_webassembly PRIVATE USE_JSEP=1)
     target_link_options(onnxruntime_webassembly PRIVATE
       "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\""
-      "SHELL:-s ASYNCIFY=1"
+      "SHELL:-s ASYNCIFY=2"
+      "SHELL:-s JSPI"
+      "SHELL:-s ASYNCIFY_IGNORE_INDIRECT=1"
       "SHELL:-s ASYNCIFY_STACK_SIZE=65536"
+      "SHELL:-s ASYNCIFY_EXPORTS=['OrtRun']"
     )
     set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
   endif()
@@ -279,7 +327,8 @@ else()
   endif()
 
   # Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions.
-  target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0")
+  # target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0")
+  # target_link_options(onnxruntime_webassembly PRIVATE "-fwasm-exceptions")
 
   if (onnxruntime_ENABLE_WEBASSEMBLY_PROFILING)
     target_link_options(onnxruntime_webassembly PRIVATE --profiling --profiling-funcs)
diff --git a/js/web/lib/index.ts b/js/web/lib/index.ts
index 86c05b9a2fa15..ee4cc0067727b 100644
--- a/js/web/lib/index.ts
+++ b/js/web/lib/index.ts
@@ -23,11 +23,11 @@ if (!BUILD_DEFS.DISABLE_WASM) {
   const wasmBackend = BUILD_DEFS.DISABLE_TRAINING ? require('./backend-wasm-inference').wasmBackend :
                                                     require('./backend-wasm-training').wasmBackend;
   if (!BUILD_DEFS.DISABLE_JSEP) {
-    registerBackend('webgpu', wasmBackend, 5);
-    registerBackend('webnn', wasmBackend, 5);
+    registerBackend('webgpu', wasmBackend, 1);
+    registerBackend('webnn', wasmBackend, 1);
   }
-  registerBackend('cpu', wasmBackend, 10);
-  registerBackend('wasm', wasmBackend, 10);
+  registerBackend('cpu', wasmBackend, 1);
+  registerBackend('wasm', wasmBackend, 1);
 }
 
 Object.defineProperty(env.versions, 'web', {value: version, enumerable: true});
diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts
index 242f7e939cda0..a410c77890354 100644
--- a/js/web/lib/wasm/jsep/init.ts
+++ b/js/web/lib/wasm/jsep/init.ts
@@ -3,8 +3,8 @@
 
 import {Env} from 'onnxruntime-common';
 
-import type {OrtWasmModule} from '../wasm-types';
 import {DataType, getTensorElementSize} from '../wasm-common';
+import type {OrtWasmModule} from '../wasm-types';
 
 import {WebGpuBackend} from './backend-webgpu';
 import {LOG_DEBUG} from './log';
@@ -68,24 +68,24 @@ class ComputeContextImpl implements ComputeContext {
   private customDataSize = 0;
   constructor(private module: OrtWasmModule, private backend: WebGpuBackend, contextDataOffset: number) {
     this.adapterInfo = backend.adapterInfo;
-    const heapU32 = module.HEAPU32;
+    const heap = module.PTR_SIZE === 4 ? module.HEAPU32 : module.HEAPU64;
 
     // extract context data
-    let dataIndex = (contextDataOffset >>> 2);
-    this.opKernelContext = heapU32[dataIndex++];
-    const inputCount = heapU32[dataIndex++];
-    this.outputCount = heapU32[dataIndex++];
-    this.customDataOffset = heapU32[dataIndex++];
-    this.customDataSize = heapU32[dataIndex++];
+    let dataIndex = module.PTR_SIZE === 8 ? (contextDataOffset / 2 ** 3) : (contextDataOffset >> 2);
+    this.opKernelContext = Number(heap[dataIndex++]);
+    const inputCount = Number(heap[dataIndex++]);
+    this.outputCount = Number(heap[dataIndex++]);
+    this.customDataOffset = Number(heap[dataIndex++]);
+    this.customDataSize = Number(heap[dataIndex++]);
 
     const inputs: TensorView[] = [];
     for (let i = 0; i < inputCount; i++) {
-      const dataType = heapU32[dataIndex++];
-      const data = heapU32[dataIndex++];
-      const dim = heapU32[dataIndex++];
+      const dataType = Number(heap[dataIndex++]);
+      const data = Number(heap[dataIndex++]);
+      const dim = Number(heap[dataIndex++]);
       const dims: number[] = [];
       for (let d = 0; d < dim; d++) {
-        dims.push(heapU32[dataIndex++]);
+        dims.push(Number(heap[dataIndex++]));
       }
       inputs.push(new TensorViewImpl(module, dataType, data, dims));
     }
@@ -127,11 +127,11 @@ class ComputeContextImpl implements ComputeContext {
   output(index: number, dims: readonly number[]): number {
     const stack = this.module.stackSave();
     try {
-      const data = this.module.stackAlloc((1 + dims.length) * 4 /* sizeof(size_t) */);
-      let offset = data >> 2;
-      this.module.HEAPU32[offset++] = dims.length;
+      const ptrSize = this.module.PTR_SIZE;
+      const data = this.module.stackAlloc((1 + dims.length) * ptrSize /* sizeof(size_t) */);
+      this.module.setValue(data, dims.length, '*');
       for (let i = 0; i < dims.length; i++) {
-        this.module.HEAPU32[offset++] = dims[i];
+        this.module.setValue(data + ptrSize * (i + 1), dims[i], '*');
       }
       return this.module._JsepOutput!(this.opKernelContext, index, data);
     } catch (e) {
@@ -193,10 +193,15 @@ export const init =
       // jsepCopy(src, dst, size, isSourceGpu)
       (src: number, dst: number, size: number, isSourceGpu = false) => {
         if (isSourceGpu) {
-          LOG_DEBUG('verbose', () => `[WebGPU] jsepCopyGpuToGpu: src=${src}, dst=${dst}, size=${size}`);
+          LOG_DEBUG(
+              'verbose',
+              () => `[WebGPU] jsepCopyGpuToGpu: src=${Number(src)}, dst=${Number(dst)}, size=${Number(size)}`);
           backend.memcpy(src, dst);
         } else {
-          LOG_DEBUG('verbose', () => `[WebGPU] jsepCopyCpuToGpu: dataOffset=${src}, gpuDataId=${dst}, size=${size}`);
+          LOG_DEBUG(
+              'verbose',
+              () => `[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(src)}, gpuDataId=${Number(dst)}, size=${
+                  Number(size)}`);
           const data = module.HEAPU8.subarray(src >>> 0, (src >>> 0) + size);
           backend.upload(dst, data);
         }
@@ -226,7 +231,7 @@ export const init =
             'verbose',
             () => `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${
                 contextDataOffset}`);
-        const context = new ComputeContextImpl(module, backend, contextDataOffset);
+        const context = new ComputeContextImpl(module, backend, Number(contextDataOffset));
         return backend.computeKernel(kernel, context, errors);
       },
       // jsepCaptureBegin
diff --git a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
index a5c0a088efa6e..aa731757651a9 100644
--- a/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
+++ b/js/web/lib/wasm/jsep/webgpu/gpu-data-manager.ts
@@ -112,7 +112,7 @@ const bucketArr: number[] = [];
 /**
  * normalize the buffer size so that it fits the 128-bits (16 bytes) alignment.
  */
-const calcNormalizedBufferSize = (size: number) => Math.ceil(size / 16) * 16;
+const calcNormalizedBufferSize = (size: number) => Math.ceil(Number(size) / 16) * 16;
 
 /**
  * calculate the buffer size so that it fits into buckets.
@@ -342,7 +342,7 @@ class GpuDataManagerImpl implements GpuDataManager {
     }
 
     const gpuData = {id: createNewGpuDataId(), type: GpuDataType.default, buffer: gpuBuffer};
-    this.storageCache.set(gpuData.id, {gpuData, originalSize: size});
+    this.storageCache.set(gpuData.id, {gpuData, originalSize: Number(size)});
 
     LOG_DEBUG('verbose', () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`);
     return gpuData;
@@ -352,7 +352,8 @@ class GpuDataManagerImpl implements GpuDataManager {
     return this.storageCache.get(id)?.gpuData;
   }
 
-  release(id: GpuDataId): number {
+  release(idInput: GpuDataId): number {
+    const id = typeof idInput === 'bigint' ? Number(idInput) : idInput;
     const cachedData = this.storageCache.get(id);
     if (!cachedData) {
       throw new Error('releasing data does not exist');
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
index 9ced89651e844..98e6b9f2bcec4 100644
--- a/js/web/lib/wasm/wasm-types.ts
+++ b/js/web/lib/wasm/wasm-types.ts
@@ -192,15 +192,26 @@ export interface OrtTrainingAPIs {
   _OrtTrainingReleaseSession(trainingHandle: number): void;
 }
 
+export type FSNode = {
+  contents: Uint8Array;
+  usedBytes: number;
+}
+
 /**
  * The interface of the WebAssembly module for ONNX Runtime, compiled from C++ source code by Emscripten.
  */
 export interface OrtWasmModule extends EmscriptenModule, OrtInferenceAPIs, Partial<OrtTrainingAPIs>,
                                        Partial<JSEP.Module> {
+  HEAP64: BigInt64Array;
+  HEAPU64: BigUint64Array;
+  PTR_SIZE: number;
+  FS: {create(path: string): FSNode; chdir(path: string): void; unlink(path: string|FSNode): void};
   // #region emscripten functions
   stackSave(): number;
   stackRestore(stack: number): void;
   stackAlloc(size: number): number;
+  getValue(ptr: number, type: string): number;
+  setValue(ptr: number, value: number, type: string): void;
 
   UTF8ToString(offset: number, maxBytesToRead?: number): string;
   lengthBytesUTF8(str: string): number;
diff --git a/onnxruntime/core/providers/js/js_kernel.h b/onnxruntime/core/providers/js/js_kernel.h
index 7324b0d69474c..714cae3cadf1e 100644
--- a/onnxruntime/core/providers/js/js_kernel.h
+++ b/onnxruntime/core/providers/js/js_kernel.h
@@ -110,16 +110,16 @@ class JsKernel : public OpKernel {
         temp_data_size += sizeof(size_t) * 3;
       }
     }
-    uint32_t* p_serialized_kernel_context = reinterpret_cast<uint32_t*>(alloc->Alloc(temp_data_size));
+    uintptr_t* p_serialized_kernel_context = reinterpret_cast<uintptr_t*>(alloc->Alloc(temp_data_size));
     if (p_serialized_kernel_context == nullptr) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to allocate memory for serialized kernel context.");
     }
 
-    p_serialized_kernel_context[0] = reinterpret_cast<uint32_t>(context);
-    p_serialized_kernel_context[1] = static_cast<uint32_t>(context->InputCount());
-    p_serialized_kernel_context[2] = static_cast<uint32_t>(context->OutputCount());
-    p_serialized_kernel_context[3] = reinterpret_cast<uint32_t>(custom_data_ptr);
-    p_serialized_kernel_context[4] = static_cast<uint32_t>(custom_data_size);
+    p_serialized_kernel_context[0] = reinterpret_cast<uintptr_t>(context);
+    p_serialized_kernel_context[1] = static_cast<uintptr_t>(context->InputCount());
+    p_serialized_kernel_context[2] = static_cast<uintptr_t>(context->OutputCount());
+    p_serialized_kernel_context[3] = reinterpret_cast<uintptr_t>(custom_data_ptr);
+    p_serialized_kernel_context[4] = static_cast<uintptr_t>(custom_data_size);
     size_t index = 5;
     for (int i = 0; i < context->InputCount(); i++) {
       const auto* input_ptr = context->Input<Tensor>(i);
@@ -130,11 +130,11 @@ class JsKernel : public OpKernel {
         p_serialized_kernel_context[index++] = 0;
         continue;
       }
-      p_serialized_kernel_context[index++] = static_cast<uint32_t>(input_ptr->GetElementType());
-      p_serialized_kernel_context[index++] = reinterpret_cast<uint32_t>(input_ptr->DataRaw());
-      p_serialized_kernel_context[index++] = static_cast<uint32_t>(input_ptr->Shape().NumDimensions());
+      p_serialized_kernel_context[index++] = static_cast<uintptr_t>(input_ptr->GetElementType());
+      p_serialized_kernel_context[index++] = reinterpret_cast<uintptr_t>(input_ptr->DataRaw());
+      p_serialized_kernel_context[index++] = static_cast<uintptr_t>(input_ptr->Shape().NumDimensions());
       for (size_t d = 0; d < input_ptr->Shape().NumDimensions(); d++) {
-        p_serialized_kernel_context[index++] = static_cast<uint32_t>(input_ptr->Shape()[d]);
+        p_serialized_kernel_context[index++] = static_cast<uintptr_t>(input_ptr->Shape()[d]);
       }
     }
 
@@ -201,7 +201,7 @@ class JsKernel : public OpKernel {
 
     int status_code = EM_ASM_INT(
         { return Module.jsepRunKernel($0, $1, Module.jsepSessionState.sessionHandle, Module.jsepSessionState.errors); },
-        this, reinterpret_cast<uint32_t>(p_serialized_kernel_context));
+        this, reinterpret_cast<uintptr_t>(p_serialized_kernel_context));
 
     LOGS_DEFAULT(VERBOSE) << "outputs = " << context->OutputCount() << ". Y.data="
                           << (size_t)(context->Output<Tensor>(0)->DataRaw()) << ".";
diff --git a/onnxruntime/core/providers/js/operators/reduce.h b/onnxruntime/core/providers/js/operators/reduce.h
index 937f1f990dc67..a69bc3c78fac5 100644
--- a/onnxruntime/core/providers/js/operators/reduce.h
+++ b/onnxruntime/core/providers/js/operators/reduce.h
@@ -24,7 +24,7 @@ namespace js {
       JSEP_INIT_KERNEL_ATTRIBUTE(ReduceKernel, ({                                                       \
                                    "keepDims" : !!$1,                                                   \
                                    "noopWithEmptyAxes" : !!$2,                                          \
-                                   "axes" : $3 ? (Array.from(HEAP32.subarray($3, $4))) : [],            \
+                                   "axes" : $3 ? (Array.from(HEAP32.subarray($3, Number($4)))) : [],    \
                                  }),                                                                    \
                                  static_cast<int32_t>(keepdims_),                                       \
                                  static_cast<int32_t>(noop_with_empty_axes_),                           \
diff --git a/onnxruntime/core/providers/js/operators/resize.h b/onnxruntime/core/providers/js/operators/resize.h
index 134eb4bf5a7f4..d32134589002c 100644
--- a/onnxruntime/core/providers/js/operators/resize.h
+++ b/onnxruntime/core/providers/js/operators/resize.h
@@ -23,7 +23,7 @@ class Resize : public JsKernel, public UpsampleBase {
     std::transform(axes_.begin(), axes_.end(), std::back_inserter(axes), [](auto& axis) { return gsl::narrow_cast<int32_t>(axis); });
     JSEP_INIT_KERNEL_ATTRIBUTE(Resize, ({
                                  "antialias" : $1,
-                                 "axes" : $2 ? Array.from(HEAP32.subarray($2, $3)) : [],
+                                 "axes" : $2 ? Array.from(HEAP32.subarray($2, Number($3))) : [],
                                  "coordinateTransformMode" : UTF8ToString($4),
                                  "cubicCoeffA" : $5,
                                  "excludeOutside" : $6,
diff --git a/onnxruntime/core/providers/js/operators/slice.h b/onnxruntime/core/providers/js/operators/slice.h
index daeffaa664741..510f48a7384f6 100644
--- a/onnxruntime/core/providers/js/operators/slice.h
+++ b/onnxruntime/core/providers/js/operators/slice.h
@@ -20,9 +20,9 @@ class Slice : public JsKernel, public SliceBase {
     std::vector<int32_t> starts(attr_starts.begin(), attr_starts.end());
     std::vector<int32_t> ends(attr_ends.begin(), attr_ends.end());
 
-    JSEP_INIT_KERNEL_ATTRIBUTE(Slice, ({"starts" : $1 ? Array.from(HEAP32.subarray($1, $2)) : [],
-                                        "ends" : $3 ? Array.from(HEAP32.subarray($3, $4)) : [],
-                                        "axes" : $5 ? Array.from(HEAP32.subarray($5, $6)) : []}),
+    JSEP_INIT_KERNEL_ATTRIBUTE(Slice, ({"starts" : $1 ? Array.from(HEAP32.subarray($1, Number($2))) : [],
+                                        "ends" : $3 ? Array.from(HEAP32.subarray($3, Number($4))) : [],
+                                        "axes" : $5 ? Array.from(HEAP32.subarray($5, Number($6))) : []}),
                                JSEP_HEAP32_INDEX_START(starts),
                                JSEP_HEAP32_INDEX_END(starts),
                                JSEP_HEAP32_INDEX_START(ends),
diff --git a/onnxruntime/core/providers/js/operators/split.h b/onnxruntime/core/providers/js/operators/split.h
index 4fdbab00e739c..e9a0b99ee4ba8 100644
--- a/onnxruntime/core/providers/js/operators/split.h
+++ b/onnxruntime/core/providers/js/operators/split.h
@@ -49,7 +49,7 @@ class Split : public JsKernel, public SplitBase {
 
     JSEP_INIT_KERNEL_ATTRIBUTE(Split, ({"axis" : $1,
                                         "numOutputs" : $2,
-                                        "splitSizes" : $3 ? Array.from(HEAP32.subarray($3, $4)) : []}),
+                                        "splitSizes" : $3 ? Array.from(HEAP32.subarray($3, Number($4))) : []}),
                                static_cast<int32_t>(axis_),
                                static_cast<int32_t>(num_outputs_),
                                JSEP_HEAP32_INDEX_START(split_sizes),
diff --git a/onnxruntime/core/providers/js/operators/transpose.h b/onnxruntime/core/providers/js/operators/transpose.h
index f43dd814aa959..3128dcdd85c68 100644
--- a/onnxruntime/core/providers/js/operators/transpose.h
+++ b/onnxruntime/core/providers/js/operators/transpose.h
@@ -21,7 +21,7 @@ class Transpose final : public JsKernel, public TransposeBase {
       }
     }
     JSEP_INIT_KERNEL_ATTRIBUTE(Transpose, ({
-                                 "perm" : $1 ? Array.from(HEAP32.subarray($1, $2)) : []
+                                 "perm" : $1 ? Array.from(HEAP32.subarray($1, Number($2))) : []
                                }),
                                JSEP_HEAP32_INDEX_START(perm),
                                JSEP_HEAP32_INDEX_END(perm));
diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc
index 0e58bb4f93f7f..51f3eafa255bc 100644
--- a/onnxruntime/wasm/api.cc
+++ b/onnxruntime/wasm/api.cc
@@ -27,8 +27,7 @@ enum DataLocation {
 };
 
 static_assert(sizeof(const char*) == sizeof(size_t), "size of a pointer and a size_t value should be the same.");
-static_assert(sizeof(size_t) == 4, "size of size_t should be 4 in this build (wasm32).");
-
+//static_assert(sizeof(size_t) == 4, "size of size_t should be 4 in this build (wasm32).");
 OrtErrorCode CheckStatus(OrtStatusPtr status) {
   if (status) {
     std::string error_message = Ort::GetApi().GetErrorMessage(status);
diff --git a/onnxruntime/wasm/api.h b/onnxruntime/wasm/api.h
index 2cd1515d191c8..755303d5d4802 100644
--- a/onnxruntime/wasm/api.h
+++ b/onnxruntime/wasm/api.h
@@ -183,7 +183,7 @@ ort_tensor_handle_t EMSCRIPTEN_KEEPALIVE OrtCreateTensor(int data_type, void* da
  *           'dims' (for all types of tensor), 'data' (only for string tensor)
  * @returns ORT error code. If not zero, call OrtGetLastError() to get detailed error message.
  */
-int EMSCRIPTEN_KEEPALIVE OrtGetTensorData(ort_tensor_handle_t tensor, int* data_type, void** data, size_t** dims, size_t* dims_length);
+int EMSCRIPTEN_KEEPALIVE OrtGetTensorData(ort_tensor_handle_t tensor, size_t* data_type, void** data, size_t** dims, size_t* dims_length);
 
 /**
  * release the specified tensor.
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index b73a17db3ce13..293ce3fdc49d0 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -401,7 +401,7 @@ def convert_arg_line_to_args(self, arg_line):
         help="Build with a specific GDK edition. Defaults to the latest installed.",
     )
     parser.add_argument("--gdk_platform", default="Scarlett", help="Sets the GDK target platform.")
-
+    parser.add_argument("--enable_wasm_memory64", action="store_true", help="Enable WebAssembly 64bit support")
     platform_group = parser.add_mutually_exclusive_group()
     platform_group.add_argument("--ios", action="store_true", help="build for ios")
     platform_group.add_argument("--visionos", action="store_true", help="build for visionOS")
@@ -1090,6 +1090,7 @@ def generate_build_tree(
         + ("ON" if args.enable_wasm_exception_throwing_override else "OFF"),
         "-Donnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER=" + ("ON" if args.wasm_run_tests_in_browser else "OFF"),
         "-Donnxruntime_ENABLE_WEBASSEMBLY_THREADS=" + ("ON" if args.enable_wasm_threads else "OFF"),
+        "-Donnxruntime_ENABLE_WEBASSEMBLY_MEMORY64=" + ("ON" if args.enable_wasm_memory64 else "OFF"),
         "-Donnxruntime_ENABLE_WEBASSEMBLY_DEBUG_INFO=" + ("ON" if args.enable_wasm_debug_info else "OFF"),
         "-Donnxruntime_ENABLE_WEBASSEMBLY_PROFILING=" + ("ON" if args.enable_wasm_profiling else "OFF"),
         "-Donnxruntime_ENABLE_LAZY_TENSOR=" + ("ON" if args.enable_lazy_tensor else "OFF"),