k2-fsa · danpovey · Dec 18, 2020
diff --git a/k2/csrc/intersect_pruned.cu b/k2/csrc/intersect_pruned.cu
@@ -846,7 +846,7 @@ class MultiGraphDenseIntersectPruned {
             // Set the forward log-like of the dest state to the largest of any
             // of those of the incoming arcs.  Note: we initialized this in
             // lambda_init_loglike above.
-            AtomicMax(&(kept_states_data[state_idx01].forward_loglike),
+            atomicMax(&(kept_states_data[state_idx01].forward_loglike),
                       end_loglike_int);
           });
     }

diff --git a/k2/csrc/utils.h b/k2/csrc/utils.h
@@ -601,14 +601,10 @@ __host__ __device__ __forceinline__ float OrderedIntToFloat(int32_t i) {
   host version of Cuda's atomicMax function, marked __host__ (the default) for
   clarity.  So we can use this in lambdas that run on both host and device.
  */
-__host__ __device__ __forceinline__ int32_t AtomicMax(int32_t *address, int32_t val) {
-#if defined(__CUDA_ARCH__)
-  return atomicMax(address, val);
-#else
+__host__ __forceinline__ int32_t atomicMax(int32_t *address, int32_t val) {
   int32_t old = *address;
   if (old < val) *address = val;
   return old;
-#endif
 }
 
 // have to figure out if there's a better place to put this