Skip to content

Commit 78c460b

Browse files
committed
[libc] Fix incorrect count when initializing slab
Summary: The initialization code should share the result with all of its neighbors. Right now it sets them to the sentinel value and doesn't shuffle them correctly. Shuffle them after initialization so we correctly report that we succeeded in the allocation.
1 parent 9b4a44d commit 78c460b

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,13 @@ struct GuardPtr {
460460
result->initialize(uniform);
461461
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
462462
finalize(result, cpp::popcount(uniform), count);
463+
count =
464+
gpu::shuffle(gpu::get_lane_mask(), cpp::countr_zero(uniform), count);
463465
}
464466

465467
if (!impl::is_sentinel(count))
466468
count = count - cpp::popcount(uniform) +
467-
impl::lane_count(uniform, gpu::get_lane_id()) + 1;
469+
impl::lane_count(uniform, gpu::get_lane_id());
468470

469471
return result;
470472
}
@@ -536,13 +538,13 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
536538
// If we find a slab with a matching chunk size then we store the result.
537539
// Otherwise, we need to free the claimed lock and continue. In the case
538540
// of out-of-memory we receive a sentinel value and return a failure.
539-
if (slab && reserved <= Slab::available_chunks(chunk_size) &&
541+
if (slab && reserved < Slab::available_chunks(chunk_size) &&
540542
slab->get_chunk_size() == chunk_size) {
541543
if (index != start)
542544
indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED);
543545
uniform = uniform & gpu::get_lane_mask();
544546
return slab;
545-
} else if (slab && (reserved > Slab::available_chunks(chunk_size) ||
547+
} else if (slab && (reserved >= Slab::available_chunks(chunk_size) ||
546548
slab->get_chunk_size() != chunk_size)) {
547549
slots[index].unlock(gpu::get_lane_mask(),
548550
gpu::get_lane_mask() & uniform);

0 commit comments

Comments
 (0)