storage/allocator/ssd/ssd_slab_allocator.h
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cstring> | ||
| 5 | #include <memory> | ||
| 6 | #include <mutex> | ||
| 7 | #include <stdexcept> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "storage/allocator/ssd/ssd_block_allocator.h" | ||
| 11 | #include "storage/io_backend/io_backend.h" | ||
| 12 | |||
| 13 | class SsdSlabAllocator : public SsdBlockAllocator { | ||
| 14 | public: | ||
| 15 | 16 | SsdSlabAllocator(IOBackend* backend, | |
| 16 | const std::vector<int>& size_classes, | ||
| 17 | uint64_t total_capacity_bytes, | ||
| 18 | uint64_t base_byte_offset) | ||
| 19 | 16 | : backend_(backend) { | |
| 20 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (backend_ == nullptr) { |
| 21 | ✗ | throw std::invalid_argument("SsdSlabAllocator backend is null"); | |
| 22 | } | ||
| 23 | size_classes_ = | ||
| 24 |
3/10✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 8 taken 16 times.
✗ Branch 9 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 16 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
|
32 | size_classes.empty() ? std::vector<int>{128, 256, 512, 1024, 4096} |
| 25 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | : size_classes; |
| 26 |
1/2✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
|
16 | std::sort(size_classes_.begin(), size_classes_.end()); |
| 27 | |||
| 28 | 16 | const uint64_t total_blocks = total_capacity_bytes / kBlockSize; | |
| 29 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | global_free_blocks_.reserve(total_blocks); |
| 30 |
2/2✓ Branch 0 taken 82 times.
✓ Branch 1 taken 16 times.
|
98 | for (uint64_t i = 0; i < total_blocks; ++i) { |
| 31 |
1/2✓ Branch 1 taken 82 times.
✗ Branch 2 not taken.
|
82 | global_free_blocks_.push_back(base_byte_offset + i * kBlockSize); |
| 32 | } | ||
| 33 | |||
| 34 |
2/2✓ Branch 5 taken 28 times.
✓ Branch 6 taken 16 times.
|
44 | for (int cls : size_classes_) { |
| 35 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | auto pool = std::make_unique<SlabPool>(); |
| 36 | 28 | pool->slot_size = static_cast<uint64_t>(cls) + kHeaderSize; | |
| 37 |
3/6✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 28 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 28 times.
|
28 | if (pool->slot_size == 0 || kBlockSize < pool->slot_size) { |
| 38 | ✗ | throw std::invalid_argument( | |
| 39 | ✗ | "SsdSlabAllocator invalid slot size vs block"); | |
| 40 | } | ||
| 41 | 28 | pool->slots_per_block = kBlockSize / pool->slot_size; | |
| 42 |
1/2✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
|
28 | slabs_.push_back(std::move(pool)); |
| 43 | 28 | } | |
| 44 | 16 | } | |
| 45 | |||
| 46 | 1211254 | uint64_t Alloc(size_t data_size) override { | |
| 47 | 1211254 | const uint16_t slab_idx = SelectSlab(data_size); | |
| 48 | 1211254 | auto& slab = *slabs_.at(slab_idx); | |
| 49 | |||
| 50 | for (;;) { | ||
| 51 |
1/2✓ Branch 1 taken 1211254 times.
✗ Branch 2 not taken.
|
1211254 | std::unique_lock<std::mutex> slab_lock(slab.mu); |
| 52 |
2/2✓ Branch 1 taken 1211162 times.
✓ Branch 2 taken 92 times.
|
1211254 | while (!slab.partial_blocks.empty()) { |
| 53 | 1211162 | const uint64_t block_idx = slab.partial_blocks.back(); | |
| 54 |
1/2✓ Branch 1 taken 1211162 times.
✗ Branch 2 not taken.
|
1211162 | Block& b = slab.blocks.at(block_idx); |
| 55 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1211162 times.
|
1211162 | if (b.tombstone) { |
| 56 | ✗ | RemoveFromPartial(slab, block_idx); | |
| 57 | ✗ | continue; | |
| 58 | } | ||
| 59 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1211162 times.
|
1211162 | if (!HasSpace(b, slab)) { |
| 60 | ✗ | RemoveFromPartial(slab, block_idx); | |
| 61 | ✗ | continue; | |
| 62 | } | ||
| 63 | 1211162 | uint64_t slot_in_block = 0; | |
| 64 |
2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 1211160 times.
|
1211162 | if (!b.free_in_block.empty()) { |
| 65 | 2 | slot_in_block = b.free_in_block.back(); | |
| 66 | 2 | b.free_in_block.pop_back(); | |
| 67 | } else { | ||
| 68 | 1211160 | slot_in_block = b.cursor; | |
| 69 | 1211160 | b.cursor++; | |
| 70 | } | ||
| 71 | 1211162 | b.used_count++; | |
| 72 |
1/2✓ Branch 1 taken 1211162 times.
✗ Branch 2 not taken.
|
1211162 | UpdatePartialAfterAlloc(slab, block_idx, b); |
| 73 | 1211162 | const uint64_t local_id = | |
| 74 | 1211162 | block_idx * slab.slots_per_block + slot_in_block; | |
| 75 | 1211162 | return Encode(slab_idx, local_id); | |
| 76 | } | ||
| 77 | |||
| 78 |
1/2✓ Branch 1 taken 92 times.
✗ Branch 2 not taken.
|
92 | slab_lock.unlock(); |
| 79 | |||
| 80 |
1/2✓ Branch 1 taken 92 times.
✗ Branch 2 not taken.
|
92 | std::unique_lock<std::mutex> g_lock(global_mu_); |
| 81 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 86 times.
|
92 | if (global_free_blocks_.empty()) { |
| 82 | 6 | return kInvalidHandle; | |
| 83 | } | ||
| 84 | 86 | const uint64_t byte_off = global_free_blocks_.back(); | |
| 85 | 86 | global_free_blocks_.pop_back(); | |
| 86 |
1/2✓ Branch 1 taken 86 times.
✗ Branch 2 not taken.
|
86 | g_lock.unlock(); |
| 87 | |||
| 88 |
1/2✓ Branch 1 taken 86 times.
✗ Branch 2 not taken.
|
86 | slab_lock.lock(); |
| 89 | 86 | uint64_t block_idx = 0; | |
| 90 | 86 | Block nb; | |
| 91 | 86 | nb.byte_offset = byte_off; | |
| 92 | 86 | nb.cursor = 0; | |
| 93 | 86 | nb.used_count = 0; | |
| 94 | 86 | nb.tombstone = false; | |
| 95 | |||
| 96 |
2/2✓ Branch 1 taken 4 times.
✓ Branch 2 taken 82 times.
|
86 | if (!slab.tombstone_indices.empty()) { |
| 97 | 4 | block_idx = slab.tombstone_indices.back(); | |
| 98 | 4 | slab.tombstone_indices.pop_back(); | |
| 99 | 4 | slab.blocks[block_idx] = std::move(nb); | |
| 100 | } else { | ||
| 101 | 82 | block_idx = slab.blocks.size(); | |
| 102 |
1/2✓ Branch 2 taken 82 times.
✗ Branch 3 not taken.
|
82 | slab.blocks.push_back(std::move(nb)); |
| 103 | } | ||
| 104 | |||
| 105 | 86 | Block& b = slab.blocks[block_idx]; | |
| 106 | 86 | const uint64_t slot_in_block = 0; | |
| 107 | 86 | b.cursor = 1; | |
| 108 | 86 | b.used_count = 1; | |
| 109 |
1/2✓ Branch 1 taken 86 times.
✗ Branch 2 not taken.
|
86 | UpdatePartialAfterAlloc(slab, block_idx, b); |
| 110 | 86 | const uint64_t local_id = | |
| 111 | 86 | block_idx * slab.slots_per_block + slot_in_block; | |
| 112 | 86 | return Encode(slab_idx, local_id); | |
| 113 | 1211254 | } | |
| 114 | } | ||
| 115 | |||
| 116 | 65476 | void Free(uint64_t handle) override { | |
| 117 | uint16_t slab_idx; | ||
| 118 | uint64_t local_slot_id; | ||
| 119 | 65476 | Decode(handle, slab_idx, local_slot_id); | |
| 120 |
1/2✓ Branch 1 taken 65476 times.
✗ Branch 2 not taken.
|
65476 | auto& slab = *slabs_.at(slab_idx); |
| 121 | 65476 | const uint64_t block_idx = local_slot_id / slab.slots_per_block; | |
| 122 | 65476 | const uint64_t slot_in_block = local_slot_id % slab.slots_per_block; | |
| 123 | |||
| 124 |
1/2✓ Branch 1 taken 65476 times.
✗ Branch 2 not taken.
|
65476 | std::unique_lock<std::mutex> slab_lock(slab.mu); |
| 125 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 65476 times.
|
65476 | if (block_idx >= slab.blocks.size()) { |
| 126 | ✗ | return; | |
| 127 | } | ||
| 128 | 65476 | Block& b = slab.blocks[block_idx]; | |
| 129 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 65476 times.
|
65476 | if (b.tombstone) { |
| 130 | ✗ | return; | |
| 131 | } | ||
| 132 | 65476 | b.used_count--; | |
| 133 |
1/2✓ Branch 1 taken 65476 times.
✗ Branch 2 not taken.
|
65476 | b.free_in_block.push_back(slot_in_block); |
| 134 |
1/2✓ Branch 1 taken 65476 times.
✗ Branch 2 not taken.
|
65476 | UpdatePartialAfterFree(slab, block_idx, b); |
| 135 | |||
| 136 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 65470 times.
|
65476 | if (b.used_count == 0) { |
| 137 | 6 | const uint64_t off = b.byte_offset; | |
| 138 | 6 | slab.blocks[block_idx] = Block{}; | |
| 139 | 6 | slab.blocks[block_idx].tombstone = true; | |
| 140 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | RemoveFromPartial(slab, block_idx); |
| 141 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | slab.tombstone_indices.push_back(block_idx); |
| 142 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | slab_lock.unlock(); |
| 143 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | std::lock_guard<std::mutex> g_lock(global_mu_); |
| 144 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | global_free_blocks_.push_back(off); |
| 145 | 6 | } | |
| 146 |
1/2✓ Branch 1 taken 65476 times.
✗ Branch 2 not taken.
|
65476 | } |
| 147 | |||
| 148 | 10 | void Write(uint64_t handle, const void* data, size_t data_size) override { | |
| 149 | uint16_t slab_idx; | ||
| 150 | uint64_t local_slot_id; | ||
| 151 | 10 | Decode(handle, slab_idx, local_slot_id); | |
| 152 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | const auto& slab = *slabs_.at(slab_idx); |
| 153 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
|
10 | if (data_size + kHeaderSize > slab.slot_size) { |
| 154 |
1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
|
2 | throw std::invalid_argument("SsdSlabAllocator write exceeds slot size"); |
| 155 | } | ||
| 156 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const uint64_t byte_off = ResolveSlotByteOffset(slab_idx, local_slot_id); |
| 157 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | WriteBytes(byte_off, slab.slot_size, data, data_size); |
| 158 | 8 | } | |
| 159 | |||
| 160 | 8 | size_t Read(uint64_t handle, void* out_buf, size_t buf_size) override { | |
| 161 | uint16_t slab_idx; | ||
| 162 | uint64_t local_slot_id; | ||
| 163 | 8 | Decode(handle, slab_idx, local_slot_id); | |
| 164 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const auto& slab = *slabs_.at(slab_idx); |
| 165 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const uint64_t byte_off = ResolveSlotByteOffset(slab_idx, local_slot_id); |
| 166 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
16 | return ReadBytes(byte_off, slab.slot_size, out_buf, buf_size); |
| 167 | } | ||
| 168 | |||
| 169 | 2 | uint64_t AllocAndWrite(const void* data, size_t data_size) override { | |
| 170 | 2 | const uint64_t handle = Alloc(data_size); | |
| 171 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (handle != kInvalidHandle) { |
| 172 | 2 | Write(handle, data, data_size); | |
| 173 | } | ||
| 174 | 2 | return handle; | |
| 175 | } | ||
| 176 | |||
| 177 | ✗ | void BatchRead(const std::vector<ReadEntry>& entries, | |
| 178 | std::vector<size_t>& out_sizes) override { | ||
| 179 | ✗ | out_sizes.assign(entries.size(), 0); | |
| 180 | struct PendingRead { | ||
| 181 | size_t index = 0; | ||
| 182 | uint64_t page_off = 0; | ||
| 183 | size_t buf_size = 0; | ||
| 184 | }; | ||
| 185 | ✗ | std::vector<IOBackend::IOEntry> io_entries; | |
| 186 | ✗ | std::vector<PendingRead> pending; | |
| 187 | ✗ | io_entries.reserve(entries.size()); | |
| 188 | ✗ | pending.reserve(entries.size()); | |
| 189 | ✗ | for (size_t i = 0; i < entries.size(); ++i) { | |
| 190 | uint16_t slab_idx; | ||
| 191 | uint64_t local_slot_id; | ||
| 192 | ✗ | Decode(entries[i].handle, slab_idx, local_slot_id); | |
| 193 | ✗ | const auto& slab = *slabs_.at(slab_idx); | |
| 194 | ✗ | const uint64_t byte_off = ResolveSlotByteOffset(slab_idx, local_slot_id); | |
| 195 | ✗ | const PageID_t start = byte_off / PAGE_SIZE; | |
| 196 | ✗ | const uint64_t page_off = byte_off % PAGE_SIZE; | |
| 197 | ✗ | const uint64_t total = page_off + slab.slot_size; | |
| 198 | ✗ | const uint64_t pages = (total + PAGE_SIZE - 1) / PAGE_SIZE; | |
| 199 | ✗ | char* buf = backend_->AllocateBuffer(pages); | |
| 200 | ✗ | io_entries.push_back({start, buf, pages}); | |
| 201 | ✗ | pending.push_back(PendingRead{ | |
| 202 | i, | ||
| 203 | page_off, | ||
| 204 | ✗ | entries[i].out_buf == nullptr ? 0 : slab.slot_size - kHeaderSize}); | |
| 205 | } | ||
| 206 | ✗ | backend_->BatchReadPages(io_entries); | |
| 207 | ✗ | for (size_t i = 0; i < pending.size(); ++i) { | |
| 208 | ✗ | const auto& item = pending[i]; | |
| 209 | ✗ | const auto& io = io_entries[i]; | |
| 210 | ✗ | uint32_t n = 0; | |
| 211 | ✗ | std::memcpy(&n, io.buffer + item.page_off, sizeof(n)); | |
| 212 | ✗ | const size_t actual = std::min(item.buf_size, static_cast<size_t>(n)); | |
| 213 | ✗ | if (actual > 0) { | |
| 214 | ✗ | std::memcpy(entries[item.index].out_buf, | |
| 215 | ✗ | io.buffer + item.page_off + kHeaderSize, | |
| 216 | actual); | ||
| 217 | } | ||
| 218 | ✗ | out_sizes[item.index] = actual; | |
| 219 | ✗ | backend_->FreeBuffer(io.buffer); | |
| 220 | } | ||
| 221 | ✗ | } | |
| 222 | |||
| 223 | std::vector<uint64_t> | ||
| 224 | ✗ | BatchAllocAndWrite(const std::vector<WriteEntry>& entries) override { | |
| 225 | ✗ | std::vector<uint64_t> handles; | |
| 226 | ✗ | handles.reserve(entries.size()); | |
| 227 | struct PendingWrite { | ||
| 228 | size_t index = 0; | ||
| 229 | uint64_t page_off = 0; | ||
| 230 | }; | ||
| 231 | struct PageGroup { | ||
| 232 | PageID_t start = 0; | ||
| 233 | uint64_t pages = 0; | ||
| 234 | char* buf = nullptr; | ||
| 235 | std::vector<PendingWrite> pending; | ||
| 236 | }; | ||
| 237 | ✗ | std::vector<PageGroup> groups; | |
| 238 | ✗ | groups.reserve(entries.size()); | |
| 239 | ✗ | for (size_t i = 0; i < entries.size(); ++i) { | |
| 240 | ✗ | const uint64_t handle = Alloc(entries[i].data_size); | |
| 241 | ✗ | handles.push_back(handle); | |
| 242 | ✗ | if (handle == kInvalidHandle) { | |
| 243 | ✗ | continue; | |
| 244 | } | ||
| 245 | uint16_t slab_idx; | ||
| 246 | uint64_t local_slot_id; | ||
| 247 | ✗ | Decode(handle, slab_idx, local_slot_id); | |
| 248 | ✗ | const auto& slab = *slabs_.at(slab_idx); | |
| 249 | ✗ | const uint64_t byte_off = ResolveSlotByteOffset(slab_idx, local_slot_id); | |
| 250 | ✗ | const PageID_t start = byte_off / PAGE_SIZE; | |
| 251 | ✗ | const uint64_t page_off = byte_off % PAGE_SIZE; | |
| 252 | ✗ | const uint64_t total = page_off + slab.slot_size; | |
| 253 | ✗ | const uint64_t pages = (total + PAGE_SIZE - 1) / PAGE_SIZE; | |
| 254 | ✗ | auto it = std::find_if( | |
| 255 | ✗ | groups.begin(), groups.end(), [&](const PageGroup& group) { | |
| 256 | ✗ | return group.start == start && group.pages == pages; | |
| 257 | }); | ||
| 258 | ✗ | if (it == groups.end()) { | |
| 259 | ✗ | PageGroup group; | |
| 260 | ✗ | group.start = start; | |
| 261 | ✗ | group.pages = pages; | |
| 262 | ✗ | group.buf = backend_->AllocateBuffer(pages); | |
| 263 | ✗ | groups.push_back(std::move(group)); | |
| 264 | ✗ | it = std::prev(groups.end()); | |
| 265 | ✗ | } | |
| 266 | ✗ | it->pending.push_back(PendingWrite{i, page_off}); | |
| 267 | } | ||
| 268 | ✗ | if (!groups.empty()) { | |
| 269 | ✗ | std::vector<IOBackend::IOEntry> io_entries; | |
| 270 | ✗ | io_entries.reserve(groups.size()); | |
| 271 | ✗ | for (auto& group : groups) { | |
| 272 | ✗ | io_entries.push_back({group.start, group.buf, group.pages}); | |
| 273 | } | ||
| 274 | ✗ | backend_->BatchReadPages(io_entries); | |
| 275 | ✗ | for (auto& group : groups) { | |
| 276 | ✗ | for (const auto& item : group.pending) { | |
| 277 | ✗ | const auto& spec = entries[item.index]; | |
| 278 | ✗ | const uint32_t n = static_cast<uint32_t>(spec.data_size); | |
| 279 | ✗ | std::memcpy(group.buf + item.page_off, &n, sizeof(n)); | |
| 280 | ✗ | std::memcpy(group.buf + item.page_off + kHeaderSize, | |
| 281 | ✗ | spec.data, | |
| 282 | ✗ | spec.data_size); | |
| 283 | } | ||
| 284 | } | ||
| 285 | ✗ | backend_->BatchWritePages(io_entries); | |
| 286 | ✗ | for (auto& group : groups) { | |
| 287 | ✗ | backend_->FreeBuffer(group.buf); | |
| 288 | } | ||
| 289 | ✗ | } | |
| 290 | ✗ | return handles; | |
| 291 | ✗ | } | |
| 292 | |||
| 293 | 2 | size_t SlotCapacity(uint64_t handle) const override { | |
| 294 | uint16_t slab_idx; | ||
| 295 | uint64_t local_slot_id; | ||
| 296 | 2 | Decode(handle, slab_idx, local_slot_id); | |
| 297 | (void)local_slot_id; | ||
| 298 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | return slabs_.at(slab_idx)->slot_size - kHeaderSize; |
| 299 | } | ||
| 300 | |||
| 301 | /// For unit tests: physical chunk size when carving the backing store. | ||
| 302 | static constexpr uint64_t kBlockSize = 1ULL << 26; | ||
| 303 | |||
| 304 | private: | ||
| 305 | struct ThreadLocalBuffer { | ||
| 306 | char* ptr = nullptr; | ||
| 307 | uint64_t pages = 0; | ||
| 308 | }; | ||
| 309 | |||
| 310 | 16 | char* AcquireThreadBuffer(uint64_t pages) { | |
| 311 | static thread_local ThreadLocalBuffer tls; | ||
| 312 |
4/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 12 times.
|
16 | if (tls.ptr == nullptr || tls.pages < pages) { |
| 313 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (tls.ptr != nullptr) { |
| 314 | 2 | backend_->FreeBuffer(tls.ptr); | |
| 315 | } | ||
| 316 | 4 | tls.ptr = backend_->AllocateBuffer(pages); | |
| 317 | 4 | tls.pages = pages; | |
| 318 | } | ||
| 319 | 16 | return tls.ptr; | |
| 320 | } | ||
| 321 | |||
| 322 | struct Block { | ||
| 323 | uint64_t byte_offset = 0; | ||
| 324 | uint64_t cursor = 0; | ||
| 325 | uint64_t used_count = 0; | ||
| 326 | std::vector<uint64_t> free_in_block; | ||
| 327 | bool tombstone = false; | ||
| 328 | }; | ||
| 329 | |||
| 330 | struct SlabPool { | ||
| 331 | uint64_t slot_size = 0; | ||
| 332 | uint64_t slots_per_block = 0; | ||
| 333 | std::vector<Block> blocks; | ||
| 334 | std::vector<uint64_t> tombstone_indices; | ||
| 335 | std::vector<uint64_t> partial_blocks; | ||
| 336 | mutable std::mutex mu; | ||
| 337 | }; | ||
| 338 | |||
| 339 | 3699058 | static bool HasSpace(const Block& b, const SlabPool& slab) { | |
| 340 |
4/4✓ Branch 1 taken 3633580 times.
✓ Branch 2 taken 65478 times.
✓ Branch 3 taken 3633504 times.
✓ Branch 4 taken 76 times.
|
3699058 | return !b.free_in_block.empty() || b.cursor < slab.slots_per_block; |
| 341 | } | ||
| 342 | |||
| 343 | 82 | static void RemoveFromPartial(SlabPool& slab, uint64_t block_idx) { | |
| 344 | 82 | auto& v = slab.partial_blocks; | |
| 345 |
2/4✓ Branch 5 taken 82 times.
✗ Branch 6 not taken.
✓ Branch 9 taken 82 times.
✗ Branch 10 not taken.
|
82 | v.erase(std::remove(v.begin(), v.end(), block_idx), v.end()); |
| 346 | 82 | } | |
| 347 | |||
| 348 | static void | ||
| 349 | 1276648 | AddToPartialIfSpace(SlabPool& slab, uint64_t block_idx, const Block& b) { | |
| 350 |
1/2✓ Branch 1 taken 1276648 times.
✗ Branch 2 not taken.
|
1276648 | if (HasSpace(b, slab)) { |
| 351 |
1/2✓ Branch 3 taken 1276648 times.
✗ Branch 4 not taken.
|
1276648 | if (std::find(slab.partial_blocks.begin(), |
| 352 | slab.partial_blocks.end(), | ||
| 353 |
2/2✓ Branch 2 taken 92 times.
✓ Branch 3 taken 1276556 times.
|
2553296 | block_idx) == slab.partial_blocks.end()) { |
| 354 | 92 | slab.partial_blocks.push_back(block_idx); | |
| 355 | } | ||
| 356 | } | ||
| 357 | 1276648 | } | |
| 358 | |||
| 359 | void | ||
| 360 | 1211248 | UpdatePartialAfterAlloc(SlabPool& slab, uint64_t block_idx, const Block& b) { | |
| 361 |
2/2✓ Branch 1 taken 1211172 times.
✓ Branch 2 taken 76 times.
|
1211248 | if (HasSpace(b, slab)) { |
| 362 | 1211172 | AddToPartialIfSpace(slab, block_idx, b); | |
| 363 | } else { | ||
| 364 | 76 | RemoveFromPartial(slab, block_idx); | |
| 365 | } | ||
| 366 | 1211248 | } | |
| 367 | |||
| 368 | void | ||
| 369 | 65476 | UpdatePartialAfterFree(SlabPool& slab, uint64_t block_idx, const Block& b) { | |
| 370 | 65476 | AddToPartialIfSpace(slab, block_idx, b); | |
| 371 | 65476 | } | |
| 372 | |||
| 373 | uint64_t | ||
| 374 | 16 | ResolveSlotByteOffset(uint16_t slab_idx, uint64_t local_slot_id) const { | |
| 375 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | auto& slab = *slabs_.at(slab_idx); |
| 376 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | std::lock_guard<std::mutex> lock(slab.mu); |
| 377 | 16 | const uint64_t block_idx = local_slot_id / slab.slots_per_block; | |
| 378 | 16 | const uint64_t slot_in_block = local_slot_id % slab.slots_per_block; | |
| 379 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
|
16 | if (block_idx >= slab.blocks.size()) { |
| 380 | ✗ | return 0; | |
| 381 | } | ||
| 382 | 16 | const Block& b = slab.blocks[block_idx]; | |
| 383 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (b.tombstone) { |
| 384 | ✗ | return 0; | |
| 385 | } | ||
| 386 | 16 | return b.byte_offset + slot_in_block * slab.slot_size; | |
| 387 | 16 | } | |
| 388 | |||
| 389 | 1211248 | static uint64_t Encode(uint16_t slab_idx, uint64_t local_slot_id) { | |
| 390 | 1211248 | return (static_cast<uint64_t>(slab_idx) << 48) | | |
| 391 | 1211248 | ((local_slot_id + 1) & 0x0000FFFFFFFFFFFFULL); | |
| 392 | } | ||
| 393 | |||
| 394 | static void | ||
| 395 | 65496 | Decode(uint64_t handle, uint16_t& slab_idx, uint64_t& local_slot_id) { | |
| 396 | 65496 | slab_idx = static_cast<uint16_t>(handle >> 48); | |
| 397 | 65496 | local_slot_id = (handle & 0x0000FFFFFFFFFFFFULL) - 1; | |
| 398 | 65496 | } | |
| 399 | |||
| 400 | 1211254 | uint16_t SelectSlab(size_t data_size) const { | |
| 401 |
1/2✓ Branch 1 taken 3306370 times.
✗ Branch 2 not taken.
|
3306370 | for (uint16_t i = 0; i < size_classes_.size(); ++i) { |
| 402 |
2/2✓ Branch 1 taken 1211254 times.
✓ Branch 2 taken 2095116 times.
|
3306370 | if (data_size <= static_cast<size_t>(size_classes_[i])) { |
| 403 | 1211254 | return i; | |
| 404 | } | ||
| 405 | } | ||
| 406 | ✗ | throw std::invalid_argument("SsdSlabAllocator value too large"); | |
| 407 | } | ||
| 408 | |||
| 409 | 8 | void WriteBytes(uint64_t byte_offset, | |
| 410 | uint64_t slot_size, | ||
| 411 | const void* data, | ||
| 412 | size_t data_size) { | ||
| 413 | 8 | const PageID_t start = byte_offset / PAGE_SIZE; | |
| 414 | 8 | const uint64_t page_off = byte_offset % PAGE_SIZE; | |
| 415 | 8 | const uint64_t total = page_off + slot_size; | |
| 416 | 8 | const uint64_t pages = (total + PAGE_SIZE - 1) / PAGE_SIZE; | |
| 417 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | char* buf = AcquireThreadBuffer(pages); |
| 418 |
2/4✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 8 times.
✗ Branch 6 not taken.
|
8 | backend_->BatchReadPages({{start, buf, pages}}); |
| 419 | 8 | const uint32_t n = static_cast<uint32_t>(data_size); | |
| 420 | 8 | std::memcpy(buf + page_off, &n, sizeof(n)); | |
| 421 | 8 | std::memcpy(buf + page_off + kHeaderSize, data, data_size); | |
| 422 |
2/4✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 8 times.
✗ Branch 6 not taken.
|
8 | backend_->BatchWritePages({{start, buf, pages}}); |
| 423 | 8 | } | |
| 424 | |||
| 425 | 8 | size_t ReadBytes(uint64_t byte_offset, | |
| 426 | uint64_t slot_size, | ||
| 427 | void* out_buf, | ||
| 428 | size_t buf_size) { | ||
| 429 | 8 | const PageID_t start = byte_offset / PAGE_SIZE; | |
| 430 | 8 | const uint64_t page_off = byte_offset % PAGE_SIZE; | |
| 431 | 8 | const uint64_t total = page_off + slot_size; | |
| 432 | 8 | const uint64_t pages = (total + PAGE_SIZE - 1) / PAGE_SIZE; | |
| 433 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | char* buf = AcquireThreadBuffer(pages); |
| 434 |
2/4✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 8 times.
✗ Branch 6 not taken.
|
8 | backend_->BatchReadPages({{start, buf, pages}}); |
| 435 | 8 | uint32_t n = 0; | |
| 436 | 8 | std::memcpy(&n, buf + page_off, sizeof(n)); | |
| 437 | 8 | const size_t actual = std::min(buf_size, static_cast<size_t>(n)); | |
| 438 | 8 | std::memcpy(out_buf, buf + page_off + kHeaderSize, actual); | |
| 439 | 8 | return actual; | |
| 440 | } | ||
| 441 | |||
| 442 | static constexpr uint64_t kHeaderSize = 4; | ||
| 443 | IOBackend* backend_; | ||
| 444 | std::vector<int> size_classes_; | ||
| 445 | std::vector<std::unique_ptr<SlabPool>> slabs_; | ||
| 446 | |||
| 447 | std::vector<uint64_t> global_free_blocks_; | ||
| 448 | std::mutex global_mu_; | ||
| 449 | }; | ||
| 450 | |||
| 451 | FACTORY_REGISTER(SsdBlockAllocator, | ||
| 452 | SSD_SLAB, | ||
| 453 | SsdSlabAllocator, | ||
| 454 | IOBackend*, | ||
| 455 | const std::vector<int>&, | ||
| 456 | uint64_t, | ||
| 457 | uint64_t); | ||
| 458 |