memory/allocators/benchmark_malloc.cc
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include <unistd.h> | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <atomic> | ||
| 5 | #include <chrono> | ||
| 6 | #include <cstdio> | ||
| 7 | #include <memory> | ||
| 8 | #include <string> | ||
| 9 | #include <thread> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "base/base.h" | ||
| 13 | #include "base/bind_core.h" | ||
| 14 | #include "base/factory.h" | ||
| 15 | #include "base/init.h" | ||
| 16 | #include "memory/allocators/concurrent_slab_memory_pool.h" | ||
| 17 | #include "memory/allocators/persist_loop_slab_allocator.h" | ||
| 18 | #include "memory/allocators/r2_slab_allocator.h" | ||
| 19 | #include "memory/malloc.h" | ||
| 20 | |||
| 21 | DEFINE_string( | ||
| 22 | allocators, | ||
| 23 | "CONCURRENT_SLAB_MEMORY_POOL,R2_SLAB,PERSIST_LOOP_SLAB,PERSIST_MEMORY_POOL", | ||
| 24 | "Comma-separated MallocApi factory names to benchmark"); | ||
| 25 | DEFINE_int32(thread_num, 16, "Number of concurrent worker threads"); | ||
| 26 | DEFINE_int32(alloc_size, 128, "Allocation size in bytes for each New/Free pair"); | ||
| 27 | DEFINE_int32(warmup_seconds, 1, "Warmup duration in seconds"); | ||
| 28 | DEFINE_int32(running_seconds, 5, "Measured benchmark duration in seconds"); | ||
| 29 | DEFINE_int64(pool_bytes, | ||
| 30 | 256LL * 1024 * 1024, | ||
| 31 | "Backing memory pool size in bytes"); | ||
| 32 | DEFINE_string(medium, "DRAM", "Shm medium passed to MallocApi factory"); | ||
| 33 | |||
| 34 | namespace { | ||
| 35 | |||
| 36 | using MallocFactory = | ||
| 37 | base::Factory<base::MallocApi, const std::string&, int64, const std::string&>; | ||
| 38 | |||
| 39 | struct BenchResult { | ||
| 40 | std::string allocator; | ||
| 41 | int threads = 0; | ||
| 42 | int alloc_size = 0; | ||
| 43 | double elapsed_sec = 0.0; | ||
| 44 | uint64_t total_ops = 0; | ||
| 45 | uint64_t failures = 0; | ||
| 46 | double ops_per_sec = 0.0; | ||
| 47 | double alloc_mb_per_sec = 0.0; | ||
| 48 | bool ok = false; | ||
| 49 | std::string error; | ||
| 50 | }; | ||
| 51 | |||
| 52 | ✗ | std::vector<std::string> SplitCommaList(const std::string& value) { | |
| 53 | ✗ | std::vector<std::string> items; | |
| 54 | ✗ | std::string current; | |
| 55 | ✗ | for (char c : value) { | |
| 56 | ✗ | if (c == ',') { | |
| 57 | ✗ | if (!current.empty()) { | |
| 58 | ✗ | items.push_back(current); | |
| 59 | ✗ | current.clear(); | |
| 60 | } | ||
| 61 | ✗ | continue; | |
| 62 | } | ||
| 63 | ✗ | if (c != ' ') { | |
| 64 | ✗ | current.push_back(c); | |
| 65 | } | ||
| 66 | } | ||
| 67 | ✗ | if (!current.empty()) { | |
| 68 | ✗ | items.push_back(current); | |
| 69 | } | ||
| 70 | ✗ | return items; | |
| 71 | ✗ | } | |
| 72 | |||
| 73 | ✗ | std::string BenchPath(const std::string& allocator) { | |
| 74 | ✗ | return "/tmp/recstore_malloc_bench_" + allocator + "_" + | |
| 75 | ✗ | std::to_string(getpid()); | |
| 76 | } | ||
| 77 | |||
| 78 | ✗ | void RunTimedWorkers(base::MallocApi* allocator, | |
| 79 | int alloc_size, | ||
| 80 | int duration_seconds, | ||
| 81 | std::vector<uint64_t>* per_thread_ops, | ||
| 82 | std::vector<uint64_t>* per_thread_failures) { | ||
| 83 | ✗ | std::atomic<bool> stop{false}; | |
| 84 | ✗ | const int thread_num = static_cast<int>(per_thread_ops->size()); | |
| 85 | ✗ | std::fill(per_thread_ops->begin(), per_thread_ops->end(), 0); | |
| 86 | ✗ | std::fill(per_thread_failures->begin(), per_thread_failures->end(), 0); | |
| 87 | |||
| 88 | ✗ | std::vector<std::thread> threads; | |
| 89 | ✗ | threads.reserve(static_cast<size_t>(thread_num)); | |
| 90 | ✗ | for (int tid = 0; tid < thread_num; ++tid) { | |
| 91 | ✗ | threads.emplace_back([allocator, alloc_size, &stop, tid, per_thread_ops, | |
| 92 | ✗ | per_thread_failures]() { | |
| 93 | ✗ | base::auto_bind_core(); | |
| 94 | ✗ | uint64_t ops = 0; | |
| 95 | ✗ | uint64_t failures = 0; | |
| 96 | ✗ | while (!stop.load(std::memory_order_acquire)) { | |
| 97 | ✗ | char* ptr = allocator->New(alloc_size); | |
| 98 | ✗ | if (ptr == nullptr) { | |
| 99 | ✗ | ++failures; | |
| 100 | ✗ | continue; | |
| 101 | } | ||
| 102 | ✗ | if (!allocator->Free(ptr)) { | |
| 103 | ✗ | ++failures; | |
| 104 | } | ||
| 105 | ✗ | ++ops; | |
| 106 | } | ||
| 107 | ✗ | (*per_thread_ops)[static_cast<size_t>(tid)] = ops; | |
| 108 | ✗ | (*per_thread_failures)[static_cast<size_t>(tid)] = failures; | |
| 109 | ✗ | }); | |
| 110 | } | ||
| 111 | |||
| 112 | ✗ | std::this_thread::sleep_for(std::chrono::seconds(duration_seconds)); | |
| 113 | ✗ | stop.store(true, std::memory_order_release); | |
| 114 | ✗ | for (auto& thread : threads) { | |
| 115 | ✗ | thread.join(); | |
| 116 | } | ||
| 117 | ✗ | } | |
| 118 | |||
| 119 | ✗ | BenchResult BenchmarkAllocator(const std::string& allocator_name) { | |
| 120 | ✗ | BenchResult result; | |
| 121 | ✗ | result.allocator = allocator_name; | |
| 122 | ✗ | result.threads = FLAGS_thread_num; | |
| 123 | ✗ | result.alloc_size = FLAGS_alloc_size; | |
| 124 | |||
| 125 | ✗ | const std::string path = BenchPath(allocator_name); | |
| 126 | ✗ | base::file_util::Delete(path, false); | |
| 127 | |||
| 128 | std::unique_ptr<base::MallocApi> allocator(MallocFactory::NewInstance( | ||
| 129 | ✗ | allocator_name, path, FLAGS_pool_bytes, FLAGS_medium)); | |
| 130 | ✗ | if (!allocator) { | |
| 131 | ✗ | result.error = "factory returned nullptr"; | |
| 132 | ✗ | return result; | |
| 133 | } | ||
| 134 | |||
| 135 | std::vector<uint64_t> per_thread_ops(static_cast<size_t>(FLAGS_thread_num), | ||
| 136 | ✗ | 0); | |
| 137 | std::vector<uint64_t> per_thread_failures( | ||
| 138 | ✗ | static_cast<size_t>(FLAGS_thread_num), 0); | |
| 139 | |||
| 140 | ✗ | if (FLAGS_warmup_seconds > 0) { | |
| 141 | ✗ | RunTimedWorkers(allocator.get(), | |
| 142 | FLAGS_alloc_size, | ||
| 143 | FLAGS_warmup_seconds, | ||
| 144 | &per_thread_ops, | ||
| 145 | &per_thread_failures); | ||
| 146 | } | ||
| 147 | |||
| 148 | ✗ | const auto start = std::chrono::steady_clock::now(); | |
| 149 | ✗ | RunTimedWorkers(allocator.get(), | |
| 150 | FLAGS_alloc_size, | ||
| 151 | FLAGS_running_seconds, | ||
| 152 | &per_thread_ops, | ||
| 153 | &per_thread_failures); | ||
| 154 | ✗ | const auto end = std::chrono::steady_clock::now(); | |
| 155 | |||
| 156 | ✗ | result.elapsed_sec = std::chrono::duration<double>(end - start).count(); | |
| 157 | ✗ | for (size_t tid = 0; tid < per_thread_ops.size(); ++tid) { | |
| 158 | ✗ | result.total_ops += per_thread_ops[tid]; | |
| 159 | ✗ | result.failures += per_thread_failures[tid]; | |
| 160 | } | ||
| 161 | |||
| 162 | ✗ | if (result.elapsed_sec > 0.0) { | |
| 163 | ✗ | result.ops_per_sec = static_cast<double>(result.total_ops) / result.elapsed_sec; | |
| 164 | ✗ | result.alloc_mb_per_sec = | |
| 165 | ✗ | (static_cast<double>(result.total_ops) * FLAGS_alloc_size) / | |
| 166 | ✗ | result.elapsed_sec / (1024.0 * 1024.0); | |
| 167 | } | ||
| 168 | |||
| 169 | ✗ | result.ok = true; | |
| 170 | ✗ | allocator.reset(); | |
| 171 | ✗ | base::file_util::Delete(path, false); | |
| 172 | ✗ | return result; | |
| 173 | ✗ | } | |
| 174 | |||
| 175 | ✗ | void PrintSummaryTable(const std::vector<BenchResult>& results) { | |
| 176 | ✗ | printf("\nMallocApi concurrent New/Free benchmark summary\n"); | |
| 177 | ✗ | printf("threads=%d alloc_size=%d warmup=%ds running=%ds pool_bytes=%ld\n\n", | |
| 178 | FLAGS_thread_num, | ||
| 179 | FLAGS_alloc_size, | ||
| 180 | FLAGS_warmup_seconds, | ||
| 181 | FLAGS_running_seconds, | ||
| 182 | static_cast<long>(FLAGS_pool_bytes)); | ||
| 183 | |||
| 184 | ✗ | printf("%-28s %7s %7s %9s %12s %9s %14s %12s\n", | |
| 185 | "Allocator", | ||
| 186 | "Threads", | ||
| 187 | "Size(B)", | ||
| 188 | "Seconds", | ||
| 189 | "TotalOps", | ||
| 190 | "Failures", | ||
| 191 | "Ops/s", | ||
| 192 | "AllocMB/s"); | ||
| 193 | ✗ | printf("%-28s %7s %7s %9s %12s %9s %14s %12s\n", | |
| 194 | "----------------------------", | ||
| 195 | "-------", | ||
| 196 | "-------", | ||
| 197 | "---------", | ||
| 198 | "------------", | ||
| 199 | "---------", | ||
| 200 | "--------------", | ||
| 201 | "------------"); | ||
| 202 | |||
| 203 | ✗ | for (const BenchResult& result : results) { | |
| 204 | ✗ | if (!result.ok) { | |
| 205 | ✗ | printf("%-28s %7d %7d %9s %12s %9s %14s %12s (%s)\n", | |
| 206 | result.allocator.c_str(), | ||
| 207 | ✗ | result.threads, | |
| 208 | ✗ | result.alloc_size, | |
| 209 | "-", | ||
| 210 | "-", | ||
| 211 | "-", | ||
| 212 | "-", | ||
| 213 | "-", | ||
| 214 | result.error.c_str()); | ||
| 215 | ✗ | continue; | |
| 216 | } | ||
| 217 | |||
| 218 | ✗ | printf("%-28s %7d %7d %9.3f %12lu %9lu %14.0f %12.2f\n", | |
| 219 | result.allocator.c_str(), | ||
| 220 | ✗ | result.threads, | |
| 221 | ✗ | result.alloc_size, | |
| 222 | ✗ | result.elapsed_sec, | |
| 223 | ✗ | static_cast<unsigned long>(result.total_ops), | |
| 224 | ✗ | static_cast<unsigned long>(result.failures), | |
| 225 | ✗ | result.ops_per_sec, | |
| 226 | ✗ | result.alloc_mb_per_sec); | |
| 227 | } | ||
| 228 | ✗ | printf("\n"); | |
| 229 | ✗ | } | |
| 230 | |||
| 231 | } // namespace | ||
| 232 | |||
| 233 | ✗ | int main(int argc, char** argv) { | |
| 234 | ✗ | base::Init(&argc, &argv); | |
| 235 | |||
| 236 | const std::vector<std::string> allocator_names = | ||
| 237 | ✗ | SplitCommaList(FLAGS_allocators); | |
| 238 | ✗ | if (allocator_names.empty()) { | |
| 239 | ✗ | fprintf(stderr, "No allocators specified via --allocators\n"); | |
| 240 | ✗ | return 1; | |
| 241 | } | ||
| 242 | |||
| 243 | ✗ | std::vector<BenchResult> results; | |
| 244 | ✗ | results.reserve(allocator_names.size()); | |
| 245 | ✗ | for (const std::string& allocator_name : allocator_names) { | |
| 246 | ✗ | results.push_back(BenchmarkAllocator(allocator_name)); | |
| 247 | } | ||
| 248 | |||
| 249 | ✗ | PrintSummaryTable(results); | |
| 250 | ✗ | return 0; | |
| 251 | ✗ | } | |
| 252 |