GCC Code Coverage Report


Directory: src/
Coverage: low: ≥ 0% medium: ≥ 75.0% high: ≥ 90.0%
Coverage Exec / Excl / Total
Lines: 0.0% 0 / 0 / 113
Functions: 0.0% 0 / 0 / 7
Branches: 0.0% 0 / 0 / 102

memory/allocators/benchmark_malloc.cc
Line Branch Exec Source
1 #include <unistd.h>
2
3 #include <algorithm>
4 #include <atomic>
5 #include <chrono>
6 #include <cstdio>
7 #include <memory>
8 #include <string>
9 #include <thread>
10 #include <vector>
11
12 #include "base/base.h"
13 #include "base/bind_core.h"
14 #include "base/factory.h"
15 #include "base/init.h"
16 #include "memory/allocators/concurrent_slab_memory_pool.h"
17 #include "memory/allocators/persist_loop_slab_allocator.h"
18 #include "memory/allocators/r2_slab_allocator.h"
19 #include "memory/malloc.h"
20
21 DEFINE_string(
22 allocators,
23 "CONCURRENT_SLAB_MEMORY_POOL,R2_SLAB,PERSIST_LOOP_SLAB,PERSIST_MEMORY_POOL",
24 "Comma-separated MallocApi factory names to benchmark");
25 DEFINE_int32(thread_num, 16, "Number of concurrent worker threads");
26 DEFINE_int32(alloc_size, 128, "Allocation size in bytes for each New/Free pair");
27 DEFINE_int32(warmup_seconds, 1, "Warmup duration in seconds");
28 DEFINE_int32(running_seconds, 5, "Measured benchmark duration in seconds");
29 DEFINE_int64(pool_bytes,
30 256LL * 1024 * 1024,
31 "Backing memory pool size in bytes");
32 DEFINE_string(medium, "DRAM", "Shm medium passed to MallocApi factory");
33
34 namespace {
35
36 using MallocFactory =
37 base::Factory<base::MallocApi, const std::string&, int64, const std::string&>;
38
39 struct BenchResult {
40 std::string allocator;
41 int threads = 0;
42 int alloc_size = 0;
43 double elapsed_sec = 0.0;
44 uint64_t total_ops = 0;
45 uint64_t failures = 0;
46 double ops_per_sec = 0.0;
47 double alloc_mb_per_sec = 0.0;
48 bool ok = false;
49 std::string error;
50 };
51
52 std::vector<std::string> SplitCommaList(const std::string& value) {
53 std::vector<std::string> items;
54 std::string current;
55 for (char c : value) {
56 if (c == ',') {
57 if (!current.empty()) {
58 items.push_back(current);
59 current.clear();
60 }
61 continue;
62 }
63 if (c != ' ') {
64 current.push_back(c);
65 }
66 }
67 if (!current.empty()) {
68 items.push_back(current);
69 }
70 return items;
71 }
72
73 std::string BenchPath(const std::string& allocator) {
74 return "/tmp/recstore_malloc_bench_" + allocator + "_" +
75 std::to_string(getpid());
76 }
77
78 void RunTimedWorkers(base::MallocApi* allocator,
79 int alloc_size,
80 int duration_seconds,
81 std::vector<uint64_t>* per_thread_ops,
82 std::vector<uint64_t>* per_thread_failures) {
83 std::atomic<bool> stop{false};
84 const int thread_num = static_cast<int>(per_thread_ops->size());
85 std::fill(per_thread_ops->begin(), per_thread_ops->end(), 0);
86 std::fill(per_thread_failures->begin(), per_thread_failures->end(), 0);
87
88 std::vector<std::thread> threads;
89 threads.reserve(static_cast<size_t>(thread_num));
90 for (int tid = 0; tid < thread_num; ++tid) {
91 threads.emplace_back([allocator, alloc_size, &stop, tid, per_thread_ops,
92 per_thread_failures]() {
93 base::auto_bind_core();
94 uint64_t ops = 0;
95 uint64_t failures = 0;
96 while (!stop.load(std::memory_order_acquire)) {
97 char* ptr = allocator->New(alloc_size);
98 if (ptr == nullptr) {
99 ++failures;
100 continue;
101 }
102 if (!allocator->Free(ptr)) {
103 ++failures;
104 }
105 ++ops;
106 }
107 (*per_thread_ops)[static_cast<size_t>(tid)] = ops;
108 (*per_thread_failures)[static_cast<size_t>(tid)] = failures;
109 });
110 }
111
112 std::this_thread::sleep_for(std::chrono::seconds(duration_seconds));
113 stop.store(true, std::memory_order_release);
114 for (auto& thread : threads) {
115 thread.join();
116 }
117 }
118
119 BenchResult BenchmarkAllocator(const std::string& allocator_name) {
120 BenchResult result;
121 result.allocator = allocator_name;
122 result.threads = FLAGS_thread_num;
123 result.alloc_size = FLAGS_alloc_size;
124
125 const std::string path = BenchPath(allocator_name);
126 base::file_util::Delete(path, false);
127
128 std::unique_ptr<base::MallocApi> allocator(MallocFactory::NewInstance(
129 allocator_name, path, FLAGS_pool_bytes, FLAGS_medium));
130 if (!allocator) {
131 result.error = "factory returned nullptr";
132 return result;
133 }
134
135 std::vector<uint64_t> per_thread_ops(static_cast<size_t>(FLAGS_thread_num),
136 0);
137 std::vector<uint64_t> per_thread_failures(
138 static_cast<size_t>(FLAGS_thread_num), 0);
139
140 if (FLAGS_warmup_seconds > 0) {
141 RunTimedWorkers(allocator.get(),
142 FLAGS_alloc_size,
143 FLAGS_warmup_seconds,
144 &per_thread_ops,
145 &per_thread_failures);
146 }
147
148 const auto start = std::chrono::steady_clock::now();
149 RunTimedWorkers(allocator.get(),
150 FLAGS_alloc_size,
151 FLAGS_running_seconds,
152 &per_thread_ops,
153 &per_thread_failures);
154 const auto end = std::chrono::steady_clock::now();
155
156 result.elapsed_sec = std::chrono::duration<double>(end - start).count();
157 for (size_t tid = 0; tid < per_thread_ops.size(); ++tid) {
158 result.total_ops += per_thread_ops[tid];
159 result.failures += per_thread_failures[tid];
160 }
161
162 if (result.elapsed_sec > 0.0) {
163 result.ops_per_sec = static_cast<double>(result.total_ops) / result.elapsed_sec;
164 result.alloc_mb_per_sec =
165 (static_cast<double>(result.total_ops) * FLAGS_alloc_size) /
166 result.elapsed_sec / (1024.0 * 1024.0);
167 }
168
169 result.ok = true;
170 allocator.reset();
171 base::file_util::Delete(path, false);
172 return result;
173 }
174
175 void PrintSummaryTable(const std::vector<BenchResult>& results) {
176 printf("\nMallocApi concurrent New/Free benchmark summary\n");
177 printf("threads=%d alloc_size=%d warmup=%ds running=%ds pool_bytes=%ld\n\n",
178 FLAGS_thread_num,
179 FLAGS_alloc_size,
180 FLAGS_warmup_seconds,
181 FLAGS_running_seconds,
182 static_cast<long>(FLAGS_pool_bytes));
183
184 printf("%-28s %7s %7s %9s %12s %9s %14s %12s\n",
185 "Allocator",
186 "Threads",
187 "Size(B)",
188 "Seconds",
189 "TotalOps",
190 "Failures",
191 "Ops/s",
192 "AllocMB/s");
193 printf("%-28s %7s %7s %9s %12s %9s %14s %12s\n",
194 "----------------------------",
195 "-------",
196 "-------",
197 "---------",
198 "------------",
199 "---------",
200 "--------------",
201 "------------");
202
203 for (const BenchResult& result : results) {
204 if (!result.ok) {
205 printf("%-28s %7d %7d %9s %12s %9s %14s %12s (%s)\n",
206 result.allocator.c_str(),
207 result.threads,
208 result.alloc_size,
209 "-",
210 "-",
211 "-",
212 "-",
213 "-",
214 result.error.c_str());
215 continue;
216 }
217
218 printf("%-28s %7d %7d %9.3f %12lu %9lu %14.0f %12.2f\n",
219 result.allocator.c_str(),
220 result.threads,
221 result.alloc_size,
222 result.elapsed_sec,
223 static_cast<unsigned long>(result.total_ops),
224 static_cast<unsigned long>(result.failures),
225 result.ops_per_sec,
226 result.alloc_mb_per_sec);
227 }
228 printf("\n");
229 }
230
231 } // namespace
232
233 int main(int argc, char** argv) {
234 base::Init(&argc, &argv);
235
236 const std::vector<std::string> allocator_names =
237 SplitCommaList(FLAGS_allocators);
238 if (allocator_names.empty()) {
239 fprintf(stderr, "No allocators specified via --allocators\n");
240 return 1;
241 }
242
243 std::vector<BenchResult> results;
244 results.reserve(allocator_names.size());
245 for (const std::string& allocator_name : allocator_names) {
246 results.push_back(BenchmarkAllocator(allocator_name));
247 }
248
249 PrintSummaryTable(results);
250 return 0;
251 }
252