CIRCT 22.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26
27#include <atomic>
28#include <chrono>
29#include <future>
30#include <iostream>
31#include <map>
32#include <random>
33#include <sstream>
34#include <stdexcept>
35#include <vector>
36
37using namespace esi;
38
39// Forward declarations of test functions.
41 uint32_t iterations);
43 const std::vector<uint32_t> &widths, bool write,
44 bool read);
46 uint32_t xferCount,
47 const std::vector<uint32_t> &widths, bool read,
48 bool write);
50 const std::vector<uint32_t> &widths, bool read, bool write);
52 const std::vector<uint32_t> &widths,
53 uint32_t xferCount, bool read, bool write);
55 uint32_t iterations, bool pipeline);
57 Accelerator *, uint32_t width,
58 uint32_t xferCount, bool read,
59 bool write);
60
61// Default widths and default widths string for CLI help text.
62constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
63static std::string defaultWidthsStr() {
64 std::string s;
65 for (size_t i = 0; i < defaultWidths.size(); ++i) {
66 s += std::to_string(defaultWidths[i]);
67 if (i + 1 < defaultWidths.size())
68 s += ",";
69 }
70 return s;
71}
72
73// Helper to format bandwidth with appropriate units.
74static std::string formatBandwidth(double bytesPerSec) {
75 const char *unit = "B/s";
76 double value = bytesPerSec;
77 if (bytesPerSec >= 1e9) {
78 unit = "GB/s";
79 value = bytesPerSec / 1e9;
80 } else if (bytesPerSec >= 1e6) {
81 unit = "MB/s";
82 value = bytesPerSec / 1e6;
83 } else if (bytesPerSec >= 1e3) {
84 unit = "KB/s";
85 value = bytesPerSec / 1e3;
86 }
87 std::ostringstream oss;
88 oss.setf(std::ios::fixed);
89 oss.precision(2);
90 oss << value << " " << unit;
91 return oss.str();
92}
93
94// Human-readable size from bytes.
95static std::string humanBytes(uint64_t bytes) {
96 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
97 double v = (double)bytes;
98 int u = 0;
99 while (v >= 1024.0 && u < 4) {
100 v /= 1024.0;
101 ++u;
102 }
103 std::ostringstream oss;
104 oss.setf(std::ios::fixed);
105 oss.precision(u == 0 ? 0 : 2);
106 oss << v << " " << units[u];
107 return oss.str();
108}
109
110// Human-readable time from microseconds.
111static std::string humanTimeUS(uint64_t us) {
112 if (us < 1000)
113 return std::to_string(us) + " us";
114 double ms = us / 1000.0;
115 if (ms < 1000.0) {
116 std::ostringstream oss;
117 oss.setf(std::ios::fixed);
118 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
119 oss << ms << " ms";
120 return oss.str();
121 }
122 double sec = ms / 1000.0;
123 std::ostringstream oss;
124 oss.setf(std::ios::fixed);
125 oss.precision(sec < 10.0 ? 3 : 2);
126 oss << sec << " s";
127 return oss.str();
128}
129
130int main(int argc, const char *argv[]) {
131 CliParser cli("esitester");
132 cli.description("Test an ESI system running the ESI tester image.");
133 cli.require_subcommand(1);
134
135 CLI::App *callback_test =
136 cli.add_subcommand("callback", "initiate callback test");
137 uint32_t cb_iters = 1;
138 callback_test->add_option("-i,--iters", cb_iters,
139 "Number of iterations to run");
140
141 CLI::App *hostmemtestSub =
142 cli.add_subcommand("hostmem", "Run the host memory test");
143 bool hmRead = false;
144 bool hmWrite = false;
145 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
146 defaultWidths.end());
147 hostmemtestSub->add_flag("-w,--write", hmWrite,
148 "Enable host memory write test");
149 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
150 hostmemtestSub->add_option(
151 "--widths", hostmemWidths,
152 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
153
154 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
155 bool dmaRead = false;
156 bool dmaWrite = false;
157 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
158 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
159 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
160 dmatestSub->add_option("--widths", dmaWidths,
161 "DMA test widths (default: " + defaultWidthsStr() +
162 ")");
163
164 CLI::App *bandwidthSub =
165 cli.add_subcommand("bandwidth", "Run the bandwidth test");
166 uint32_t xferCount = 1000;
167 bandwidthSub->add_option("-c,--count", xferCount,
168 "Number of transfers to perform");
169 bool bandwidthRead = false;
170 bool bandwidthWrite = false;
171 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
172 defaultWidths.end());
173 bandwidthSub->add_option("--widths", bandwidthWidths,
174 "Width of the transfers to perform (default: " +
175 defaultWidthsStr() + ")");
176 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
177 "Enable bandwidth write");
178 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
179
180 CLI::App *hostmembwSub =
181 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
182 uint32_t hmBwCount = 1000;
183 bool hmBwRead = false;
184 bool hmBwWrite = false;
185 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
186 hostmembwSub->add_option("-c,--count", hmBwCount,
187 "Number of hostmem transfers");
188 hostmembwSub->add_option(
189 "--widths", hmBwWidths,
190 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
191 hostmembwSub->add_flag("-w,--write", hmBwWrite,
192 "Measure hostmem write bandwidth");
193 hostmembwSub->add_flag("-r,--read", hmBwRead,
194 "Measure hostmem read bandwidth");
195
196 CLI::App *loopbackSub =
197 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
198 uint32_t loopbackIters = 10;
199 bool loopbackPipeline = false;
200 loopbackSub->add_option("-i,--iters", loopbackIters,
201 "Number of function invocations (default 10)");
202 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
203 "Pipeline all calls then collect results");
204
205 CLI::App *aggBwSub = cli.add_subcommand(
206 "aggbandwidth",
207 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
208 uint32_t aggWidth = 512;
209 uint32_t aggCount = 1000;
210 bool aggRead = false;
211 bool aggWrite = false;
212 aggBwSub->add_option(
213 "--width", aggWidth,
214 "Bit width (default 512; other widths ignored if absent)");
215 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
216 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
217 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
218
219 if (int rc = cli.esiParse(argc, argv))
220 return rc;
221 if (!cli.get_help_ptr()->empty())
222 return 0;
223
224 Context &ctxt = cli.getContext();
225 AcceleratorConnection *acc = cli.connect();
226 try {
227 const auto &info = *acc->getService<services::SysInfo>();
228 ctxt.getLogger().info("esitester", "Connected to accelerator.");
229 Manifest manifest(ctxt, info.getJsonManifest());
230 Accelerator *accel = manifest.buildAccelerator(*acc);
231 ctxt.getLogger().info("esitester", "Built accelerator.");
232 acc->getServiceThread()->addPoll(*accel);
233
234 if (*callback_test) {
235 callbackTest(acc, accel, cb_iters);
236 } else if (*hostmemtestSub) {
237 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
238 } else if (*loopbackSub) {
239 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
240 } else if (*dmatestSub) {
241 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
242 } else if (*bandwidthSub) {
243 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
244 bandwidthWrite);
245 } else if (*hostmembwSub) {
246 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
247 hmBwWrite);
248 } else if (*aggBwSub) {
249 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
250 aggWrite);
251 }
252
253 acc->disconnect();
254 } catch (std::exception &e) {
255 ctxt.getLogger().error("esitester", e.what());
256 acc->disconnect();
257 return -1;
258 }
259 std::cout << "Exiting successfully\n";
260 return 0;
261}
262
264 uint32_t iterations) {
265 auto cb_test = accel->getChildren().find(AppID("cb_test"));
266 if (cb_test == accel->getChildren().end())
267 throw std::runtime_error("No cb_test child found in accelerator");
268 auto &ports = cb_test->second->getPorts();
269 auto cmd_port = ports.find(AppID("cmd"));
270 if (cmd_port == ports.end())
271 throw std::runtime_error("No cmd port found in cb_test child");
272 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
273 if (!cmdMMIO)
274 throw std::runtime_error("cb_test cmd port is not MMIO");
275
276 auto f = ports.find(AppID("cb"));
277 if (f == ports.end())
278 throw std::runtime_error("No cb port found in accelerator");
279
280 auto *callPort = f->second.getAs<services::CallService::Callback>();
281 if (!callPort)
282 throw std::runtime_error("cb port is not a CallService::Callback");
283
284 std::atomic<uint32_t> callbackCount = 0;
285 callPort->connect(
286 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
287 callbackCount.fetch_add(1);
288 conn->getLogger().debug(
289 [&](std::string &subsystem, std::string &msg,
290 std::unique_ptr<std::map<std::string, std::any>> &details) {
291 subsystem = "ESITESTER";
292 msg = "Received callback";
293 details = std::make_unique<std::map<std::string, std::any>>();
294 details->emplace("data", data);
295 });
296 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
297 return MessageData();
298 },
299 true);
300
301 for (uint32_t i = 0; i < iterations; ++i) {
302 conn->getLogger().info("esitester", "Issuing callback command iteration " +
303 std::to_string(i) + "/" +
304 std::to_string(iterations));
305 cmdMMIO->write(0x10, i); // Command the callback
306 // Wait up to 1 second for the callback to be invoked.
307 for (uint32_t wait = 0; wait < 1000; ++wait) {
308 if (callbackCount.load() > i)
309 break;
310 std::this_thread::sleep_for(std::chrono::milliseconds(1));
311 }
312 if (callbackCount.load() <= i)
313 throw std::runtime_error("Callback test failed. No callback received");
314 }
315}
316
317/// Test the hostmem write functionality.
320 uint32_t width) {
321 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
322 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
323 auto check = [&](bool print) {
324 bool ret = true;
325 for (size_t i = 0; i < 9; ++i) {
326 if (print)
327 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
328 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
329 ret = false;
330 }
331 return ret;
332 };
333
334 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
335 if (writeMemChildIter == acc->getChildren().end())
336 throw std::runtime_error(
337 "hostmem write test failed. No writemem child found");
338 auto &writeMemPorts = writeMemChildIter->second->getPorts();
339
340 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
341 if (cmdPortIter == writeMemPorts.end())
342 throw std::runtime_error(
343 "hostmem write test failed. No (cmd,width) MMIO port");
344 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
345 if (!cmdMMIO)
346 throw std::runtime_error(
347 "hostmem write test failed. (cmd,width) port not MMIO");
348
349 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
350 if (issuedPortIter == writeMemPorts.end())
351 throw std::runtime_error(
352 "hostmem write test failed. addrCmdIssued missing");
353 auto *addrCmdIssuedPort =
354 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
355 if (!addrCmdIssuedPort)
356 throw std::runtime_error(
357 "hostmem write test failed. addrCmdIssued not telemetry");
358 addrCmdIssuedPort->connect();
359
360 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
361 if (responsesPortIter == writeMemPorts.end())
362 throw std::runtime_error(
363 "hostmem write test failed. addrCmdResponses missing");
364 auto *addrCmdResponsesPort =
365 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
366 if (!addrCmdResponsesPort)
367 throw std::runtime_error(
368 "hostmem write test failed. addrCmdResponses not telemetry");
369 addrCmdResponsesPort->connect();
370
371 for (size_t i = 0, e = 9; i < e; ++i)
372 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
373 region.flush();
374 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
375 cmdMMIO->write(0x18, 1);
376 cmdMMIO->write(0x20, 1);
377 bool done = false;
378 for (int i = 0; i < 100; ++i) {
379 auto issued = addrCmdIssuedPort->readInt();
380 auto responses = addrCmdResponsesPort->readInt();
381 if (issued == 1 && responses == 1) {
382 done = true;
383 break;
384 }
385 std::this_thread::sleep_for(std::chrono::microseconds(100));
386 }
387 if (!done) {
388 check(true);
389 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
390 " bits) timeout waiting for completion");
391 }
392 if (!check(true))
393 throw std::runtime_error("hostmem write test failed (" +
394 std::to_string(width) + " bits)");
395}
396
399 uint32_t width) {
400 std::cout << "Running hostmem READ test with width " << width << std::endl;
401 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
402 if (readMemChildIter == acc->getChildren().end())
403 throw std::runtime_error(
404 "hostmem read test failed. No readmem child found");
405
406 auto &readMemPorts = readMemChildIter->second->getPorts();
407 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
408 if (addrCmdPortIter == readMemPorts.end())
409 throw std::runtime_error(
410 "hostmem read test failed. No AddressCommand MMIO port");
411 auto *addrCmdMMIO =
412 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
413 if (!addrCmdMMIO)
414 throw std::runtime_error(
415 "hostmem read test failed. AddressCommand port not MMIO");
416
417 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
418 if (lastReadPortIter == readMemPorts.end())
419 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
420 auto *lastReadPort =
421 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
422 if (!lastReadPort)
423 throw std::runtime_error(
424 "hostmem read test failed. lastReadLSB not telemetry");
425 lastReadPort->connect();
426
427 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
428 if (issuedPortIter == readMemPorts.end())
429 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
430 auto *addrCmdIssuedPort =
431 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
432 if (!addrCmdIssuedPort)
433 throw std::runtime_error(
434 "hostmem read test failed. addrCmdIssued not telemetry");
435 addrCmdIssuedPort->connect();
436
437 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
438 if (responsesPortIter == readMemPorts.end())
439 throw std::runtime_error(
440 "hostmem read test failed. addrCmdResponses missing");
441 auto *addrCmdResponsesPort =
442 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
443 if (!addrCmdResponsesPort)
444 throw std::runtime_error(
445 "hostmem read test failed. addrCmdResponses not telemetry");
446 addrCmdResponsesPort->connect();
447
448 for (size_t i = 0; i < 8; ++i) {
449 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
450 dataPtr[0] = 0x12345678ull << i;
451 dataPtr[1] = 0xDEADBEEFull << i;
452 region.flush();
453 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
454 addrCmdMMIO->write(0x18, 1);
455 addrCmdMMIO->write(0x20, 1);
456 bool done = false;
457 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
458 auto issued = addrCmdIssuedPort->readInt();
459 auto responses = addrCmdResponsesPort->readInt();
460 if (issued == 1 && responses == 1) {
461 done = true;
462 break;
463 }
464 std::this_thread::sleep_for(std::chrono::milliseconds(10));
465 }
466 if (!done)
467 throw std::runtime_error("hostmem read (" + std::to_string(width) +
468 " bits) timeout waiting for completion");
469 uint64_t captured = lastReadPort->readInt();
470 uint64_t expected = dataPtr[0];
471 if (width < 64)
472 expected &= ((1ull << width) - 1);
473 if (captured != expected)
474 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
475 " bits) failed. Expected " +
476 esi::toHex(expected) + ", got " +
477 esi::toHex(captured));
478 }
479}
480
482 const std::vector<uint32_t> &widths, bool write,
483 bool read) {
484 // Enable the host memory service.
485 auto hostmem = conn->getService<services::HostMem>();
486 hostmem->start();
487 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
488 /*memOpts=*/{.writeable = true});
489 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
490 conn->getLogger().info("esitester",
491 "Running host memory test with region size " +
492 std::to_string(scratchRegion->getSize()) +
493 " bytes at 0x" + toHex(dataPtr));
494 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
495 dataPtr[i] = 0;
496 scratchRegion->flush();
497
498 bool passed = true;
499 for (size_t width : widths) {
500 try {
501 if (write)
502 hostmemWriteTest(acc, *scratchRegion, width);
503 if (read)
504 hostmemReadTest(acc, *scratchRegion, width);
505 } catch (std::exception &e) {
506 conn->getLogger().error("esitester", "Hostmem test failed for width " +
507 std::to_string(width) + ": " +
508 e.what());
509 passed = false;
510 }
511 }
512 if (!passed)
513 throw std::runtime_error("Hostmem test failed");
514 std::cout << "Hostmem test passed" << std::endl;
515}
516
518 size_t width) {
519 Logger &logger = conn->getLogger();
520 logger.info("esitester",
521 "== Running DMA read test with width " + std::to_string(width));
522 AppIDPath lastPath;
523 BundlePort *toHostMMIOPort =
524 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
525 if (!toHostMMIOPort)
526 throw std::runtime_error("dma read test failed. No tohostdma[" +
527 std::to_string(width) + "] found");
528 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
529 if (!toHostMMIO)
530 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
531 lastPath.clear();
532 BundlePort *outPortBundle =
533 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
534 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
535 outPort.connect();
536
537 size_t xferCount = 24;
538 uint64_t last = 0;
539 MessageData data;
540 toHostMMIO->write(0, xferCount);
541 for (size_t i = 0; i < xferCount; ++i) {
542 outPort.read(data);
543 if (width == 64) {
544 uint64_t val = *data.as<uint64_t>();
545 if (val < last)
546 throw std::runtime_error("dma read test failed. Out of order data");
547 last = val;
548 }
549 logger.debug("esitester",
550 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
551 }
552 outPort.disconnect();
553 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
554}
555
557 size_t width) {
558 Logger &logger = conn->getLogger();
559 logger.info("esitester",
560 "Running DMA write test with width " + std::to_string(width));
561 AppIDPath lastPath;
562 BundlePort *fromHostMMIOPort =
563 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
564 if (!fromHostMMIOPort)
565 throw std::runtime_error("dma read test for " + toString(width) +
566 " bits failed. No fromhostdma[" +
567 std::to_string(width) + "] found");
568 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
569 if (!fromHostMMIO)
570 throw std::runtime_error("dma write test for " + toString(width) +
571 " bits failed. MMIO port is not MMIO");
572 lastPath.clear();
573 BundlePort *outPortBundle =
574 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
575 if (!outPortBundle)
576 throw std::runtime_error("dma write test for " + toString(width) +
577 " bits failed. No out port found");
578 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
580
581 size_t xferCount = 24;
582 uint8_t *data = new uint8_t[width];
583 for (size_t i = 0; i < width / 8; ++i)
584 data[i] = 0;
585 fromHostMMIO->read(8);
586 fromHostMMIO->write(0, xferCount);
587 for (size_t i = 1; i < xferCount + 1; ++i) {
588 data[0] = i;
589 bool successWrite;
590 size_t attempts = 0;
591 do {
592 successWrite = writePort.tryWrite(MessageData(data, width / 8));
593 if (!successWrite) {
594 std::this_thread::sleep_for(std::chrono::milliseconds(10));
595 }
596 } while (!successWrite && ++attempts < 100);
597 if (!successWrite)
598 throw std::runtime_error("dma write test for " + toString(width) +
599 " bits failed. Write failed");
600 uint64_t lastReadMMIO;
601 for (size_t a = 0; a < 20; ++a) {
602 lastReadMMIO = fromHostMMIO->read(8);
603 if (lastReadMMIO == i)
604 break;
605 std::this_thread::sleep_for(std::chrono::milliseconds(10));
606 if (a >= 19)
607 throw std::runtime_error("dma write for " + toString(width) +
608 " bits test failed. Read from MMIO failed");
609 }
610 }
611 writePort.disconnect();
612 delete[] data;
613 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
614}
615
617 const std::vector<uint32_t> &widths, bool read,
618 bool write) {
619 bool success = true;
620 if (write)
621 for (size_t width : widths)
622 try {
623 dmaWriteTest(conn, acc, width);
624 } catch (std::exception &e) {
625 success = false;
626 std::cerr << "DMA write test for " << width
627 << " bits failed: " << e.what() << std::endl;
628 }
629 if (read)
630 for (size_t width : widths)
631 dmaReadTest(conn, acc, width);
632 if (!success)
633 throw std::runtime_error("DMA test failed");
634 std::cout << "DMA test passed" << std::endl;
635}
636
637//
638// DMA bandwidth test
639//
640
642 size_t width, size_t xferCount) {
643
644 AppIDPath lastPath;
645 BundlePort *toHostMMIOPort =
646 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
647 if (!toHostMMIOPort)
648 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
649 std::to_string(width) + "] found");
650 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
651 if (!toHostMMIO)
652 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
653 lastPath.clear();
654 BundlePort *outPortBundle =
655 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
656 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
657 outPort.connect();
658
659 Logger &logger = conn->getLogger();
660 logger.info("esitester", "Starting read bandwidth test with " +
661 std::to_string(xferCount) + " x " +
662 std::to_string(width) + " bit transfers");
663 MessageData data;
664 auto start = std::chrono::high_resolution_clock::now();
665 toHostMMIO->write(0, xferCount);
666 for (size_t i = 0; i < xferCount; ++i) {
667 outPort.read(data);
668 logger.debug(
669 [i, &data](std::string &subsystem, std::string &msg,
670 std::unique_ptr<std::map<std::string, std::any>> &details) {
671 subsystem = "esitester";
672 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
673 });
674 }
675 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
676 std::chrono::high_resolution_clock::now() - start);
677 double bytesPerSec =
678 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
679 logger.info("esitester",
680 " Bandwidth test: " + std::to_string(xferCount) + " x " +
681 std::to_string(width) + " bit transfers in " +
682 std::to_string(duration.count()) + " microseconds");
683 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
684}
685
687 size_t width, size_t xferCount) {
688
689 AppIDPath lastPath;
690 BundlePort *fromHostMMIOPort =
691 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
692 if (!fromHostMMIOPort)
693 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
694 std::to_string(width) + "] found");
695 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
696 if (!fromHostMMIO)
697 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
698 lastPath.clear();
699 BundlePort *inPortBundle =
700 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
701 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
702 outPort.connect();
703
704 Logger &logger = conn->getLogger();
705 logger.info("esitester", "Starting write bandwidth test with " +
706 std::to_string(xferCount) + " x " +
707 std::to_string(width) + " bit transfers");
708 std::vector<uint8_t> dataVec(width / 8);
709 for (size_t i = 0; i < width / 8; ++i)
710 dataVec[i] = i;
711 MessageData data(dataVec);
712 auto start = std::chrono::high_resolution_clock::now();
713 fromHostMMIO->write(0, xferCount);
714 for (size_t i = 0; i < xferCount; ++i) {
715 outPort.write(data);
716 logger.debug(
717 [i, &data](std::string &subsystem, std::string &msg,
718 std::unique_ptr<std::map<std::string, std::any>> &details) {
719 subsystem = "esitester";
720 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
721 });
722 }
723 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
724 std::chrono::high_resolution_clock::now() - start);
725 double bytesPerSec =
726 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
727 logger.info("esitester",
728 " Bandwidth test: " + std::to_string(xferCount) + " x " +
729 std::to_string(width) + " bit transfers in " +
730 std::to_string(duration.count()) + " microseconds");
731 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
732}
733
735 const std::vector<uint32_t> &widths,
736 uint32_t xferCount, bool read, bool write) {
737 if (read)
738 for (uint32_t w : widths)
739 bandwidthReadTest(conn, acc, w, xferCount);
740 if (write)
741 for (uint32_t w : widths)
742 bandwidthWriteTest(conn, acc, w, xferCount);
743}
744
745//
746// Hostmem bandwidth test
747//
748
749static void
752 uint32_t width, uint32_t xferCount) {
753 Logger &logger = conn->getLogger();
754 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
755 std::to_string(xferCount) + " x " +
756 std::to_string(width) + " bits");
757
758 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
759 if (writeMemChildIter == acc->getChildren().end())
760 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
761 auto &writeMemPorts = writeMemChildIter->second->getPorts();
762
763 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
764 if (cmdPortIter == writeMemPorts.end())
765 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
766 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
767 if (!cmdMMIO)
768 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
769
770 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
771 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
772 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
773 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
774 cycleCount == writeMemPorts.end())
775 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
776 auto *issuedPort =
777 issuedIter->second.getAs<services::TelemetryService::Metric>();
778 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
779 auto *cyclePort =
780 cycleCount->second.getAs<services::TelemetryService::Metric>();
781 if (!issuedPort || !respPort || !cyclePort)
782 throw std::runtime_error(
783 "hostmem write bandwidth: telemetry type mismatch");
784
785 issuedPort->connect();
786 respPort->connect();
787 cyclePort->connect();
788
789 // Initialize pattern (optional).
790 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
791 size_t words = region.getSize() / 8;
792 for (size_t i = 0; i < words; ++i)
793 dataPtr[i] = i + 0xA5A50000;
794 region.flush();
795
796 auto start = std::chrono::high_resolution_clock::now();
797 // Fire off xferCount write commands (one flit each).
798 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
799 cmdMMIO->write(0x10, devPtr); // address
800 cmdMMIO->write(0x18, xferCount); // flits
801 cmdMMIO->write(0x20, 1); // start
802
803 // Wait for responses counter to reach target.
804 bool completed = false;
805 for (int wait = 0; wait < 100000; ++wait) {
806 uint64_t respNow = respPort->readInt();
807 if (respNow == xferCount) {
808 completed = true;
809 break;
810 }
811 std::this_thread::sleep_for(std::chrono::microseconds(50));
812 }
813 if (!completed)
814 throw std::runtime_error("hostmem write bandwidth timeout");
815 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
816 std::chrono::high_resolution_clock::now() - start);
817 double bytesPerSec =
818 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
819 uint64_t cycles = cyclePort->readInt();
820 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
821 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
822 << "): " << formatBandwidth(bytesPerSec) << " "
823 << std::to_string(xferCount) << " flits in "
824 << std::to_string(duration.count()) << " us, "
825 << std::to_string(cycles) << " cycles, " << bytesPerCycle
826 << " bytes/cycle" << std::endl;
827}
828
829static void
832 uint32_t width, uint32_t xferCount) {
833 Logger &logger = conn->getLogger();
834 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
835 std::to_string(xferCount) + " x " +
836 std::to_string(width) + " bits");
837
838 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
839 if (readMemChildIter == acc->getChildren().end())
840 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
841 auto &readMemPorts = readMemChildIter->second->getPorts();
842
843 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
844 if (cmdPortIter == readMemPorts.end())
845 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
846 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
847 if (!cmdMMIO)
848 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
849
850 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
851 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
852 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
853 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
854 cyclePort == readMemPorts.end())
855 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
856 auto *issuedPort =
857 issuedIter->second.getAs<services::TelemetryService::Metric>();
858 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
859 auto *cycleCntPort =
860 cyclePort->second.getAs<services::TelemetryService::Metric>();
861 if (!issuedPort || !respPort || !cycleCntPort)
862 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
863 issuedPort->connect();
864 respPort->connect();
865 cycleCntPort->connect();
866
867 // Prepare memory pattern (optional).
868 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
869 size_t words64 = region.getSize() / 8;
870 for (size_t i = 0; i < words64; ++i)
871 dataPtr[i] = 0xCAFEBABE0000ull + i;
872 region.flush();
873 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
874 auto start = std::chrono::high_resolution_clock::now();
875
876 cmdMMIO->write(0x10, devPtr);
877 cmdMMIO->write(0x18, xferCount);
878 cmdMMIO->write(0x20, 1);
879
880 bool timeout = true;
881 for (int wait = 0; wait < 100000; ++wait) {
882 uint64_t respNow = respPort->readInt();
883 if (respNow == xferCount) {
884 timeout = false;
885 break;
886 }
887 std::this_thread::sleep_for(std::chrono::microseconds(50));
888 }
889 if (timeout)
890 throw std::runtime_error("hostmem read bandwidth timeout");
891 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
892 std::chrono::high_resolution_clock::now() - start);
893 double bytesPerSec =
894 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
895 uint64_t cycles = cycleCntPort->readInt();
896 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
897 std::cout << "[ READ] Hostmem bandwidth (" << width
898 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
899 << " flits in " << duration.count() << " us, " << cycles
900 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
901}
902
904 uint32_t xferCount,
905 const std::vector<uint32_t> &widths, bool read,
906 bool write) {
907 auto hostmemSvc = conn->getService<services::HostMem>();
908 hostmemSvc->start();
909 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
910 /*memOpts=*/{.writeable = true});
911 for (uint32_t w : widths) {
912 if (write)
913 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
914 if (read)
915 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
916 }
917}
918
920 uint32_t iterations, bool pipeline) {
921 Logger &logger = conn->getLogger();
922 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
923 if (loopbackChild == accel->getChildren().end())
924 throw std::runtime_error("Loopback test: no 'loopback' child");
925 auto &ports = loopbackChild->second->getPorts();
926 auto addIter = ports.find(AppID("add"));
927 if (addIter == ports.end())
928 throw std::runtime_error("Loopback test: no 'add' port");
929
930 // Use FuncService::Func instead of raw channels.
931 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
932 if (!funcPort)
933 throw std::runtime_error(
934 "Loopback test: 'add' port not a FuncService::Function");
935 funcPort->connect();
936 if (iterations == 0) {
937 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
938 return;
939 }
940 std::mt19937_64 rng(0xC0FFEE);
941 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
942
943 if (!pipeline) {
944 auto start = std::chrono::high_resolution_clock::now();
945 for (uint32_t i = 0; i < iterations; ++i) {
946 uint32_t argVal = dist(rng);
947 uint32_t expected = (argVal + 11) & 0xFFFF;
948 uint8_t argBytes[3] = {
949 static_cast<uint8_t>(argVal & 0xFF),
950 static_cast<uint8_t>((argVal >> 8) & 0xFF),
951 static_cast<uint8_t>((argVal >> 16) & 0xFF),
952 };
953 MessageData argMsg(argBytes, 3);
954 MessageData resMsg = funcPort->call(argMsg).get();
955 uint16_t got = *resMsg.as<uint16_t>();
956 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
957 << " got=0x" << esi::toHex(got) << " exp=0x"
958 << esi::toHex(expected) << std::endl;
959 if (got != expected)
960 throw std::runtime_error("Loopback mismatch (non-pipelined)");
961 }
962 auto end = std::chrono::high_resolution_clock::now();
963 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
964 .count();
965 double callsPerSec = (double)iterations * 1e6 / (double)us;
966 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
967 std::to_string(iterations) + " calls, " +
968 std::to_string(us) + " us, " +
969 std::to_string(callsPerSec) + " calls/s)");
970 } else {
971 // Pipelined mode: launch all calls first, then collect.
972 std::vector<std::future<MessageData>> futures;
973 futures.reserve(iterations);
974 std::vector<uint32_t> expectedVals;
975 expectedVals.reserve(iterations);
976
977 auto issueStart = std::chrono::high_resolution_clock::now();
978 for (uint32_t i = 0; i < iterations; ++i) {
979 uint32_t argVal = dist(rng);
980 uint32_t expected = (argVal + 11) & 0xFFFF;
981 uint8_t argBytes[3] = {
982 static_cast<uint8_t>(argVal & 0xFF),
983 static_cast<uint8_t>((argVal >> 8) & 0xFF),
984 static_cast<uint8_t>((argVal >> 16) & 0xFF),
985 };
986 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
987 expectedVals.emplace_back(expected);
988 }
989 auto issueEnd = std::chrono::high_resolution_clock::now();
990
991 for (uint32_t i = 0; i < iterations; ++i) {
992 MessageData resMsg = futures[i].get();
993 uint16_t got = *resMsg.as<uint16_t>();
994 uint16_t exp = (uint16_t)expectedVals[i];
995 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
996 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
997 if (got != exp)
998 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
999 std::to_string(i));
1000 }
1001 auto collectEnd = std::chrono::high_resolution_clock::now();
1002
1003 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1004 issueEnd - issueStart)
1005 .count();
1006 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1007 collectEnd - issueStart)
1008 .count();
1009
1010 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1011 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1012
1013 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1014 std::to_string(iterations) + " in " +
1015 std::to_string(issueUs) + " us (" +
1016 std::to_string(issueRate) +
1017 " calls/s), total " + std::to_string(totalUs) +
1018 " us (" + std::to_string(completionRate) +
1019 " calls/s effective)");
1020 }
1021}
1022
1024 Accelerator *acc, uint32_t width,
1025 uint32_t xferCount, bool read,
1026 bool write) {
1027 Logger &logger = conn->getLogger();
1028 if (!read && !write) {
1029 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1030 return;
1031 }
1032 logger.info(
1033 "esitester",
1034 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1035 " count=" + std::to_string(xferCount) +
1036 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1037
1038 auto hostmemSvc = conn->getService<services::HostMem>();
1039 hostmemSvc->start();
1040
1041 struct Unit {
1042 std::string prefix;
1043 bool isRead = false;
1044 bool isWrite = false;
1045 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1046 services::TelemetryService::Metric *resp = nullptr;
1047 services::TelemetryService::Metric *cycles = nullptr;
1048 services::MMIO::MMIORegion *cmd = nullptr;
1049 bool launched = false;
1050 bool done = false;
1051 uint64_t bytes = 0;
1052 uint64_t duration_us = 0;
1053 uint64_t cycleCount = 0;
1054 std::chrono::high_resolution_clock::time_point start;
1055 };
1056 std::vector<Unit> units;
1057 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1058 "readmem_1", "readmem_2"};
1059 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1060 "writemem_1", "writemem_2"};
1061
1062 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1063 bool doWrite) {
1064 for (auto &p : pref) {
1065 AppID id(p, width);
1066 auto childIt = acc->getChildren().find(id);
1067 if (childIt == acc->getChildren().end())
1068 continue; // silently skip missing variants
1069 auto &ports = childIt->second->getPorts();
1070 auto cmdIt = ports.find(AppID("cmd", width));
1071 auto respIt = ports.find(AppID("addrCmdResponses"));
1072 auto cycIt = ports.find(AppID("addrCmdCycles"));
1073 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1074 continue;
1075 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1076 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1077 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1078 if (!cmd || !resp || !cyc)
1079 continue;
1080 resp->connect();
1081 cyc->connect();
1082 Unit u;
1083 u.prefix = p;
1084 u.isRead = doRead;
1085 u.isWrite = doWrite;
1086 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1087 // Init pattern.
1088 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1089 size_t words = u.region->getSize() / 8;
1090 for (size_t i = 0; i < words; ++i)
1091 ptr[i] =
1092 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1093 u.region->flush();
1094 u.cmd = cmd;
1095 u.resp = resp;
1096 u.cycles = cyc;
1097 u.bytes = uint64_t(xferCount) * (width / 8);
1098 units.emplace_back(std::move(u));
1099 }
1100 };
1101 if (read)
1102 addUnits(readPrefixes, true, false);
1103 if (write)
1104 addUnits(writePrefixes, false, true);
1105 if (units.empty()) {
1106 std::cout << "aggbandwidth: no matching units present for width " << width
1107 << "\n";
1108 return;
1109 }
1110
1111 auto wallStart = std::chrono::high_resolution_clock::now();
1112 // Launch sequentially.
1113 for (auto &u : units) {
1114 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1115 u.cmd->write(0x10, devPtr);
1116 u.cmd->write(0x18, xferCount);
1117 u.cmd->write(0x20, 1);
1118 u.start = std::chrono::high_resolution_clock::now();
1119 u.launched = true;
1120 }
1121
1122 // Poll all until complete.
1123 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1124 uint64_t loops = 0;
1125 while (true) {
1126 bool allDone = true;
1127 for (auto &u : units) {
1128 if (u.done)
1129 continue;
1130 if (u.resp->readInt() == xferCount) {
1131 auto end = std::chrono::high_resolution_clock::now();
1132 u.duration_us =
1133 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1134 .count();
1135 u.cycleCount = u.cycles->readInt();
1136 u.done = true;
1137 } else {
1138 allDone = false;
1139 }
1140 }
1141 if (allDone)
1142 break;
1143 if (++loops >= timeoutLoops)
1144 throw std::runtime_error("aggbandwidth: timeout");
1145 std::this_thread::sleep_for(std::chrono::microseconds(50));
1146 }
1147 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1148 std::chrono::high_resolution_clock::now() - wallStart)
1149 .count();
1150
1151 uint64_t totalBytes = 0;
1152 uint64_t totalReadBytes = 0;
1153 uint64_t totalWriteBytes = 0;
1154 for (auto &u : units) {
1155 totalBytes += u.bytes;
1156 if (u.isRead)
1157 totalReadBytes += u.bytes;
1158 if (u.isWrite)
1159 totalWriteBytes += u.bytes;
1160 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1161 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1162 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1163 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1164 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1165 << " us) cycles=" << u.cycleCount
1166 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1167 }
1168 // Compute aggregate bandwidths as total size / total wall time (not sum of
1169 // unit throughputs).
1170 double aggReadBps =
1171 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1172 double aggWriteBps =
1173 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1174 double aggCombinedBps =
1175 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1176
1177 std::cout << "[agg-total] units=" << units.size()
1178 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1179 << totalReadBytes << " B)"
1180 << " read_bw=" << formatBandwidth(aggReadBps)
1181 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1182 << totalWriteBytes << " B)"
1183 << " write_bw=" << formatBandwidth(aggWriteBps)
1184 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1185 << totalBytes << " B)"
1186 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1187 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1188 << std::endl;
1189 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1190}
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Definition RpcServer.cpp:37
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:79
ServiceClass * getService(AppIDPath id={}, std::string implName={}, ServiceImplDetails details={}, HWClientDetails clients={})
Get a typed reference to a particular service type.
virtual void disconnect()
Disconnect from the accelerator cleanly.
Logger & getLogger() const
Definition Accelerator.h:84
AcceleratorServiceThread * getServiceThread()
Return a pointer to the accelerator 'service' thread (or threads).
void addPoll(HWModule &module)
Poll this module.
Top level accelerator class.
Definition Accelerator.h:60
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:226
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:254
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:35
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:25
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:63
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:60
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:46
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
BundlePort * resolvePort(const AppIDPath &path, AppIDPath &lastLookup) const
Attempt to resolve a path to a port.
Definition Design.cpp:72
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:67
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
Definition Common.h:113
const T * as() const
Cast to a type.
Definition Common.h:148
A ChannelPort which reads data from the accelerator.
Definition Ports.h:124
virtual void connect(std::function< bool(MessageData)> callback, std::optional< unsigned > bufferSize=std::nullopt)
Definition Ports.cpp:44
virtual void disconnect() override
Definition Ports.h:129
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:165
A ChannelPort which sends data to the accelerator.
Definition Ports.h:77
virtual void write(const MessageData &)=0
A very basic blocking write API.
virtual void connect(std::optional< unsigned > bufferSize=std::nullopt) override
Set up a connection to the accelerator.
Definition Ports.h:82
A function call which gets attached to a service port.
Definition Services.h:324
A function call which gets attached to a service port.
Definition Services.h:274
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:244
A "slice" of some parent MMIO space.
Definition Services.h:171
Information about the Accelerator system.
Definition Services.h:111
A telemetry port which gets attached to a service port.
Definition Services.h:386
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:378
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:62
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:74
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
Definition esitester.cpp:95
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static std::string defaultWidthsStr()
Definition esitester.cpp:63
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
RAII memory region for host memory.
Definition Services.h:220
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:226
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:234
virtual std::size_t getSize() const =0