Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions cpp/examples/performance_timers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
*/
#include "memilio/timer/auto_timer.h"
#include "memilio/timer/table_printer.h"
#include "memilio/timer/timer_registrar.h"
#include "memilio/utils/logging.h"
#include "memilio/utils/miompi.h"

#include <thread> // This is only used for the example load function.

Expand All @@ -40,8 +43,15 @@ void load()

int main()
{
// Specify the namespace of AutoTimer, so we don't have to repeat it. Do not do this with entire namespaces.
mio::set_log_level(mio::LogLevel::info);
mio::mpi::init();

const int comm_rank = mio::mpi::rank(mio::mpi::get_world());
const int comm_size = mio::mpi::size(mio::mpi::get_world());
// Specify the namespace of AutoTimer and TimerRegistrar, so we don't have to repeat it.
// Avoid "using" statements with entire namespaces.
using mio::timing::AutoTimer;
using mio::timing::TimerRegistrar;

// Start immediately timing the main function. An AutoTimer starts timing upon its creation, and ends timing when
// it is destroyed. This usually happens when a function returns, or a scope indicated by {curly braces} ends.
Expand All @@ -54,15 +64,21 @@ int main()
// the end of the programm. This can be disabled by calling `TimerRegistrar::disable_final_timer_summary()`.
auto printer = std::make_unique<mio::timing::TablePrinter>();
printer->set_time_format("{:e}");
mio::timing::TimerRegistrar::get_instance().set_printer(std::move(printer));
TimerRegistrar::get_instance().set_printer(std::move(printer));

// To manually print all timers, use `TimerRegistrar::print_timers()`, but make sure that no timers are running.
// To manually print all timers, use `TimerRegistrar::print_timers()` (or `TimerRegistrar::print_gathered_timers()`
// when using MPI), but make sure that no timers are running.
// The "main" timer in this example would make that difficult, but you can simply add another scope around it,
// similar to the "compute loops" timer below.

const int N = 1000; // Number of iterations. Be wary of increasing this number when using the sleep_for load!
// Number of iterations. Be wary of increasing this number when using the sleep_for load!
const int total_iterations = 1000;
const int N = total_iterations / comm_size + (comm_rank < (total_iterations % comm_size));

std::cout << "Num threads: " << mio::omp::get_max_threads() << "\n";
if (mio::mpi::is_root()) {
mio::log_info("Num ranks: {} - Num threads: {} - Work size per rank: {}", comm_size,
mio::omp::get_max_threads(), N);
}

// Open a new scope to time computations.
{
Expand Down Expand Up @@ -100,5 +116,15 @@ int main()
}
}

// For MPI parallel execution, we replace the final timer summary with a manual print. That way, we can gather
// timers from all ranks into a single output, instead of each rank printing a separate output. You can also use a
// custom printer for computing additional statistics.
if (comm_size > 1) {
TimerRegistrar::get_instance().disable_final_timer_summary();
TimerRegistrar::get_instance().print_gathered_timers(mio::mpi::get_world());
}

mio::mpi::finalize();

return 0;
}
33 changes: 33 additions & 0 deletions cpp/memilio/timer/basic_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
#ifndef MIO_TIMER_BASIC_TIMER_H
#define MIO_TIMER_BASIC_TIMER_H

#include "memilio/io/io.h"
#include "memilio/timer/definitions.h"

#include <string_view>
#include <utility>

namespace mio
{
Expand Down Expand Up @@ -69,6 +71,37 @@ class BasicTimer
should_be_running(false, "~BasicTimer");
}

/**
* serialize this.
* @see mio::serialize
*/
template <class IOContext>
void serialize(IOContext& io) const
{
auto obj = io.create_object("BasicTimer");
obj.add_element("elapsed_time", details::convert_to_ticks(m_elapsed_time));
}

/**
* deserialize an object of this class.
* @see mio::deserialize
*/
template <class IOContext>
static IOResult<BasicTimer> deserialize(IOContext& io)
{
using Tick = decltype(details::convert_to_ticks(std::declval<DurationType>()));
auto obj = io.expect_object("BasicTimer");
auto et = obj.expect_element("elapsed_time", Tag<Tick>{});
return apply(
io,
[](auto&& et_) {
BasicTimer b;
b.m_elapsed_time = DurationType{et_};
return b;
},
et);
}

private:
TimeType m_start_time; ///< The last time point at which start() was called
DurationType m_elapsed_time{0}; ///< The total time spent between starts and stops.
Expand Down
17 changes: 16 additions & 1 deletion cpp/memilio/timer/definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "memilio/utils/mioomp.h"

#include <chrono>
#include <cstddef>
#include <ctime>

namespace mio
{
Expand All @@ -42,6 +42,21 @@ using DurationType = std::chrono::steady_clock::duration;

#endif

namespace details
{

/// @brief Convert a duration to integer ticks. Useful for serialization.
inline decltype(auto) convert_to_ticks(DurationType duration)
{
#ifdef MEMILIO_ENABLE_OPENMP
return duration;
#else
return duration.count();
#endif
}

} // namespace details

/**
* @brief Convert a duration to a (floating point) number of seconds.
* @param[in] duration Any DurationType value, mainly `BasicTimer::get_elapsed_time()`.
Expand Down
9 changes: 5 additions & 4 deletions cpp/memilio/timer/list_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define MIO_TIMER_LIST_PRINTER_H

#include "memilio/timer/registration.h"
#include "memilio/utils/compiler_diagnostics.h"

#include <ostream>
#include <list>
Expand All @@ -45,16 +46,16 @@ struct ListPrinter : public Printer {
const auto indent = " ";
// Write out all timers
out << "All Timers: " << timer_register.size() << "\n";
for (const auto& [name, scope, timer, thread] : timer_register) {
for (const auto& [name, scope, timer, thread, rank] : timer_register) {
out << indent << qualified_name(name, scope) << ": " << std::scientific
<< time_in_seconds(timer.get_elapsed_time()) << " (" << thread << ")\n";
is_multithreaded |= thread > 0;
<< time_in_seconds(timer.get_elapsed_time()) << " (" << rank << ", " << thread << ")\n";
is_multithreaded |= thread > 0 || rank > 0;
}
// Write out timers accumulated over threads by name
if (is_multithreaded) {
// dedupe list entries from parallel execution
std::map<std::string, DurationType> deduper;
for (const auto& [name, scope, timer, _] : timer_register) {
for (const auto& [name, scope, timer, thread, rank] : timer_register) {
deduper[qualified_name(name, scope)] += timer.get_elapsed_time();
}
out << "Unique Timers (accumulated): " << deduper.size() << "\n";
Expand Down
2 changes: 1 addition & 1 deletion cpp/memilio/timer/named_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class NamedTimer : public BasicTimer
*/
NamedTimer()
{
TimerRegistrar::get_instance().add_timer({name(), scope(), *this, mio::omp::get_thread_id()});
TimerRegistrar::get_instance().add_timer({name(), scope(), *this, mio::omp::get_thread_id(), 0});
}
};

Expand Down
3 changes: 3 additions & 0 deletions cpp/memilio/timer/registration.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#ifndef MIO_TIMER_REGISTRATION_H
#define MIO_TIMER_REGISTRATION_H

#include "memilio/io/default_serialize.h"
#include "memilio/timer/basic_timer.h"

#include <list>
Expand All @@ -30,11 +31,13 @@ namespace mio
namespace timing
{

/// @brief Struct used to register ( @see TimerRegistrar ) and print timers ( @see Printer ).
struct TimerRegistration {
std::string name;
std::string scope;
BasicTimer& timer;
int thread_id;
int rank;
};

/**
Expand Down
10 changes: 5 additions & 5 deletions cpp/memilio/timer/table_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ class TablePrinter : public Printer
std::map<std::string, size_t> row_to_index; // map from name to index, used to fill table
bool is_multithreaded = false; // keep track of whether a thread id > 0 exists
// map rows from thread 0 first, so the order of timers (mostly) corresponds to their call order
for (const auto& [name, scope, _, thread] : timer_register) {
if (thread == 0) {
for (const auto& [name, scope, timer, thread, rank] : timer_register) {
if (thread == 0 && rank == 0) {
const std::string qn = qualified_name(name, scope);
if (row_to_index.emplace(qn, rows.size()).second) {
rows.push_back(qn);
Expand All @@ -220,8 +220,8 @@ class TablePrinter : public Printer
// make a second pass to add timers from other threads
// this does nothing, if all timers are used on thread 0 at least once
if (is_multithreaded) {
for (auto& [name, scope, _, thread] : timer_register) {
if (thread != 0) {
for (auto& [name, scope, timer, thread, rank] : timer_register) {
if (thread != 0 || rank != 0) {
const std::string qn = qualified_name(name, scope);
if (row_to_index.emplace(qn, rows.size()).second) {
rows.push_back(qn);
Expand All @@ -245,7 +245,7 @@ class TablePrinter : public Printer
}
// accumulate elapsed time and gather statistics in the table
// averages are calculated later, using finished values from elapsed and num
for (auto& [name, scope, timer, thread] : timer_register) {
for (auto& [name, scope, timer, thread, rank] : timer_register) {
const auto row = row_to_index[qualified_name(name, scope)];
const auto time = time_in_seconds(timer.get_elapsed_time());
table(row, elapsed) += time;
Expand Down
90 changes: 86 additions & 4 deletions cpp/memilio/timer/timer_registrar.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,19 @@
#ifndef MIO_TIMER_TIMER_REGISTRAR_H
#define MIO_TIMER_TIMER_REGISTRAR_H

#include "memilio/io/binary_serializer.h"
#include "memilio/io/default_serialize.h"
#include "memilio/timer/basic_timer.h"
#include "memilio/timer/registration.h"
#include "memilio/timer/table_printer.h"
#include "memilio/utils/compiler_diagnostics.h"
#include "memilio/utils/miompi.h"

#include <cassert>
#include <iostream>
#include <list>
#include <memory>
#include <mutex>
#include <ostream>

namespace mio
{
Expand Down Expand Up @@ -88,14 +93,35 @@ class TimerRegistrar
{
PRAGMA_OMP(single nowait)
{
get_instance().m_printer->print(m_register, out);
m_printer->print(m_register, out);
}
}

/**
* @brief Print all timers gathered on the given MPI Comm using a Printer.
*
* By default, uses TablePrinter to write to stdout. The printer can be changed using the set_printer member.
*
* @param comm An MPI communicator. Make sure this method is called by all ranks on comm!
* @param out Any output stream, defaults to std::cout.
*/
void print_gathered_timers(mpi::Comm comm, std::ostream& out = std::cout)
{
PRAGMA_OMP(single nowait)
{
std::list<BasicTimer> external_timers; // temporary storage for gathered registrations
std::list<TimerRegistration> gathered_register; // combined registrations of all ranks
gather_timers(comm, external_timers, gathered_register);
if (mpi::is_root()) {
m_printer->print(gathered_register, out);
}
}
}

/// @brief Prevent the TimerRegistrar from calling print_timers on exit from main.
void disable_final_timer_summary() const
void disable_final_timer_summary()
{
get_instance().m_print_on_death = false;
m_print_on_death = false;
}

/**
Expand Down Expand Up @@ -143,6 +169,62 @@ class TimerRegistrar
}
}

/// @brief Gather timers from all ranks, using external_timers as temporary timer storage for gathered_register.
void gather_timers(mpi::Comm comm, std::list<BasicTimer>& external_timers,
std::list<TimerRegistration>& gathered_register) const
{
#ifndef MEMILIO_ENABLE_MPI
mio::unused(comm, external_timers);
#endif
if (mpi::is_root()) {
std::ranges::transform(m_register, std::back_inserter(gathered_register), [](const TimerRegistration& r) {
return TimerRegistration{r.name, r.scope, r.timer, r.thread_id, 0};
});
}
if (comm == nullptr) {
if (mpi::is_root()) {
log_error("Got nullptr as MPI Comm. Only timers on root rank are gathered.");
}
return;
}
#ifdef MEMILIO_ENABLE_MPI
// name, scope, timer, thread_id
using GatherableRegistration = std::tuple<std::string, std::string, BasicTimer, int>;
int comm_size;
MPI_Comm_size(comm, &comm_size);

if (mpi::is_root()) {
for (int snd_rank = 1; snd_rank < comm_size; snd_rank++) { // skip root rank!
int bytes_size;
MPI_Recv(&bytes_size, 1, MPI_INT, snd_rank, 0, comm, MPI_STATUS_IGNORE);
ByteStream bytes(bytes_size);
MPI_Recv(bytes.data(), bytes.data_size(), MPI_BYTE, snd_rank, 0, mpi::get_world(), MPI_STATUS_IGNORE);

auto rec_register = deserialize_binary(bytes, Tag<std::vector<GatherableRegistration>>{});
if (!rec_register) {
log_error("Error receiving ensemble results from rank {}.", snd_rank);
}
std::ranges::transform(
rec_register.value(), std::back_inserter(gathered_register), [&](const GatherableRegistration& r) {
const auto& [name, scope, timer, thread_id] = r;
external_timers.push_back(timer);
return TimerRegistration{name, scope, external_timers.back(), thread_id, snd_rank};
});
}
}
else {
std::vector<GatherableRegistration> snd_register;
std::ranges::transform(m_register, std::back_inserter(snd_register), [](const TimerRegistration& r) {
return GatherableRegistration{r.name, r.scope, r.timer, r.thread_id};
});
ByteStream bytes = serialize_binary(snd_register);
int bytes_size = int(bytes.data_size());
MPI_Send(&bytes_size, 1, MPI_INT, 0, 0, comm);
MPI_Send(bytes.data(), bytes.data_size(), MPI_BYTE, 0, 0, comm);
}
#endif
}

std::unique_ptr<Printer> m_printer = std::make_unique<TablePrinter>(); ///< A printer to visualize all timers.
bool m_print_on_death = true; ///< Whether to call m_printer during the destructor.
std::list<TimerRegistration> m_register; ///< List that allows access to timers without having their name.
Expand Down
Loading