#include #include #include #include #include void multiply_tr(FT* m1, FT* m2, size_t size, FT* result); int main(int argc, char** argv) { constexpr size_t dim {4}; constexpr size_t size {dim*dim}; int times = argc > 1 ? std::stoi(argv[1]) : 100; FT m1[size] = {1.5, 2.3, 1.1, 4, 1.2, 2.2, 3.3, 4.4, 3.1, .1, 2.2, .87, 5.5, 0.8, 1.7, 2.2}; FT res[size]; std::fill(res, res + size, 0); auto s = std::chrono::high_resolution_clock::now(); for (int i {}; i < times; ++i) multiply_tr(m1, m1, dim, res); auto e = std::chrono::high_resolution_clock::now(); //std::chrono::duration t = e - s; std::printf("Time to complete: %ldns.\n", std::chrono::duration_cast(e - s).count()); for (size_t i {}; i < dim; ++i) { for (size_t j {}; j < dim; ++j) std::printf("%g, ", res[i*dim + j]); std::puts(""); } return 0; } void multiply_tr(FT* m1, FT* m2, size_t size, FT* result) { for (size_t i {}; i < size; ++i) for (size_t j {}; j < size; ++j) { auto pos_row1 = i * size; auto pos_row2 = j * size; auto pos_result = i * size + j; for (size_t k {}; k < size; ++k) result[pos_result + k] += m1[pos_row1] * m2[pos_row2]; } }