File size: 1,870 Bytes
29547e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#pragma once
#include "gpu_types.h"
#include "gpu_libs.h"
#include <memory>
#include <vector>

typedef void (*TimerCompletionCallback)(float elapsed_time, size_t calc_ops, float *time_ptr, float *gflops_ptr,
                                        void *user_data);

class KernelTimer {
  private:
    size_t calc_ops;
    HOST_TYPE(Event_t) start, stop;
    float *time_ptr;
    float *gflops_ptr;
    void *user_data;
    TimerCompletionCallback callback;
    bool callback_executed;

  public:
    KernelTimer(size_t calc_ops, float *time, float *gflops);

    void start_timer(hipStream_t stream = 0);
    void stop_timer(hipStream_t stream = 0);
    void set_callback(TimerCompletionCallback cb, void *data = nullptr);

    // Wait for the timer to complete and execute the callback if set
    void synchronize();

    // Getter methods for the callback
    HOST_TYPE(Event_t) get_start_event() const { return start; }
    HOST_TYPE(Event_t) get_stop_event() const { return stop; }
    size_t get_calc_ops() const { return calc_ops; }
    float *get_time_ptr() const { return time_ptr; }
    float *get_gflops_ptr() const { return gflops_ptr; }
    void execute_callback(float elapsed_time);
    void set_callback_executed(bool executed) { callback_executed = executed; }
    bool is_callback_executed() const { return callback_executed; }

    ~KernelTimer();
};

class KernelTimerScoped {
  private:
    std::shared_ptr<KernelTimer> timer;
    hipStream_t stream;

  public:
    KernelTimerScoped(std::vector<std::shared_ptr<KernelTimer>> &timers, size_t calc_ops, float *time, float *gflops,
                      hipStream_t stream = 0)
        : timer(std::make_shared<KernelTimer>(calc_ops, time, gflops)), stream(stream) {
        timers.push_back(timer);
        timer->start_timer(stream);
    }

    ~KernelTimerScoped() { timer->stop_timer(stream); }
};