-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbalar.h
163 lines (138 loc) · 4.97 KB
/
balar.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
// See the file CONTRIBUTORS.TXT in the top level directory
// the distribution for more information.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.
// SST includes
#include <sst/core/sst_types.h>
#include <sst/core/link.h>
#include <sst/core/event.h>
#include <sst/core/output.h>
#include <sst/core/timeConverter.h>
#include <sst/core/interfaces/simpleMem.h>
#include <sst/core/component.h>
#include <../ariel/ariel_shmem.h>
// Other Includes
#include "mempool.h"
#include "host_defines.h"
#include "builtin_types.h"
#include "driver_types.h"
#include "cuda_runtime_api.h"
#include "balar_event.h"
#include <cstring>
#include <string>
#include <fstream>
#include <sstream>
#include <map>
#include <stdio.h>
#include <stdint.h>
#include <poll.h>
using namespace std;
using namespace SST;
using namespace SST::Interfaces;
using namespace SST::BalarComponent;
namespace SST {
namespace BalarComponent {
class Balar : public SST::Component {
public:
Balar( SST::ComponentId_t id, SST::Params& params);
void init(unsigned int phase);
void setup() {};
void finish() {};
// NEW HANDLERS
void cpuHandler( SST::Event* e );
void memoryHandler(SimpleMem::Request* event);
void gpuCacheHandler(SimpleMem::Request* event);
void handleCPUWriteRequest(uint64_t txSize, uint64_t pAddr);
void handleCPUReadRequest(uint64_t txSize, uint64_t pAddr);
// TODO Need separate GPU memHierarchy first
void handleReadRequest();
void commitReadRequest();
// TODO Need separate GPU memHierarchy first
void handleWriteRequest();
void commitWriteRequest();
bool is_SST_buffer_full(unsigned core_id);
void send_read_request_SST(unsigned core_id, uint64_t address, uint64_t size, void* mem_req);
void send_write_request_SST(unsigned core_id, uint64_t address, uint64_t size, void* mem_req);
void SST_callback_memcpy_H2D_done();
void SST_callback_memcpy_D2H_done();
bool tick(SST::Cycle_t x);
cudaMemcpyKind memcpyKind;
bool is_stalled = false;
unsigned int transferNumber;
std::vector< uint64_t > physicalAddresses;
uint64_t totalTransfer;
uint64_t ackTransfer;
uint64_t remainingTransfer;
uint64_t baseAddress;
uint64_t currentAddress;
std::vector< uint8_t > dataAddress;
uint32_t pending_transactions_count = 0;
uint32_t maxPendingTransCore;
~Balar() { };
SST_ELI_REGISTER_COMPONENT(
Balar,
"balar",
"balar",
SST_ELI_ELEMENT_VERSION(3,2,0),
"GPGPU simulator based on GPGPU-Sim",
COMPONENT_CATEGORY_PROCESSOR
)
SST_ELI_DOCUMENT_PARAMS(
{"verbose", "Verbosity for debugging. Increased numbers for increased verbosity.", "0"},
{"clock", "Internal Controller Clock Rate.", "1.0 Ghz"},
{"latency", "The time to be spent to service a memory request", "1000"},
{"num_nodes", "number of disaggregated nodes in the system", "1"},
{"num_cores", "Number of GPUs", "1"},
{"maxtranscore", "Maximum number of pending transactions", "16"},
{"maxcachetrans", "Maximum number of pending cache transactions", "512"},
)
// Optional since there is nothing to document
SST_ELI_DOCUMENT_STATISTICS(
)
SST_ELI_DOCUMENT_PORTS(
{"requestLink%(num_cores)d", "Handle CUDA API calls", { "BalarComponent.BalarEvent", "" } },
{"requestMemLink%(num_cores)d", "Link to CPU memH (cache)", {} },
{"requestGPUCacheLink%(num_cores)d", "Link to GPU memH (cache)", {} }
)
// Optional since there is nothing to document
SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
)
private:
struct cache_req_params {
cache_req_params( unsigned m_core_id, void* mem_fetch, SimpleMem::Request* req) {
core_id = m_core_id;
mem_fetch_pointer = mem_fetch;
original_sst_req = req;
}
void* mem_fetch_pointer;
unsigned core_id;
SimpleMem::Request* original_sst_req;
};
Balar(); // for serialization only
Balar(const Balar&); // do not implement
void operator=(const Balar&); // do not implement
uint32_t cpu_core_count;
uint32_t gpu_core_count;
uint32_t pending_transaction_count = 0;
std::unordered_map<SimpleMem::Request::id_t, SimpleMem::Request*>* pendingTransactions;
SimpleMem** gpu_to_cpu_cache_links;
Link** gpu_to_core_links;
uint32_t latency; // The page fault latency/ the time spent by Balar to service a memory allocation request
SimpleMem** gpu_to_cache_links;
uint32_t maxPendingCacheTrans;
std::unordered_map<SimpleMem::Request::id_t, struct cache_req_params>* gpuCachePendingTransactions;
uint32_t* numPendingCacheTransPerCore;
Output* output;
}; //END class Balar
} //END namespace BalarComponent
} //END namespace SST