Commit | Line | Data |
---|---|---|
920dae64 AT |
1 | /* |
2 | * ========== Copyright Header Begin ========================================== | |
3 | * | |
4 | * OpenSPARC T2 Processor File: MemorySync.h | |
5 | * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. | |
6 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. | |
7 | * | |
8 | * The above named program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public | |
10 | * License version 2 as published by the Free Software Foundation. | |
11 | * | |
12 | * The above named program is distributed in the hope that it will be | |
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public | |
18 | * License along with this work; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | * | |
21 | * ========== Copyright Header End ============================================ | |
22 | */ | |
23 | #ifndef _MEMORYMODEL_H | |
24 | #define _MEMORYMODEL_H | |
25 | /************************************************************************ | |
26 | ** | |
27 | ** Copyright (C) 2002, Sun Microsystems, Inc. | |
28 | ** | |
29 | ** Sun considers its source code as an unpublished, proprietary | |
30 | ** trade secret and it is available only under strict license provisions. | |
31 | ** This copyright notice is placed here only to protect Sun in the event | |
32 | ** the source is deemed a published work. Disassembly, decompilation, | |
33 | ** or other means of reducing the object code to human readable form | |
34 | ** is prohibited by the license agreement under which this code is | |
35 | ** provided to the user or company in possession of this copy." | |
36 | ** | |
37 | *************************************************************************/ | |
38 | #include <iostream> | |
39 | #include <vector> | |
40 | ||
41 | #include "MemoryTransaction.h" | |
42 | #include "MemorySyncDefs.h" | |
43 | #include "LoadStoreCmd.h" | |
44 | #include "MemoryAccessBuffer.h" | |
45 | #include "LoadStoreBuffer.h" | |
46 | #include "RieslingInterface.h" | |
47 | #include "MemorySyncMessage.h" | |
48 | #include "TsoChecker.h" | |
49 | #include "SS_PliSocket.h" | |
50 | ||
51 | // using namespace Tso; | |
52 | ||
53 | /** | |
54 | * MemorySync class extends Riesling's memory to provide Riesling same | |
55 | * memory images as RTL-TestBench | |
56 | * <p> | |
57 | * When RTL and Riesling perform lock-step simulation, RTL and Riesling | |
58 | * may see different data, especially for multi-thread microprocessors. | |
59 | * For example, considering the following STEP execution sequence | |
60 | * <UL> | |
61 | * <LI> (1) TO stores to address A (where T0 means thread 0) | |
62 | * <LI> (2) TO reads address A | |
63 | * <LI> (3) T1 reads address A (where T1 means thread 1) | |
64 | * </UL> | |
65 | * Assume all load/store are are normal load/store, and not to IO address | |
66 | * space. Then, it is pretty sure that instruction (2) will read data | |
67 | * written by (1). However, this is not necessary true for (3). If the store | |
68 | * data is still in the store buffer of thread 0, then instruction (3) will | |
69 | * not see this data. Without any other help, Riesling will always read | |
70 | * data written by (1), which is wrong in this scenario. | |
71 | * <p> | |
72 | * Suppose in the above sequence the T0 and T1 are replaced with C0 (core 0) | |
73 | * and C1 (core 1):<br> | |
74 | * (Assume every core has its own D$) | |
75 | * <UL> | |
76 | * <LI> (1) CO stores to address A | |
77 | * <LI> (2) CO reads address A | |
78 | * <LI> (3) C1 reads address A | |
79 | * </UL> | |
80 | * Similarly, it cannot tell which data (3) will get based on just the sequence. | |
81 | * Riesling, without any help, will agian always get data from (1). However, | |
82 | * it is possible (3) gets data from its own D$ which still contains data | |
83 | * before (1). (The execution of (1) will send an invalidation to core 1, | |
84 | * but it may not yet reach to core 1's D$ before (3) is executed.) | |
85 | * <p> | |
86 | * This MemorySync model is designed to provide Riesling the same memory image | |
87 | * as the RTL. To do so, three things are needed | |
88 | * - data structures to maintain transient data, | |
89 | * - mechanism to map STEP (instruction execution by Riesling) to right data, | |
90 | * - APIs to RTL to guide data flow | |
91 | * Riesling can see this consistent memory view by not accessing memory directly, | |
92 | * instead it should access memory through this model. | |
93 | * <p> | |
94 | * This module models 4 essential data structures: fetch buffer, load buffer, | |
95 | * store buffer, and memory access buffer. The fetch buffer, load buffer, and | |
96 | * and store buffer are created for each thread; while the memory access buffer | |
97 | * is shared among all threads. | |
98 | * <p> | |
99 | * The memory access buffer (MAB) maintains the global data access order, as | |
100 | * well as the memory image at the moment the related command is issued. The | |
101 | * memory images maintained include per core L1$ content and global L2$ and | |
102 | * Memory content. The L2$ and physical memory images are considered the same | |
103 | * because (1) the model assumes L2$ is the first memory component shared by | |
104 | * all of the cores, and (2) a write committed to L2$ is observable immediately | |
105 | * by all of them. | |
106 | * <p> | |
107 | * The first condition indicates that all (cacheable) accesses have to access | |
108 | * L2$ first, which guarantees consistency between L2 and memory. (Strictly | |
109 | * speaking, we should also assume that cacheable data cannot be changed to | |
110 | * non-cacheable data to avoid different image between L2 and memory.) | |
111 | * The second condition indicates a read from L2$ after a write to L2$ must | |
112 | * get the data written by the previous write. This is to guarantee there | |
113 | * exists only one L2 image. If any of these conditions is violated, then | |
114 | * the model may need to be revised. | |
115 | * <p> | |
116 | * Data is stored in the <data> field of an MAB entry, and the type indicates | |
117 | * which image it maintains. The StoreCommit entry contains data for L2/Memory, | |
118 | * the LoadFill/StoreUpdate entry contains data for the D$, and the FetchFill | |
119 | * type contains data for the I$. L1$ image needs to be presented as unit of | |
120 | * a cache line. Therefore, each LoadFill/FetchFill command create multiple | |
121 | * entries in the MAB since the data size of a MAB entry is limited to 8 bytes. | |
122 | * L2$ image does not have this need since it is actually the same as memory. | |
123 | * If an access supposed to get data from StoreCommit entry and cannot find | |
124 | * any match in the MAB, then it can just get data from the (Riesling) memory. | |
125 | * <p> | |
126 | * The fetch buffer has an entry associated with every intruction fetch. The | |
127 | * order of these entries is the same as the instruction fetch order. A link | |
128 | * pointing to the corresponding FetchData in MAB is set when the FetchData | |
129 | * command is received and executed. | |
130 | * <p> | |
131 | * When an instruction is about to be executed, Riesling calls MemorySync:: | |
132 | * postMemoryAccess() in order to obtain the instruction word. This callback | |
133 | * method maps the instruction with one of the entries in the fetch buffer. | |
134 | * Then, via the link of that entry, the method gets the instruction word | |
135 | * from the FetchData in the MAB, and assigns that back to the Riesling. | |
136 | * In matching the instruction with the fetch buffer entry, the model makes | |
137 | * the assumption that intructions are executed in the same order as the | |
138 | * instruction fetch. However, the model allows speculative fetches. Hence, | |
139 | * the matching is done by looking at the oldest, non-executed, and same PC | |
140 | * entry in the fetch buffer. All non-executed FetchIssue entries before this | |
141 | * matched one are considered as speculative fetch operations, and are thus | |
142 | * removed. | |
143 | * <p> | |
144 | * Similar to the fetch buffer, the load buffer has an entry associated with | |
145 | * every load instruction. But, unlike the fetch buffer, entries in the load | |
146 | * buffer must have a one-to-one mapping with load instructions. In addition, | |
147 | * the order in the load buffer must be in the program order as the LoadIssue | |
148 | * commands are required to be sent in the program order. Also similar to the | |
149 | * fetch buffer entry, a link pointing to the corresponding LoadData is set | |
150 | * when the LoadData command is received and executed. | |
151 | * <p> | |
152 | * When a load is executed, Riesling again calls MemorySync::postMemoryAccess() | |
153 | * to obtain its load data. This callback method maps the instruction with one | |
154 | * of the entries in the load buffer. Then, via the link of that entry, it gets | |
155 | * the data from the LoadData in the MAB, and assigns that back to the Riesling. | |
156 | * In matching the instruction with the fetch buffer entry, the model makes the | |
157 | * assumption that the Riesling is requested (by the RTL) to execute the load | |
158 | * instructions in the program order, except for I/O access, which is allowed | |
159 | * to be executed out-of-order relative to other loads. As a result, the method | |
160 | * maps the load instruction to the first non-executed entry in the load buffer | |
161 | * unless it is an I/O access, in which address is also considered. | |
162 | * <p> | |
163 | * Note that the condition that requires load instruction be executed in program | |
164 | * order in Riesling can be relaxed to the condition that requires only loads | |
165 | * accessing to the same 8-byte block be executed in the program order with minor | |
166 | * program modification. (Refer to the algorithm that matches the LoadData with | |
167 | * LoadIssue in MemorySync::handleLoadData().) | |
168 | * <p> | |
169 | * The store buffer also has an entry associated with every store. Difference | |
170 | * from fetch buffer and load buffer entry is that the store buffer entry also | |
171 | * has <data> field and a <byte_mask> indicating which byte is valid. When a | |
172 | * StoreCommit command is received by the module, it merges the data of the | |
173 | * matched store buffer entry with the current L2/Memory image of that 8-byte | |
174 | * block. The merged data is then saved in the StoreCommit entry in the MAB. | |
175 | * <p> | |
176 | * When a store is executed, Riesling calls MemorySync::preMemoryAccess(), | |
177 | * and pass the data to be written by it. The method compares the data with | |
178 | * the data in the model's store buffer, which represents RTL's data, and | |
179 | * reports error if happens. | |
180 | * The assumption made to map the store to the store buffer entry is that | |
181 | * the stores are executed in program order. So, the match simply to find | |
182 | * the first non-executed StoreIssue entry. | |
183 | * <p> | |
184 | * To simplify design and to handle some special cases, the store buffer is | |
185 | * actually modeled by three differnt data structures: stb_, rmoStb_ (rmo | |
186 | * store buffer), and retStb_ (retired store buffer). All StoreIssue commands | |
187 | * first create entries in the stb_. Upon receiving its StoreAck command, the | |
188 | * entry is moved to either rmoStb_ (if RMO stores) or retStb_ (otherwise). | |
189 | * As a result, the stb_ is suppose to contain the same store instructions as | |
190 | * RTL's store buffer so that the store buffer bypassing search can just search | |
191 | * the stb_. However, in some case, such as the current Niagara 1 set-up, due | |
192 | * to the difficulty to get the exact time, the RTL may still allows bypassing | |
193 | * even after its StoreAck is issued. To tolerate such situation, normal stores | |
194 | * are moved to retStb_ on StoreAck. Store buffer bypassing search then also | |
195 | * searches this buffer if none is found matched in the stb_. | |
196 | * <p> | |
197 | * The rmoStb_ is introduced to solve another issue. By definition, RMO stores | |
198 | * are not required to perform dependence check, and are only required to obey | |
199 | * the RMO memory consistency protocol. Hence, a RMO store can be issued (from | |
200 | * store buffer) to L2 without waiting for the commit of a prior store. It is | |
201 | * removed from the store buffer when it is issued. Therefore, its StoreAck is | |
202 | * sent before StoreCommit; while normal stores have StoreAck after StoreCommit. | |
203 | * The rmoStb_ is designed to hold the RMO StoreIssue entries when they are | |
204 | * removed from stb_ so that the instruction-data mapping is possible. | |
205 | * RMO Stores include block stores and stores using *_BLOCK_INIT_ST_QUAD_LDD_*. | |
206 | * <p> | |
207 | * Except for the assumptions describe above, the model also has some other | |
208 | * assumption due to the implementation and testbench issue. This paragraph | |
209 | * summarizes all assumptions here: | |
210 | * <OL> | |
211 | * <LI> L2$ is the first memory component shared by all cores | |
212 | * <LI> a write committed to L2$ is observable immediately by all of the cores. | |
213 | * <LI> cacheable data cannot be changed to noncacheable data during the test | |
214 | * <LI> intructions are executed in the same order as the instruction fetch | |
215 | * (but allows speculative fetches) | |
216 | * <LI> load instructions are executed in the program order, except for I/O | |
217 | * <LI> store instructions are executed in the program order | |
218 | * <LI> LoadData to the same 8-byte block is in the program order | |
219 | * <LI> StoreCommit to the same 8-byte block is in the program order | |
220 | * <LI> no STB bypassing of the rmo store data is possible | |
221 | * </OL> | |
222 | * <p> | |
223 | * The design attempts to be microarchitecture-independent. The microarchitecture | |
224 | * dependent part is parameterized. These microarchitecture dependent parameters | |
225 | * are either defined in MemorySyncDefs.h or passed as arguments when creating | |
226 | * the MemorySync object. These parameters include: | |
227 | * - system type (N1, N2, ...) | |
228 | * - number of cores per chip, | |
229 | * - number of threads per core, | |
230 | * - D$ line size, | |
231 | * - I$ line size | |
232 | * <p> | |
233 | * @see LoadStoreCmd.h | |
234 | * @see LoadStoreBuffer.h | |
235 | * @see MemoryAccessBuffer.h | |
236 | * @see MemorySyncDefs.h | |
237 | */ | |
238 | ||
239 | class MemorySync{ | |
240 | ||
241 | public: | |
242 | static MemorySync* msyncObj; | |
243 | static std::string dumpBuffers(); | |
244 | ||
245 | public: | |
246 | /** | |
247 | * Default constructor | |
248 | */ | |
249 | MemorySync(); | |
250 | ||
251 | /** | |
252 | * Constructor | |
253 | * @memDebug debug level | |
254 | */ | |
255 | MemorySync(int max_strands, int strands_per_core, int cores_per_cpu, int memDebug, int tsoChecker=1, int callback=1); | |
256 | ||
257 | /** | |
258 | * Copy constructor | |
259 | * | |
260 | * @param orig The MemorySync object to copy. | |
261 | */ | |
262 | MemorySync( const MemorySync &orig ); | |
263 | ||
264 | /** | |
265 | * Destructor | |
266 | */ | |
267 | virtual ~MemorySync(); | |
268 | ||
269 | /** | |
270 | * Equality operator | |
271 | * | |
272 | * @param rhs The right hand side of the equality operator | |
273 | * @return Return true if this objec and rhs are equal, | |
274 | * otherwise return false | |
275 | */ | |
276 | bool operator==( const MemorySync &rhs ) const; | |
277 | ||
278 | /** | |
279 | * Assignment operator | |
280 | * | |
281 | * @param rhs The right hand side of the assignment operator. | |
282 | * @return The lvalue of the assignment. | |
283 | */ | |
284 | const MemorySync & operator=( const MemorySync &rhs ); | |
285 | ||
286 | /** | |
287 | * Return a string representation of this MemorySync object. | |
288 | */ | |
289 | std::string toString() const; | |
290 | ||
291 | /** | |
292 | * return a pointer to the (global) memory access buffer | |
293 | */ | |
294 | MemoryAccessBuffer* getMABPtr () { return &mab_; } | |
295 | ||
296 | /** | |
297 | * return a pointer to a specified strand's (thread's) load buffer | |
298 | * @param tid thread id | |
299 | * @return pointer to the load buffer of the specified thread, ldb_[tid] | |
300 | */ | |
301 | LoadStoreBuffer* getLDBPtr (int tid) { return &ldb_[tid]; }; | |
302 | ||
303 | /** | |
304 | * return a pointer to a specified thread's store buffer | |
305 | * @param tid thread id | |
306 | * @return pointer to the store buffer of specified tread, stb_[tid] | |
307 | */ | |
308 | LoadStoreBuffer* getSTBPtr (int tid) { return &stb_[tid]; }; | |
309 | ||
310 | /** | |
311 | * return a pointer to a specified thread's store buffer | |
312 | * @param tid thread id | |
313 | * @return pointer to the store buffer of specified tread, stb_[tid] | |
314 | */ | |
315 | LoadStoreBuffer* getRSTBPtr (int tid) { return &retStb_[tid]; } | |
316 | ||
317 | /** | |
318 | * assign data to StoreIssue entry | |
319 | * @param tid thread id | |
320 | * @param addr address | |
321 | * @param data data to be written | |
322 | * @param rmo indicates if this is for a rmo store | |
323 | */ | |
324 | void setTestBenchData(uint32_t tid, uint64_t addr, uint64_t data, bool rmo); | |
325 | ||
326 | /** | |
327 | * remove a StoreIssue entry from the specified thread's retired store buffer | |
328 | * @param tid thread id | |
329 | * @param addr address | |
330 | */ | |
331 | void removeRetiredStore (uint64_t addr, uint32_t tid); | |
332 | ||
333 | ||
334 | /** | |
335 | * Executes LoadIssue MemorySync API command | |
336 | * <p> | |
337 | * Depending on the instruction type, this method pushes 1-8 entries into the | |
338 | * load buffer of that thread: <br> | |
339 | * - Block-Load, which accesses 64 bytes, results in 8 entries; <br> | |
340 | * - Quad-Load, which accesses 16 bytes, results in 2 entries; <br> | |
341 | * - normal load, which accesses 1-8 bytes, results in 1 entry. | |
342 | * <p> | |
343 | * In N2, this method has to figure out whether the data source is store buffer | |
344 | * or not. If it is, then the result will be used for LoadData sent later. | |
345 | * <p> | |
346 | * @param cmd a LoadStoreCmd object reference that contains the info of the LoadIssue | |
347 | */ | |
348 | void handleLoadIssue(LoadStoreCmd& cmd); | |
349 | ||
350 | /** | |
351 | * Executes LoadData MemorySync API command | |
352 | * <p> | |
353 | * Depending on the instruction type, this method pushes 1 or 2 (only for N2 so far) | |
354 | * entries into the global memory access buffer. In addition, it accesses data for | |
355 | * each entry according to the data source specified in the command. Note that the | |
356 | * MemorySync model for N2 actually has to figure out if the data source is store | |
357 | * buffer or not by itself. | |
358 | * <p> | |
359 | * If the data source is store buffer, then data should get from the store buffer | |
360 | * of that thread <br> | |
361 | * If the data source is L1, then data comes from LoadFill/StoreUpdate of the MAB, | |
362 | * or, if not found, from Riesling memory <br> | |
363 | * If the data source is L2/Memory, then data comes from StoreCommit of the MAB, | |
364 | * or, if not found, from Riesling memory <br> | |
365 | * <p> | |
366 | * @param cmd a LoadStoreCmd object reference that contains the info of the LoadData | |
367 | */ | |
368 | void handleLoadData(LoadStoreCmd& cmd); | |
369 | ||
370 | /** | |
371 | * Executes LoadFill MemorySync API command | |
372 | * <p> | |
373 | * This method pushes DCACHE_LINE_SIZE/8 entries into the global memory access buffer. | |
374 | * Each entry is filled with data at the moment the corresponding LoadData is issued. | |
375 | * <p> | |
376 | * Note that while searching for the matched LoadData entry, the matched one should | |
377 | * also have cacheL1 set. However, N2 does not specify cacheL1 flag in its LoadData | |
378 | * command. It requires the MemorySync model follow the assumption that every thread | |
379 | * can have only one outstanding load, i.e., the LoadData that results in the LoadFill | |
380 | * must be the only non-executed LoadData entry of that thread. | |
381 | * <p> | |
382 | * @param cmd a LoadStoreCmd object reference that contains the info of the LoadFill | |
383 | */ | |
384 | void handleLoadFill(LoadStoreCmd& cmd); | |
385 | ||
386 | /** | |
387 | * Executes StoreIssue MemorySync API command | |
388 | * This method pushes one entry for each of this command into its store buffer. | |
389 | * Although <data> is expected, some environment may not have data ready when | |
390 | * this command is issued. In this case, another method setTestBenchData() can | |
391 | * be used to set the data in the store buffer. | |
392 | * <p> | |
393 | * @param cmd a LoadStoreCmd object reference that contains the info of the StoreIssue | |
394 | */ | |
395 | void handleStoreIssue(LoadStoreCmd& cmd); | |
396 | ||
397 | /** | |
398 | * Executes StoreCommit MemorySync API command | |
399 | * This method merges the corresponding StoreIssue data with the L2/momory data | |
400 | * at the time when the StoreCommit is issued. The merged data is stored in the | |
401 | * StoreCommit entry in the MAB, which represents the L2/Memory image at that | |
402 | * time. Usually, a StoreCommit has one matched entry in the store buffer and | |
403 | * results in one entry in MAB. One exception of this is the BLK_INIT store. | |
404 | * If satisfying certain conditions, a BLK_INIT store can zero a L2 cache line. | |
405 | * In this case, the model inserts L2CACHE_LINE_SIZE/8 entries into MAB with 0 | |
406 | * in their <data> fields, except for the one that has data written by this | |
407 | * instruction. | |
408 | * <p> | |
409 | * @param cmd a LoadStoreCmd object reference that contains the info of the StoreCommit | |
410 | */ | |
411 | void handleStoreCommit (LoadStoreCmd& cmd) ; | |
412 | ||
413 | /** | |
414 | * Executes StoreAck MemorySync API command | |
415 | * This method move the matched StoreIssue in the stb_ into either retStb_ (for | |
416 | * normal stores) or rmoStb_ (for RMO stores). | |
417 | * <p> | |
418 | * @param cmd a LoadStoreCmd object reference that contains the info of the StoreAck | |
419 | */ | |
420 | void handleStoreAck (LoadStoreCmd& cmd); | |
421 | ||
422 | /** | |
423 | * Executes StoreInv MemorySync API command | |
424 | * This method notifies the StoreCommit that cuases the StoreInv that a StoreInv | |
425 | * is performed. | |
426 | * <p> | |
427 | * The function of StoreInv command is to prevent the StoreCommit that initiates | |
428 | * the StoreInv from retiring from the Memory Aceess Buffer (MAB) too early. A | |
429 | * StoreCommit cannot be removed from the MAB until it reaches to the head of the | |
430 | * MAB and all corresponding StoreInv and StoreUpdate are done. If the latter | |
431 | * condition is violate, then correct data may not be obtained. Considering the | |
432 | * following sequence: | |
433 | * - C0T0 StoreCommit A, inv=8'b00000010 | |
434 | * - C0T1 LoadData A (L1 hit) | |
435 | * - C1Tx StoreInv A | |
436 | * If the first StoreCommit is retired from the MAB without waiting for the | |
437 | * StoreInv completes, then since the MAB StoreCommit retirement also results in | |
438 | * writing its data to the Riesling memory, the LoadData would read StoreCommit's | |
439 | * data, which is wrong. The LoadData should get data before the StoreCommit. | |
440 | * | |
441 | * The implementation also creates an entry for the StoreInv in the MAB after it | |
442 | * notifies the StoreCommit it is done. This is actually not necessary, doing so | |
443 | * allows us examine the command sequence in MAB, which helps debug. | |
444 | * <p> | |
445 | * @param cmd a LoadStoreCmd object reference that contains the info of the StoreInv | |
446 | */ | |
447 | void handleStoreInv (LoadStoreCmd& cmd); | |
448 | ||
449 | /** | |
450 | * Executes StoreUpdate MemorySync API command | |
451 | * This method notifies the StoreCommit that cuases the StoreUpdate that a | |
452 | * StoreUpdate is performed. In addition, it also creates a StoreUpdate | |
453 | * entry in the MAB, and copy the data in the StoreCommit to the <data> field | |
454 | * of the StoreUpdate. Recall that the StoreUpdate keeps the D$ data image. | |
455 | * <p> | |
456 | * The other function of StoreUpdate command is the same as StoreInv, that is | |
457 | * is to prevent the StoreCommit that initiates the StoreUpdate from retiring | |
458 | * from the Memory Aceess Buffer (MAB) too early. Reference the example in the | |
459 | * handleStoreInv() for more details. | |
460 | * <p> | |
461 | * @param cmd a LoadStoreCmd object reference that contains the info of the StoreUpdate | |
462 | */ | |
463 | void handleStoreUpdate (LoadStoreCmd& cmd); | |
464 | ||
465 | /** | |
466 | * Executes Evict MemorySync API command | |
467 | * This method creates DCACHE_LINE_SIZE/8 Evict entries in the MAB to indicate | |
468 | * a L2 cache line is evicted. Each D$ line needs to have one Evict since its | |
469 | * <inv> may be different. | |
470 | * <p> | |
471 | * The function of Evict and EvictInv is to mimic L2 cache line eviction. Let's | |
472 | * use an example to explain the necessity of implementing them. Considering the | |
473 | * same sequence used in handleStoreInv(): | |
474 | * - C0T0 StoreCommit A, inv=8'b00000010 | |
475 | * - C0T1 LoadData A (L1 hit) | |
476 | * - C1Tx StoreInv A | |
477 | * From the value of <inv>, we know that the same line exists in core1's L1. | |
478 | * Suppose that before the StoreCommit, the line in L2 is evicted. Then, the | |
479 | * the StoreCommit would have <inv>=8'b00000000. Consequently, similar error will | |
480 | * happen: StoreCommit is removed too early so that the LoadData (L1 hit) gets | |
481 | * wrong data. Evict and EvictInv can solve this problem. Including these two | |
482 | * command, the above situation will have the following sequence: | |
483 | * - Evict A, inv=8'b00000010 | |
484 | * - C0T0 StoreCommit A, inv=8'b00000010 | |
485 | * - C0T1 LoadData A (L1 hit) | |
486 | * - C1Tx EvictInv A | |
487 | * | |
488 | * Again, EvictInv prevents Evict from being removed too early. | |
489 | * The implementation also creates an entry for the EvictInv in the MAB after it | |
490 | * notifies the StoreCommit it is done. This is actually not necessary, doing so | |
491 | * allows us examine the command sequence in MAB, which helps debug. | |
492 | * <p> | |
493 | * @param cmd a LoadStoreCmd object reference that contains the info of the Evict | |
494 | */ | |
495 | void handleEvict (LoadStoreCmd& cmd); | |
496 | ||
497 | /** | |
498 | * Executes EvictInv MemorySync API command | |
499 | * This method creates L2CACHE_LINE_SIZE/8 EvictInv entries in the MAB, and | |
500 | * notifies the corresponding Evict entries in the MAB that the EvictInv is | |
501 | * performed. Reference to handleEvict() for more discussion. | |
502 | * <p> | |
503 | * @param cmd a LoadStoreCmd object reference that contains the info of the EvictInv | |
504 | */ | |
505 | void handleEvictInv (LoadStoreCmd& cmd); | |
506 | ||
507 | /** | |
508 | * To allow DMA accesses from I/O. It occurres when a SIU Store (from ENET | |
509 | * or PCI) is seen in the L2 or when Bench back-door stores to memory. | |
510 | * | |
511 | * Riesling will update memory immediately when MemorySlam message is | |
512 | * received. | |
513 | * | |
514 | * No SSTEP will be sent with this message. | |
515 | * No checking is performed by Reisling on this message. | |
516 | * No checking against any load/store in-flight. | |
517 | */ | |
518 | void handleStoreSlam(StoreIssueCmd& cmd); | |
519 | /** | |
520 | * DMA_STORE, similar to MEM_SLAM, but allow inv_vec to handle conflict | |
521 | * with L1 data. | |
522 | */ | |
523 | void handleDmaStoreStart(LoadStoreCmd& cmd); | |
524 | void handleDmaStore (LoadStoreCmd& cmd) ; | |
525 | ||
526 | void handleFetchIssue(LoadStoreCmd& cmd); | |
527 | void handleFetchData(LoadStoreCmd& cmd); | |
528 | void handleFetchFill(LoadStoreCmd& cmd); | |
529 | ||
530 | void preMemoryAccess( MemoryTransaction &xact ); | |
531 | void postMemoryAccess( MemoryTransaction& xact ); | |
532 | ||
533 | static void pre_memory_access( void* msync, MemoryTransaction& xact ); | |
534 | static void post_memory_access( void* msync, MemoryTransaction& xact ); | |
535 | ||
536 | uint64_t getL1Instr(LoadStoreCmd& cmd); | |
537 | uint64_t getL1Data(LoadStoreCmd& cmd); | |
538 | uint64_t getL2Data(std::list<MemoryAccessEntry>::iterator from, LoadStoreCmd& cmd); | |
539 | uint64_t align2addr (uint64_t data, uint8_t size, uint32_t addr_offset); | |
540 | uint64_t align8byte (uint64_t data, uint8_t size, uint32_t addr_offset); | |
541 | uint64_t merge (uint64_t todata, uint64_t fromdata, uint8_t fromszv); | |
542 | void bufWriteMemory (std::vector<MemoryAccessEntry>& wdbuf); | |
543 | uint64_t byteMask(const uint8_t vbyte); | |
544 | ||
545 | uint64_t iSeq (uint32_t tid) { return (iseq_[tid]++); } | |
546 | uint64_t getIseq (uint32_t tid) { return iseq_[tid]; } | |
547 | ||
548 | Tso::TsoChecker* getTsoCheckerPtr () { return tsoChecker_; } | |
549 | ||
550 | /** | |
551 | * turn off msync model related callbacks | |
552 | */ | |
553 | void nullMsyncCallback(); | |
554 | /* | |
555 | * empty the buffers of a specified strand-id | |
556 | */ | |
557 | void flushMsyncCallback(int tid); | |
558 | ||
559 | /* | |
560 | * Riesling: | |
561 | * if LoadPop, pop and discard the Load at the top of your queue. | |
562 | * You will have received a LDISSUE and LDDATA for this Load before | |
563 | * receiving the LDPOP. | |
564 | * | |
565 | * Bench: | |
566 | * It is not possible for bench to send the <pa> on LDPOP. | |
567 | * So, Riesling can't check that correct Load is popped. | |
568 | * However, if wrong Load is discarded, the next Load will likely fail. | |
569 | */ | |
570 | void handleLoadPop(int tid); | |
571 | ||
572 | /* | |
573 | * Riesling: | |
574 | * if StorePop, pop and discard the Store at the top of your queue. | |
575 | * You will have received a STISSUE for this Store before | |
576 | * receiving the STPOP. | |
577 | */ | |
578 | void handleStorePop(int tid); | |
579 | ||
580 | /* | |
581 | * compare a N-byte data block in memory with the data provided by | |
582 | * testbench, throw error if miscompare. | |
583 | */ | |
584 | void handleMemoryCheck(uint64_t paddr, uint64_t* data, int dataSize); | |
585 | ||
586 | /* | |
587 | * flush out all entries, invoked when encounter INTP=0 system-wide | |
588 | * warm-reset. | |
589 | */ | |
590 | void flushAll(); | |
591 | ||
592 | /** | |
593 | * a hook for address translation. For example, dma_loopback that | |
594 | * translates a I/O address to RAM address. | |
595 | */ | |
596 | typedef uint64_t (* AddrTransCB)(uint64_t); | |
597 | void setAddrTransCB(AddrTransCB cb) { addrTrans = cb; } | |
598 | ||
599 | // handle sniper related load/store | |
600 | void handleSniper(int tid, uint64_t addr, INSTR_TYPE itype, uint64_t data); | |
601 | void setCoreEnable(int node_index, uint64_t data); | |
602 | ||
603 | // socket file descriptor to RTL testbench, used to report error | |
604 | SS_PliSocket* socket; | |
605 | ||
606 | protected: | |
607 | void completeStoreInvStoreUpdate (LoadStoreCmd& cmd); | |
608 | ||
609 | private: | |
610 | MemoryAccessBuffer mab_; // global memory access buffer | |
611 | LoadStoreBuffer* ldb_; // load buffer | |
612 | LoadStoreBuffer* stb_; // store buffer | |
613 | LoadStoreBuffer* retStb_; | |
614 | LoadStoreBuffer* rmoStb_; | |
615 | ||
616 | LoadStoreBuffer* ifb_; // I-Fetch Buffer | |
617 | ||
618 | uint64_t* iseq_; // load/store instruction sequence number | |
619 | ||
620 | /* The following buffer, retStb_, is introduced to store entries removed | |
621 | from the stb_ whose data may be need for the following load. This is to | |
622 | solve the Ni signal's timing issue. May not need for N2. */ | |
623 | /* LoadStoreBuffer retStb_[MAX_STRANDS]; */ | |
624 | /* This buffer is for storing RMO-store instructions. In N2, this type of | |
625 | stores leaves STB when it is issued to L2, hence StoreAck will be before | |
626 | StoreL2Commit. In addition, they may have out-of-order invalidation | |
627 | behavior. */ | |
628 | /* LoadStoreBuffer rmoStb_[MAX_STRANDS]; */ | |
629 | RieslingInterface rif_; | |
630 | int memDebug_; | |
631 | Tso::TsoChecker* tsoChecker_; | |
632 | // hook to provide optional (I/O) address translation | |
633 | AddrTransCB addrTrans; | |
634 | // inv_vec mask, 1 bit per core, | |
635 | // inv_vec in st_commit is arranged from left to right as: core_n_1 | |
636 | // core_n_2 ... core_1 core_0. | |
637 | // inv_vec_mask is arranged differently, inv_vec_mask[0] keeps core_7 ... | |
638 | // core_0, from left to right. inv_vec_mask[1] keeps core_15 ... core_8, | |
639 | // from left to right, and so on. | |
640 | uint8_t inv_vec_mask[INV_VEC_SIZE]; | |
641 | }; | |
642 | ||
643 | #endif /* _MEMORYMODEL_H */ |