4
4
*/
5
5
6
6
#include <assert.h>
7
+ #include <sched.h>
8
+ #include <stdatomic.h>
7
9
#include <stdbool.h>
8
10
#include <stdint.h>
9
11
#include <stdio.h>
@@ -269,6 +271,7 @@ void rv_debug(riscv_t *rv)
269
271
HASH_FUNC_IMPL (map_hash , BLOCK_MAP_CAPACITY_BITS , 1 << BLOCK_MAP_CAPACITY_BITS )
270
272
#endif
271
273
274
+
272
275
/* allocate a basic block */
273
276
static block_t * block_alloc (riscv_t * rv )
274
277
{
@@ -278,12 +281,13 @@ static block_t *block_alloc(riscv_t *rv)
278
281
#if RV32_HAS (JIT )
279
282
block -> translatable = true;
280
283
block -> hot = false;
281
- block -> hot2 = false;
282
284
block -> has_loops = false;
283
285
block -> n_invoke = 0 ;
284
286
INIT_LIST_HEAD (& block -> list );
285
287
#if RV32_HAS (T2C )
286
- block -> compiled = false;
288
+ /* Initialize T2C compilation flag */
289
+ atomic_store_explicit (& block -> compiled , false, memory_order_relaxed );
290
+ atomic_store_explicit (& block -> func , NULL , memory_order_relaxed );
287
291
#endif
288
292
#endif
289
293
return block ;
@@ -918,6 +922,29 @@ static block_t *block_find_or_translate(riscv_t *rv)
918
922
return next_blk ;
919
923
}
920
924
925
+ #if RV32_HAS (T2C )
926
+ /* Avoid evicting blocks being compiled */
927
+ if (atomic_load_explicit (& replaced_blk -> compiled , memory_order_acquire )) {
928
+ /* This block is being compiled - do not evict it.
929
+ * Return NULL to signal cache is full.
930
+ */
931
+ pthread_mutex_unlock (& rv -> cache_lock );
932
+
933
+ /* Free the newly translated block */
934
+ for (rv_insn_t * ir = next_blk -> ir_head , * next_ir ; ir ; ir = next_ir ) {
935
+ next_ir = ir -> next ;
936
+ free (ir -> fuse );
937
+ mpool_free (rv -> block_ir_mp , ir );
938
+ }
939
+ list_del_init (& next_blk -> list );
940
+ atomic_store_explicit (& next_blk -> func , NULL , memory_order_relaxed );
941
+ mpool_free (rv -> block_mp , next_blk );
942
+
943
+ return NULL ;
944
+ }
945
+ /* Allow evicting blocks that are not yet compiled */
946
+ #endif
947
+
921
948
if (prev == replaced_blk )
922
949
prev = NULL ;
923
950
@@ -930,43 +957,59 @@ static block_t *block_find_or_translate(riscv_t *rv)
930
957
rv_insn_t * taken = entry -> ir_tail -> branch_taken ,
931
958
* untaken = entry -> ir_tail -> branch_untaken ;
932
959
933
- if (taken == replaced_blk_entry ) {
960
+ if (taken == replaced_blk_entry )
934
961
entry -> ir_tail -> branch_taken = NULL ;
935
- }
936
- if (untaken == replaced_blk_entry ) {
962
+ if (untaken == replaced_blk_entry )
937
963
entry -> ir_tail -> branch_untaken = NULL ;
938
- }
939
964
940
965
/* upadte JALR LUT */
941
- if (!entry -> ir_tail -> branch_table ) {
966
+ if (!entry -> ir_tail -> branch_table )
942
967
continue ;
943
- }
944
968
945
- /**
946
- * TODO: upadate all JALR instructions which references to this
947
- * basic block as the destination.
969
+ /* TODO: upadate all JALR instructions which references to this basic
970
+ * block as the destination.
948
971
*/
949
972
}
950
973
951
- /* free IRs in replaced block */
952
- for (rv_insn_t * ir = replaced_blk -> ir_head , * next_ir ; ir != NULL ;
953
- ir = next_ir ) {
954
- next_ir = ir -> next ;
974
+ #if RV32_HAS (T2C )
975
+ /* Double-check - do not evict if being compiled */
976
+ if (atomic_load_explicit (& replaced_blk -> compiled , memory_order_acquire )) {
977
+ /* Block is being compiled - cannot evict */
978
+ pthread_mutex_unlock (& rv -> cache_lock );
955
979
956
- if (ir -> fuse )
980
+ /* Free the newly translated block */
981
+ for (rv_insn_t * ir = next_blk -> ir_head , * next_ir ; ir ; ir = next_ir ) {
982
+ next_ir = ir -> next ;
957
983
free (ir -> fuse );
984
+ mpool_free (rv -> block_ir_mp , ir );
985
+ }
986
+ list_del_init (& next_blk -> list );
987
+ atomic_store_explicit (& next_blk -> func , NULL , memory_order_relaxed );
988
+ mpool_free (rv -> block_mp , next_blk );
958
989
990
+ return NULL ;
991
+ }
992
+ #endif
993
+ /* At this point, block is not compiled - safe to evict */
994
+ /* Free the replaced block */
995
+ for (rv_insn_t * ir = replaced_blk -> ir_head , * next_ir ; ir ; ir = next_ir ) {
996
+ next_ir = ir -> next ;
997
+ free (ir -> fuse );
959
998
mpool_free (rv -> block_ir_mp , ir );
960
999
}
961
1000
962
1001
list_del_init (& replaced_blk -> list );
1002
+ #if RV32_HAS (T2C )
1003
+ /* Clear atomic fields before returning to pool */
1004
+ atomic_store_explicit (& replaced_blk -> func , NULL , memory_order_relaxed );
1005
+ atomic_store_explicit (& replaced_blk -> compiled , false, memory_order_relaxed );
1006
+ #endif
963
1007
mpool_free (rv -> block_mp , replaced_blk );
964
1008
#if RV32_HAS (T2C )
965
1009
pthread_mutex_unlock (& rv -> cache_lock );
966
1010
#endif
967
1011
#endif
968
1012
969
- assert (next_blk );
970
1013
return next_blk ;
971
1014
}
972
1015
@@ -1077,6 +1120,15 @@ void rv_step(void *arg)
1077
1120
* and move onto the next block.
1078
1121
*/
1079
1122
block_t * block = block_find_or_translate (rv );
1123
+ #if RV32_HAS (T2C )
1124
+ if (!block ) {
1125
+ /* Cache is full of compiled blocks.
1126
+ * Try again after yielding to allow T2C thread to complete.
1127
+ */
1128
+ sched_yield ();
1129
+ continue ;
1130
+ }
1131
+ #endif
1080
1132
/* by now, a block should be available */
1081
1133
assert (block );
1082
1134
@@ -1120,20 +1172,87 @@ void rv_step(void *arg)
1120
1172
last_pc = rv -> PC ;
1121
1173
#if RV32_HAS (JIT )
1122
1174
#if RV32_HAS (T2C )
1123
- /* executed through the tier-2 JIT compiler */
1124
- if (block -> hot2 ) {
1125
- ((exec_t2c_func_t ) block -> func )(rv );
1175
+ /* Check if T2C compiled code exists for this block */
1176
+ t2c_entry_t * t2c_entry =
1177
+ cache_get (rv -> t2c_cache , block -> pc_start , false);
1178
+ if (t2c_entry && t2c_entry -> func ) {
1179
+ /* Clear compiled flag now that T2C code is available
1180
+ * This allows the block to be evicted if needed */
1181
+ atomic_store_explicit (& block -> compiled , false,
1182
+ memory_order_relaxed );
1183
+
1184
+ /* Execute the compiled function - no synchronization needed
1185
+ * as T2C entries are immutable */
1186
+ exec_t2c_func_t func = (exec_t2c_func_t ) t2c_entry -> func ;
1187
+ func (rv );
1126
1188
prev = NULL ;
1127
1189
continue ;
1128
- } /* check if invoking times of t1 generated code exceed threshold */
1129
- else if (!block -> compiled && block -> n_invoke >= THRESHOLD ) {
1130
- block -> compiled = true;
1190
+ }
1191
+ /* check if invoking times of t1 generated code exceed threshold */
1192
+ if (!atomic_load_explicit (& block -> compiled , memory_order_acquire ) &&
1193
+ block -> n_invoke >= THRESHOLD ) {
1194
+ /* Mark block as queued for compilation to avoid re-queueing */
1195
+ atomic_store_explicit (& block -> compiled , true, memory_order_release );
1196
+
1131
1197
queue_entry_t * entry = malloc (sizeof (queue_entry_t ));
1132
- entry -> block = block ;
1133
- pthread_mutex_lock (& rv -> wait_queue_lock );
1134
- list_add (& entry -> list , & rv -> wait_queue );
1135
- pthread_mutex_unlock (& rv -> wait_queue_lock );
1198
+ if (entry ) {
1199
+ /* Copy block metadata */
1200
+ entry -> pc_start = block -> pc_start ;
1201
+ entry -> pc_end = block -> pc_end ;
1202
+ entry -> n_insn = block -> n_insn ;
1203
+ #if RV32_HAS (SYSTEM )
1204
+ entry -> satp = block -> satp ;
1205
+ #endif
1206
+ /* Deep copy the IR chain */
1207
+ entry -> ir_head_copy = NULL ;
1208
+ rv_insn_t * * copy_ptr = & entry -> ir_head_copy ;
1209
+ for (rv_insn_t * ir = block -> ir_head ; ir ; ir = ir -> next ) {
1210
+ rv_insn_t * ir_copy = malloc (sizeof (rv_insn_t ));
1211
+ if (!ir_copy ) {
1212
+ /* Clean up on failure */
1213
+ for (rv_insn_t * tmp = entry -> ir_head_copy , * next ; tmp ;
1214
+ tmp = next ) {
1215
+ next = tmp -> next ;
1216
+ free (tmp -> fuse );
1217
+ free (tmp );
1218
+ }
1219
+ free (entry );
1220
+ atomic_store_explicit (& block -> compiled , false,
1221
+ memory_order_release );
1222
+ goto skip_t2c ;
1223
+ }
1224
+ memcpy (ir_copy , ir , sizeof (rv_insn_t ));
1225
+ /* Copy fuse data if present */
1226
+ if (ir -> fuse ) {
1227
+ size_t fuse_size = ir -> imm2 * sizeof (opcode_fuse_t );
1228
+ ir_copy -> fuse = malloc (fuse_size );
1229
+ if (ir_copy -> fuse ) {
1230
+ memcpy (ir_copy -> fuse , ir -> fuse , fuse_size );
1231
+ }
1232
+ }
1233
+ /* Clear branch pointers as they're not needed for
1234
+ * compilation */
1235
+ ir_copy -> branch_taken = NULL ;
1236
+ ir_copy -> branch_untaken = NULL ;
1237
+ ir_copy -> branch_table = NULL ;
1238
+ /* Link the copy */
1239
+ ir_copy -> next = NULL ;
1240
+ * copy_ptr = ir_copy ;
1241
+ copy_ptr = & ir_copy -> next ;
1242
+ }
1243
+
1244
+ pthread_mutex_lock (& rv -> wait_queue_lock );
1245
+ list_add (& entry -> list , & rv -> wait_queue );
1246
+ pthread_cond_signal (& rv -> wait_queue_cond );
1247
+ pthread_mutex_unlock (& rv -> wait_queue_lock );
1248
+ /* Keep compiled flag set to prevent re-queueing */
1249
+ } else {
1250
+ /* Failed to allocate - clear compiled flag */
1251
+ atomic_store_explicit (& block -> compiled , false,
1252
+ memory_order_release );
1253
+ }
1136
1254
}
1255
+ skip_t2c :; /* Empty statement for label */
1137
1256
#endif
1138
1257
/* executed through the tier-1 JIT compiler */
1139
1258
struct jit_state * state = rv -> jit_state ;
0 commit comments