Commit b03cce8e08237bb84735aa86aea5d7d7c4633e2d

Authored by bellard
1 parent 4d7a0880

fixed global variable handling with qemu load/stores - initial global prologue/e…

…pilogue implementation

git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4406 c046a42c-6fe2-441c-8c8c-71466251a162
tcg/i386/tcg-target.c
... ... @@ -46,6 +46,8 @@ int tcg_target_reg_alloc_order[] = {
46 46 const int tcg_target_call_iarg_regs[3] = { TCG_REG_EAX, TCG_REG_EDX, TCG_REG_ECX };
47 47 const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX };
48 48  
  49 +static uint8_t *tb_ret_addr;
  50 +
49 51 static void patch_reloc(uint8_t *code_ptr, int type,
50 52 tcg_target_long value, tcg_target_long addend)
51 53 {
... ... @@ -879,7 +881,8 @@ static inline void tcg_out_op(TCGContext *s, int opc,
879 881 switch(opc) {
880 882 case INDEX_op_exit_tb:
881 883 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]);
882   - tcg_out8(s, 0xc3); /* ret */
  884 + tcg_out8(s, 0xe9); /* jmp tb_ret_addr */
  885 + tcg_out32(s, tb_ret_addr - s->code_ptr - 4);
883 886 break;
884 887 case INDEX_op_goto_tb:
885 888 if (s->tb_jmp_offset) {
... ... @@ -1145,6 +1148,53 @@ static const TCGTargetOpDef x86_op_defs[] = {
1145 1148 { -1 },
1146 1149 };
1147 1150  
  1151 +static int tcg_target_callee_save_regs[] = {
  1152 + /* TCG_REG_EBP, */ /* currently used for the global env, so no
  1153 + need to save */
  1154 + TCG_REG_EBX,
  1155 + TCG_REG_ESI,
  1156 + TCG_REG_EDI,
  1157 +};
  1158 +
  1159 +static inline void tcg_out_push(TCGContext *s, int reg)
  1160 +{
  1161 + tcg_out_opc(s, 0x50 + reg);
  1162 +}
  1163 +
  1164 +static inline void tcg_out_pop(TCGContext *s, int reg)
  1165 +{
  1166 + tcg_out_opc(s, 0x58 + reg);
  1167 +}
  1168 +
  1169 +/* Generate global QEMU prologue and epilogue code */
  1170 +void tcg_target_qemu_prologue(TCGContext *s)
  1171 +{
  1172 + int i, frame_size, push_size, stack_addend;
  1173 +
  1174 + /* TB prologue */
  1175 + /* save all callee saved registers */
  1176 + for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
  1177 + tcg_out_push(s, tcg_target_callee_save_regs[i]);
  1178 + }
  1179 + /* reserve some stack space */
  1180 + push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
  1181 + frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
  1182 + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
  1183 + ~(TCG_TARGET_STACK_ALIGN - 1);
  1184 + stack_addend = frame_size - push_size;
  1185 + tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
  1186 +
  1187 + tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
  1188 +
  1189 + /* TB epilogue */
  1190 + tb_ret_addr = s->code_ptr;
  1191 + tcg_out_addi(s, TCG_REG_ESP, stack_addend);
  1192 + for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
  1193 + tcg_out_pop(s, tcg_target_callee_save_regs[i]);
  1194 + }
  1195 + tcg_out8(s, 0xc3); /* ret */
  1196 +}
  1197 +
1148 1198 void tcg_target_init(TCGContext *s)
1149 1199 {
1150 1200 /* fail safe */
... ...
tcg/tcg.c
... ... @@ -242,6 +242,13 @@ void tcg_context_init(TCGContext *s)
242 242 }
243 243  
244 244 tcg_target_init(s);
  245 +
  246 + /* init global prologue and epilogue */
  247 + s->code_buf = code_gen_prologue;
  248 + s->code_ptr = s->code_buf;
  249 + tcg_target_qemu_prologue(s);
  250 + flush_icache_range((unsigned long)s->code_buf,
  251 + (unsigned long)s->code_ptr);
245 252 }
246 253  
247 254 void tcg_set_frame(TCGContext *s, int reg,
... ... @@ -680,36 +687,57 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
680 687 nb_oargs = arg >> 16;
681 688 nb_iargs = arg & 0xffff;
682 689 nb_cargs = def->nb_cargs;
683   - } else if (c == INDEX_op_nopn) {
684   - /* variable number of arguments */
685   - nb_cargs = *args;
686   - nb_oargs = 0;
687   - nb_iargs = 0;
688   - } else {
689   - nb_oargs = def->nb_oargs;
690   - nb_iargs = def->nb_iargs;
691   - nb_cargs = def->nb_cargs;
692   - }
693 690  
694   - k = 0;
695   - for(i = 0; i < nb_oargs; i++) {
696   - if (k != 0)
697   - fprintf(outfile, ",");
698   - fprintf(outfile, "%s",
699   - tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
700   - }
701   - for(i = 0; i < nb_iargs; i++) {
702   - if (k != 0)
703   - fprintf(outfile, ",");
  691 + /* function name */
704 692 /* XXX: dump helper name for call */
705 693 fprintf(outfile, "%s",
706   - tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
707   - }
708   - for(i = 0; i < nb_cargs; i++) {
709   - if (k != 0)
  694 + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + nb_iargs - 1]));
  695 + /* flags */
  696 + fprintf(outfile, ",$0x%" TCG_PRIlx,
  697 + args[nb_oargs + nb_iargs]);
  698 + /* nb out args */
  699 + fprintf(outfile, ",$%d", nb_oargs);
  700 + for(i = 0; i < nb_oargs; i++) {
  701 + fprintf(outfile, ",");
  702 + fprintf(outfile, "%s",
  703 + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i]));
  704 + }
  705 + for(i = 0; i < (nb_iargs - 1); i++) {
710 706 fprintf(outfile, ",");
711   - arg = args[k++];
712   - fprintf(outfile, "$0x%" TCG_PRIlx, arg);
  707 + fprintf(outfile, "%s",
  708 + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i]));
  709 + }
  710 + } else {
  711 + if (c == INDEX_op_nopn) {
  712 + /* variable number of arguments */
  713 + nb_cargs = *args;
  714 + nb_oargs = 0;
  715 + nb_iargs = 0;
  716 + } else {
  717 + nb_oargs = def->nb_oargs;
  718 + nb_iargs = def->nb_iargs;
  719 + nb_cargs = def->nb_cargs;
  720 + }
  721 +
  722 + k = 0;
  723 + for(i = 0; i < nb_oargs; i++) {
  724 + if (k != 0)
  725 + fprintf(outfile, ",");
  726 + fprintf(outfile, "%s",
  727 + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
  728 + }
  729 + for(i = 0; i < nb_iargs; i++) {
  730 + if (k != 0)
  731 + fprintf(outfile, ",");
  732 + fprintf(outfile, "%s",
  733 + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
  734 + }
  735 + for(i = 0; i < nb_cargs; i++) {
  736 + if (k != 0)
  737 + fprintf(outfile, ",");
  738 + arg = args[k++];
  739 + fprintf(outfile, "$0x%" TCG_PRIlx, arg);
  740 + }
713 741 }
714 742 fprintf(outfile, "\n");
715 743 args += nb_iargs + nb_oargs + nb_cargs;
... ... @@ -1027,6 +1055,9 @@ void tcg_liveness_analysis(TCGContext *s)
1027 1055 /* if end of basic block, update */
1028 1056 if (def->flags & TCG_OPF_BB_END) {
1029 1057 tcg_la_bb_end(s, dead_temps);
  1058 + } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
  1059 + /* globals are live */
  1060 + memset(dead_temps, 0, s->nb_globals);
1030 1061 }
1031 1062  
1032 1063 /* input args are live */
... ... @@ -1119,9 +1150,7 @@ static void check_regs(TCGContext *s)
1119 1150 ts->reg != reg) {
1120 1151 printf("Inconsistency for register %s:\n",
1121 1152 tcg_target_reg_names[reg]);
1122   - printf("reg state:\n");
1123   - dump_regs(s);
1124   - tcg_abort();
  1153 + goto fail;
1125 1154 }
1126 1155 }
1127 1156 }
... ... @@ -1132,10 +1161,16 @@ static void check_regs(TCGContext *s)
1132 1161 s->reg_to_temp[ts->reg] != k) {
1133 1162 printf("Inconsistency for temp %s:\n",
1134 1163 tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
  1164 + fail:
1135 1165 printf("reg state:\n");
1136 1166 dump_regs(s);
1137 1167 tcg_abort();
1138 1168 }
  1169 + if (ts->val_type == TEMP_VAL_CONST && k < s->nb_globals) {
  1170 + printf("constant forbidden in global %s\n",
  1171 + tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
  1172 + goto fail;
  1173 + }
1139 1174 }
1140 1175 }
1141 1176 #endif
... ... @@ -1376,13 +1411,26 @@ static void tcg_reg_alloc_op(TCGContext *s,
1376 1411 }
1377 1412 }
1378 1413  
1379   - /* XXX: permit generic clobber register list ? */
1380 1414 if (def->flags & TCG_OPF_CALL_CLOBBER) {
  1415 + /* XXX: permit generic clobber register list ? */
1381 1416 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1382 1417 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
1383 1418 tcg_reg_free(s, reg);
1384 1419 }
1385 1420 }
  1421 + /* XXX: for load/store we could do that only for the slow path
  1422 + (i.e. when a memory callback is called) */
  1423 +
  1424 + /* store globals and free associated registers (we assume the insn
  1425 + can modify any global. */
  1426 + for(i = 0; i < s->nb_globals; i++) {
  1427 + ts = &s->temps[i];
  1428 + if (!ts->fixed_reg) {
  1429 + if (ts->val_type == TEMP_VAL_REG) {
  1430 + tcg_reg_free(s, ts->reg);
  1431 + }
  1432 + }
  1433 + }
1386 1434 }
1387 1435  
1388 1436 /* satisfy the output constraints */
... ... @@ -1435,6 +1483,12 @@ static void tcg_reg_alloc_op(TCGContext *s,
1435 1483 }
1436 1484 }
1437 1485  
  1486 +#ifdef TCG_TARGET_STACK_GROWSUP
  1487 +#define STACK_DIR(x) (-(x))
  1488 +#else
  1489 +#define STACK_DIR(x) (x)
  1490 +#endif
  1491 +
1438 1492 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1439 1493 int opc, const TCGArg *args,
1440 1494 unsigned int dead_iargs)
... ... @@ -1443,7 +1497,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1443 1497 TCGArg arg, func_arg;
1444 1498 TCGTemp *ts;
1445 1499 tcg_target_long stack_offset, call_stack_size, func_addr;
1446   - int const_func_arg;
  1500 + int const_func_arg, allocate_args;
1447 1501 TCGRegSet allocated_regs;
1448 1502 const TCGArgConstraint *arg_ct;
1449 1503  
... ... @@ -1464,12 +1518,11 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1464 1518 call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
1465 1519 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
1466 1520 ~(TCG_TARGET_STACK_ALIGN - 1);
1467   -#ifdef TCG_TARGET_STACK_GROWSUP
1468   - tcg_out_addi(s, TCG_REG_CALL_STACK, call_stack_size);
1469   -#else
1470   - tcg_out_addi(s, TCG_REG_CALL_STACK, -call_stack_size);
1471   -#endif
1472   -
  1521 + allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
  1522 + if (allocate_args) {
  1523 + tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
  1524 + }
  1525 + /* XXX: on some architectures it does not start at zero */
1473 1526 stack_offset = 0;
1474 1527 for(i = nb_regs; i < nb_params; i++) {
1475 1528 arg = args[nb_oargs + i];
... ... @@ -1491,11 +1544,8 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1491 1544 } else {
1492 1545 tcg_abort();
1493 1546 }
1494   -#ifdef TCG_TARGET_STACK_GROWSUP
1495   - stack_offset -= sizeof(tcg_target_long);
1496   -#else
1497   - stack_offset += sizeof(tcg_target_long);
1498   -#endif
  1547 + /* XXX: not necessarily in the same order */
  1548 + stack_offset += STACK_DIR(sizeof(tcg_target_long));
1499 1549 }
1500 1550  
1501 1551 /* assign input registers */
... ... @@ -1525,9 +1575,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1525 1575 arg_ct = &def->args_ct[0];
1526 1576 ts = &s->temps[func_arg];
1527 1577 func_addr = ts->val;
1528   -#ifdef HOST_HPPA
1529   - func_addr = (tcg_target_long)__canonicalize_funcptr_for_compare((void *)func_addr);
1530   -#endif
1531 1578 const_func_arg = 0;
1532 1579 if (ts->val_type == TEMP_VAL_MEM) {
1533 1580 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
... ... @@ -1586,11 +1633,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
1586 1633  
1587 1634 tcg_out_op(s, opc, &func_arg, &const_func_arg);
1588 1635  
1589   -#ifdef TCG_TARGET_STACK_GROWSUP
1590   - tcg_out_addi(s, TCG_REG_CALL_STACK, -call_stack_size);
1591   -#else
1592   - tcg_out_addi(s, TCG_REG_CALL_STACK, call_stack_size);
1593   -#endif
  1636 + if (allocate_args) {
  1637 + tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size));
  1638 + }
1594 1639  
1595 1640 /* assign output registers and emit moves if needed */
1596 1641 for(i = 0; i < nb_oargs; i++) {
... ... @@ -1672,10 +1717,6 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
1672 1717 args = gen_opparam_buf;
1673 1718 op_index = 0;
1674 1719  
1675   -#ifdef TCG_TARGET_NEEDS_PROLOGUE
1676   - tcg_target_prologue(s);
1677   -#endif
1678   -
1679 1720 for(;;) {
1680 1721 opc = gen_opc_buf[op_index];
1681 1722 #ifdef CONFIG_PROFILER
... ...
tcg/tcg.h
... ... @@ -90,6 +90,10 @@ typedef struct TCGPool {
90 90  
91 91 #define TCG_MAX_TEMPS 512
92 92  
  93 +/* when the size of the arguments of a called function is smaller than
  94 + this value, they are statically allocated in the TB stack frame */
  95 +#define TCG_STATIC_CALL_ARGS_SIZE 128
  96 +
93 97 typedef int TCGType;
94 98  
95 99 #define TCG_TYPE_I32 0
... ... @@ -285,8 +289,11 @@ typedef struct TCGArgConstraint {
285 289  
286 290 #define TCG_OPF_BB_END 0x01 /* instruction defines the end of a basic
287 291 block */
288   -#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers */
289   -#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects */
  292 +#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers
  293 + and potentially update globals. */
  294 +#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it
  295 + cannot be removed if its output
  296 + are not used */
290 297  
291 298 typedef struct TCGOpDef {
292 299 const char *name;
... ... @@ -305,6 +312,7 @@ typedef struct TCGTargetOpDef {
305 312 extern TCGOpDef tcg_op_defs[];
306 313  
307 314 void tcg_target_init(TCGContext *s);
  315 +void tcg_target_qemu_prologue(TCGContext *s);
308 316  
309 317 #define tcg_abort() \
310 318 do {\
... ... @@ -358,3 +366,6 @@ int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2);
358 366 int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2);
359 367 uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2);
360 368 uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2);
  369 +
  370 +extern uint8_t code_gen_prologue[];
  371 +#define tcg_qemu_tb_exec(tb_ptr) ((long REGPARM (*)(void *))code_gen_prologue)(tb_ptr)
... ...
tcg/x86_64/tcg-target.c
... ... @@ -73,6 +73,8 @@ const int tcg_target_call_oarg_regs[2] = {
73 73 TCG_REG_RDX
74 74 };
75 75  
  76 +static uint8_t *tb_ret_addr;
  77 +
76 78 static void patch_reloc(uint8_t *code_ptr, int type,
77 79 tcg_target_long value, tcg_target_long addend)
78 80 {
... ... @@ -841,7 +843,8 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
841 843 switch(opc) {
842 844 case INDEX_op_exit_tb:
843 845 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, args[0]);
844   - tcg_out8(s, 0xc3); /* ret */
  846 + tcg_out8(s, 0xe9); /* jmp tb_ret_addr */
  847 + tcg_out32(s, tb_ret_addr - s->code_ptr - 4);
845 848 break;
846 849 case INDEX_op_goto_tb:
847 850 if (s->tb_jmp_offset) {
... ... @@ -1129,6 +1132,58 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
1129 1132 }
1130 1133 }
1131 1134  
  1135 +static int tcg_target_callee_save_regs[] = {
  1136 + TCG_REG_R10,
  1137 + TCG_REG_R11,
  1138 + TCG_REG_RBP,
  1139 + TCG_REG_RBX,
  1140 + TCG_REG_R12,
  1141 + TCG_REG_R13,
  1142 + /* TCG_REG_R14, */ /* currently used for the global env, so no
  1143 + need to save */
  1144 + TCG_REG_R15,
  1145 +};
  1146 +
  1147 +static inline void tcg_out_push(TCGContext *s, int reg)
  1148 +{
  1149 + tcg_out_opc(s, (0x50 + (reg & 7)), 0, reg, 0);
  1150 +}
  1151 +
  1152 +static inline void tcg_out_pop(TCGContext *s, int reg)
  1153 +{
  1154 + tcg_out_opc(s, (0x58 + (reg & 7)), 0, reg, 0);
  1155 +}
  1156 +
  1157 +/* Generate global QEMU prologue and epilogue code */
  1158 +void tcg_target_qemu_prologue(TCGContext *s)
  1159 +{
  1160 + int i, frame_size, push_size, stack_addend;
  1161 +
  1162 + /* TB prologue */
  1163 + /* save all callee saved registers */
  1164 + for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
  1165 + tcg_out_push(s, tcg_target_callee_save_regs[i]);
  1166 +
  1167 + }
  1168 + /* reserve some stack space */
  1169 + push_size = 8 + ARRAY_SIZE(tcg_target_callee_save_regs) * 8;
  1170 + frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
  1171 + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
  1172 + ~(TCG_TARGET_STACK_ALIGN - 1);
  1173 + stack_addend = frame_size - push_size;
  1174 + tcg_out_addi(s, TCG_REG_RSP, -stack_addend);
  1175 +
  1176 + tcg_out_modrm(s, 0xff, 4, TCG_REG_RDI); /* jmp *%rdi */
  1177 +
  1178 + /* TB epilogue */
  1179 + tb_ret_addr = s->code_ptr;
  1180 + tcg_out_addi(s, TCG_REG_RSP, stack_addend);
  1181 + for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
  1182 + tcg_out_pop(s, tcg_target_callee_save_regs[i]);
  1183 + }
  1184 + tcg_out8(s, 0xc3); /* ret */
  1185 +}
  1186 +
1132 1187 static const TCGTargetOpDef x86_64_op_defs[] = {
1133 1188 { INDEX_op_exit_tb, { } },
1134 1189 { INDEX_op_goto_tb, { } },
... ... @@ -1212,6 +1267,10 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
1212 1267  
1213 1268 void tcg_target_init(TCGContext *s)
1214 1269 {
  1270 + /* fail safe */
  1271 + if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
  1272 + tcg_abort();
  1273 +
1215 1274 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1216 1275 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1217 1276 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
... ... @@ -1227,10 +1286,6 @@ void tcg_target_init(TCGContext *s)
1227 1286  
1228 1287 tcg_regset_clear(s->reserved_regs);
1229 1288 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RSP);
1230   - /* XXX: will be suppresed when proper global TB entry code will be
1231   - generated */
1232   - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RBX);
1233   - tcg_regset_set_reg(s->reserved_regs, TCG_REG_RBP);
1234 1289  
1235 1290 tcg_add_target_add_op_defs(x86_64_op_defs);
1236 1291 }
... ...