Commit cf1d97f07480b6197aebc489938b4e1fed78d3e7

Authored by edgar_igl
1 parent 3bd8c5e4

CRIS: Improve TLB management and handle delayslots at page boundaries.

* Dont flush the entire qemu tlb when the $pid changes. Instead we go through
  the guests TLB and choose entries that need to be flushed.
* Add env->dslot and handle delayslots at pageboundaries.
* Remove some unused code.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4450 c046a42c-6fe2-441c-8c8c-71466251a162
cpu-exec.c
... ... @@ -261,6 +261,7 @@ static inline TranslationBlock *tb_find_fast(void)
261 261 pc = env->pc;
262 262 #elif defined(TARGET_CRIS)
263 263 flags = env->pregs[PR_CCS] & U_FLAG;
  264 + flags |= env->dslot;
264 265 cs_base = 0;
265 266 pc = env->pc;
266 267 #else
... ...
target-cris/cpu.h
... ... @@ -107,11 +107,10 @@ typedef struct CPUCRISState {
107 107 /* Pseudo register for the kernel stack. */
108 108 uint32_t ksp;
109 109  
110   - /* These are setup up by the guest code just before transfering the
111   - control back to the host. */
112   - int jmp;
113   - uint32_t btarget;
  110 + /* Branch. */
  111 + int dslot;
114 112 int btaken;
  113 + uint32_t btarget;
115 114  
116 115 /* Condition flag tracking. */
117 116 uint32_t cc_op;
... ... @@ -119,10 +118,8 @@ typedef struct CPUCRISState {
119 118 uint32_t cc_dest;
120 119 uint32_t cc_src;
121 120 uint32_t cc_result;
122   -
123 121 /* size of the operation, 1 = byte, 2 = word, 4 = dword. */
124 122 int cc_size;
125   -
126 123 /* Extended arithmetics. */
127 124 int cc_x_live;
128 125 int cc_x;
... ... @@ -137,13 +134,6 @@ typedef struct CPUCRISState {
137 134 uint32_t debug2;
138 135 uint32_t debug3;
139 136  
140   - struct
141   - {
142   - int exec_insns;
143   - int exec_loads;
144   - int exec_stores;
145   - } stats;
146   -
147 137 /* FIXME: add a check in the translator to avoid writing to support
148 138 register sets beyond the 4th. The ISA allows up to 256! but in
149 139 practice there is no core that implements more than 4.
... ...
target-cris/helper.c
... ... @@ -97,9 +97,10 @@ int cpu_cris_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
97 97 r = tlb_set_page(env, address, phy, prot, mmu_idx, is_softmmu);
98 98 }
99 99 if (r > 0)
100   - D(fprintf(logfile, "%s returns %d irqreq=%x addr=%x ismmu=%d vec=%x\n",
101   - __func__, r, env->interrupt_request,
102   - address, is_softmmu, res.bf_vec));
  100 + D(fprintf(logfile, "%s returns %d irqreq=%x addr=%x"
  101 + " phy=%x ismmu=%d vec=%x pc=%x\n",
  102 + __func__, r, env->interrupt_request,
  103 + address, res.phy, is_softmmu, res.bf_vec, env->pc));
103 104 return r;
104 105 }
105 106  
... ... @@ -138,13 +139,19 @@ void do_interrupt(CPUState *env)
138 139 break;
139 140 }
140 141  
141   - if ((env->pregs[PR_CCS] & U_FLAG)) {
142   - D(fprintf(logfile, "excp isr=%x PC=%x SP=%x ERP=%x pid=%x ccs=%x cc=%d %x\n",
143   - ex_vec, env->pc,
  142 + if (env->dslot) {
  143 + D(fprintf(logfile, "excp isr=%x PC=%x ds=%d SP=%x"
  144 + " ERP=%x pid=%x ccs=%x cc=%d %x\n",
  145 + ex_vec, env->pc, env->dslot,
144 146 env->regs[R_SP],
145 147 env->pregs[PR_ERP], env->pregs[PR_PID],
146 148 env->pregs[PR_CCS],
147 149 env->cc_op, env->cc_mask));
  150 + /* We loose the btarget, btaken state here so rexec the
  151 + branch. */
  152 + env->pregs[PR_ERP] -= env->dslot;
  153 + /* Exception starts with dslot cleared. */
  154 + env->dslot = 0;
148 155 }
149 156  
150 157 env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
... ...
target-cris/helper.h
1 1 #define TCG_HELPER_PROTO
2 2  
3 3 void TCG_HELPER_PROTO helper_raise_exception(uint32_t index);
  4 +void TCG_HELPER_PROTO helper_tlb_flush_pid(uint32_t pid);
4 5 void TCG_HELPER_PROTO helper_tlb_flush(void);
5 6 void TCG_HELPER_PROTO helper_dump(uint32_t a0, uint32_t a1, uint32_t a2);
6 7 void TCG_HELPER_PROTO helper_dummy(void);
... ...
target-cris/mmu.c
... ... @@ -174,8 +174,9 @@ static int cris_mmu_translate_page(struct cris_mmu_result_t *res,
174 174 tlb_pfn = EXTRACT_FIELD(lo, 13, 31);
175 175 tlb_g = EXTRACT_FIELD(lo, 4, 4);
176 176  
177   - D(printf("TLB[%d][%d] v=%x vpage=%x -> pfn=%x lo=%x hi=%x\n",
178   - i, idx, tlb_vpn, vpage, tlb_pfn, lo, hi));
  177 + D(fprintf(logfile,
  178 + "TLB[%d][%d][%d] v=%x vpage=%x->pfn=%x lo=%x hi=%x\n",
  179 + mmu, set, idx, tlb_vpn, vpage, tlb_pfn, lo, hi));
179 180 if ((tlb_g || (tlb_pid == (env->pregs[PR_PID] & 0xff)))
180 181 && tlb_vpn == vpage) {
181 182 match = 1;
... ... @@ -224,7 +225,6 @@ static int cris_mmu_translate_page(struct cris_mmu_result_t *res,
224 225 res->bf_vec = vect_base + 3;
225 226 } else if (cfg_v && !tlb_v) {
226 227 D(printf ("tlb: invalid %x\n", vaddr));
227   - set_field(&r_cause, rwcause, 8, 9);
228 228 match = 0;
229 229 res->bf_vec = vect_base + 1;
230 230 }
... ... @@ -287,21 +287,42 @@ static int cris_mmu_translate_page(struct cris_mmu_result_t *res,
287 287 return !match;
288 288 }
289 289  
290   -/* Give us the vaddr corresponding to the latest TLB update. */
291   -target_ulong cris_mmu_tlb_latest_update(CPUState *env)
  290 +void cris_mmu_flush_pid(CPUState *env, uint32_t pid)
292 291 {
293   - uint32_t sel = env->sregs[SFR_RW_MM_TLB_SEL];
294   - uint32_t vaddr;
295   - uint32_t hi;
296   - int set;
297   - int idx;
298   -
299   - idx = EXTRACT_FIELD(sel, 0, 4);
300   - set = EXTRACT_FIELD(sel, 4, 5);
301   -
302   - hi = env->tlbsets[1][set][idx].hi;
303   - vaddr = EXTRACT_FIELD(hi, 13, 31);
304   - return vaddr << TARGET_PAGE_BITS;
  292 + target_ulong vaddr;
  293 + unsigned int idx;
  294 + uint32_t lo, hi;
  295 + uint32_t tlb_vpn;
  296 + int tlb_pid, tlb_g, tlb_v, tlb_k;
  297 + unsigned int set;
  298 + unsigned int mmu;
  299 +
  300 + pid &= 0xff;
  301 + for (mmu = 0; mmu < 2; mmu++) {
  302 + for (set = 0; set < 4; set++)
  303 + {
  304 + for (idx = 0; idx < 16; idx++) {
  305 + lo = env->tlbsets[mmu][set][idx].lo;
  306 + hi = env->tlbsets[mmu][set][idx].hi;
  307 +
  308 + tlb_vpn = EXTRACT_FIELD(hi, 13, 31);
  309 + tlb_pid = EXTRACT_FIELD(hi, 0, 7);
  310 + tlb_g = EXTRACT_FIELD(lo, 4, 4);
  311 + tlb_v = EXTRACT_FIELD(lo, 3, 3);
  312 + tlb_k = EXTRACT_FIELD(lo, 2, 2);
  313 +
  314 + /* Kernel protected areas need to be flushed
  315 + as well. */
  316 + if (tlb_v && !tlb_g) {
  317 + vaddr = tlb_vpn << TARGET_PAGE_BITS;
  318 + D(fprintf(logfile,
  319 + "flush pid=%x vaddr=%x\n",
  320 + pid, vaddr));
  321 + tlb_flush_page(env, vaddr);
  322 + }
  323 + }
  324 + }
  325 + }
305 326 }
306 327  
307 328 int cris_mmu_translate(struct cris_mmu_result_t *res,
... ...
target-cris/mmu.h
... ... @@ -11,7 +11,7 @@ struct cris_mmu_result_t
11 11 int bf_vec;
12 12 };
13 13  
14   -target_ulong cris_mmu_tlb_latest_update(CPUState *env);
  14 +void cris_mmu_flush_pid(CPUState *env, uint32_t pid);
15 15 int cris_mmu_translate(struct cris_mmu_result_t *res,
16 16 CPUState *env, uint32_t vaddr,
17 17 int rw, int mmu_idx);
... ...
target-cris/op_helper.c
... ... @@ -85,6 +85,13 @@ void helper_raise_exception(uint32_t index)
85 85 cpu_loop_exit();
86 86 }
87 87  
  88 +void helper_tlb_flush_pid(uint32_t pid)
  89 +{
  90 +#if !defined(CONFIG_USER_ONLY)
  91 + cris_mmu_flush_pid(env, pid);
  92 +#endif
  93 +}
  94 +
88 95 void helper_tlb_flush(void)
89 96 {
90 97 tlb_flush(env, 1);
... ... @@ -100,6 +107,10 @@ void helper_dummy(void)
100 107  
101 108 }
102 109  
  110 +/* Used by the tlb decoder. */
  111 +#define EXTRACT_FIELD(src, start, end) \
  112 + (((src) >> start) & ((1 << (end - start + 1)) - 1))
  113 +
103 114 void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
104 115 {
105 116 uint32_t srs;
... ... @@ -120,10 +131,7 @@ void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
120 131 uint32_t idx;
121 132 uint32_t lo, hi;
122 133 uint32_t vaddr;
123   -
124   - vaddr = cris_mmu_tlb_latest_update(env);
125   - D(fprintf(logfile, "tlb flush vaddr=%x\n", vaddr));
126   - tlb_flush_page(env, vaddr);
  134 + int tlb_v;
127 135  
128 136 idx = set = env->sregs[SFR_RW_MM_TLB_SEL];
129 137 set >>= 4;
... ... @@ -134,8 +142,19 @@ void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
134 142 lo = env->sregs[SFR_RW_MM_TLB_LO];
135 143 /* Writes are done via r_mm_cause. */
136 144 hi = env->sregs[SFR_R_MM_CAUSE];
  145 +
  146 + vaddr = EXTRACT_FIELD(env->tlbsets[srs-1][set][idx].hi,
  147 + 13, 31);
  148 + vaddr <<= TARGET_PAGE_BITS;
  149 + tlb_v = EXTRACT_FIELD(env->tlbsets[srs-1][set][idx].lo,
  150 + 3, 3);
137 151 env->tlbsets[srs - 1][set][idx].lo = lo;
138 152 env->tlbsets[srs - 1][set][idx].hi = hi;
  153 +
  154 + D(fprintf(logfile,
  155 + "tlb flush vaddr=%x v=%d pc=%x\n",
  156 + vaddr, tlb_v, env->pc));
  157 + tlb_flush_page(env, vaddr);
139 158 }
140 159 }
141 160 #endif
... ...
target-cris/translate.c
... ... @@ -21,8 +21,7 @@
21 21  
22 22 /*
23 23 * FIXME:
24   - * The condition code translation is in desperate need of attention. It's slow
25   - * and for system simulation it seems buggy. It sucks.
  24 + * The condition code translation is in need of attention.
26 25 */
27 26  
28 27 #include <stdarg.h>
... ... @@ -40,13 +39,6 @@
40 39 #include "crisv32-decode.h"
41 40 #include "qemu-common.h"
42 41  
43   -#define CRIS_STATS 0
44   -#if CRIS_STATS
45   -#define STATS(x) x
46   -#else
47   -#define STATS(x)
48   -#endif
49   -
50 42 #define DISAS_CRIS 0
51 43 #if DISAS_CRIS
52 44 #define DIS(x) x
... ... @@ -109,25 +101,18 @@ typedef struct DisasContext {
109 101  
110 102 int user; /* user or kernel mode. */
111 103 int is_jmp;
112   - int dyn_jmp;
113 104  
114   - uint32_t delayed_pc;
115 105 int delayed_branch;
116   - int bcc;
117   - uint32_t condlabel;
118 106  
119 107 struct TranslationBlock *tb;
120 108 int singlestep_enabled;
121 109 } DisasContext;
122 110  
123   -void cris_prepare_jmp (DisasContext *dc, uint32_t dst);
124 111 static void gen_BUG(DisasContext *dc, char *file, int line)
125 112 {
126 113 printf ("BUG: pc=%x %s %d\n", dc->pc, file, line);
127 114 fprintf (logfile, "BUG: pc=%x %s %d\n", dc->pc, file, line);
128   - cpu_dump_state (dc->env, stdout, fprintf, 0);
129   - fflush(NULL);
130   - cris_prepare_jmp (dc, 0x70000000 + line);
  115 + cpu_abort(dc->env, "%s:%d\n", file, line);
131 116 }
132 117  
133 118 const char *regnames[] =
... ... @@ -207,7 +192,7 @@ static inline void t_gen_mov_TN_preg(TCGv tn, int r)
207 192 else
208 193 tcg_gen_mov_tl(tn, cpu_PR[r]);
209 194 }
210   -static inline void t_gen_mov_preg_TN(int r, TCGv tn)
  195 +static inline void t_gen_mov_preg_TN(DisasContext *dc, int r, TCGv tn)
211 196 {
212 197 if (r < 0 || r > 15)
213 198 fprintf(stderr, "wrong register write $p%d\n", r);
... ... @@ -216,10 +201,9 @@ static inline void t_gen_mov_preg_TN(int r, TCGv tn)
216 201 else if (r == PR_SRS)
217 202 tcg_gen_andi_tl(cpu_PR[r], tn, 3);
218 203 else {
219   - if (r == PR_PID) {
220   - tcg_gen_helper_0_0(helper_tlb_flush);
221   - }
222 204 tcg_gen_mov_tl(cpu_PR[r], tn);
  205 + if (r == PR_PID)
  206 + tcg_gen_helper_0_1(helper_tlb_flush_pid, tn);
223 207 }
224 208 }
225 209  
... ... @@ -596,7 +580,7 @@ static inline void t_gen_swapr(TCGv d, TCGv s)
596 580 tcg_gen_discard_tl(org_s);
597 581 }
598 582  
599   -static void t_gen_cc_jmp(target_ulong pc_true, target_ulong pc_false)
  583 +static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
600 584 {
601 585 TCGv btaken;
602 586 int l1;
... ... @@ -606,9 +590,9 @@ static void t_gen_cc_jmp(target_ulong pc_true, target_ulong pc_false)
606 590  
607 591 /* Conditional jmp. */
608 592 t_gen_mov_TN_env(btaken, btaken);
609   - tcg_gen_movi_tl(env_pc, pc_false);
  593 + tcg_gen_mov_tl(env_pc, pc_false);
610 594 tcg_gen_brcond_tl(TCG_COND_EQ, btaken, tcg_const_tl(0), l1);
611   - tcg_gen_movi_tl(env_pc, pc_true);
  595 + tcg_gen_mov_tl(env_pc, pc_true);
612 596 gen_set_label(l1);
613 597  
614 598 tcg_gen_discard_tl(btaken);
... ... @@ -740,10 +724,11 @@ static void crisv32_alu_op(DisasContext *dc, int op, int rd, int size)
740 724 int writeback = 1;
741 725 if (dc->update_cc) {
742 726 cris_update_cc_op(dc, op, size);
743   - tcg_gen_mov_tl(cc_dest, cpu_T[0]);
  727 + if (op != CC_OP_MOVE)
  728 + tcg_gen_mov_tl(cc_dest, cpu_T[0]);
744 729  
745 730 /* FIXME: This shouldn't be needed. But we don't pass the
746   - tests without it. Investigate. */
  731 + tests without it. Investigate. */
747 732 t_gen_mov_env_TN(cc_x_live, tcg_const_tl(dc->flagx_live));
748 733 t_gen_mov_env_TN(cc_x, tcg_const_tl(dc->flags_x));
749 734 }
... ... @@ -812,7 +797,7 @@ static void crisv32_alu_op(DisasContext *dc, int op, int rd, int size)
812 797 TCGv mof;
813 798 mof = tcg_temp_new(TCG_TYPE_TL);
814 799 t_gen_muls(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
815   - t_gen_mov_preg_TN(PR_MOF, mof);
  800 + t_gen_mov_preg_TN(dc, PR_MOF, mof);
816 801 tcg_gen_discard_tl(mof);
817 802 }
818 803 break;
... ... @@ -821,7 +806,7 @@ static void crisv32_alu_op(DisasContext *dc, int op, int rd, int size)
821 806 TCGv mof;
822 807 mof = tcg_temp_new(TCG_TYPE_TL);
823 808 t_gen_mulu(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
824   - t_gen_mov_preg_TN(PR_MOF, mof);
  809 + t_gen_mov_preg_TN(dc, PR_MOF, mof);
825 810 tcg_gen_discard_tl(mof);
826 811 }
827 812 break;
... ... @@ -875,12 +860,6 @@ static void crisv32_alu_op(DisasContext *dc, int op, int rd, int size)
875 860 }
876 861 if (dc->update_cc)
877 862 tcg_gen_mov_tl(cc_result, cpu_T[0]);
878   -
879   - {
880   - /* TODO: Optimize this. */
881   - if (!dc->flagx_live)
882   - cris_evaluate_flags(dc);
883   - }
884 863 }
885 864  
886 865 static int arith_cc(DisasContext *dc)
... ... @@ -1073,7 +1052,6 @@ static void gen_tst_cc (DisasContext *dc, int cond)
1073 1052 tcg_gen_andi_tl(cpu_T[0], cpu_PR[PR_CCS], P_FLAG);
1074 1053 break;
1075 1054 case CC_A:
1076   - cris_evaluate_flags(dc);
1077 1055 tcg_gen_movi_tl(cpu_T[0], 1);
1078 1056 break;
1079 1057 default:
... ... @@ -1087,14 +1065,13 @@ static void cris_prepare_cc_branch (DisasContext *dc, int offset, int cond)
1087 1065 /* This helps us re-schedule the micro-code to insns in delay-slots
1088 1066 before the actual jump. */
1089 1067 dc->delayed_branch = 2;
1090   - dc->delayed_pc = dc->pc + offset;
1091   - dc->bcc = cond;
1092 1068 if (cond != CC_A)
1093 1069 {
1094 1070 gen_tst_cc (dc, cond);
1095 1071 t_gen_mov_env_TN(btaken, cpu_T[0]);
1096   - }
1097   - tcg_gen_movi_tl(env_btarget, dc->delayed_pc);
  1072 + } else
  1073 + t_gen_mov_env_TN(btaken, tcg_const_tl(1));
  1074 + tcg_gen_movi_tl(env_btarget, dc->pc + offset);
1098 1075 }
1099 1076  
1100 1077  
... ... @@ -1104,18 +1081,7 @@ void cris_prepare_dyn_jmp (DisasContext *dc)
1104 1081 /* This helps us re-schedule the micro-code to insns in delay-slots
1105 1082 before the actual jump. */
1106 1083 dc->delayed_branch = 2;
1107   - dc->dyn_jmp = 1;
1108   - dc->bcc = CC_A;
1109   -}
1110   -
1111   -void cris_prepare_jmp (DisasContext *dc, uint32_t dst)
1112   -{
1113   - /* This helps us re-schedule the micro-code to insns in delay-slots
1114   - before the actual jump. */
1115   - dc->delayed_branch = 2;
1116   - dc->delayed_pc = dst;
1117   - dc->dyn_jmp = 0;
1118   - dc->bcc = CC_A;
  1084 + t_gen_mov_env_TN(btaken, tcg_const_tl(1));
1119 1085 }
1120 1086  
1121 1087 void gen_load(DisasContext *dc, TCGv dst, TCGv addr,
... ... @@ -1123,6 +1089,7 @@ void gen_load(DisasContext *dc, TCGv dst, TCGv addr,
1123 1089 {
1124 1090 int mem_index = cpu_mmu_index(dc->env);
1125 1091  
  1092 + cris_evaluate_flags(dc);
1126 1093 if (size == 1) {
1127 1094 if (sign)
1128 1095 tcg_gen_qemu_ld8s(dst, addr, mem_index);
... ... @@ -1236,10 +1203,7 @@ static void dec_prep_alu_r(DisasContext *dc, int rs, int rd,
1236 1203 t_gen_zext(cpu_T[0], cpu_R[rd], size);
1237 1204 }
1238 1205  
1239   -/* Prepare T0 and T1 for a memory + alu operation.
1240   - s_ext decides if the operand1 should be sign-extended or zero-extended when
1241   - needed. */
1242   -static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
  1206 +static int dec_prep_move_m(DisasContext *dc, int s_ext, int memsize)
1243 1207 {
1244 1208 unsigned int rs, rd;
1245 1209 uint32_t imm;
... ... @@ -1272,7 +1236,7 @@ static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
1272 1236 imm = ldl_code(dc->pc + 2);
1273 1237  
1274 1238 DIS(fprintf (logfile, "imm=%x rd=%d sext=%d ms=%d\n",
1275   - imm, rd, s_ext, memsize));
  1239 + imm, rd, s_ext, memsize));
1276 1240 tcg_gen_movi_tl(cpu_T[1], imm);
1277 1241 dc->postinc = 0;
1278 1242 } else {
... ... @@ -1282,9 +1246,20 @@ static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
1282 1246 else
1283 1247 t_gen_zext(cpu_T[1], cpu_T[1], memsize);
1284 1248 }
  1249 + return insn_len;
  1250 +}
  1251 +
  1252 +/* Prepare T0 and T1 for a memory + alu operation.
  1253 + s_ext decides if the operand1 should be sign-extended or zero-extended when
  1254 + needed. */
  1255 +static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize)
  1256 +{
  1257 + int insn_len;
  1258 +
  1259 + insn_len = dec_prep_move_m(dc, s_ext, memsize);
1285 1260  
1286 1261 /* put dest in T0. */
1287   - t_gen_mov_TN_reg(cpu_T[0], rd);
  1262 + tcg_gen_mov_tl(cpu_T[0], cpu_R[dc->op2]);
1288 1263 return insn_len;
1289 1264 }
1290 1265  
... ... @@ -1421,7 +1396,7 @@ static unsigned int dec_btstq(DisasContext *dc)
1421 1396 crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
1422 1397  
1423 1398 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
1424   - t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
  1399 + t_gen_mov_preg_TN(dc, PR_CCS, cpu_T[0]);
1425 1400 dc->flags_live = 1;
1426 1401 return 2;
1427 1402 }
... ... @@ -1702,7 +1677,9 @@ static char * swapmode_name(int mode, char *modename) {
1702 1677  
1703 1678 static unsigned int dec_swap_r(DisasContext *dc)
1704 1679 {
1705   - DIS(char modename[4]);
  1680 +#if DISAS_CRIS
  1681 + char modename[4];
  1682 +#endif
1706 1683 DIS(fprintf (logfile, "swap%s $r%u\n",
1707 1684 swapmode_name(dc->op2, modename), dc->op1));
1708 1685  
... ... @@ -1777,7 +1754,7 @@ static unsigned int dec_btst_r(DisasContext *dc)
1777 1754 crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
1778 1755  
1779 1756 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
1780   - t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
  1757 + t_gen_mov_preg_TN(dc, PR_CCS, cpu_T[0]);
1781 1758 dc->flags_live = 1;
1782 1759 return 2;
1783 1760 }
... ... @@ -1900,14 +1877,16 @@ static unsigned int dec_setclrf(DisasContext *dc)
1900 1877 flags = (EXTRACT_FIELD(dc->ir, 12, 15) << 4)
1901 1878 | EXTRACT_FIELD(dc->ir, 0, 3);
1902 1879 DIS(fprintf (logfile, "set=%d flags=%x\n", set, flags));
1903   - if (set && flags == 0)
  1880 + if (set && flags == 0) {
1904 1881 DIS(fprintf (logfile, "nop\n"));
1905   - else if (!set && (flags & 0x20))
  1882 + } else if (!set && (flags & 0x20)) {
1906 1883 DIS(fprintf (logfile, "di\n"));
1907   - else
  1884 + }
  1885 + else {
1908 1886 DIS(fprintf (logfile, "%sf %x\n",
1909   - set ? "set" : "clr",
  1887 + set ? "set" : "clr",
1910 1888 flags));
  1889 + }
1911 1890  
1912 1891 if (set && (flags & X_FLAG)) {
1913 1892 dc->flagx_live = 1;
... ... @@ -1924,7 +1903,7 @@ static unsigned int dec_setclrf(DisasContext *dc)
1924 1903 /* Enter user mode. */
1925 1904 t_gen_mov_env_TN(ksp, cpu_R[R_SP]);
1926 1905 tcg_gen_mov_tl(cpu_R[R_SP], cpu_PR[PR_USP]);
1927   - dc->is_jmp = DISAS_UPDATE;
  1906 + dc->is_jmp = DISAS_NEXT;
1928 1907 }
1929 1908 tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], flags);
1930 1909 }
... ... @@ -1971,7 +1950,7 @@ static unsigned int dec_move_rp(DisasContext *dc)
1971 1950 else
1972 1951 t_gen_mov_TN_reg(cpu_T[0], dc->op1);
1973 1952  
1974   - t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
  1953 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
1975 1954 if (dc->op2 == PR_CCS) {
1976 1955 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
1977 1956 dc->flags_live = 1;
... ... @@ -2004,7 +1983,7 @@ static unsigned int dec_move_mr(DisasContext *dc)
2004 1983 dc->op1, dc->postinc ? "+]" : "]",
2005 1984 dc->op2));
2006 1985  
2007   - insn_len = dec_prep_alu_m(dc, 0, memsize);
  1986 + insn_len = dec_prep_move_m(dc, 0, memsize);
2008 1987 cris_cc_mask(dc, CC_MASK_NZ);
2009 1988 crisv32_alu_op(dc, CC_OP_MOVE, dc->op2, memsize);
2010 1989 do_postinc(dc, memsize);
... ... @@ -2317,7 +2296,7 @@ static unsigned int dec_move_mp(DisasContext *dc)
2317 2296 }
2318 2297 }
2319 2298  
2320   - t_gen_mov_preg_TN(dc->op2, cpu_T[1]);
  2299 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[1]);
2321 2300  
2322 2301 do_postinc(dc, memsize);
2323 2302 return insn_len;
... ... @@ -2337,7 +2316,6 @@ static unsigned int dec_move_pm(DisasContext *dc)
2337 2316 if (dc->op2 == PR_CCS)
2338 2317 cris_evaluate_flags(dc);
2339 2318 t_gen_mov_TN_preg(cpu_T[1], dc->op2);
2340   -
2341 2319 gen_store(dc, cpu_R[dc->op1], cpu_T[1], memsize);
2342 2320  
2343 2321 cris_cc_mask(dc, 0);
... ... @@ -2482,7 +2460,7 @@ static unsigned int dec_jas_im(DisasContext *dc)
2482 2460 cris_cc_mask(dc, 0);
2483 2461 /* Store the return address in Pd. */
2484 2462 tcg_gen_movi_tl(env_btarget, imm);
2485   - t_gen_mov_preg_TN(dc->op2, tcg_const_tl(dc->pc + 8));
  2463 + t_gen_mov_preg_TN(dc, dc->op2, tcg_const_tl(dc->pc + 8));
2486 2464 cris_prepare_dyn_jmp(dc);
2487 2465 return 6;
2488 2466 }
... ... @@ -2499,7 +2477,7 @@ static unsigned int dec_jasc_im(DisasContext *dc)
2499 2477 tcg_gen_movi_tl(cpu_T[0], imm);
2500 2478 tcg_gen_mov_tl(env_btarget, cpu_T[0]);
2501 2479 tcg_gen_movi_tl(cpu_T[0], dc->pc + 8 + 4);
2502   - t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
  2480 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
2503 2481 cris_prepare_dyn_jmp(dc);
2504 2482 return 6;
2505 2483 }
... ... @@ -2512,7 +2490,7 @@ static unsigned int dec_jasc_r(DisasContext *dc)
2512 2490 t_gen_mov_TN_reg(cpu_T[0], dc->op1);
2513 2491 tcg_gen_mov_tl(env_btarget, cpu_T[0]);
2514 2492 tcg_gen_movi_tl(cpu_T[0], dc->pc + 4 + 4);
2515   - t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
  2493 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
2516 2494 cris_prepare_dyn_jmp(dc);
2517 2495 return 2;
2518 2496 }
... ... @@ -2547,7 +2525,7 @@ static unsigned int dec_bas_im(DisasContext *dc)
2547 2525 tcg_gen_movi_tl(cpu_T[0], dc->pc + simm);
2548 2526 tcg_gen_mov_tl(env_btarget, cpu_T[0]);
2549 2527 tcg_gen_movi_tl(cpu_T[0], dc->pc + 8);
2550   - t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
  2528 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
2551 2529 cris_prepare_dyn_jmp(dc);
2552 2530 return 6;
2553 2531 }
... ... @@ -2563,7 +2541,7 @@ static unsigned int dec_basc_im(DisasContext *dc)
2563 2541 tcg_gen_movi_tl(cpu_T[0], dc->pc + simm);
2564 2542 tcg_gen_mov_tl(env_btarget, cpu_T[0]);
2565 2543 tcg_gen_movi_tl(cpu_T[0], dc->pc + 12);
2566   - t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
  2544 + t_gen_mov_preg_TN(dc, dc->op2, cpu_T[0]);
2567 2545 cris_prepare_dyn_jmp(dc);
2568 2546 return 6;
2569 2547 }
... ... @@ -2785,8 +2763,42 @@ static void check_breakpoint(CPUState *env, DisasContext *dc)
2785 2763 }
2786 2764 }
2787 2765  
  2766 +
  2767 +/*
  2768 + * Delay slots on QEMU/CRIS.
  2769 + *
  2770 + * If an exception hits on a delayslot, the core will let ERP (the Exception
  2771 + * Return Pointer) point to the branch (the previous) insn and set the lsb to
  2772 + * to give SW a hint that the exception actually hit on the dslot.
  2773 + *
  2774 + * CRIS expects all PC addresses to be 16-bit aligned. The lsb is ignored by
  2775 + * the core and any jmp to an odd addresses will mask off that lsb. It is
  2776 + * simply there to let sw know there was an exception on a dslot.
  2777 + *
  2778 + * When the software returns from an exception, the branch will re-execute.
  2779 + * On QEMU care needs to be taken when a branch+delayslot sequence is broken
  2780 + * and the branch and delayslot dont share pages.
  2781 + *
  2782 + * The TB contaning the branch insn will set up env->btarget and evaluate
  2783 + * env->btaken. When the translation loop exits we will note that the branch
  2784 + * sequence is broken and let env->dslot be the size of the branch insn (those
  2785 + * vary in length).
  2786 + *
  2787 + * The TB contaning the delayslot will have the PC of its real insn (i.e no lsb
  2788 + * set). It will also expect to have env->dslot setup with the size of the
  2789 + * delay slot so that env->pc - env->dslot point to the branch insn. This TB
  2790 + * will execute the dslot and take the branch, either to btarget or just one
  2791 + * insn ahead.
  2792 + *
  2793 + * When exceptions occur, we check for env->dslot in do_interrupt to detect
  2794 + * broken branch sequences and setup $erp accordingly (i.e let it point to the
  2795 + * branch and set lsb). Then env->dslot gets cleared so that the exception
  2796 + * handler can enter. When returning from exceptions (jump $erp) the lsb gets
  2797 + * masked off and we will reexecute the branch insn.
  2798 + *
  2799 + */
  2800 +
2788 2801 /* generate intermediate code for basic block 'tb'. */
2789   -struct DisasContext ctx;
2790 2802 static int
2791 2803 gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2792 2804 int search_pc)
... ... @@ -2795,6 +2807,7 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2795 2807 uint32_t pc_start;
2796 2808 unsigned int insn_len;
2797 2809 int j, lj;
  2810 + struct DisasContext ctx;
2798 2811 struct DisasContext *dc = &ctx;
2799 2812 uint32_t next_page_start;
2800 2813  
... ... @@ -2803,8 +2816,6 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2803 2816  
2804 2817 /* Odd PC indicates that branch is rexecuting due to exception in the
2805 2818 * delayslot, like in real hw.
2806   - * FIXME: we need to handle the case were the branch and the insn in
2807   - * the delayslot do not share pages.
2808 2819 */
2809 2820 pc_start = tb->pc & ~1;
2810 2821 dc->env = env;
... ... @@ -2820,19 +2831,24 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2820 2831 dc->flagx_live = 0;
2821 2832 dc->flags_x = 0;
2822 2833 dc->cc_mask = 0;
  2834 + dc->update_cc = 0;
2823 2835 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
2824 2836  
2825   - dc->user = env->pregs[PR_CCS] & U_FLAG;
2826   - dc->delayed_branch = 0;
  2837 + /* Decode TB flags. */
  2838 + dc->user = tb->flags & U_FLAG;
  2839 + dc->delayed_branch = !!(tb->flags & 7);
2827 2840  
2828 2841 if (loglevel & CPU_LOG_TB_IN_ASM) {
2829 2842 fprintf(logfile,
2830   - "search=%d pc=%x ccs=%x pid=%x usp=%x dbg=%x %x %x\n"
  2843 + "srch=%d pc=%x %x bt=%x ds=%lld ccs=%x\n"
  2844 + "pid=%x usp=%x dbg=%x %x %x\n"
2831 2845 "%x.%x.%x.%x\n"
2832 2846 "%x.%x.%x.%x\n"
2833 2847 "%x.%x.%x.%x\n"
2834 2848 "%x.%x.%x.%x\n",
2835   - search_pc, env->pc, env->pregs[PR_CCS],
  2849 + search_pc, dc->pc, dc->ppc,
  2850 + env->btarget, tb->flags & 7,
  2851 + env->pregs[PR_CCS],
2836 2852 env->pregs[PR_PID], env->pregs[PR_USP],
2837 2853 env->debug1, env->debug2, env->debug3,
2838 2854 env->regs[0], env->regs[1], env->regs[2], env->regs[3],
... ... @@ -2860,14 +2876,17 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2860 2876 while (lj < j)
2861 2877 gen_opc_instr_start[lj++] = 0;
2862 2878 }
2863   - if (dc->delayed_branch == 1) {
  2879 + if (dc->delayed_branch == 1)
2864 2880 gen_opc_pc[lj] = dc->ppc | 1;
2865   - gen_opc_instr_start[lj] = 0;
2866   - }
2867   - else {
  2881 + else
2868 2882 gen_opc_pc[lj] = dc->pc;
2869   - gen_opc_instr_start[lj] = 1;
2870   - }
  2883 + gen_opc_instr_start[lj] = 1;
  2884 + }
  2885 +
  2886 + /* Pretty disas. */
  2887 + DIS(fprintf(logfile, "%x ", dc->pc));
  2888 + if (search_pc) {
  2889 + DIS(fprintf(logfile, "%x ", dc->pc));
2871 2890 }
2872 2891  
2873 2892 dc->clear_x = 1;
... ... @@ -2881,17 +2900,13 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2881 2900 actually genereating any host code, the simulator will just
2882 2901 loop doing nothing for on this program location. */
2883 2902 if (dc->delayed_branch) {
  2903 + t_gen_mov_env_TN(dslot, tcg_const_tl(0));
2884 2904 dc->delayed_branch--;
2885 2905 if (dc->delayed_branch == 0)
2886 2906 {
2887   - if (dc->bcc == CC_A) {
2888   - tcg_gen_mov_tl(env_pc, env_btarget);
2889   - dc->is_jmp = DISAS_JUMP;
2890   - }
2891   - else {
2892   - t_gen_cc_jmp(dc->delayed_pc, dc->pc);
2893   - dc->is_jmp = DISAS_JUMP;
2894   - }
  2907 + t_gen_cc_jmp(env_btarget,
  2908 + tcg_const_tl(dc->pc));
  2909 + dc->is_jmp = DISAS_JUMP;
2895 2910 }
2896 2911 }
2897 2912  
... ... @@ -2900,15 +2915,16 @@ gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
2900 2915 if (!(tb->pc & 1) && env->singlestep_enabled)
2901 2916 break;
2902 2917 } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end
2903   - && ((dc->pc < next_page_start) || dc->delayed_branch));
  2918 + && (dc->pc < next_page_start));
2904 2919  
  2920 + /* Broken branch+delayslot sequence. */
2905 2921 if (dc->delayed_branch == 1) {
2906   - /* Reexecute the last insn. */
2907   - dc->pc = dc->ppc | 1;
  2922 + /* Set env->dslot to the size of the branch insn. */
  2923 + t_gen_mov_env_TN(dslot, tcg_const_tl(dc->pc - dc->ppc));
2908 2924 }
2909 2925  
2910 2926 if (!dc->is_jmp) {
2911   - D(printf("!jmp pc=%x jmp=%d db=%d\n", dc->pc,
  2927 + D(fprintf(logfile, "!jmp pc=%x jmp=%d db=%d\n", dc->pc,
2912 2928 dc->is_jmp, dc->delayed_branch));
2913 2929 /* T0 and env_pc should hold the new pc. */
2914 2930 tcg_gen_movi_tl(cpu_T[0], dc->pc);
... ... @@ -3079,6 +3095,7 @@ CPUCRISState *cpu_cris_init (const char *cpu_model)
3079 3095 TCG_HELPER(helper_dummy);
3080 3096  
3081 3097 TCG_HELPER(helper_tlb_flush);
  3098 + TCG_HELPER(helper_tlb_flush_pid);
3082 3099 TCG_HELPER(helper_movl_sreg_reg);
3083 3100 TCG_HELPER(helper_movl_reg_sreg);
3084 3101 TCG_HELPER(helper_rfe);
... ...