Commit 852d481faf7070ac6e46653b77f6c9ecbdfb9efc

Authored by edgar_igl
1 parent 714fa308

SH: Improve movca.l/ocbi emulation.

Author: Vladimir Prus <vladimir@codesourcery.com>

    Fix movcal.l/ocbi emulation.

        * target-sh4/cpu.h (memory_content): New.
        (CPUSH4State): New fields movcal_backup and movcal_backup_tail.
        * target-sh4/helper.h (helper_movcal)
        (helper_discard_movcal_backup, helper_ocbi): New.
        * target-sh4/op_helper.c (helper_movcal)
        (helper_discard_movcal_backup, helper_ocbi): New.
        * target-sh4/translate.c (DisasContext): New field has_movcal.
        (sh4_defs): Update CVS for SH7785.
        (cpu_sh4_init): Initialize env->movcal_backup_tail.
        (_decode_opc): Discard movca.l-backup.
        Make use of helper_movcal and helper_ocbi.
        (gen_intermediate_code_internal): Initialize has_movcal to 1.

Thanks to Shin-ichiro KAWASAKI and Paul Mundt for valuable feedback.

Acked-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@6966 c046a42c-6fe2-441c-8c8c-71466251a162
target-sh4/cpu.h
@@ -100,6 +100,12 @@ enum sh_features { @@ -100,6 +100,12 @@ enum sh_features {
100 SH_FEATURE_BCR3_AND_BCR4 = 2, 100 SH_FEATURE_BCR3_AND_BCR4 = 2,
101 }; 101 };
102 102
  103 +typedef struct memory_content {
  104 + uint32_t address;
  105 + uint32_t value;
  106 + struct memory_content *next;
  107 +} memory_content;
  108 +
103 typedef struct CPUSH4State { 109 typedef struct CPUSH4State {
104 int id; /* CPU model */ 110 int id; /* CPU model */
105 111
@@ -148,6 +154,8 @@ typedef struct CPUSH4State { @@ -148,6 +154,8 @@ typedef struct CPUSH4State {
148 tlb_t itlb[ITLB_SIZE]; /* instruction translation table */ 154 tlb_t itlb[ITLB_SIZE]; /* instruction translation table */
149 void *intc_handle; 155 void *intc_handle;
150 int intr_at_halt; /* SR_BL ignored during sleep */ 156 int intr_at_halt; /* SR_BL ignored during sleep */
  157 + memory_content *movcal_backup;
  158 + memory_content **movcal_backup_tail;
151 } CPUSH4State; 159 } CPUSH4State;
152 160
153 CPUSH4State *cpu_sh4_init(const char *cpu_model); 161 CPUSH4State *cpu_sh4_init(const char *cpu_model);
@@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); @@ -162,6 +170,8 @@ void sh4_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
162 void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, 170 void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr,
163 uint32_t mem_value); 171 uint32_t mem_value);
164 172
  173 +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr);
  174 +
165 static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls) 175 static inline void cpu_set_tls(CPUSH4State *env, target_ulong newtls)
166 { 176 {
167 env->gbr = newtls; 177 env->gbr = newtls;
@@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) @@ -293,6 +303,8 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
293 env->flags = tb->flags; 303 env->flags = tb->flags;
294 } 304 }
295 305
  306 +#define TB_FLAG_PENDING_MOVCA (1 << 4)
  307 +
296 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, 308 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
297 target_ulong *cs_base, int *flags) 309 target_ulong *cs_base, int *flags)
298 { 310 {
@@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, @@ -302,7 +314,8 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
302 | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ 314 | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */
303 | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ 315 | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
304 | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ 316 | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */
305 - | (env->sr & SR_FD); /* Bit 15 */ 317 + | (env->sr & SR_FD) /* Bit 15 */
  318 + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */
306 } 319 }
307 320
308 #endif /* _CPU_SH4_H */ 321 #endif /* _CPU_SH4_H */
target-sh4/helper.c
@@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr, @@ -644,4 +644,48 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, target_phys_addr_t addr,
644 } 644 }
645 } 645 }
646 646
  647 +int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
  648 +{
  649 + int n;
  650 + int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0;
  651 +
  652 + /* check area */
  653 + if (env->sr & SR_MD) {
  654 + /* For previledged mode, P2 and P4 area is not cachable. */
  655 + if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr)
  656 + return 0;
  657 + } else {
  658 + /* For user mode, only U0 area is cachable. */
  659 + if (0x80000000 <= addr)
  660 + return 0;
  661 + }
  662 +
  663 + /*
  664 + * TODO : Evaluate CCR and check if the cache is on or off.
  665 + * Now CCR is not in CPUSH4State, but in SH7750State.
  666 + * When you move the ccr inot CPUSH4State, the code will be
  667 + * as follows.
  668 + */
  669 +#if 0
  670 + /* check if operand cache is enabled or not. */
  671 + if (!(env->ccr & 1))
  672 + return 0;
  673 +#endif
  674 +
  675 + /* if MMU is off, no check for TLB. */
  676 + if (env->mmucr & MMUCR_AT)
  677 + return 1;
  678 +
  679 + /* check TLB */
  680 + n = find_tlb_entry(env, addr, env->itlb, ITLB_SIZE, use_asid);
  681 + if (n >= 0)
  682 + return env->itlb[n].c;
  683 +
  684 + n = find_tlb_entry(env, addr, env->utlb, UTLB_SIZE, use_asid);
  685 + if (n >= 0)
  686 + return env->utlb[n].c;
  687 +
  688 + return 0;
  689 +}
  690 +
647 #endif 691 #endif
target-sh4/helper.h
@@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void) @@ -9,6 +9,10 @@ DEF_HELPER_0(debug, void)
9 DEF_HELPER_1(sleep, void, i32) 9 DEF_HELPER_1(sleep, void, i32)
10 DEF_HELPER_1(trapa, void, i32) 10 DEF_HELPER_1(trapa, void, i32)
11 11
  12 +DEF_HELPER_2(movcal, void, i32, i32)
  13 +DEF_HELPER_0(discard_movcal_backup, void)
  14 +DEF_HELPER_1(ocbi, void, i32)
  15 +
12 DEF_HELPER_2(addv, i32, i32, i32) 16 DEF_HELPER_2(addv, i32, i32, i32)
13 DEF_HELPER_2(addc, i32, i32, i32) 17 DEF_HELPER_2(addc, i32, i32, i32)
14 DEF_HELPER_2(subv, i32, i32, i32) 18 DEF_HELPER_2(subv, i32, i32, i32)
target-sh4/op_helper.c
@@ -18,6 +18,7 @@ @@ -18,6 +18,7 @@
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
19 */ 19 */
20 #include <assert.h> 20 #include <assert.h>
  21 +#include <stdlib.h>
21 #include "exec.h" 22 #include "exec.h"
22 #include "helper.h" 23 #include "helper.h"
23 24
@@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra) @@ -122,6 +123,57 @@ void helper_trapa(uint32_t tra)
122 cpu_loop_exit(); 123 cpu_loop_exit();
123 } 124 }
124 125
  126 +void helper_movcal(uint32_t address, uint32_t value)
  127 +{
  128 + if (cpu_sh4_is_cached (env, address))
  129 + {
  130 + memory_content *r = malloc (sizeof(memory_content));
  131 + r->address = address;
  132 + r->value = value;
  133 + r->next = NULL;
  134 +
  135 + *(env->movcal_backup_tail) = r;
  136 + env->movcal_backup_tail = &(r->next);
  137 + }
  138 +}
  139 +
  140 +void helper_discard_movcal_backup(void)
  141 +{
  142 + memory_content *current = env->movcal_backup;
  143 +
  144 + while(current)
  145 + {
  146 + memory_content *next = current->next;
  147 + free (current);
  148 + env->movcal_backup = current = next;
  149 + if (current == 0)
  150 + env->movcal_backup_tail = &(env->movcal_backup);
  151 + }
  152 +}
  153 +
  154 +void helper_ocbi(uint32_t address)
  155 +{
  156 + memory_content **current = &(env->movcal_backup);
  157 + while (*current)
  158 + {
  159 + uint32_t a = (*current)->address;
  160 + if ((a & ~0x1F) == (address & ~0x1F))
  161 + {
  162 + memory_content *next = (*current)->next;
  163 + stl(a, (*current)->value);
  164 +
  165 + if (next == 0)
  166 + {
  167 + env->movcal_backup_tail = current;
  168 + }
  169 +
  170 + free (*current);
  171 + *current = next;
  172 + break;
  173 + }
  174 + }
  175 +}
  176 +
125 uint32_t helper_addc(uint32_t arg0, uint32_t arg1) 177 uint32_t helper_addc(uint32_t arg0, uint32_t arg1)
126 { 178 {
127 uint32_t tmp0, tmp1; 179 uint32_t tmp0, tmp1;
target-sh4/translate.c
@@ -50,6 +50,7 @@ typedef struct DisasContext { @@ -50,6 +50,7 @@ typedef struct DisasContext {
50 uint32_t delayed_pc; 50 uint32_t delayed_pc;
51 int singlestep_enabled; 51 int singlestep_enabled;
52 uint32_t features; 52 uint32_t features;
  53 + int has_movcal;
53 } DisasContext; 54 } DisasContext;
54 55
55 #if defined(CONFIG_USER_ONLY) 56 #if defined(CONFIG_USER_ONLY)
@@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model) @@ -283,6 +284,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model)
283 env = qemu_mallocz(sizeof(CPUSH4State)); 284 env = qemu_mallocz(sizeof(CPUSH4State));
284 env->features = def->features; 285 env->features = def->features;
285 cpu_exec_init(env); 286 cpu_exec_init(env);
  287 + env->movcal_backup_tail = &(env->movcal_backup);
286 sh4_translate_init(); 288 sh4_translate_init();
287 env->cpu_model_str = cpu_model; 289 env->cpu_model_str = cpu_model;
288 cpu_sh4_reset(env); 290 cpu_sh4_reset(env);
@@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) @@ -495,6 +497,37 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
495 497
496 static void _decode_opc(DisasContext * ctx) 498 static void _decode_opc(DisasContext * ctx)
497 { 499 {
  500 + /* This code tries to make movcal emulation sufficiently
  501 + accurate for Linux purposes. This instruction writes
  502 + memory, and prior to that, always allocates a cache line.
  503 + It is used in two contexts:
  504 + - in memcpy, where data is copied in blocks, the first write
  505 + of to a block uses movca.l for performance.
  506 + - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
  507 + to flush the cache. Here, the data written by movcal.l is never
  508 + written to memory, and the data written is just bogus.
  509 +
  510 + To simulate this, we simulate movcal.l, we store the value to memory,
  511 + but we also remember the previous content. If we see ocbi, we check
  512 + if movcal.l for that address was done previously. If so, the write should
  513 + not have hit the memory, so we restore the previous content.
  514 + When we see an instruction that is neither movca.l
  515 + nor ocbi, the previous content is discarded.
  516 +
  517 + To optimize, we only try to flush stores when we're at the start of
  518 + TB, or if we already saw movca.l in this TB and did not flush stores
  519 + yet. */
  520 + if (ctx->has_movcal)
  521 + {
  522 + int opcode = ctx->opcode & 0xf0ff;
  523 + if (opcode != 0x0093 /* ocbi */
  524 + && opcode != 0x00c3 /* movca.l */)
  525 + {
  526 + gen_helper_discard_movcal_backup ();
  527 + ctx->has_movcal = 0;
  528 + }
  529 + }
  530 +
498 #if 0 531 #if 0
499 fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode); 532 fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
500 #endif 533 #endif
@@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx) @@ -1545,7 +1578,13 @@ static void _decode_opc(DisasContext * ctx)
1545 } 1578 }
1546 return; 1579 return;
1547 case 0x00c3: /* movca.l R0,@Rm */ 1580 case 0x00c3: /* movca.l R0,@Rm */
1548 - tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx); 1581 + {
  1582 + TCGv val = tcg_temp_new();
  1583 + tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx);
  1584 + gen_helper_movcal (REG(B11_8), val);
  1585 + tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
  1586 + }
  1587 + ctx->has_movcal = 1;
1549 return; 1588 return;
1550 case 0x40a9: 1589 case 0x40a9:
1551 /* MOVUA.L @Rm,R0 (Rm) -> R0 1590 /* MOVUA.L @Rm,R0 (Rm) -> R0
@@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx) @@ -1594,9 +1633,7 @@ static void _decode_opc(DisasContext * ctx)
1594 break; 1633 break;
1595 case 0x0093: /* ocbi @Rn */ 1634 case 0x0093: /* ocbi @Rn */
1596 { 1635 {
1597 - TCGv dummy = tcg_temp_new();  
1598 - tcg_gen_qemu_ld32s(dummy, REG(B11_8), ctx->memidx);  
1599 - tcg_temp_free(dummy); 1636 + gen_helper_ocbi (REG(B11_8));
1600 } 1637 }
1601 return; 1638 return;
1602 case 0x00a3: /* ocbp @Rn */ 1639 case 0x00a3: /* ocbp @Rn */
@@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb, @@ -1876,6 +1913,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
1876 ctx.tb = tb; 1913 ctx.tb = tb;
1877 ctx.singlestep_enabled = env->singlestep_enabled; 1914 ctx.singlestep_enabled = env->singlestep_enabled;
1878 ctx.features = env->features; 1915 ctx.features = env->features;
  1916 + ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA);
1879 1917
1880 #ifdef DEBUG_DISAS 1918 #ifdef DEBUG_DISAS
1881 qemu_log_mask(CPU_LOG_TB_CPU, 1919 qemu_log_mask(CPU_LOG_TB_CPU,