Commit 5df3a524fc0b923cf2e5e1883ff550d055d36eb5

Authored by Filip Navara
1 parent 430c1ad1

TCG constant propagation.

Add support for constant propagation to TCG. This has to be paired with the liveness
analysis to remove the dead code. Not all possible operations are covered, but the
most common ones are. This improves the code generation for several ARM instructions,
like MVN (immediate), and it may help other targets as well.

v1 -> v2:
Added profiling code and hopefully fixed for 64-bit TCG targets.
v2 -> v3:
Another attempt at fixing the support for 64-bit TCG targets.

Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing 2 changed files with 187 additions and 0 deletions
tcg/tcg.c
@@ -1025,7 +1025,183 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) @@ -1025,7 +1025,183 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1025 #endif 1025 #endif
1026 tdefs++; 1026 tdefs++;
1027 } 1027 }
  1028 +}
  1029 +
  1030 +static void tcg_const_analysis(TCGContext *s)
  1031 +{
  1032 + int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i;
  1033 + TCGArg *args;
  1034 + uint16_t op;
  1035 + uint16_t *opc_ptr;
  1036 + const TCGOpDef *def;
  1037 + uint8_t *const_temps;
  1038 + tcg_target_ulong *temp_values;
  1039 + tcg_target_ulong val;
  1040 + tcg_target_ulong dest_val, src_val, src2_val;
  1041 +
  1042 + const_temps = tcg_malloc(s->nb_temps);
  1043 + memset(const_temps, 0, s->nb_temps);
  1044 + temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t));
  1045 +
  1046 + opc_ptr = gen_opc_buf;
  1047 + args = gen_opparam_buf;
  1048 + while (opc_ptr < gen_opc_ptr) {
  1049 + op = *opc_ptr;
  1050 + def = &tcg_op_defs[op];
  1051 + nb_oargs = def->nb_oargs;
  1052 + nb_iargs = def->nb_iargs;
  1053 + nb_cargs = def->nb_cargs;
  1054 + del_args = 0;
  1055 +
  1056 + switch(op) {
  1057 + case INDEX_op_movi_i32:
  1058 +#if TCG_TARGET_REG_BITS == 64
  1059 + case INDEX_op_movi_i64:
  1060 +#endif
  1061 + dest = args[0];
  1062 + val = args[1];
  1063 + const_temps[dest] = 1;
  1064 + temp_values[dest] = val;
  1065 + break;
  1066 + case INDEX_op_mov_i32:
  1067 +#if TCG_TARGET_REG_BITS == 64
  1068 + case INDEX_op_mov_i64:
  1069 +#endif
  1070 + dest = args[0];
  1071 + src = args[1];
  1072 + const_temps[dest] = const_temps[src];
  1073 + temp_values[dest] = temp_values[src];
  1074 + break;
  1075 +#ifdef TCG_TARGET_HAS_not_i32
  1076 + case INDEX_op_not_i32:
  1077 + dest = args[0];
  1078 + src = args[1];
  1079 + if (const_temps[src]) {
  1080 + const_temps[dest] = 1;
  1081 + *opc_ptr = INDEX_op_movi_i32;
  1082 + args[1] = temp_values[dest] = ~temp_values[src] & 0xffffffff;
  1083 + } else {
  1084 + const_temps[dest] = 0;
  1085 + }
  1086 + break;
  1087 +#endif
  1088 +#ifdef TCG_TARGET_HAS_not_i64
  1089 + case INDEX_op_not_i64:
  1090 + dest = args[0];
  1091 + src = args[1];
  1092 + if (const_temps[src]) {
  1093 + const_temps[dest] = 1;
  1094 + *opc_ptr = INDEX_op_movi_i64;
  1095 + args[1] = temp_values[dest] = ~temp_values[src];
  1096 + } else {
  1097 + const_temps[dest] = 0;
  1098 + }
  1099 + break;
  1100 +#endif
  1101 + case INDEX_op_add_i32:
  1102 + case INDEX_op_sub_i32:
  1103 + case INDEX_op_mul_i32:
  1104 + case INDEX_op_and_i32:
  1105 + case INDEX_op_or_i32:
  1106 + case INDEX_op_xor_i32:
  1107 + case INDEX_op_shl_i32:
  1108 + case INDEX_op_shr_i32:
  1109 + dest = args[0];
  1110 + src = args[1];
  1111 + src2 = args[2];
  1112 + if (const_temps[src] && const_temps[src2]) {
  1113 + src_val = temp_values[src];
  1114 + src2_val = temp_values[src2];
  1115 + const_temps[dest] = 1;
  1116 + switch (op) {
  1117 + case INDEX_op_add_i32: dest_val = src_val + src2_val; break;
  1118 + case INDEX_op_sub_i32: dest_val = src_val - src2_val; break;
  1119 + case INDEX_op_mul_i32: dest_val = src_val * src2_val; break;
  1120 + case INDEX_op_and_i32: dest_val = src_val & src2_val; break;
  1121 + case INDEX_op_or_i32: dest_val = src_val | src2_val; break;
  1122 + case INDEX_op_xor_i32: dest_val = src_val ^ src2_val; break;
  1123 + case INDEX_op_shl_i32: dest_val = src_val << src2_val; break;
  1124 + case INDEX_op_shr_i32: dest_val = src_val >> src2_val; break;
  1125 + default: tcg_abort(); return;
  1126 + }
  1127 + *opc_ptr = INDEX_op_movi_i32;
  1128 + args[1] = temp_values[dest] = dest_val & 0xffffffff;
  1129 + del_args = 1;
  1130 + } else {
  1131 + const_temps[dest] = 0;
  1132 + }
  1133 + break;
  1134 +#if TCG_TARGET_REG_BITS == 64
  1135 + case INDEX_op_add_i64:
  1136 + case INDEX_op_sub_i64:
  1137 + case INDEX_op_mul_i64:
  1138 + case INDEX_op_and_i64:
  1139 + case INDEX_op_or_i64:
  1140 + case INDEX_op_xor_i64:
  1141 + case INDEX_op_shl_i64:
  1142 + case INDEX_op_shr_i64:
  1143 + dest = args[0];
  1144 + src = args[1];
  1145 + src2 = args[2];
  1146 + if (const_temps[src] && const_temps[src2]) {
  1147 + src_val = temp_values[src];
  1148 + src2_val = temp_values[src2];
  1149 + const_temps[dest] = 1;
  1150 + switch (op) {
  1151 + case INDEX_op_add_i64: dest_val = src_val + src2_val; break;
  1152 + case INDEX_op_sub_i64: dest_val = src_val - src2_val; break;
  1153 + case INDEX_op_mul_i64: dest_val = src_val * src2_val; break;
  1154 + case INDEX_op_and_i64: dest_val = src_val & src2_val; break;
  1155 + case INDEX_op_or_i64: dest_val = src_val | src2_val; break;
  1156 + case INDEX_op_xor_i64: dest_val = src_val ^ src2_val; break;
  1157 + case INDEX_op_shl_i64: dest_val = src_val << src2_val; break;
  1158 + case INDEX_op_shr_i64: dest_val = src_val >> src2_val; break;
  1159 + default: tcg_abort(); return;
  1160 + }
  1161 + *opc_ptr = INDEX_op_movi_i64;
  1162 + args[1] = temp_values[dest] = dest_val;
  1163 + del_args = 1;
  1164 + } else {
  1165 + const_temps[dest] = 0;
  1166 + }
  1167 + break;
  1168 +#endif
  1169 + case INDEX_op_call:
  1170 + nb_oargs = args[0] >> 16;
  1171 + nb_iargs = args[0] & 0xffff;
  1172 + nb_cargs = def->nb_cargs;
  1173 + args++;
  1174 + for (i = 0; i < nb_oargs; i++) {
  1175 + const_temps[args[i]] = 0;
  1176 + }
  1177 + break;
  1178 + case INDEX_op_nopn:
  1179 + /* variable number of arguments */
  1180 + nb_cargs = args[0];
  1181 + break;
  1182 + case INDEX_op_set_label:
  1183 + memset(const_temps, 0, s->nb_temps);
  1184 + break;
  1185 + default:
  1186 + if (def->flags & TCG_OPF_BB_END) {
  1187 + memset(const_temps, 0, s->nb_temps);
  1188 + } else {
  1189 + for (i = 0; i < nb_oargs; i++) {
  1190 + const_temps[args[i]] = 0;
  1191 + }
  1192 + }
  1193 + break;
  1194 + }
  1195 + opc_ptr++;
  1196 + args += nb_iargs + nb_oargs + nb_cargs - del_args;
  1197 + if (del_args > 0) {
  1198 + gen_opparam_ptr -= del_args;
  1199 + memmove(args, args + del_args, (gen_opparam_ptr - args) * sizeof(*args));
  1200 + }
  1201 + }
1028 1202
  1203 + if (args != gen_opparam_ptr)
  1204 + tcg_abort();
1029 } 1205 }
1030 1206
1031 #ifdef USE_LIVENESS_ANALYSIS 1207 #ifdef USE_LIVENESS_ANALYSIS
@@ -1896,6 +2072,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, @@ -1896,6 +2072,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
1896 #endif 2072 #endif
1897 2073
1898 #ifdef CONFIG_PROFILER 2074 #ifdef CONFIG_PROFILER
  2075 + s->const_time -= profile_getclock();
  2076 +#endif
  2077 + tcg_const_analysis(s);
  2078 +#ifdef CONFIG_PROFILER
  2079 + s->const_time += profile_getclock();
  2080 +#endif
  2081 +
  2082 +#ifdef CONFIG_PROFILER
1899 s->la_time -= profile_getclock(); 2083 s->la_time -= profile_getclock();
1900 #endif 2084 #endif
1901 tcg_liveness_analysis(s); 2085 tcg_liveness_analysis(s);
@@ -2068,6 +2252,8 @@ void tcg_dump_info(FILE *f, @@ -2068,6 +2252,8 @@ void tcg_dump_info(FILE *f,
2068 (double)s->interm_time / tot * 100.0); 2252 (double)s->interm_time / tot * 100.0);
2069 cpu_fprintf(f, " gen_code time %0.1f%%\n", 2253 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2070 (double)s->code_time / tot * 100.0); 2254 (double)s->code_time / tot * 100.0);
  2255 + cpu_fprintf(f, "const/code time %0.1f%%\n",
  2256 + (double)s->const_time / (s->code_time ? s->code_time : 1) * 100.0);
2071 cpu_fprintf(f, "liveness/code time %0.1f%%\n", 2257 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2072 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 2258 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2073 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", 2259 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
tcg/tcg.h
@@ -288,6 +288,7 @@ struct TCGContext { @@ -288,6 +288,7 @@ struct TCGContext {
288 int64_t code_out_len; 288 int64_t code_out_len;
289 int64_t interm_time; 289 int64_t interm_time;
290 int64_t code_time; 290 int64_t code_time;
  291 + int64_t const_time;
291 int64_t la_time; 292 int64_t la_time;
292 int64_t restore_count; 293 int64_t restore_count;
293 int64_t restore_time; 294 int64_t restore_time;