Commit 5df3a524fc0b923cf2e5e1883ff550d055d36eb5

Authored by Filip Navara
1 parent 430c1ad1

TCG constant propagation.

Add support for constant propagation to TCG. This has to be paired with the liveness
analysis to remove the dead code. Not all possible operations are covered, but the
most common ones are. This improves the code generation for several ARM instructions,
like MVN (immediate), and it may help other targets as well.

v1 -> v2:
Added profiling code and hopefully fixed for 64-bit TCG targets.
v2 -> v3:
Another attempt at fixing the support for 64-bit TCG targets.

Signed-off-by: Filip Navara <filip.navara@gmail.com>
Showing 2 changed files with 187 additions and 0 deletions
tcg/tcg.c
... ... @@ -1025,7 +1025,183 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1025 1025 #endif
1026 1026 tdefs++;
1027 1027 }
  1028 +}
  1029 +
  1030 +static void tcg_const_analysis(TCGContext *s)
  1031 +{
  1032 + int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i;
  1033 + TCGArg *args;
  1034 + uint16_t op;
  1035 + uint16_t *opc_ptr;
  1036 + const TCGOpDef *def;
  1037 + uint8_t *const_temps;
  1038 + tcg_target_ulong *temp_values;
  1039 + tcg_target_ulong val;
  1040 + tcg_target_ulong dest_val, src_val, src2_val;
  1041 +
  1042 + const_temps = tcg_malloc(s->nb_temps);
  1043 + memset(const_temps, 0, s->nb_temps);
  1044 + temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t));
  1045 +
  1046 + opc_ptr = gen_opc_buf;
  1047 + args = gen_opparam_buf;
  1048 + while (opc_ptr < gen_opc_ptr) {
  1049 + op = *opc_ptr;
  1050 + def = &tcg_op_defs[op];
  1051 + nb_oargs = def->nb_oargs;
  1052 + nb_iargs = def->nb_iargs;
  1053 + nb_cargs = def->nb_cargs;
  1054 + del_args = 0;
  1055 +
  1056 + switch(op) {
  1057 + case INDEX_op_movi_i32:
  1058 +#if TCG_TARGET_REG_BITS == 64
  1059 + case INDEX_op_movi_i64:
  1060 +#endif
  1061 + dest = args[0];
  1062 + val = args[1];
  1063 + const_temps[dest] = 1;
  1064 + temp_values[dest] = val;
  1065 + break;
  1066 + case INDEX_op_mov_i32:
  1067 +#if TCG_TARGET_REG_BITS == 64
  1068 + case INDEX_op_mov_i64:
  1069 +#endif
  1070 + dest = args[0];
  1071 + src = args[1];
  1072 + const_temps[dest] = const_temps[src];
  1073 + temp_values[dest] = temp_values[src];
  1074 + break;
  1075 +#ifdef TCG_TARGET_HAS_not_i32
  1076 + case INDEX_op_not_i32:
  1077 + dest = args[0];
  1078 + src = args[1];
  1079 + if (const_temps[src]) {
  1080 + const_temps[dest] = 1;
  1081 + *opc_ptr = INDEX_op_movi_i32;
  1082 + args[1] = temp_values[dest] = ~temp_values[src] & 0xffffffff;
  1083 + } else {
  1084 + const_temps[dest] = 0;
  1085 + }
  1086 + break;
  1087 +#endif
  1088 +#ifdef TCG_TARGET_HAS_not_i64
  1089 + case INDEX_op_not_i64:
  1090 + dest = args[0];
  1091 + src = args[1];
  1092 + if (const_temps[src]) {
  1093 + const_temps[dest] = 1;
  1094 + *opc_ptr = INDEX_op_movi_i64;
  1095 + args[1] = temp_values[dest] = ~temp_values[src];
  1096 + } else {
  1097 + const_temps[dest] = 0;
  1098 + }
  1099 + break;
  1100 +#endif
  1101 + case INDEX_op_add_i32:
  1102 + case INDEX_op_sub_i32:
  1103 + case INDEX_op_mul_i32:
  1104 + case INDEX_op_and_i32:
  1105 + case INDEX_op_or_i32:
  1106 + case INDEX_op_xor_i32:
  1107 + case INDEX_op_shl_i32:
  1108 + case INDEX_op_shr_i32:
  1109 + dest = args[0];
  1110 + src = args[1];
  1111 + src2 = args[2];
  1112 + if (const_temps[src] && const_temps[src2]) {
  1113 + src_val = temp_values[src];
  1114 + src2_val = temp_values[src2];
  1115 + const_temps[dest] = 1;
  1116 + switch (op) {
  1117 + case INDEX_op_add_i32: dest_val = src_val + src2_val; break;
  1118 + case INDEX_op_sub_i32: dest_val = src_val - src2_val; break;
  1119 + case INDEX_op_mul_i32: dest_val = src_val * src2_val; break;
  1120 + case INDEX_op_and_i32: dest_val = src_val & src2_val; break;
  1121 + case INDEX_op_or_i32: dest_val = src_val | src2_val; break;
  1122 + case INDEX_op_xor_i32: dest_val = src_val ^ src2_val; break;
  1123 + case INDEX_op_shl_i32: dest_val = src_val << src2_val; break;
  1124 + case INDEX_op_shr_i32: dest_val = src_val >> src2_val; break;
  1125 + default: tcg_abort(); return;
  1126 + }
  1127 + *opc_ptr = INDEX_op_movi_i32;
  1128 + args[1] = temp_values[dest] = dest_val & 0xffffffff;
  1129 + del_args = 1;
  1130 + } else {
  1131 + const_temps[dest] = 0;
  1132 + }
  1133 + break;
  1134 +#if TCG_TARGET_REG_BITS == 64
  1135 + case INDEX_op_add_i64:
  1136 + case INDEX_op_sub_i64:
  1137 + case INDEX_op_mul_i64:
  1138 + case INDEX_op_and_i64:
  1139 + case INDEX_op_or_i64:
  1140 + case INDEX_op_xor_i64:
  1141 + case INDEX_op_shl_i64:
  1142 + case INDEX_op_shr_i64:
  1143 + dest = args[0];
  1144 + src = args[1];
  1145 + src2 = args[2];
  1146 + if (const_temps[src] && const_temps[src2]) {
  1147 + src_val = temp_values[src];
  1148 + src2_val = temp_values[src2];
  1149 + const_temps[dest] = 1;
  1150 + switch (op) {
  1151 + case INDEX_op_add_i64: dest_val = src_val + src2_val; break;
  1152 + case INDEX_op_sub_i64: dest_val = src_val - src2_val; break;
  1153 + case INDEX_op_mul_i64: dest_val = src_val * src2_val; break;
  1154 + case INDEX_op_and_i64: dest_val = src_val & src2_val; break;
  1155 + case INDEX_op_or_i64: dest_val = src_val | src2_val; break;
  1156 + case INDEX_op_xor_i64: dest_val = src_val ^ src2_val; break;
  1157 + case INDEX_op_shl_i64: dest_val = src_val << src2_val; break;
  1158 + case INDEX_op_shr_i64: dest_val = src_val >> src2_val; break;
  1159 + default: tcg_abort(); return;
  1160 + }
  1161 + *opc_ptr = INDEX_op_movi_i64;
  1162 + args[1] = temp_values[dest] = dest_val;
  1163 + del_args = 1;
  1164 + } else {
  1165 + const_temps[dest] = 0;
  1166 + }
  1167 + break;
  1168 +#endif
  1169 + case INDEX_op_call:
  1170 + nb_oargs = args[0] >> 16;
  1171 + nb_iargs = args[0] & 0xffff;
  1172 + nb_cargs = def->nb_cargs;
  1173 + args++;
  1174 + for (i = 0; i < nb_oargs; i++) {
  1175 + const_temps[args[i]] = 0;
  1176 + }
  1177 + break;
  1178 + case INDEX_op_nopn:
  1179 + /* variable number of arguments */
  1180 + nb_cargs = args[0];
  1181 + break;
  1182 + case INDEX_op_set_label:
  1183 + memset(const_temps, 0, s->nb_temps);
  1184 + break;
  1185 + default:
  1186 + if (def->flags & TCG_OPF_BB_END) {
  1187 + memset(const_temps, 0, s->nb_temps);
  1188 + } else {
  1189 + for (i = 0; i < nb_oargs; i++) {
  1190 + const_temps[args[i]] = 0;
  1191 + }
  1192 + }
  1193 + break;
  1194 + }
  1195 + opc_ptr++;
  1196 + args += nb_iargs + nb_oargs + nb_cargs - del_args;
  1197 + if (del_args > 0) {
  1198 + gen_opparam_ptr -= del_args;
  1199 + memmove(args, args + del_args, (gen_opparam_ptr - args) * sizeof(*args));
  1200 + }
  1201 + }
1028 1202  
  1203 + if (args != gen_opparam_ptr)
  1204 + tcg_abort();
1029 1205 }
1030 1206  
1031 1207 #ifdef USE_LIVENESS_ANALYSIS
... ... @@ -1896,6 +2072,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
1896 2072 #endif
1897 2073  
1898 2074 #ifdef CONFIG_PROFILER
  2075 + s->const_time -= profile_getclock();
  2076 +#endif
  2077 + tcg_const_analysis(s);
  2078 +#ifdef CONFIG_PROFILER
  2079 + s->const_time += profile_getclock();
  2080 +#endif
  2081 +
  2082 +#ifdef CONFIG_PROFILER
1899 2083 s->la_time -= profile_getclock();
1900 2084 #endif
1901 2085 tcg_liveness_analysis(s);
... ... @@ -2068,6 +2252,8 @@ void tcg_dump_info(FILE *f,
2068 2252 (double)s->interm_time / tot * 100.0);
2069 2253 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2070 2254 (double)s->code_time / tot * 100.0);
  2255 + cpu_fprintf(f, "const/code time %0.1f%%\n",
  2256 + (double)s->const_time / (s->code_time ? s->code_time : 1) * 100.0);
2071 2257 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2072 2258 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2073 2259 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
... ...
tcg/tcg.h
... ... @@ -288,6 +288,7 @@ struct TCGContext {
288 288 int64_t code_out_len;
289 289 int64_t interm_time;
290 290 int64_t code_time;
  291 + int64_t const_time;
291 292 int64_t la_time;
292 293 int64_t restore_count;
293 294 int64_t restore_time;
... ...