Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions)
authorTaylor Simpson <ltaylorsimpson@gmail.com>
Mon, 15 Jan 2024 22:14:41 +0000 (15:14 -0700)
committerBrian Cain <bcain@quicinc.com>
Mon, 22 Jan 2024 06:02:33 +0000 (22:02 -0800)
The Decodetree Specification can be found here
https://www.qemu.org/docs/master/devel/decodetree.html

Covers all 32-bit instructions, including HVX

We generate separate decoders for each instruction class.  The reason
will be more apparent in the next patch in this series.

We add 2 new scripts
    gen_decodetree.py        Generate the input to decodetree.py
    gen_trans_funcs.py       Generate the trans_* functions used by the
                             output of decodetree.py

Since the functions generated by decodetree.py take DisasContext * as an
argument, we add the argument to a couple of functions that didn't need
it previously.  We also set the insn field in DisasContext during decode
because it is used by the trans_* functions.

There is a g_assert_not_reached() in decode_insns() in decode.c to
verify we never try to use the old decoder on 32-bit instructions

Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com>
Signed-off-by: Brian Cain <bcain@quicinc.com>
target/hexagon/README
target/hexagon/decode.c
target/hexagon/decode.h
target/hexagon/gen_decodetree.py [new file with mode: 0755]
target/hexagon/gen_trans_funcs.py [new file with mode: 0755]
target/hexagon/meson.build
target/hexagon/translate.c

index 69b2ffe9bb2bdfb6b48b589584d3013e49b4b15b..1b2a4d0eaca9a9574c00ed8dc829109179d82a3e 100644 (file)
@@ -189,11 +189,16 @@ the packet, and we mark the implicit writes.  After the analysis is performed,
 we initialize the result register for each of the predicated assignments.
 
 In addition to instruction semantics, we use a generator to create the decode
-tree.  This generation is also a two step process.  The first step is to run
-target/hexagon/gen_dectree_import.c to produce
+tree.  This generation is a four step process.
+Step 1 is to run target/hexagon/gen_dectree_import.c to produce
     <BUILD_DIR>/target/hexagon/iset.py
-This file is imported by target/hexagon/dectree.py to produce
-    <BUILD_DIR>/target/hexagon/dectree_generated.h.inc
+Step 2 is to import iset.py into target/hexagon/gen_decodetree.py to produce
+    <BUILD_DIR>/target/hexagon/normal_decode_generated
+    <BUILD_DIR>/target/hexagon/hvx_decode_generated
+Step 3 is to process the above files with QEMU's decodetree.py to produce
+    <BUILD_DIR>/target/hexagon/decode_*_generated.c.inc
+Step 4 is to import iset.py into target/hexagon/gen_trans_funcs.py to produce
+    <BUILD_DIR>/target/hexagon/decodetree_trans_funcs_generated.c.inc
 
 *** Key Files ***
 
index 946c55cc71da2ea2b80a9e5f8c2a5aa1b32a71d7..bddad1f75e865df986f77af2baf764b952732f99 100644 (file)
@@ -52,6 +52,34 @@ DEF_REGMAP(R_8,   8,  0, 1, 2, 3, 4, 5, 6, 7)
 #define DECODE_MAPPED_REG(OPNUM, NAME) \
     insn->regno[OPNUM] = DECODE_REGISTER_##NAME[insn->regno[OPNUM]];
 
+/* Helper functions for decode_*_generated.c.inc */
+#define DECODE_MAPPED(NAME) \
+static int decode_mapped_reg_##NAME(DisasContext *ctx, int x) \
+{ \
+    return DECODE_REGISTER_##NAME[x]; \
+}
+DECODE_MAPPED(R_16)
+DECODE_MAPPED(R_8)
+
+/* Helper function for decodetree_trans_funcs_generated.c.inc */
+static int shift_left(DisasContext *ctx, int x, int n, int immno)
+{
+    int ret = x;
+    Insn *insn = ctx->insn;
+    if (!insn->extension_valid ||
+        insn->which_extended != immno) {
+        ret <<= n;
+    }
+    return ret;
+}
+
+/* Include the generated decoder for 32 bit insn */
+#include "decode_normal_generated.c.inc"
+#include "decode_hvx_generated.c.inc"
+
+/* Include the generated helpers for the decoder */
+#include "decodetree_trans_funcs_generated.c.inc"
+
 typedef struct {
     const struct DectreeTable *table_link;
     const struct DectreeTable *table_link_b;
@@ -550,7 +578,8 @@ apply_extender(Packet *pkt, int i, uint32_t extender)
     int immed_num;
     uint32_t base_immed;
 
-    immed_num = opcode_which_immediate_is_extended(pkt->insn[i].opcode);
+    immed_num = pkt->insn[i].which_extended;
+    g_assert(immed_num == opcode_which_immediate_is_extended(pkt->insn[i].opcode));
     base_immed = pkt->insn[i].immed[immed_num];
 
     pkt->insn[i].immed[immed_num] = extender | fZXTN(6, 32, base_immed);
@@ -762,12 +791,19 @@ decode_insns_tablewalk(Insn *insn, const DectreeTable *table,
 }
 
 static unsigned int
-decode_insns(Insn *insn, uint32_t encoding)
+decode_insns(DisasContext *ctx, Insn *insn, uint32_t encoding)
 {
     const DectreeTable *table;
     if (parse_bits(encoding) != 0) {
+        if (decode_normal(ctx, encoding) ||
+            decode_hvx(ctx, encoding)) {
+            insn->generate = opcode_genptr[insn->opcode];
+            insn->iclass = iclass_bits(encoding);
+            return 1;
+        }
         /* Start with PP table - 32 bit instructions */
         table = &dectree_table_DECODE_ROOT_32;
+        g_assert_not_reached();
     } else {
         /* start with EE table - duplex instructions */
         table = &dectree_table_DECODE_ROOT_EE;
@@ -916,8 +952,8 @@ decode_set_slot_number(Packet *pkt)
  * or number of words used on success
  */
 
-int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
-                  bool disas_only)
+int decode_packet(DisasContext *ctx, int max_words, const uint32_t *words,
+                  Packet *pkt, bool disas_only)
 {
     int num_insns = 0;
     int words_read = 0;
@@ -930,9 +966,11 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
     memset(pkt, 0, sizeof(*pkt));
     /* Try to build packet */
     while (!end_of_packet && (words_read < max_words)) {
+        Insn *insn = &pkt->insn[num_insns];
+        ctx->insn = insn;
         encoding32 = words[words_read];
         end_of_packet = is_packet_end(encoding32);
-        new_insns = decode_insns(&pkt->insn[num_insns], encoding32);
+        new_insns = decode_insns(ctx, insn, encoding32);
         g_assert(new_insns > 0);
         /*
          * If we saw an extender, mark next word extended so immediate
@@ -1006,9 +1044,13 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
 int disassemble_hexagon(uint32_t *words, int nwords, bfd_vma pc,
                         GString *buf)
 {
+    DisasContext ctx;
     Packet pkt;
 
-    if (decode_packet(nwords, words, &pkt, true) > 0) {
+    memset(&ctx, 0, sizeof(DisasContext));
+    ctx.pkt = &pkt;
+
+    if (decode_packet(&ctx, nwords, words, &pkt, true) > 0) {
         snprint_a_pkt_disas(buf, &pkt, words, pc);
         return pkt.encod_pkt_size_in_bytes;
     } else {
index c66f5ea64d76bf6cc112978953fd33d191a5661b..3f3012b978d84f555c75e08054b2b7043df1fa36 100644 (file)
 #include "cpu.h"
 #include "opcodes.h"
 #include "insn.h"
+#include "translate.h"
 
 void decode_init(void);
 
 void decode_send_insn_to(Packet *packet, int start, int newloc);
 
-int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
-                  bool disas_only);
+int decode_packet(DisasContext *ctx, int max_words, const uint32_t *words,
+                  Packet *pkt, bool disas_only);
 
 #endif
diff --git a/target/hexagon/gen_decodetree.py b/target/hexagon/gen_decodetree.py
new file mode 100755 (executable)
index 0000000..9634554
--- /dev/null
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+
+##
+##  Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>
+##
+##  This program is free software; you can redistribute it and/or modify
+##  it under the terms of the GNU General Public License as published by
+##  the Free Software Foundation; either version 2 of the License, or
+##  (at your option) any later version.
+##
+##  This program is distributed in the hope that it will be useful,
+##  but WITHOUT ANY WARRANTY; without even the implied warranty of
+##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##  GNU General Public License for more details.
+##
+##  You should have received a copy of the GNU General Public License
+##  along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+
+import io
+import re
+
+import sys
+import textwrap
+import iset
+import hex_common
+
+encs = {
+    tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))
+    for tag in iset.tags
+    if iset.iset[tag]["enc"] != "MISSING ENCODING"
+}
+
+
+regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")
+immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")
+
+
+def ordered_unique(l):
+    return sorted(set(l), key=l.index)
+
+num_registers = {"R": 32, "V": 32}
+
+operand_letters = {
+    "P",
+    "i",
+    "I",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+    "d",
+    "e",
+    "f",
+    "g",
+}
+
+#
+# These instructions have unused operand letters in their encoding
+# They don't correspond to actual operands in the instruction semantics
+# We will mark them as ignored in QEMU decodetree
+#
+tags_with_unused_d_encoding = {
+    "R6_release_at_vi",
+    "R6_release_st_vi",
+    "S4_stored_rl_at_vi",
+    "S4_stored_rl_st_vi",
+    "S2_storew_rl_at_vi",
+    "S2_stored_rl_at_vi",
+    "S2_storew_rl_st_vi",
+}
+
+tags_with_unused_t_encoding = {
+    "R6_release_at_vi",
+    "R6_release_st_vi",
+}
+
+def skip_tag(tag, class_to_decode):
+    enc_class = iset.iset[tag]["enc_class"]
+    return enc_class != class_to_decode
+
+
+##
+## Generate the QEMU decodetree file for each instruction in class_to_decode
+##     For A2_add: Rd32=add(Rs32,Rt32)
+##     We produce:
+##     %A2_add_Rd   0:5
+##     %A2_add_Rs   16:5
+##     %A2_add_Rt   8:5
+##     @A2_add  11110011000.......-.....---..... Rd=%A2_add_Rd Rs=%A2_add_Rs Rt=%A2_add_Rt %PP
+##     A2_add   ..................-.....---..... @A2_add
+##
+def gen_decodetree_file(f, class_to_decode):
+    f.write(f"## DO NOT MODIFY - This file is generated by {sys.argv[0]}\n\n")
+    f.write("%PP\t14:2\n\n")
+    for tag in sorted(encs.keys(), key=iset.tags.index):
+        if skip_tag(tag, class_to_decode):
+            continue
+
+        enc = encs[tag]
+        enc_str = "".join(reversed(encs[tag]))
+        f.write(("#" * 80) + "\n"
+                f"## {tag}:\t{enc_str}\n"
+                "##\n")
+
+
+        regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))
+        imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))
+
+        # Write the field definitions for the registers
+        for regno, reg in enumerate(regs):
+            reg_type, reg_id, _, reg_enc_size = reg
+            reg_letter = reg_id[0]
+            reg_num_choices = int(reg_enc_size.rstrip("S"))
+            reg_mapping = reg_type + "".join("_" for letter in reg_id) + \
+                          reg_enc_size
+            reg_enc_fields = re.findall(reg_letter + "+", enc)
+
+            # Check for some errors
+            if len(reg_enc_fields) == 0:
+                raise Exception(f"{tag} missing register field!")
+            if len(reg_enc_fields) > 1:
+                raise Exception(f"{tag} has split register field!")
+            reg_enc_field = reg_enc_fields[0]
+            if 2 ** len(reg_enc_field) != reg_num_choices:
+                raise Exception(f"{tag} has incorrect register field width!")
+
+            f.write(f"%{tag}_{reg_type}{reg_id}\t"
+                    f"{enc.index(reg_enc_field)}:{len(reg_enc_field)}")
+
+            if (reg_type in num_registers and
+                reg_num_choices != num_registers[reg_type]):
+                f.write(f"\t!function=decode_mapped_reg_{reg_mapping}")
+            f.write("\n")
+
+        # Write the field definitions for the immediates
+        for imm in imms:
+            immno = 1 if imm[0].isupper() else 0
+            imm_type = imm[0]
+            imm_width = int(imm[1])
+            imm_letter = "i" if imm_type.islower() else "I"
+            fields = []
+            sign_mark = "s" if imm_type.lower() in "sr" else ""
+            for m in reversed(list(re.finditer(imm_letter + "+", enc))):
+                fields.append(f"{m.start()}:{sign_mark}{m.end() - m.start()}")
+                sign_mark = ""
+            field_str = " ".join(fields)
+            f.write(f"%{tag}_{imm_type}{imm_letter}\t{field_str}\n")
+
+        ## Handle instructions with unused encoding letters
+        ## Change the unused letters to ignored
+        if tag in tags_with_unused_d_encoding:
+            enc_str = enc_str.replace("d", "-")
+        if tag in tags_with_unused_t_encoding:
+            enc_str = enc_str.replace("t", "-")
+
+        # Replace the operand letters with .
+        for x in operand_letters:
+            enc_str = enc_str.replace(x, ".")
+
+        # Write the instruction format
+        f.write(f"@{tag}\t{enc_str}")
+        for reg in regs:
+            reg_type = reg[0]
+            reg_id = reg[1]
+            f.write(f" {reg_type}{reg_id}=%{tag}_{reg_type}{reg_id}")
+        for imm in imms:
+            imm_type = imm[0]
+            imm_letter = "i" if imm_type.islower() else "I"
+            f.write(f" {imm_type}{imm_letter}=%{tag}_{imm_type}{imm_letter}")
+
+        f.write(" %PP\n")
+
+         # Replace the 0s and 1s with .
+        enc_str = enc_str.replace("0", ".").replace("1", ".")
+
+        # Write the instruction pattern
+        f.write(f"{tag}\t{enc_str} @{tag}\n")
+
+
+if __name__ == "__main__":
+    hex_common.read_semantics_file(sys.argv[1])
+    class_to_decode = sys.argv[2]
+    with open(sys.argv[3], "w") as f:
+        gen_decodetree_file(f, class_to_decode)
diff --git a/target/hexagon/gen_trans_funcs.py b/target/hexagon/gen_trans_funcs.py
new file mode 100755 (executable)
index 0000000..c907131
--- /dev/null
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+##
+##  Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>
+##
+##  This program is free software; you can redistribute it and/or modify
+##  it under the terms of the GNU General Public License as published by
+##  the Free Software Foundation; either version 2 of the License, or
+##  (at your option) any later version.
+##
+##  This program is distributed in the hope that it will be useful,
+##  but WITHOUT ANY WARRANTY; without even the implied warranty of
+##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##  GNU General Public License for more details.
+##
+##  You should have received a copy of the GNU General Public License
+##  along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+
+import io
+import re
+
+import sys
+import textwrap
+import iset
+import hex_common
+
+encs = {
+    tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))
+    for tag in iset.tags
+    if iset.iset[tag]["enc"] != "MISSING ENCODING"
+}
+
+
+regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")
+immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")
+
+
+def ordered_unique(l):
+    return sorted(set(l), key=l.index)
+
+
+def skip_tag(tag, classes):
+    enc_class = iset.iset[tag]["enc_class"]
+    return enc_class not in classes
+
+
+def code_fmt(txt):
+    return textwrap.indent(textwrap.dedent(txt), "    ")
+
+open_curly = "{"
+close_curly = "}"
+
+def mark_which_imm_extended(f, tag):
+    immre = re.compile(r"IMMEXT\([rRsSuUm]")
+    imm = immre.findall(hex_common.semdict[tag])
+    if len(imm) == 0:
+        # No extended operand found
+        return
+    letter = re.split("\\(", imm[0])[1]
+    f.write(code_fmt(f"""\
+        insn->which_extended = {0 if letter.islower() else 1};
+    """))
+
+##
+## Generate the QEMU decodetree trans_<tag> function for each instruction
+##     For A2_add: Rd32=add(Rs32,Rt32)
+##     We produce:
+##     static bool trans_A2_add(DisasContext *ctx, arg_A2_add *args)
+##     {
+##         Insn *insn = ctx->insn;
+##         insn->opcode = A2_add;
+##         insn->regno[0] = args->Rd;
+##         insn->regno[1] = args->Rs;
+##         insn->regno[2] = args->Rt;
+##         return true;
+##     }
+##
+def gen_trans_funcs(f, classes):
+    f.write(f"/* DO NOT MODIFY - This file is generated by {sys.argv[0]} */\n\n")
+    for tag in sorted(encs.keys(), key=iset.tags.index):
+        if skip_tag(tag, classes):
+            continue
+
+        regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))
+        imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))
+
+        f.write(textwrap.dedent(f"""\
+            static bool trans_{tag}(DisasContext *ctx, arg_{tag} *args)
+            {open_curly}
+                Insn *insn = ctx->insn;
+                insn->opcode = {tag};
+        """))
+
+        regno = 0
+        for reg in regs:
+            reg_type = reg[0]
+            reg_id = reg[1]
+            f.write(code_fmt(f"""\
+                insn->regno[{regno}] = args->{reg_type}{reg_id};
+            """))
+            regno += 1
+
+        if len(imms) != 0:
+            mark_which_imm_extended(f, tag)
+
+        for imm in imms:
+            imm_type = imm[0]
+            imm_letter = "i" if imm_type.islower() else "I"
+            immno = 0 if imm_type.islower() else 1
+            imm_shift = int(imm[2]) if imm[2] else 0
+            if imm_shift:
+                f.write(code_fmt(f"""\
+                    insn->immed[{immno}] =
+                        shift_left(ctx, args->{imm_type}{imm_letter},
+                                   {imm_shift}, {immno});
+                """))
+            else:
+                f.write(code_fmt(f"""\
+                    insn->immed[{immno}] = args->{imm_type}{imm_letter};
+                """))
+
+        f.write(textwrap.dedent(f"""\
+                return true;
+            {close_curly}
+        """))
+
+
+if __name__ == "__main__":
+    hex_common.read_semantics_file(sys.argv[1])
+    with open(sys.argv[2], "w") as f:
+        gen_trans_funcs(f, { "NORMAL", "EXT_mmvec" })
index da8e608d0065defad0f18d437e57783286c313cd..831bd5716ab5cfdc7a95a5aa36b48778e658b2ca 100644 (file)
@@ -133,6 +133,61 @@ dectree_generated = custom_target(
 )
 hexagon_ss.add(dectree_generated)
 
+#
+# Generate the input to the QEMU decodetree.py script
+#
+normal_decode_generated = custom_target(
+    'normal_decode_generated',
+    output: 'normal_decode_generated',
+    depends: [iset_py, semantics_generated],
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files('gen_decodetree.py'), semantics_generated, 'NORMAL', '@OUTPUT@'],
+)
+hexagon_ss.add(normal_decode_generated)
+
+hvx_decode_generated = custom_target(
+    'hvx_decode_generated',
+    output: 'hvx_decode_generated',
+    depends: [iset_py, semantics_generated],
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files('gen_decodetree.py'), semantics_generated, 'EXT_mmvec', '@OUTPUT@'],
+)
+hexagon_ss.add(hvx_decode_generated)
+
+#
+# Run the QEMU decodetree.py script to produce the instruction decoder
+#
+decodetree_py = meson.current_source_dir() / '../../scripts/decodetree.py'
+decode_normal_generated = custom_target(
+    'decode_normal_generated.c.inc',
+    output: 'decode_normal_generated.c.inc',
+    input: normal_decode_generated,
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files(decodetree_py), normal_decode_generated, '--static-decode=decode_normal', '-o', '@OUTPUT@'],
+)
+hexagon_ss.add(decode_normal_generated)
+
+decode_hvx_generated = custom_target(
+    'decode_hvx_generated.c.inc',
+    output: 'decode_hvx_generated.c.inc',
+    input: hvx_decode_generated,
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files(decodetree_py), hvx_decode_generated, '--static-decode=decode_hvx', '-o', '@OUTPUT@'],
+)
+hexagon_ss.add(decode_hvx_generated)
+
+#
+# Generate the trans_* functions that the decoder will use
+#
+decodetree_trans_funcs_generated = custom_target(
+    'decodetree_trans_funcs_generated.c.inc',
+    output: 'decodetree_trans_funcs_generated.c.inc',
+    depends: [iset_py, semantics_generated],
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files('gen_trans_funcs.py'), semantics_generated, '@OUTPUT@'],
+)
+hexagon_ss.add(decodetree_trans_funcs_generated)
+
 hexagon_ss.add(files(
     'cpu.c',
     'translate.c',
index 666c06118022b7ef0477a49b1ee60bf9a1d70bf9..95579ae2432f0949c4df2e8591809b56fbec120c 100644 (file)
@@ -1033,10 +1033,10 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
         return;
     }
 
-    if (decode_packet(nwords, words, &pkt, false) > 0) {
+    ctx->pkt = &pkt;
+    if (decode_packet(ctx, nwords, words, &pkt, false) > 0) {
         pkt.pc = ctx->base.pc_next;
         HEX_DEBUG_PRINT_PKT(&pkt);
-        ctx->pkt = &pkt;
         gen_start_packet(ctx);
         for (i = 0; i < pkt.num_insns; i++) {
             ctx->insn = &pkt.insn[i];