drm/amdkfd: Use SQC when TCP would fail in gfx10.1 context save
authorLaurent Morichetti <laurent.morichetti@amd.com>
Sat, 17 Feb 2024 04:16:41 +0000 (20:16 -0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 6 Mar 2024 20:24:50 +0000 (15:24 -0500)
Similarly to gfx9, gfx10.1 drops vector stores when an xnack error is
raised. To work around this issue, use scalar stores instead of vector
stores when trapsts.xnack_error == 1.

Signed-off-by: Laurent Morichetti <laurent.morichetti@amd.com>
Reviewed-by: Jay Cornwall <jay.cornwall@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm

index 2e9b64edb8d2e0d8c6ec6f54dcd2b248007f2be4..5a0308d26b53c68551128bea941783ef2b066650 100644 (file)
@@ -678,7 +678,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 };
 
 static const uint32_t cwsr_trap_nv1x_hex[] = {
-       0xbf820001, 0xbf8201f5,
+       0xbf820001, 0xbf820394,
        0xb0804004, 0xb978f802,
        0x8a78ff78, 0x00020006,
        0xb97bf803, 0x876eff78,
@@ -769,13 +769,90 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0x877c817c, 0xbf06817c,
        0xbf850002, 0xbeff0380,
        0xbf820002, 0xbeff03c1,
-       0xbf82000b, 0xbef603ff,
-       0x01000000, 0xe0704000,
-       0x705d0000, 0xe0704080,
-       0x705d0100, 0xe0704100,
-       0x705d0200, 0xe0704180,
-       0x705d0300, 0xbf82000a,
-       0xbef603ff, 0x01000000,
+       0xbf820058, 0xbef603ff,
+       0x01000000, 0xb97af803,
+       0x8a7a7aff, 0x10000000,
+       0xbf850049, 0xbe840380,
+       0xd7600000, 0x00000900,
+       0x80048104, 0xd7600001,
+       0x00000900, 0x80048104,
+       0xd7600002, 0x00000900,
+       0x80048104, 0xd7600003,
+       0x00000900, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000901,
+       0x80048104, 0xd7600001,
+       0x00000901, 0x80048104,
+       0xd7600002, 0x00000901,
+       0x80048104, 0xd7600003,
+       0x00000901, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000902,
+       0x80048104, 0xd7600001,
+       0x00000902, 0x80048104,
+       0xd7600002, 0x00000902,
+       0x80048104, 0xd7600003,
+       0x00000902, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000903,
+       0x80048104, 0xd7600001,
+       0x00000903, 0x80048104,
+       0xd7600002, 0x00000903,
+       0x80048104, 0xd7600003,
+       0x00000903, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbf820060,
+       0xe0704000, 0x705d0000,
+       0xe0704080, 0x705d0100,
+       0xe0704100, 0x705d0200,
+       0xe0704180, 0x705d0300,
+       0xbf820057, 0xbef603ff,
+       0x01000000, 0xb97af803,
+       0x8a7a7aff, 0x10000000,
+       0xbf850049, 0xbe840380,
+       0xd7600000, 0x00000900,
+       0x80048104, 0xd7600001,
+       0x00000900, 0x80048104,
+       0xd7600002, 0x00000900,
+       0x80048104, 0xd7600003,
+       0x00000900, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06c004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000901,
+       0x80048104, 0xd7600001,
+       0x00000901, 0x80048104,
+       0xd7600002, 0x00000901,
+       0x80048104, 0xd7600003,
+       0x00000901, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06c004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000902,
+       0x80048104, 0xd7600001,
+       0x00000902, 0x80048104,
+       0xd7600002, 0x00000902,
+       0x80048104, 0xd7600003,
+       0x00000902, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06c004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000903,
+       0x80048104, 0xd7600001,
+       0x00000903, 0x80048104,
+       0xd7600002, 0x00000903,
+       0x80048104, 0xd7600003,
+       0x00000903, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06c004,
+       0xbf84ffef, 0xbf820008,
        0xe0704000, 0x705d0000,
        0xe0704100, 0x705d0100,
        0xe0704200, 0x705d0200,
@@ -855,9 +932,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xbf850002, 0xbeff0380,
        0xbf820001, 0xbeff03c1,
        0xb97b4306, 0x877bc17b,
-       0xbf840044, 0xbf8a0000,
+       0xbf840086, 0xbf8a0000,
        0x877aff6d, 0x80000000,
-       0xbf840040, 0x8f7b867b,
+       0xbf840082, 0x8f7b867b,
        0x8f7b827b, 0xbef6037b,
        0xb9703a05, 0x80708170,
        0xbf0d9973, 0xbf850002,
@@ -871,16 +948,49 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xd7660000, 0x000200c1,
        0x16000084, 0x907c9973,
        0x877c817c, 0xbf06817c,
-       0xbefc0380, 0xbf850012,
-       0xbe8303ff, 0x00000080,
+       0xbefc0380, 0xbf850033,
+       0xb97af803, 0x8a7a7aff,
+       0x10000000, 0xbf85001d,
+       0xd8d80000, 0x01000000,
+       0xbf8c0000, 0xbe840380,
+       0xd7600000, 0x00000901,
+       0x80048104, 0xd7600001,
+       0x00000901, 0x80048104,
+       0xd7600002, 0x00000901,
+       0x80048104, 0xd7600003,
+       0x00000901, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0x807cff7c,
+       0x00000080, 0xd5250000,
+       0x0001ff00, 0x00000080,
+       0xbf0a7b7c, 0xbf85ffe4,
+       0xbf820044, 0xbe8303ff,
+       0x00000080, 0xbf800000,
        0xbf800000, 0xbf800000,
-       0xbf800000, 0xd8d80000,
+       0xd8d80000, 0x01000000,
+       0xbf8c0000, 0xe0704000,
+       0x705d0100, 0x807c037c,
+       0x80700370, 0xd5250000,
+       0x0001ff00, 0x00000080,
+       0xbf0a7b7c, 0xbf85fff4,
+       0xbf820032, 0xb97af803,
+       0x8a7a7aff, 0x10000000,
+       0xbf85001d, 0xd8d80000,
        0x01000000, 0xbf8c0000,
-       0xe0704000, 0x705d0100,
-       0x807c037c, 0x80700370,
+       0xbe840380, 0xd7600000,
+       0x00000901, 0x80048104,
+       0xd7600001, 0x00000901,
+       0x80048104, 0xd7600002,
+       0x00000901, 0x80048104,
+       0xd7600003, 0x00000901,
+       0x80048104, 0xf469003a,
+       0xe0000000, 0x80709070,
+       0xbf06c004, 0xbf84ffef,
+       0x807cff7c, 0x00000100,
        0xd5250000, 0x0001ff00,
-       0x00000080, 0xbf0a7b7c,
-       0xbf85fff4, 0xbf820011,
+       0x00000100, 0xbf0a7b7c,
+       0xbf85ffe4, 0xbf820011,
        0xbe8303ff, 0x00000100,
        0xbf800000, 0xbf800000,
        0xbf800000, 0xd8d80000,
@@ -898,10 +1008,52 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0xbeff03c1, 0xb97b3a05,
        0x807b817b, 0x8f7b827b,
        0x907c9973, 0x877c817c,
-       0xbf06817c, 0xbf850017,
+       0xbf06817c, 0xbf85006b,
        0xbef603ff, 0x01000000,
        0xbefc0384, 0xbf0a7b7c,
-       0xbf840037, 0x7e008700,
+       0xbf8400fa, 0xb97af803,
+       0x8a7a7aff, 0x10000000,
+       0xbf850050, 0x7e008700,
+       0x7e028701, 0x7e048702,
+       0x7e068703, 0xbe840380,
+       0xd7600000, 0x00000900,
+       0x80048104, 0xd7600001,
+       0x00000900, 0x80048104,
+       0xd7600002, 0x00000900,
+       0x80048104, 0xd7600003,
+       0x00000900, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000901,
+       0x80048104, 0xd7600001,
+       0x00000901, 0x80048104,
+       0xd7600002, 0x00000901,
+       0x80048104, 0xd7600003,
+       0x00000901, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000902,
+       0x80048104, 0xd7600001,
+       0x00000902, 0x80048104,
+       0xd7600002, 0x00000902,
+       0x80048104, 0xd7600003,
+       0x00000902, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0xbe840380,
+       0xd7600000, 0x00000903,
+       0x80048104, 0xd7600001,
+       0x00000903, 0x80048104,
+       0xd7600002, 0x00000903,
+       0x80048104, 0xd7600003,
+       0x00000903, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06a004,
+       0xbf84ffef, 0x807c847c,
+       0xbf0a7b7c, 0xbf85ffb1,
+       0xbf8200a6, 0x7e008700,
        0x7e028701, 0x7e048702,
        0x7e068703, 0xe0704000,
        0x705d0000, 0xe0704080,
@@ -910,9 +1062,51 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0x705d0300, 0x807c847c,
        0x8070ff70, 0x00000200,
        0xbf0a7b7c, 0xbf85ffef,
-       0xbf820025, 0xbef603ff,
+       0xbf820094, 0xbef603ff,
        0x01000000, 0xbefc0384,
-       0xbf0a7b7c, 0xbf840011,
+       0xbf0a7b7c, 0xbf840065,
+       0xb97af803, 0x8a7a7aff,
+       0x10000000, 0xbf850050,
+       0x7e008700, 0x7e028701,
+       0x7e048702, 0x7e068703,
+       0xbe840380, 0xd7600000,
+       0x00000900, 0x80048104,
+       0xd7600001, 0x00000900,
+       0x80048104, 0xd7600002,
+       0x00000900, 0x80048104,
+       0xd7600003, 0x00000900,
+       0x80048104, 0xf469003a,
+       0xe0000000, 0x80709070,
+       0xbf06c004, 0xbf84ffef,
+       0xbe840380, 0xd7600000,
+       0x00000901, 0x80048104,
+       0xd7600001, 0x00000901,
+       0x80048104, 0xd7600002,
+       0x00000901, 0x80048104,
+       0xd7600003, 0x00000901,
+       0x80048104, 0xf469003a,
+       0xe0000000, 0x80709070,
+       0xbf06c004, 0xbf84ffef,
+       0xbe840380, 0xd7600000,
+       0x00000902, 0x80048104,
+       0xd7600001, 0x00000902,
+       0x80048104, 0xd7600002,
+       0x00000902, 0x80048104,
+       0xd7600003, 0x00000902,
+       0x80048104, 0xf469003a,
+       0xe0000000, 0x80709070,
+       0xbf06c004, 0xbf84ffef,
+       0xbe840380, 0xd7600000,
+       0x00000903, 0x80048104,
+       0xd7600001, 0x00000903,
+       0x80048104, 0xd7600002,
+       0x00000903, 0x80048104,
+       0xd7600003, 0x00000903,
+       0x80048104, 0xf469003a,
+       0xe0000000, 0x80709070,
+       0xbf06c004, 0xbf84ffef,
+       0x807c847c, 0xbf0a7b7c,
+       0xbf85ffb1, 0xbf82003b,
        0x7e008700, 0x7e028701,
        0x7e048702, 0x7e068703,
        0xe0704000, 0x705d0000,
@@ -922,179 +1116,192 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
        0x807c847c, 0x8070ff70,
        0x00000400, 0xbf0a7b7c,
        0xbf85ffef, 0xb97b1e06,
-       0x877bc17b, 0xbf84000c,
+       0x877bc17b, 0xbf840027,
        0x8f7b837b, 0x807b7c7b,
        0xbefe03c1, 0xbeff0380,
-       0x7e008700, 0xe0704000,
-       0x705d0000, 0x807c817c,
-       0x8070ff70, 0x00000080,
-       0xbf0a7b7c, 0xbf85fff8,
-       0xbf820144, 0xbef4037e,
-       0x8775ff7f, 0x0000ffff,
-       0x8875ff75, 0x00040000,
-       0xbef60380, 0xbef703ff,
-       0x10807fac, 0xb97202dc,
-       0x8f729972, 0x876eff7f,
-       0x04000000, 0xbf840034,
+       0xb97af803, 0x8a7a7aff,
+       0x10000000, 0xbf850017,
+       0x7e008700, 0xbe840380,
+       0xd7600000, 0x00000900,
+       0x80048104, 0xd7600001,
+       0x00000900, 0x80048104,
+       0xd7600002, 0x00000900,
+       0x80048104, 0xd7600003,
+       0x00000900, 0x80048104,
+       0xf469003a, 0xe0000000,
+       0x80709070, 0xbf06c004,
+       0xbf84ffef, 0x807c817c,
+       0xbf0a7b7c, 0xbf85ffea,
+       0xbf820008, 0x7e008700,
+       0xe0704000, 0x705d0000,
+       0x807c817c, 0x8070ff70,
+       0x00000080, 0xbf0a7b7c,
+       0xbf85fff8, 0xbf820144,
+       0xbef4037e, 0x8775ff7f,
+       0x0000ffff, 0x8875ff75,
+       0x00040000, 0xbef60380,
+       0xbef703ff, 0x10807fac,
+       0xb97202dc, 0x8f729972,
+       0x876eff7f, 0x04000000,
+       0xbf840034, 0xbefe03c1,
+       0x907c9972, 0x877c817c,
+       0xbf06817c, 0xbf850002,
+       0xbeff0380, 0xbf820001,
+       0xbeff03c1, 0xb96f4306,
+       0x876fc16f, 0xbf840029,
+       0x8f6f866f, 0x8f6f826f,
+       0xbef6036f, 0xb9783a05,
+       0x80788178, 0xbf0d9972,
+       0xbf850002, 0x8f788978,
+       0xbf820001, 0x8f788a78,
+       0xb96e1e06, 0x8f6e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0x8078ff78,
+       0x00000080, 0xbef603ff,
+       0x01000000, 0x907c9972,
+       0x877c817c, 0xbf06817c,
+       0xbefc0380, 0xbf850009,
+       0xe0310000, 0x781d0000,
+       0x807cff7c, 0x00000080,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7c, 0xbf85fff8,
+       0xbf820008, 0xe0310000,
+       0x781d0000, 0x807cff7c,
+       0x00000100, 0x8078ff78,
+       0x00000100, 0xbf0a6f7c,
+       0xbf85fff8, 0xbef80380,
        0xbefe03c1, 0x907c9972,
        0x877c817c, 0xbf06817c,
        0xbf850002, 0xbeff0380,
        0xbf820001, 0xbeff03c1,
-       0xb96f4306, 0x876fc16f,
-       0xbf840029, 0x8f6f866f,
-       0x8f6f826f, 0xbef6036f,
-       0xb9783a05, 0x80788178,
-       0xbf0d9972, 0xbf850002,
-       0x8f788978, 0xbf820001,
-       0x8f788a78, 0xb96e1e06,
-       0x8f6e8a6e, 0x80786e78,
+       0xb96f3a05, 0x806f816f,
+       0x8f6f826f, 0x907c9972,
+       0x877c817c, 0xbf06817c,
+       0xbf850024, 0xbef603ff,
+       0x01000000, 0xbeee0378,
        0x8078ff78, 0x00000200,
-       0x8078ff78, 0x00000080,
-       0xbef603ff, 0x01000000,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbefc0380,
-       0xbf850009, 0xe0310000,
-       0x781d0000, 0x807cff7c,
-       0x00000080, 0x8078ff78,
-       0x00000080, 0xbf0a6f7c,
-       0xbf85fff8, 0xbf820008,
-       0xe0310000, 0x781d0000,
-       0x807cff7c, 0x00000100,
-       0x8078ff78, 0x00000100,
-       0xbf0a6f7c, 0xbf85fff8,
-       0xbef80380, 0xbefe03c1,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbf850002,
-       0xbeff0380, 0xbf820001,
-       0xbeff03c1, 0xb96f3a05,
-       0x806f816f, 0x8f6f826f,
-       0x907c9972, 0x877c817c,
-       0xbf06817c, 0xbf850024,
-       0xbef603ff, 0x01000000,
-       0xbeee0378, 0x8078ff78,
-       0x00000200, 0xbefc0384,
-       0xbf0a6f7c, 0xbf840050,
+       0xbefc0384, 0xbf0a6f7c,
+       0xbf840050, 0xe0304000,
+       0x785d0000, 0xe0304080,
+       0x785d0100, 0xe0304100,
+       0x785d0200, 0xe0304180,
+       0x785d0300, 0xbf8c3f70,
+       0x7e008500, 0x7e028501,
+       0x7e048502, 0x7e068503,
+       0x807c847c, 0x8078ff78,
+       0x00000200, 0xbf0a6f7c,
+       0xbf85ffee, 0xe0304000,
+       0x6e5d0000, 0xe0304080,
+       0x6e5d0100, 0xe0304100,
+       0x6e5d0200, 0xe0304180,
+       0x6e5d0300, 0xbf8c3f70,
+       0xbf820034, 0xbef603ff,
+       0x01000000, 0xbeee0378,
+       0x8078ff78, 0x00000400,
+       0xbefc0384, 0xbf0a6f7c,
+       0xbf840012, 0xe0304000,
+       0x785d0000, 0xe0304100,
+       0x785d0100, 0xe0304200,
+       0x785d0200, 0xe0304300,
+       0x785d0300, 0xbf8c3f70,
+       0x7e008500, 0x7e028501,
+       0x7e048502, 0x7e068503,
+       0x807c847c, 0x8078ff78,
+       0x00000400, 0xbf0a6f7c,
+       0xbf85ffee, 0xb96f1e06,
+       0x876fc16f, 0xbf84000e,
+       0x8f6f836f, 0x806f7c6f,
+       0xbefe03c1, 0xbeff0380,
        0xe0304000, 0x785d0000,
-       0xe0304080, 0x785d0100,
-       0xe0304100, 0x785d0200,
-       0xe0304180, 0x785d0300,
        0xbf8c3f70, 0x7e008500,
-       0x7e028501, 0x7e048502,
-       0x7e068503, 0x807c847c,
-       0x8078ff78, 0x00000200,
-       0xbf0a6f7c, 0xbf85ffee,
+       0x807c817c, 0x8078ff78,
+       0x00000080, 0xbf0a6f7c,
+       0xbf85fff7, 0xbeff03c1,
        0xe0304000, 0x6e5d0000,
-       0xe0304080, 0x6e5d0100,
-       0xe0304100, 0x6e5d0200,
-       0xe0304180, 0x6e5d0300,
-       0xbf8c3f70, 0xbf820034,
-       0xbef603ff, 0x01000000,
-       0xbeee0378, 0x8078ff78,
-       0x00000400, 0xbefc0384,
-       0xbf0a6f7c, 0xbf840012,
-       0xe0304000, 0x785d0000,
-       0xe0304100, 0x785d0100,
-       0xe0304200, 0x785d0200,
-       0xe0304300, 0x785d0300,
-       0xbf8c3f70, 0x7e008500,
-       0x7e028501, 0x7e048502,
-       0x7e068503, 0x807c847c,
-       0x8078ff78, 0x00000400,
-       0xbf0a6f7c, 0xbf85ffee,
-       0xb96f1e06, 0x876fc16f,
-       0xbf84000e, 0x8f6f836f,
-       0x806f7c6f, 0xbefe03c1,
-       0xbeff0380, 0xe0304000,
-       0x785d0000, 0xbf8c3f70,
-       0x7e008500, 0x807c817c,
-       0x8078ff78, 0x00000080,
-       0xbf0a6f7c, 0xbf85fff7,
-       0xbeff03c1, 0xe0304000,
-       0x6e5d0000, 0xe0304100,
-       0x6e5d0100, 0xe0304200,
-       0x6e5d0200, 0xe0304300,
-       0x6e5d0300, 0xbf8c3f70,
+       0xe0304100, 0x6e5d0100,
+       0xe0304200, 0x6e5d0200,
+       0xe0304300, 0x6e5d0300,
+       0xbf8c3f70, 0xb9783a05,
+       0x80788178, 0xbf0d9972,
+       0xbf850002, 0x8f788978,
+       0xbf820001, 0x8f788a78,
+       0xb96e1e06, 0x8f6e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0x80f8ff78,
+       0x00000050, 0xbef603ff,
+       0x01000000, 0xbefc03ff,
+       0x0000006c, 0x80f89078,
+       0xf429003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc847c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0x80f8a078,
+       0xf42d003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc887c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0xbe843104,
+       0xbe863106, 0x80f8c078,
+       0xf431003a, 0xf0000000,
+       0xbf8cc07f, 0x80fc907c,
+       0xbf800000, 0xbe803100,
+       0xbe823102, 0xbe843104,
+       0xbe863106, 0xbe883108,
+       0xbe8a310a, 0xbe8c310c,
+       0xbe8e310e, 0xbf06807c,
+       0xbf84fff0, 0xba80f801,
+       0x00000000, 0xbf8a0000,
        0xb9783a05, 0x80788178,
        0xbf0d9972, 0xbf850002,
        0x8f788978, 0xbf820001,
        0x8f788a78, 0xb96e1e06,
        0x8f6e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
-       0x80f8ff78, 0x00000050,
        0xbef603ff, 0x01000000,
-       0xbefc03ff, 0x0000006c,
-       0x80f89078, 0xf429003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc847c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0x80f8a078, 0xf42d003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc887c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0xbe843104, 0xbe863106,
-       0x80f8c078, 0xf431003a,
-       0xf0000000, 0xbf8cc07f,
-       0x80fc907c, 0xbf800000,
-       0xbe803100, 0xbe823102,
-       0xbe843104, 0xbe863106,
-       0xbe883108, 0xbe8a310a,
-       0xbe8c310c, 0xbe8e310e,
-       0xbf06807c, 0xbf84fff0,
-       0xba80f801, 0x00000000,
-       0xbf8a0000, 0xb9783a05,
-       0x80788178, 0xbf0d9972,
-       0xbf850002, 0x8f788978,
-       0xbf820001, 0x8f788a78,
-       0xb96e1e06, 0x8f6e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0xbef603ff,
-       0x01000000, 0xf4211bfa,
+       0xf4211bfa, 0xf0000000,
+       0x80788478, 0xf4211b3a,
        0xf0000000, 0x80788478,
-       0xf4211b3a, 0xf0000000,
-       0x80788478, 0xf4211b7a,
+       0xf4211b7a, 0xf0000000,
+       0x80788478, 0xf4211c3a,
        0xf0000000, 0x80788478,
-       0xf4211c3a, 0xf0000000,
-       0x80788478, 0xf4211c7a,
+       0xf4211c7a, 0xf0000000,
+       0x80788478, 0xf4211eba,
        0xf0000000, 0x80788478,
-       0xf4211eba, 0xf0000000,
-       0x80788478, 0xf4211efa,
+       0xf4211efa, 0xf0000000,
+       0x80788478, 0xf4211e7a,
        0xf0000000, 0x80788478,
-       0xf4211e7a, 0xf0000000,
-       0x80788478, 0xf4211cfa,
+       0xf4211cfa, 0xf0000000,
+       0x80788478, 0xf4211bba,
        0xf0000000, 0x80788478,
+       0xbf8cc07f, 0xb9eef814,
        0xf4211bba, 0xf0000000,
        0x80788478, 0xbf8cc07f,
-       0xb9eef814, 0xf4211bba,
-       0xf0000000, 0x80788478,
-       0xbf8cc07f, 0xb9eef815,
-       0xbefc036f, 0xbefe0370,
-       0xbeff0371, 0x876f7bff,
-       0x000003ff, 0xb9ef4803,
-       0xb9f9f816, 0x876f7bff,
-       0xfffff800, 0x906f8b6f,
-       0xb9efa2c3, 0xb9f3f801,
-       0xb96e3a05, 0x806e816e,
-       0xbf0d9972, 0xbf850002,
-       0x8f6e896e, 0xbf820001,
-       0x8f6e8a6e, 0xb96f1e06,
-       0x8f6f8a6f, 0x806e6f6e,
-       0x806eff6e, 0x00000200,
-       0x806e746e, 0x826f8075,
-       0x876fff6f, 0x0000ffff,
-       0xf4091c37, 0xfa000050,
-       0xf4091d37, 0xfa000060,
-       0xf4011e77, 0xfa000074,
-       0xbf8cc07f, 0x906e8977,
-       0x876fff6e, 0x003f8000,
-       0x906e8677, 0x876eff6e,
-       0x02000000, 0x886e6f6e,
-       0xb9eef807, 0x876dff6d,
-       0x0000ffff, 0x87fe7e7e,
-       0x87ea6a6a, 0xb9faf802,
-       0xbe80226c, 0xbf9b0000,
+       0xb9eef815, 0xbefc036f,
+       0xbefe0370, 0xbeff0371,
+       0x876f7bff, 0x000003ff,
+       0xb9ef4803, 0xb9f9f816,
+       0x876f7bff, 0xfffff800,
+       0x906f8b6f, 0xb9efa2c3,
+       0xb9f3f801, 0xb96e3a05,
+       0x806e816e, 0xbf0d9972,
+       0xbf850002, 0x8f6e896e,
+       0xbf820001, 0x8f6e8a6e,
+       0xb96f1e06, 0x8f6f8a6f,
+       0x806e6f6e, 0x806eff6e,
+       0x00000200, 0x806e746e,
+       0x826f8075, 0x876fff6f,
+       0x0000ffff, 0xf4091c37,
+       0xfa000050, 0xf4091d37,
+       0xfa000060, 0xf4011e77,
+       0xfa000074, 0xbf8cc07f,
+       0x906e8977, 0x876fff6e,
+       0x003f8000, 0x906e8677,
+       0x876eff6e, 0x02000000,
+       0x886e6f6e, 0xb9eef807,
+       0x876dff6d, 0x0000ffff,
+       0x87fe7e7e, 0x87ea6a6a,
+       0xb9faf802, 0xbe80226c,
+       0xbf9b0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
-       0xbf9f0000, 0x00000000,
 };
 
 static const uint32_t cwsr_trap_arcturus_hex[] = {
index 7568ff3af9786f22349dbf017826e9cc92e29d10..e1aaa5ce0784e41917d7389dea792d721b86396c 100644 (file)
@@ -44,6 +44,7 @@
 #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
 #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
 #define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
 
 var SINGLE_STEP_MISSED_WORKAROUND              = 1     //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
 
@@ -86,6 +87,7 @@ var SQ_WAVE_TRAPSTS_WAVE_START_MASK           = 0x20000
 var SQ_WAVE_TRAPSTS_WAVE_END_MASK              = 0x40000
 var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK       = 0x100000
 #endif
+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK           = 0x10000000
 
 var SQ_WAVE_MODE_EXCP_EN_SHIFT                 = 12
 var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT      = 19
@@ -475,6 +477,16 @@ L_SAVE_4VGPR_WAVE32:
 
        // VGPR Allocated in 4-GPR granularity
 
+#if SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_FIRST_VGPRS32_WITH_TCP
+
+       write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+       s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS32_WITH_TCP:
+#endif
+
 #if !NO_SQC_STORE
        buffer_store_dword      v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
 #endif
@@ -488,6 +500,16 @@ L_SAVE_4VGPR_WAVE64:
 
        // VGPR Allocated in 4-GPR granularity
 
+#if  SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_FIRST_VGPRS64_WITH_TCP
+
+       write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+       s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS64_WITH_TCP:
+#endif
+
 #if !NO_SQC_STORE
        buffer_store_dword      v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
 #endif
@@ -660,6 +682,26 @@ L_SAVE_LDS_NORMAL:
        s_cbranch_scc1  L_SAVE_LDS_W64
 
 L_SAVE_LDS_W32:
+#if SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W32
+
+L_SAVE_LDS_LOOP_SQC_W32:
+       ds_read_b32     v1, v0
+       s_waitcnt       0
+
+       write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+       s_add_u32       m0, m0, 128                                             //every buffer_store_lds does 128 bytes
+       v_add_nc_u32    v0, v0, 128                                             //mem offset increased by 128 bytes
+       s_cmp_lt_u32    m0, s_save_alloc_size                                   //scc=(m0 < s_save_alloc_size) ? 1 : 0
+       s_cbranch_scc1  L_SAVE_LDS_LOOP_SQC_W32                                 //LDS save is complete?
+
+       s_branch        L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W32:
+#endif
+
        s_mov_b32       s3, 128
        s_nop           0
        s_nop           0
@@ -669,7 +711,7 @@ L_SAVE_LDS_LOOP_W32:
        s_waitcnt       0
        buffer_store_dword      v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
 
-       s_add_u32       m0, m0, s3                                              //every buffer_store_lds does 256 bytes
+       s_add_u32       m0, m0, s3                                              //every buffer_store_lds does 128 bytes
        s_add_u32       s_save_mem_offset, s_save_mem_offset, s3
        v_add_nc_u32    v0, v0, 128                                             //mem offset increased by 128 bytes
        s_cmp_lt_u32    m0, s_save_alloc_size                                   //scc=(m0 < s_save_alloc_size) ? 1 : 0
@@ -678,6 +720,26 @@ L_SAVE_LDS_LOOP_W32:
        s_branch        L_SAVE_LDS_DONE
 
 L_SAVE_LDS_W64:
+#if  SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W64
+
+L_SAVE_LDS_LOOP_SQC_W64:
+       ds_read_b32     v1, v0
+       s_waitcnt       0
+
+       write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+       s_add_u32       m0, m0, 256                                             //every buffer_store_lds does 256 bytes
+       v_add_nc_u32    v0, v0, 256                                             //mem offset increased by 256 bytes
+       s_cmp_lt_u32    m0, s_save_alloc_size                                   //scc=(m0 < s_save_alloc_size) ? 1 : 0
+       s_cbranch_scc1  L_SAVE_LDS_LOOP_SQC_W64                                 //LDS save is complete?
+
+       s_branch        L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W64:
+#endif
+
        s_mov_b32       s3, 256
        s_nop           0
        s_nop           0
@@ -727,6 +789,25 @@ L_SAVE_VGPR_NORMAL:
        s_cmp_lt_u32    m0, s_save_alloc_size
        s_cbranch_scc0  L_SAVE_VGPR_END
 
+#if  SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W32:
+       v_movrels_b32   v0, v0                                                  //v0 = v[0+m0]
+       v_movrels_b32   v1, v1                                                  //v1 = v[1+m0]
+       v_movrels_b32   v2, v2                                                  //v2 = v[2+m0]
+       v_movrels_b32   v3, v3                                                  //v3 = v[3+m0]
+
+       write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+       s_add_u32 m0, m0, 4
+       s_cmp_lt_u32 m0, s_save_alloc_size
+       s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W32
+
+       s_branch L_SAVE_VGPR_END
+#endif
+
 L_SAVE_VGPR_W32_LOOP:
        v_movrels_b32   v0, v0                                                  //v0 = v[0+m0]
        v_movrels_b32   v1, v1                                                  //v1 = v[1+m0]
@@ -753,6 +834,25 @@ L_SAVE_VGPR_WAVE64:
        s_cmp_lt_u32    m0, s_save_alloc_size
        s_cbranch_scc0  L_SAVE_SHARED_VGPR
 
+#if  SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W64:
+       v_movrels_b32   v0, v0                                                  //v0 = v[0+m0]
+       v_movrels_b32   v1, v1                                                  //v1 = v[1+m0]
+       v_movrels_b32   v2, v2                                                  //v2 = v[2+m0]
+       v_movrels_b32   v3, v3                                                  //v3 = v[3+m0]
+
+       write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+       s_add_u32 m0, m0, 4
+       s_cmp_lt_u32 m0, s_save_alloc_size
+       s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W64
+
+       s_branch L_SAVE_VGPR_END
+#endif
+
 L_SAVE_VGPR_W64_LOOP:
        v_movrels_b32   v0, v0                                                  //v0 = v[0+m0]
        v_movrels_b32   v1, v1                                                  //v1 = v[1+m0]
@@ -780,6 +880,23 @@ L_SAVE_SHARED_VGPR:
        s_add_u32       s_save_alloc_size, s_save_alloc_size, m0
        s_mov_b32       exec_lo, 0xFFFFFFFF
        s_mov_b32       exec_hi, 0x00000000
+
+#if  SAVE_AFTER_XNACK_ERROR
+       check_if_tcp_store_ok()
+       s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC:
+       v_movrels_b32   v0, v0
+
+       write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+       s_add_u32 m0, m0, 1
+       s_cmp_lt_u32 m0, s_save_alloc_size
+       s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC
+
+       s_branch L_SAVE_VGPR_END
+#endif
+
 L_SAVE_SHARED_VGPR_WAVE64_LOOP:
        v_movrels_b32   v0, v0                                                  //v0 = v[0+m0]
        buffer_store_dword      v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
@@ -1190,6 +1307,43 @@ function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
        s_buffer_load_dwordx4   s, s_rsrc, s_mem_offset glc:1
 end
 
+#if SAVE_AFTER_XNACK_ERROR
+function check_if_tcp_store_ok
+       // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
+       s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+       s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
+
+L_TCP_STORE_CHECK_DONE:
+end
+
+function write_vgpr_to_mem_with_sqc(vgpr, n_lanes, s_rsrc, s_mem_offset)
+       s_mov_b32 s4, 0
+
+L_WRITE_VGPR_LANE_LOOP:
+       for var lane = 0; lane < 4; ++lane
+               v_readlane_b32 s[lane], vgpr, s4
+               s_add_u32 s4, s4, 1
+       end
+
+       s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
+
+       s_add_u32 s_mem_offset, s_mem_offset, 0x10
+       s_cmp_eq_u32 s4, n_lanes
+       s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
+end
+
+function write_vgprs_to_mem_with_sqc_w32(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+       for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+               write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 32, s_rsrc, s_mem_offset)
+       end
+end
+
+function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+       for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+               write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 64, s_rsrc, s_mem_offset)
+       end
+end
+#endif
 
 function get_lds_size_bytes(s_lds_size_byte)
        s_getreg_b32    s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)