#include <linux/cpufeature.h>
 #include <asm/neon.h>
 #include <asm/simd.h>
+#include <crypto/b128ops.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <crypto/sm4.h>
 asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
                               u8 *tweak, unsigned int nbytes,
                               const u32 *rkey2_enc);
+asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest,
+                                 const u8 *src, unsigned int nblocks,
+                                 bool enc_before, bool enc_after);
 
 EXPORT_SYMBOL(sm4_ce_expand_key);
 EXPORT_SYMBOL(sm4_ce_crypt_block);
        struct sm4_ctx key2;
 };
 
+struct sm4_mac_tfm_ctx {
+       struct sm4_ctx key;
+       u8 __aligned(8) consts[];
+};
+
+struct sm4_mac_desc_ctx {
+       unsigned int len;
+       u8 digest[SM4_BLOCK_SIZE];
+};
+
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
                      unsigned int key_len)
 {
        }
 };
 
+static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
+                            unsigned int key_len)
+{
+       struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+
+       if (key_len != SM4_KEY_SIZE)
+               return -EINVAL;
+
+       kernel_neon_begin();
+       sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+                         crypto_sm4_fk, crypto_sm4_ck);
+       kernel_neon_end();
+
+       return 0;
+}
+
+static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
+                          unsigned int key_len)
+{
+       struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+       be128 *consts = (be128 *)ctx->consts;
+       u64 a, b;
+
+       if (key_len != SM4_KEY_SIZE)
+               return -EINVAL;
+
+       memset(consts, 0, SM4_BLOCK_SIZE);
+
+       kernel_neon_begin();
+
+       sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+                         crypto_sm4_fk, crypto_sm4_ck);
+
+       /* encrypt the zero block */
+       sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+
+       kernel_neon_end();
+
+       /* gf(2^128) multiply zero-ciphertext with u and u^2 */
+       a = be64_to_cpu(consts[0].a);
+       b = be64_to_cpu(consts[0].b);
+       consts[0].a = cpu_to_be64((a << 1) | (b >> 63));
+       consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+       a = be64_to_cpu(consts[0].a);
+       b = be64_to_cpu(consts[0].b);
+       consts[1].a = cpu_to_be64((a << 1) | (b >> 63));
+       consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+       return 0;
+}
+
+static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
+                          unsigned int key_len)
+{
+       struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+       u8 __aligned(8) key2[SM4_BLOCK_SIZE];
+       static u8 const ks[3][SM4_BLOCK_SIZE] = {
+               { [0 ... SM4_BLOCK_SIZE - 1] = 0x1},
+               { [0 ... SM4_BLOCK_SIZE - 1] = 0x2},
+               { [0 ... SM4_BLOCK_SIZE - 1] = 0x3},
+       };
+
+       if (key_len != SM4_KEY_SIZE)
+               return -EINVAL;
+
+       kernel_neon_begin();
+
+       sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+                         crypto_sm4_fk, crypto_sm4_ck);
+
+       sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+       sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+
+       sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+                         crypto_sm4_fk, crypto_sm4_ck);
+
+       kernel_neon_end();
+
+       return 0;
+}
+
+static int sm4_mac_init(struct shash_desc *desc)
+{
+       struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       memset(ctx->digest, 0, SM4_BLOCK_SIZE);
+       ctx->len = 0;
+
+       return 0;
+}
+
+static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
+                         unsigned int len)
+{
+       struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+       struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+       unsigned int l, nblocks;
+
+       if (len == 0)
+               return 0;
+
+       if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
+               l = min(len, SM4_BLOCK_SIZE - ctx->len);
+
+               crypto_xor(ctx->digest + ctx->len, p, l);
+               ctx->len += l;
+               len -= l;
+               p += l;
+       }
+
+       if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
+               kernel_neon_begin();
+
+               if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
+                       sm4_ce_crypt_block(tctx->key.rkey_enc,
+                                          ctx->digest, ctx->digest);
+                       ctx->len = 0;
+               } else {
+                       nblocks = len / SM4_BLOCK_SIZE;
+                       len %= SM4_BLOCK_SIZE;
+
+                       sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+                                         nblocks, (ctx->len == SM4_BLOCK_SIZE),
+                                         (len != 0));
+
+                       p += nblocks * SM4_BLOCK_SIZE;
+
+                       if (len == 0)
+                               ctx->len = SM4_BLOCK_SIZE;
+               }
+
+               kernel_neon_end();
+
+               if (len) {
+                       crypto_xor(ctx->digest, p, len);
+                       ctx->len = len;
+               }
+       }
+
+       return 0;
+}
+
+static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
+{
+       struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+       struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+       const u8 *consts = tctx->consts;
+
+       if (ctx->len != SM4_BLOCK_SIZE) {
+               ctx->digest[ctx->len] ^= 0x80;
+               consts += SM4_BLOCK_SIZE;
+       }
+
+       kernel_neon_begin();
+       sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+                         false, true);
+       kernel_neon_end();
+
+       memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+       return 0;
+}
+
+static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+       struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+       struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       if (ctx->len) {
+               kernel_neon_begin();
+               sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+                                  ctx->digest);
+               kernel_neon_end();
+       }
+
+       memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+       return 0;
+}
+
+static struct shash_alg sm4_mac_algs[] = {
+       {
+               .base = {
+                       .cra_name               = "cmac(sm4)",
+                       .cra_driver_name        = "cmac-sm4-ce",
+                       .cra_priority           = 400,
+                       .cra_blocksize          = SM4_BLOCK_SIZE,
+                       .cra_ctxsize            = sizeof(struct sm4_mac_tfm_ctx)
+                                                       + SM4_BLOCK_SIZE * 2,
+                       .cra_module             = THIS_MODULE,
+               },
+               .digestsize     = SM4_BLOCK_SIZE,
+               .init           = sm4_mac_init,
+               .update         = sm4_mac_update,
+               .final          = sm4_cmac_final,
+               .setkey         = sm4_cmac_setkey,
+               .descsize       = sizeof(struct sm4_mac_desc_ctx),
+       }, {
+               .base = {
+                       .cra_name               = "xcbc(sm4)",
+                       .cra_driver_name        = "xcbc-sm4-ce",
+                       .cra_priority           = 400,
+                       .cra_blocksize          = SM4_BLOCK_SIZE,
+                       .cra_ctxsize            = sizeof(struct sm4_mac_tfm_ctx)
+                                                       + SM4_BLOCK_SIZE * 2,
+                       .cra_module             = THIS_MODULE,
+               },
+               .digestsize     = SM4_BLOCK_SIZE,
+               .init           = sm4_mac_init,
+               .update         = sm4_mac_update,
+               .final          = sm4_cmac_final,
+               .setkey         = sm4_xcbc_setkey,
+               .descsize       = sizeof(struct sm4_mac_desc_ctx),
+       }, {
+               .base = {
+                       .cra_name               = "cbcmac(sm4)",
+                       .cra_driver_name        = "cbcmac-sm4-ce",
+                       .cra_priority           = 400,
+                       .cra_blocksize          = 1,
+                       .cra_ctxsize            = sizeof(struct sm4_mac_tfm_ctx),
+                       .cra_module             = THIS_MODULE,
+               },
+               .digestsize     = SM4_BLOCK_SIZE,
+               .init           = sm4_mac_init,
+               .update         = sm4_mac_update,
+               .final          = sm4_cbcmac_final,
+               .setkey         = sm4_cbcmac_setkey,
+               .descsize       = sizeof(struct sm4_mac_desc_ctx),
+       }
+};
+
 static int __init sm4_init(void)
 {
-       return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+       int err;
+
+       err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+       if (err)
+               return err;
+
+       err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
+       if (err)
+               goto out_err;
+
+       return 0;
+
+out_err:
+       crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+       return err;
 }
 
 static void __exit sm4_exit(void)
 {
+       crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
        crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
 }
 
 MODULE_ALIAS_CRYPTO("ctr(sm4)");
 MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
 MODULE_ALIAS_CRYPTO("xts(sm4)");
+MODULE_ALIAS_CRYPTO("cmac(sm4)");
+MODULE_ALIAS_CRYPTO("xcbc(sm4)");
+MODULE_ALIAS_CRYPTO("cbcmac(sm4)");
 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
 MODULE_LICENSE("GPL v2");