svn commit: r328150 - in head: sys/crypto/ccp sys/modules sys/modules/ccp tests/sys/opencrypto

Conrad Meyer cem at FreeBSD.org
Thu Jan 18 22:01:32 UTC 2018


Author: cem
Date: Thu Jan 18 22:01:30 2018
New Revision: 328150
URL: https://svnweb.freebsd.org/changeset/base/328150

Log:
  Add ccp(4): experimental driver for AMD Crypto Co-Processor
  
  * Registers TRNG source for random(4)
  * Finds available queues, LSBs; allocates static objects
  * Allocates a shared MSI-X for all queues.  The hardware does not have
    separate interrupts per queue.  Working interrupt mode driver.
  * Computes SHA hashes, HMAC.  Passes cryptotest.py, cryptocheck tests.
  * Does AES-CBC, CTR mode, and XTS.  cryptotest.py and cryptocheck pass.
  * Support for "authenc" (AES + HMAC).  (SHA1 seems to result in
    "unaligned" cleartext inputs from cryptocheck -- which the engine
    cannot handle.  SHA2 seems to work fine.)
  * GCM passes for block-multiple AAD, input lengths
  
  Largely based on ccr(4), part of cxgbe(4).
  
  Rough performance averages on AMD Ryzen 1950X (4kB buffer):
  aesni:      SHA1: ~8300 Mb/s    SHA256: ~8000 Mb/s
  ccp:               ~630 Mb/s    SHA256:  ~660 Mb/s  SHA512:  ~700 Mb/s
  cryptosoft:       ~1800 Mb/s    SHA256: ~1800 Mb/s  SHA512: ~2700 Mb/s
  
  As you can see, performance is poor in comparison to aesni(4) and even
  cryptosoft (due to high setup cost).  At a larger buffer size (128kB),
  throughput is a little better (but still worse than aesni(4)):
  
  aesni:      SHA1:~10400 Mb/s    SHA256: ~9950 Mb/s
  ccp:              ~2200 Mb/s    SHA256: ~2600 Mb/s  SHA512: ~3800 Mb/s
  cryptosoft:       ~1750 Mb/s    SHA256: ~1800 Mb/s  SHA512: ~2700 Mb/s
  
  AES performance has a similar story:
  
  aesni:      4kB: ~11250 Mb/s    128kB: ~11250 Mb/s
  ccp:               ~350 Mb/s    128kB:  ~4600 Mb/s
  cryptosoft:       ~1750 Mb/s    128kB:  ~1700 Mb/s
  
  This driver is EXPERIMENTAL.  You should verify cryptographic results on
  typical and corner case inputs from your application against a known- good
  implementation.
  
  Sponsored by:	Dell EMC Isilon
  Differential Revision:	https://reviews.freebsd.org/D12723

Added:
  head/sys/crypto/ccp/
  head/sys/crypto/ccp/ccp.c
     - copied, changed from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c
  head/sys/crypto/ccp/ccp.h   (contents, props changed)
  head/sys/crypto/ccp/ccp_hardware.c
     - copied, changed from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c
  head/sys/crypto/ccp/ccp_hardware.h   (contents, props changed)
  head/sys/crypto/ccp/ccp_lsb.c   (contents, props changed)
  head/sys/crypto/ccp/ccp_lsb.h   (contents, props changed)
  head/sys/modules/ccp/
  head/sys/modules/ccp/Makefile   (contents, props changed)
Modified:
  head/sys/modules/Makefile
  head/tests/sys/opencrypto/cryptotest.py

Copied and modified: head/sys/crypto/ccp/ccp.c (from r328137, head/sys/dev/cxgbe/crypto/t4_crypto.c)
==============================================================================
--- head/sys/dev/cxgbe/crypto/t4_crypto.c	Thu Jan 18 21:19:57 2018	(r328137, copy source)
+++ head/sys/crypto/ccp/ccp.c	Thu Jan 18 22:01:30 2018	(r328150)
@@ -1,7 +1,10 @@
 /*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
  * Copyright (c) 2017 Chelsio Communications, Inc.
+ * Copyright (c) 2017 Conrad Meyer <cem at FreeBSD.org>
  * All rights reserved.
- * Written by: John Baldwin <jhb at FreeBSD.org>
+ * Largely borrowed from ccr(4), Written by: John Baldwin <jhb at FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -28,1330 +31,88 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_ddb.h"
+
 #include <sys/types.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
+#include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
+#include <sys/random.h>
 #include <sys/sglist.h>
+#include <sys/sysctl.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+#include <dev/pci/pcivar.h>
+
+#include <dev/random/randomdev.h>
+
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>
 
 #include "cryptodev_if.h"
 
-#include "common/common.h"
-#include "crypto/t4_crypto.h"
+#include "ccp.h"
+#include "ccp_hardware.h"
 
-/*
- * Requests consist of:
- *
- * +-------------------------------+
- * | struct fw_crypto_lookaside_wr |
- * +-------------------------------+
- * | struct ulp_txpkt              |
- * +-------------------------------+
- * | struct ulptx_idata            |
- * +-------------------------------+
- * | struct cpl_tx_sec_pdu         |
- * +-------------------------------+
- * | struct cpl_tls_tx_scmd_fmt    |
- * +-------------------------------+
- * | key context header            |
- * +-------------------------------+
- * | AES key                       |  ----- For requests with AES
- * +-------------------------------+ -
- * | IPAD (16-byte aligned)        |  \
- * +-------------------------------+  +---- For requests with HMAC
- * | OPAD (16-byte aligned)        |  /
- * +-------------------------------+ -
- * | GMAC H                        |  ----- For AES-GCM
- * +-------------------------------+ -
- * | struct cpl_rx_phys_dsgl       |  \
- * +-------------------------------+  +---- Destination buffer for
- * | PHYS_DSGL entries             |  /     non-hash-only requests
- * +-------------------------------+ -
- * | 16 dummy bytes                |  ----- Only for hash-only requests
- * +-------------------------------+
- * | IV                            |  ----- If immediate IV
- * +-------------------------------+
- * | Payload                       |  ----- If immediate Payload
- * +-------------------------------+ -
- * | struct ulptx_sgl              |  \
- * +-------------------------------+  +---- If payload via SGL
- * | SGL entries                   |  /
- * +-------------------------------+ -
- *
- * Note that the key context must be padded to ensure 16-byte alignment.
- * For HMAC requests, the key consists of the partial hash of the IPAD
- * followed by the partial hash of the OPAD.
- *
- * Replies consist of:
- *
- * +-------------------------------+
- * | struct cpl_fw6_pld            |
- * +-------------------------------+
- * | hash digest                   |  ----- For HMAC request with
- * +-------------------------------+        'hash_size' set in work request
- *
- * A 32-bit big-endian error status word is supplied in the last 4
- * bytes of data[0] in the CPL_FW6_PLD message.  bit 0 indicates a
- * "MAC" error and bit 1 indicates a "PAD" error.
- *
- * The 64-bit 'cookie' field from the fw_crypto_lookaside_wr message
- * in the request is returned in data[1] of the CPL_FW6_PLD message.
- *
- * For block cipher replies, the updated IV is supplied in data[2] and
- * data[3] of the CPL_FW6_PLD message.
- *
- * For hash replies where the work request set 'hash_size' to request
- * a copy of the hash in the reply, the hash digest is supplied
- * immediately following the CPL_FW6_PLD message.
- */
+MALLOC_DEFINE(M_CCP, "ccp", "AMD CCP crypto");
 
 /*
- * The documentation for CPL_RX_PHYS_DSGL claims a maximum of 32
- * SG entries.
+ * Need a global softc available for garbage random_source API, which lacks any
+ * context pointer.  It's also handy for debugging.
  */
-#define	MAX_RX_PHYS_DSGL_SGE	32
-#define	DSGL_SGE_MAXLEN		65535
+struct ccp_softc *g_ccp_softc;
 
-/*
- * The adapter only supports requests with a total input or output
- * length of 64k-1 or smaller.  Longer requests either result in hung
- * requests or incorrect results.
- */
-#define	MAX_REQUEST_SIZE	65535
+bool g_debug_print = false;
+SYSCTL_BOOL(_hw_ccp, OID_AUTO, debug, CTLFLAG_RWTUN, &g_debug_print, 0,
+    "Set to enable debugging log messages");
 
-static MALLOC_DEFINE(M_CCR, "ccr", "Chelsio T6 crypto");
-
-struct ccr_session_hmac {
-	struct auth_hash *auth_hash;
-	int hash_len;
-	unsigned int partial_digest_len;
-	unsigned int auth_mode;
-	unsigned int mk_size;
-	char ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
-	char opad[CHCR_HASH_MAX_BLOCK_SIZE_128];
+static struct pciid {
+	uint32_t devid;
+	const char *desc;
+} ccp_ids[] = {
+	{ 0x14561022, "AMD CCP-5a" },
+	{ 0x14681022, "AMD CCP-5b" },
 };
+MODULE_PNP_INFO("W32:vendor/device", pci, ccp, ccp_ids, sizeof(ccp_ids[0]),
+    nitems(ccp_ids));
 
-struct ccr_session_gmac {
-	int hash_len;
-	char ghash_h[GMAC_BLOCK_LEN];
+static struct random_source random_ccp = {
+	.rs_ident = "AMD CCP TRNG",
+	.rs_source = RANDOM_PURE_CCP,
+	.rs_read = random_ccp_read,
 };
 
-struct ccr_session_blkcipher {
-	unsigned int cipher_mode;
-	unsigned int key_len;
-	unsigned int iv_len;
-	__be32 key_ctx_hdr;
-	char enckey[CHCR_AES_MAX_KEY_LEN];
-	char deckey[CHCR_AES_MAX_KEY_LEN];
-};
-
-struct ccr_session {
-	bool active;
-	int pending;
-	enum { HMAC, BLKCIPHER, AUTHENC, GCM } mode;
-	union {
-		struct ccr_session_hmac hmac;
-		struct ccr_session_gmac gmac;
-	};
-	struct ccr_session_blkcipher blkcipher;
-};
-
-struct ccr_softc {
-	struct adapter *adapter;
-	device_t dev;
-	uint32_t cid;
-	int tx_channel_id;
-	struct ccr_session *sessions;
-	int nsessions;
-	struct mtx lock;
-	bool detaching;
-	struct sge_wrq *txq;
-	struct sge_rxq *rxq;
-
-	/*
-	 * Pre-allocate S/G lists used when preparing a work request.
-	 * 'sg_crp' contains an sglist describing the entire buffer
-	 * for a 'struct cryptop'.  'sg_ulptx' is used to describe
-	 * the data the engine should DMA as input via ULPTX_SGL.
-	 * 'sg_dsgl' is used to describe the destination that cipher
-	 * text and a tag should be written to.
-	 */
-	struct sglist *sg_crp;
-	struct sglist *sg_ulptx;
-	struct sglist *sg_dsgl;
-
-	/* Statistics. */
-	uint64_t stats_blkcipher_encrypt;
-	uint64_t stats_blkcipher_decrypt;
-	uint64_t stats_hmac;
-	uint64_t stats_authenc_encrypt;
-	uint64_t stats_authenc_decrypt;
-	uint64_t stats_gcm_encrypt;
-	uint64_t stats_gcm_decrypt;
-	uint64_t stats_wr_nomem;
-	uint64_t stats_inflight;
-	uint64_t stats_mac_error;
-	uint64_t stats_pad_error;
-	uint64_t stats_bad_session;
-	uint64_t stats_sglist_error;
-	uint64_t stats_process_error;
-};
-
 /*
- * Crypto requests involve two kind of scatter/gather lists.
- *
- * Non-hash-only requests require a PHYS_DSGL that describes the
- * location to store the results of the encryption or decryption
- * operation.  This SGL uses a different format (PHYS_DSGL) and should
- * exclude the crd_skip bytes at the start of the data as well as
- * any AAD or IV.  For authenticated encryption requests it should
- * cover include the destination of the hash or tag.
- *
- * The input payload may either be supplied inline as immediate data,
- * or via a standard ULP_TX SGL.  This SGL should include AAD,
- * ciphertext, and the hash or tag for authenticated decryption
- * requests.
- *
- * These scatter/gather lists can describe different subsets of the
- * buffer described by the crypto operation.  ccr_populate_sglist()
- * generates a scatter/gather list that covers the entire crypto
- * operation buffer that is then used to construct the other
- * scatter/gather lists.
+ * ccp_populate_sglist() generates a scatter/gather list that covers the entire
+ * crypto operation buffer.
  */
 static int
-ccr_populate_sglist(struct sglist *sg, struct cryptop *crp)
+ccp_populate_sglist(struct sglist *sg, struct cryptop *crp)
 {
 	int error;
 
 	sglist_reset(sg);
 	if (crp->crp_flags & CRYPTO_F_IMBUF)
-		error = sglist_append_mbuf(sg, (struct mbuf *)crp->crp_buf);
+		error = sglist_append_mbuf(sg, crp->crp_mbuf);
 	else if (crp->crp_flags & CRYPTO_F_IOV)
-		error = sglist_append_uio(sg, (struct uio *)crp->crp_buf);
+		error = sglist_append_uio(sg, crp->crp_uio);
 	else
 		error = sglist_append(sg, crp->crp_buf, crp->crp_ilen);
 	return (error);
 }
 
 /*
- * Segments in 'sg' larger than 'maxsegsize' are counted as multiple
- * segments.
- */
-static int
-ccr_count_sgl(struct sglist *sg, int maxsegsize)
-{
-	int i, nsegs;
-
-	nsegs = 0;
-	for (i = 0; i < sg->sg_nseg; i++)
-		nsegs += howmany(sg->sg_segs[i].ss_len, maxsegsize);
-	return (nsegs);
-}
-
-/* These functions deal with PHYS_DSGL for the reply buffer. */
-static inline int
-ccr_phys_dsgl_len(int nsegs)
-{
-	int len;
-
-	len = (nsegs / 8) * sizeof(struct phys_sge_pairs);
-	if ((nsegs % 8) != 0) {
-		len += sizeof(uint16_t) * 8;
-		len += roundup2(nsegs % 8, 2) * sizeof(uint64_t);
-	}
-	return (len);
-}
-
-static void
-ccr_write_phys_dsgl(struct ccr_softc *sc, void *dst, int nsegs)
-{
-	struct sglist *sg;
-	struct cpl_rx_phys_dsgl *cpl;
-	struct phys_sge_pairs *sgl;
-	vm_paddr_t paddr;
-	size_t seglen;
-	u_int i, j;
-
-	sg = sc->sg_dsgl;
-	cpl = dst;
-	cpl->op_to_tid = htobe32(V_CPL_RX_PHYS_DSGL_OPCODE(CPL_RX_PHYS_DSGL) |
-	    V_CPL_RX_PHYS_DSGL_ISRDMA(0));
-	cpl->pcirlxorder_to_noofsgentr = htobe32(
-	    V_CPL_RX_PHYS_DSGL_PCIRLXORDER(0) |
-	    V_CPL_RX_PHYS_DSGL_PCINOSNOOP(0) |
-	    V_CPL_RX_PHYS_DSGL_PCITPHNTENB(0) | V_CPL_RX_PHYS_DSGL_DCAID(0) |
-	    V_CPL_RX_PHYS_DSGL_NOOFSGENTR(nsegs));
-	cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR;
-	cpl->rss_hdr_int.qid = htobe16(sc->rxq->iq.abs_id);
-	cpl->rss_hdr_int.hash_val = 0;
-	sgl = (struct phys_sge_pairs *)(cpl + 1);
-	j = 0;
-	for (i = 0; i < sg->sg_nseg; i++) {
-		seglen = sg->sg_segs[i].ss_len;
-		paddr = sg->sg_segs[i].ss_paddr;
-		do {
-			sgl->addr[j] = htobe64(paddr);
-			if (seglen > DSGL_SGE_MAXLEN) {
-				sgl->len[j] = htobe16(DSGL_SGE_MAXLEN);
-				paddr += DSGL_SGE_MAXLEN;
-				seglen -= DSGL_SGE_MAXLEN;
-			} else {
-				sgl->len[j] = htobe16(seglen);
-				seglen = 0;
-			}
-			j++;
-			if (j == 8) {
-				sgl++;
-				j = 0;
-			}
-		} while (seglen != 0);
-	}
-	MPASS(j + 8 * (sgl - (struct phys_sge_pairs *)(cpl + 1)) == nsegs);
-}
-
-/* These functions deal with the ULPTX_SGL for input payload. */
-static inline int
-ccr_ulptx_sgl_len(int nsegs)
-{
-	u_int n;
-
-	nsegs--; /* first segment is part of ulptx_sgl */
-	n = sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
-	return (roundup2(n, 16));
-}
-
-static void
-ccr_write_ulptx_sgl(struct ccr_softc *sc, void *dst, int nsegs)
-{
-	struct ulptx_sgl *usgl;
-	struct sglist *sg;
-	struct sglist_seg *ss;
-	int i;
-
-	sg = sc->sg_ulptx;
-	MPASS(nsegs == sg->sg_nseg);
-	ss = &sg->sg_segs[0];
-	usgl = dst;
-	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
-	    V_ULPTX_NSGE(nsegs));
-	usgl->len0 = htobe32(ss->ss_len);
-	usgl->addr0 = htobe64(ss->ss_paddr);
-	ss++;
-	for (i = 0; i < sg->sg_nseg - 1; i++) {
-		usgl->sge[i / 2].len[i & 1] = htobe32(ss->ss_len);
-		usgl->sge[i / 2].addr[i & 1] = htobe64(ss->ss_paddr);
-		ss++;
-	}
-	
-}
-
-static bool
-ccr_use_imm_data(u_int transhdr_len, u_int input_len)
-{
-
-	if (input_len > CRYPTO_MAX_IMM_TX_PKT_LEN)
-		return (false);
-	if (roundup2(transhdr_len, 16) + roundup2(input_len, 16) >
-	    SGE_MAX_WR_LEN)
-		return (false);
-	return (true);
-}
-
-static void
-ccr_populate_wreq(struct ccr_softc *sc, struct chcr_wr *crwr, u_int kctx_len,
-    u_int wr_len, uint32_t sid, u_int imm_len, u_int sgl_len, u_int hash_size,
-    u_int iv_loc, struct cryptop *crp)
-{
-	u_int cctx_size;
-
-	cctx_size = sizeof(struct _key_ctx) + kctx_len;
-	crwr->wreq.op_to_cctx_size = htobe32(
-	    V_FW_CRYPTO_LOOKASIDE_WR_OPCODE(FW_CRYPTO_LOOKASIDE_WR) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_COMPL(0) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_IMM_LEN(imm_len) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC(1) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE(cctx_size >> 4));
-	crwr->wreq.len16_pkd = htobe32(
-	    V_FW_CRYPTO_LOOKASIDE_WR_LEN16(wr_len / 16));
-	crwr->wreq.session_id = htobe32(sid);
-	crwr->wreq.rx_chid_to_rx_q_id = htobe32(
-	    V_FW_CRYPTO_LOOKASIDE_WR_RX_CHID(sc->tx_channel_id) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_LCB(0) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_PHASH(0) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_IV(iv_loc) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_FQIDX(0) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_TX_CH(0) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID(sc->rxq->iq.abs_id));
-	crwr->wreq.key_addr = 0;
-	crwr->wreq.pld_size_hash_size = htobe32(
-	    V_FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE(sgl_len) |
-	    V_FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE(hash_size));
-	crwr->wreq.cookie = htobe64((uintptr_t)crp);
-
-	crwr->ulptx.cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
-	    V_ULP_TXPKT_DATAMODIFY(0) |
-	    V_ULP_TXPKT_CHANNELID(sc->tx_channel_id) | V_ULP_TXPKT_DEST(0) |
-	    V_ULP_TXPKT_FID(0) | V_ULP_TXPKT_RO(1));
-	crwr->ulptx.len = htobe32(
-	    ((wr_len - sizeof(struct fw_crypto_lookaside_wr)) / 16));
-
-	crwr->sc_imm.cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
-	    V_ULP_TX_SC_MORE(imm_len != 0 ? 0 : 1));
-	crwr->sc_imm.len = htobe32(wr_len - offsetof(struct chcr_wr, sec_cpl) -
-	    sgl_len);
-}
-
-static int
-ccr_hmac(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s,
-    struct cryptop *crp)
-{
-	struct chcr_wr *crwr;
-	struct wrqe *wr;
-	struct auth_hash *axf;
-	struct cryptodesc *crd;
-	char *dst;
-	u_int hash_size_in_response, kctx_flits, kctx_len, transhdr_len, wr_len;
-	u_int imm_len, iopad_size;
-	int error, sgl_nsegs, sgl_len;
-
-	crd = crp->crp_desc;
-
-	/* Reject requests with too large of an input buffer. */
-	if (crd->crd_len > MAX_REQUEST_SIZE)
-		return (EFBIG);
-
-	axf = s->hmac.auth_hash;
-
-	/* PADs must be 128-bit aligned. */
-	iopad_size = roundup2(s->hmac.partial_digest_len, 16);
-
-	/*
-	 * The 'key' part of the context includes the aligned IPAD and
-	 * OPAD.
-	 */
-	kctx_len = iopad_size * 2;
-	hash_size_in_response = axf->hashsize;
-	transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
-
-	if (crd->crd_len == 0) {
-		imm_len = axf->blocksize;
-		sgl_nsegs = 0;
-		sgl_len = 0;
-	} else if (ccr_use_imm_data(transhdr_len, crd->crd_len)) {
-		imm_len = crd->crd_len;
-		sgl_nsegs = 0;
-		sgl_len = 0;
-	} else {
-		imm_len = 0;
-		sglist_reset(sc->sg_ulptx);
-		error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-		    crd->crd_skip, crd->crd_len);
-		if (error)
-			return (error);
-		sgl_nsegs = sc->sg_ulptx->sg_nseg;
-		sgl_len = ccr_ulptx_sgl_len(sgl_nsegs);
-	}
-
-	wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len;
-	wr = alloc_wrqe(wr_len, sc->txq);
-	if (wr == NULL) {
-		sc->stats_wr_nomem++;
-		return (ENOMEM);
-	}
-	crwr = wrtod(wr);
-	memset(crwr, 0, wr_len);
-
-	ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len,
-	    hash_size_in_response, IV_NOP, crp);
-
-	/* XXX: Hardcodes SGE loopback channel of 0. */
-	crwr->sec_cpl.op_ivinsrtofst = htobe32(
-	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
-	    V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) |
-	    V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) |
-	    V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) |
-	    V_CPL_TX_SEC_PDU_IVINSRTOFST(0));
-
-	crwr->sec_cpl.pldlen = htobe32(crd->crd_len == 0 ? axf->blocksize :
-	    crd->crd_len);
-
-	crwr->sec_cpl.cipherstop_lo_authinsert = htobe32(
-	    V_CPL_TX_SEC_PDU_AUTHSTART(1) | V_CPL_TX_SEC_PDU_AUTHSTOP(0));
-
-	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
-	crwr->sec_cpl.seqno_numivs = htobe32(
-	    V_SCMD_SEQ_NO_CTRL(0) |
-	    V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) |
-	    V_SCMD_CIPH_MODE(CHCR_SCMD_CIPHER_MODE_NOP) |
-	    V_SCMD_AUTH_MODE(s->hmac.auth_mode) |
-	    V_SCMD_HMAC_CTRL(CHCR_SCMD_HMAC_CTRL_NO_TRUNC));
-	crwr->sec_cpl.ivgen_hdrlen = htobe32(
-	    V_SCMD_LAST_FRAG(0) |
-	    V_SCMD_MORE_FRAGS(crd->crd_len == 0 ? 1 : 0) | V_SCMD_MAC_ONLY(1));
-
-	memcpy(crwr->key_ctx.key, s->hmac.ipad, s->hmac.partial_digest_len);
-	memcpy(crwr->key_ctx.key + iopad_size, s->hmac.opad,
-	    s->hmac.partial_digest_len);
-
-	/* XXX: F_KEY_CONTEXT_SALT_PRESENT set, but 'salt' not set. */
-	kctx_flits = (sizeof(struct _key_ctx) + kctx_len) / 16;
-	crwr->key_ctx.ctx_hdr = htobe32(V_KEY_CONTEXT_CTX_LEN(kctx_flits) |
-	    V_KEY_CONTEXT_OPAD_PRESENT(1) | V_KEY_CONTEXT_SALT_PRESENT(1) |
-	    V_KEY_CONTEXT_CK_SIZE(CHCR_KEYCTX_NO_KEY) |
-	    V_KEY_CONTEXT_MK_SIZE(s->hmac.mk_size) | V_KEY_CONTEXT_VALID(1));
-
-	dst = (char *)(crwr + 1) + kctx_len + DUMMY_BYTES;
-	if (crd->crd_len == 0) {
-		dst[0] = 0x80;
-		*(uint64_t *)(dst + axf->blocksize - sizeof(uint64_t)) =
-		    htobe64(axf->blocksize << 3);
-	} else if (imm_len != 0)
-		crypto_copydata(crp->crp_flags, crp->crp_buf, crd->crd_skip,
-		    crd->crd_len, dst);
-	else
-		ccr_write_ulptx_sgl(sc, dst, sgl_nsegs);
-
-	/* XXX: TODO backpressure */
-	t4_wrq_tx(sc->adapter, wr);
-
-	return (0);
-}
-
-static int
-ccr_hmac_done(struct ccr_softc *sc, struct ccr_session *s, struct cryptop *crp,
-    const struct cpl_fw6_pld *cpl, int error)
-{
-	struct cryptodesc *crd;
-
-	crd = crp->crp_desc;
-	if (error == 0) {
-		crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject,
-		    s->hmac.hash_len, (c_caddr_t)(cpl + 1));
-	}
-
-	return (error);
-}
-
-static int
-ccr_blkcipher(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s,
-    struct cryptop *crp)
-{
-	char iv[CHCR_MAX_CRYPTO_IV_LEN];
-	struct chcr_wr *crwr;
-	struct wrqe *wr;
-	struct cryptodesc *crd;
-	char *dst;
-	u_int iv_loc, kctx_len, key_half, op_type, transhdr_len, wr_len;
-	u_int imm_len;
-	int dsgl_nsegs, dsgl_len;
-	int sgl_nsegs, sgl_len;
-	int error;
-
-	crd = crp->crp_desc;
-
-	if (s->blkcipher.key_len == 0 || crd->crd_len == 0)
-		return (EINVAL);
-	if (crd->crd_alg == CRYPTO_AES_CBC &&
-	    (crd->crd_len % AES_BLOCK_LEN) != 0)
-		return (EINVAL);
-
-	/* Reject requests with too large of an input buffer. */
-	if (crd->crd_len > MAX_REQUEST_SIZE)
-		return (EFBIG);
-
-	iv_loc = IV_NOP;
-	if (crd->crd_flags & CRD_F_ENCRYPT) {
-		op_type = CHCR_ENCRYPT_OP;
-		if (crd->crd_flags & CRD_F_IV_EXPLICIT)
-			memcpy(iv, crd->crd_iv, s->blkcipher.iv_len);
-		else
-			arc4rand(iv, s->blkcipher.iv_len, 0);
-		iv_loc = IV_IMMEDIATE;
-		if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0)
-			crypto_copyback(crp->crp_flags, crp->crp_buf,
-			    crd->crd_inject, s->blkcipher.iv_len, iv);
-	} else {
-		op_type = CHCR_DECRYPT_OP;
-		if (crd->crd_flags & CRD_F_IV_EXPLICIT) {
-			memcpy(iv, crd->crd_iv, s->blkcipher.iv_len);
-			iv_loc = IV_IMMEDIATE;
-		} else
-			iv_loc = IV_DSGL;
-	}
-
-	sglist_reset(sc->sg_dsgl);
-	error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp, crd->crd_skip,
-	    crd->crd_len);
-	if (error)
-		return (error);
-	dsgl_nsegs = ccr_count_sgl(sc->sg_dsgl, DSGL_SGE_MAXLEN);
-	if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE)
-		return (EFBIG);
-	dsgl_len = ccr_phys_dsgl_len(dsgl_nsegs);
-
-	/* The 'key' must be 128-bit aligned. */
-	kctx_len = roundup2(s->blkcipher.key_len, 16);
-	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len);
-
-	if (ccr_use_imm_data(transhdr_len, crd->crd_len +
-	    s->blkcipher.iv_len)) {
-		imm_len = crd->crd_len;
-		if (iv_loc == IV_DSGL) {
-			crypto_copydata(crp->crp_flags, crp->crp_buf,
-			    crd->crd_inject, s->blkcipher.iv_len, iv);
-			iv_loc = IV_IMMEDIATE;
-		}
-		sgl_nsegs = 0;
-		sgl_len = 0;
-	} else {
-		imm_len = 0;
-		sglist_reset(sc->sg_ulptx);
-		if (iv_loc == IV_DSGL) {
-			error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-			    crd->crd_inject, s->blkcipher.iv_len);
-			if (error)
-				return (error);
-		}
-		error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-		    crd->crd_skip, crd->crd_len);
-		if (error)
-			return (error);
-		sgl_nsegs = sc->sg_ulptx->sg_nseg;
-		sgl_len = ccr_ulptx_sgl_len(sgl_nsegs);
-	}
-
-	wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len;
-	if (iv_loc == IV_IMMEDIATE)
-		wr_len += s->blkcipher.iv_len;
-	wr = alloc_wrqe(wr_len, sc->txq);
-	if (wr == NULL) {
-		sc->stats_wr_nomem++;
-		return (ENOMEM);
-	}
-	crwr = wrtod(wr);
-	memset(crwr, 0, wr_len);
-
-	ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, 0,
-	    iv_loc, crp);
-
-	/* XXX: Hardcodes SGE loopback channel of 0. */
-	crwr->sec_cpl.op_ivinsrtofst = htobe32(
-	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
-	    V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) |
-	    V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) |
-	    V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) |
-	    V_CPL_TX_SEC_PDU_IVINSRTOFST(1));
-
-	crwr->sec_cpl.pldlen = htobe32(s->blkcipher.iv_len + crd->crd_len);
-
-	crwr->sec_cpl.aadstart_cipherstop_hi = htobe32(
-	    V_CPL_TX_SEC_PDU_CIPHERSTART(s->blkcipher.iv_len + 1) |
-	    V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(0));
-	crwr->sec_cpl.cipherstop_lo_authinsert = htobe32(
-	    V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(0));
-
-	/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
-	crwr->sec_cpl.seqno_numivs = htobe32(
-	    V_SCMD_SEQ_NO_CTRL(0) |
-	    V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) |
-	    V_SCMD_ENC_DEC_CTRL(op_type) |
-	    V_SCMD_CIPH_MODE(s->blkcipher.cipher_mode) |
-	    V_SCMD_AUTH_MODE(CHCR_SCMD_AUTH_MODE_NOP) |
-	    V_SCMD_HMAC_CTRL(CHCR_SCMD_HMAC_CTRL_NOP) |
-	    V_SCMD_IV_SIZE(s->blkcipher.iv_len / 2) |
-	    V_SCMD_NUM_IVS(0));
-	crwr->sec_cpl.ivgen_hdrlen = htobe32(
-	    V_SCMD_IV_GEN_CTRL(0) |
-	    V_SCMD_MORE_FRAGS(0) | V_SCMD_LAST_FRAG(0) | V_SCMD_MAC_ONLY(0) |
-	    V_SCMD_AADIVDROP(1) | V_SCMD_HDR_LEN(dsgl_len));
-
-	crwr->key_ctx.ctx_hdr = s->blkcipher.key_ctx_hdr;
-	switch (crd->crd_alg) {
-	case CRYPTO_AES_CBC:
-		if (crd->crd_flags & CRD_F_ENCRYPT)
-			memcpy(crwr->key_ctx.key, s->blkcipher.enckey,
-			    s->blkcipher.key_len);
-		else
-			memcpy(crwr->key_ctx.key, s->blkcipher.deckey,
-			    s->blkcipher.key_len);
-		break;
-	case CRYPTO_AES_ICM:
-		memcpy(crwr->key_ctx.key, s->blkcipher.enckey,
-		    s->blkcipher.key_len);
-		break;
-	case CRYPTO_AES_XTS:
-		key_half = s->blkcipher.key_len / 2;
-		memcpy(crwr->key_ctx.key, s->blkcipher.enckey + key_half,
-		    key_half);
-		if (crd->crd_flags & CRD_F_ENCRYPT)
-			memcpy(crwr->key_ctx.key + key_half,
-			    s->blkcipher.enckey, key_half);
-		else
-			memcpy(crwr->key_ctx.key + key_half,
-			    s->blkcipher.deckey, key_half);
-		break;
-	}
-
-	dst = (char *)(crwr + 1) + kctx_len;
-	ccr_write_phys_dsgl(sc, dst, dsgl_nsegs);
-	dst += sizeof(struct cpl_rx_phys_dsgl) + dsgl_len;
-	if (iv_loc == IV_IMMEDIATE) {
-		memcpy(dst, iv, s->blkcipher.iv_len);
-		dst += s->blkcipher.iv_len;
-	}
-	if (imm_len != 0)
-		crypto_copydata(crp->crp_flags, crp->crp_buf, crd->crd_skip,
-		    crd->crd_len, dst);
-	else
-		ccr_write_ulptx_sgl(sc, dst, sgl_nsegs);
-
-	/* XXX: TODO backpressure */
-	t4_wrq_tx(sc->adapter, wr);
-
-	return (0);
-}
-
-static int
-ccr_blkcipher_done(struct ccr_softc *sc, struct ccr_session *s,
-    struct cryptop *crp, const struct cpl_fw6_pld *cpl, int error)
-{
-
-	/*
-	 * The updated IV to permit chained requests is at
-	 * cpl->data[2], but OCF doesn't permit chained requests.
-	 */
-	return (error);
-}
-
-/*
- * 'hashsize' is the length of a full digest.  'authsize' is the
- * requested digest length for this operation which may be less
- * than 'hashsize'.
- */
-static int
-ccr_hmac_ctrl(unsigned int hashsize, unsigned int authsize)
-{
-
-	if (authsize == 10)
-		return (CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366);
-	if (authsize == 12)
-		return (CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT);
-	if (authsize == hashsize / 2)
-		return (CHCR_SCMD_HMAC_CTRL_DIV2);
-	return (CHCR_SCMD_HMAC_CTRL_NO_TRUNC);
-}
-
-static int
-ccr_authenc(struct ccr_softc *sc, uint32_t sid, struct ccr_session *s,
-    struct cryptop *crp, struct cryptodesc *crda, struct cryptodesc *crde)
-{
-	char iv[CHCR_MAX_CRYPTO_IV_LEN];
-	struct chcr_wr *crwr;
-	struct wrqe *wr;
-	struct auth_hash *axf;
-	char *dst;
-	u_int iv_loc, kctx_len, key_half, op_type, transhdr_len, wr_len;
-	u_int hash_size_in_response, imm_len, iopad_size;
-	u_int aad_start, aad_len, aad_stop;
-	u_int auth_start, auth_stop, auth_insert;
-	u_int cipher_start, cipher_stop;
-	u_int hmac_ctrl, input_len;
-	int dsgl_nsegs, dsgl_len;
-	int sgl_nsegs, sgl_len;
-	int error;
-
-	/*
-	 * If there is a need in the future, requests with an empty
-	 * payload could be supported as HMAC-only requests.
-	 */
-	if (s->blkcipher.key_len == 0 || crde->crd_len == 0)
-		return (EINVAL);
-	if (crde->crd_alg == CRYPTO_AES_CBC &&
-	    (crde->crd_len % AES_BLOCK_LEN) != 0)
-		return (EINVAL);
-
-	/*
-	 * AAD is only permitted before the cipher/plain text, not
-	 * after.
-	 */
-	if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip)
-		return (EINVAL);
-
-	axf = s->hmac.auth_hash;
-	hash_size_in_response = s->hmac.hash_len;
-
-	/*
-	 * The IV is always stored at the start of the buffer even
-	 * though it may be duplicated in the payload.  The crypto
-	 * engine doesn't work properly if the IV offset points inside
-	 * of the AAD region, so a second copy is always required.
-	 */
-	iv_loc = IV_IMMEDIATE;
-	if (crde->crd_flags & CRD_F_ENCRYPT) {
-		op_type = CHCR_ENCRYPT_OP;
-		if (crde->crd_flags & CRD_F_IV_EXPLICIT)
-			memcpy(iv, crde->crd_iv, s->blkcipher.iv_len);
-		else
-			arc4rand(iv, s->blkcipher.iv_len, 0);
-		if ((crde->crd_flags & CRD_F_IV_PRESENT) == 0)
-			crypto_copyback(crp->crp_flags, crp->crp_buf,
-			    crde->crd_inject, s->blkcipher.iv_len, iv);
-	} else {
-		op_type = CHCR_DECRYPT_OP;
-		if (crde->crd_flags & CRD_F_IV_EXPLICIT)
-			memcpy(iv, crde->crd_iv, s->blkcipher.iv_len);
-		else
-			crypto_copydata(crp->crp_flags, crp->crp_buf,
-			    crde->crd_inject, s->blkcipher.iv_len, iv);
-	}
-
-	/*
-	 * The output buffer consists of the cipher text followed by
-	 * the hash when encrypting.  For decryption it only contains
-	 * the plain text.
-	 */
-	if (op_type == CHCR_ENCRYPT_OP) {
-		if (crde->crd_len + hash_size_in_response > MAX_REQUEST_SIZE)
-			return (EFBIG);
-	} else {
-		if (crde->crd_len > MAX_REQUEST_SIZE)
-			return (EFBIG);
-	}
-	sglist_reset(sc->sg_dsgl);
-	error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp, crde->crd_skip,
-	    crde->crd_len);
-	if (error)
-		return (error);
-	if (op_type == CHCR_ENCRYPT_OP) {
-		error = sglist_append_sglist(sc->sg_dsgl, sc->sg_crp,
-		    crda->crd_inject, hash_size_in_response);
-		if (error)
-			return (error);
-	}
-	dsgl_nsegs = ccr_count_sgl(sc->sg_dsgl, DSGL_SGE_MAXLEN);
-	if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE)
-		return (EFBIG);
-	dsgl_len = ccr_phys_dsgl_len(dsgl_nsegs);
-
-	/* PADs must be 128-bit aligned. */
-	iopad_size = roundup2(s->hmac.partial_digest_len, 16);
-
-	/*
-	 * The 'key' part of the key context consists of the key followed
-	 * by the IPAD and OPAD.
-	 */
-	kctx_len = roundup2(s->blkcipher.key_len, 16) + iopad_size * 2;
-	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len);
-
-	/*
-	 * The input buffer consists of the IV, any AAD, and then the
-	 * cipher/plain text.  For decryption requests the hash is
-	 * appended after the cipher text.
-	 */
-	if (crda->crd_skip < crde->crd_skip) {
-		if (crda->crd_skip + crda->crd_len > crde->crd_skip)
-			aad_len = (crde->crd_skip - crda->crd_skip);
-		else
-			aad_len = crda->crd_len;
-	} else
-		aad_len = 0;
-	input_len = aad_len + crde->crd_len;
-
-	/*
-	 * The firmware hangs if sent a request which is a
-	 * bit smaller than MAX_REQUEST_SIZE.  In particular, the
-	 * firmware appears to require 512 - 16 bytes of spare room
-	 * along with the size of the hash even if the hash isn't
-	 * included in the input buffer.
-	 */
-	if (input_len + roundup2(axf->hashsize, 16) + (512 - 16) >
-	    MAX_REQUEST_SIZE)
-		return (EFBIG);
-	if (op_type == CHCR_DECRYPT_OP)
-		input_len += hash_size_in_response;
-	if (ccr_use_imm_data(transhdr_len, s->blkcipher.iv_len + input_len)) {
-		imm_len = input_len;
-		sgl_nsegs = 0;
-		sgl_len = 0;
-	} else {
-		imm_len = 0;
-		sglist_reset(sc->sg_ulptx);
-		if (aad_len != 0) {
-			error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-			    crda->crd_skip, aad_len);
-			if (error)
-				return (error);
-		}
-		error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-		    crde->crd_skip, crde->crd_len);
-		if (error)
-			return (error);
-		if (op_type == CHCR_DECRYPT_OP) {
-			error = sglist_append_sglist(sc->sg_ulptx, sc->sg_crp,
-			    crda->crd_inject, hash_size_in_response);
-			if (error)
-				return (error);
-		}
-		sgl_nsegs = sc->sg_ulptx->sg_nseg;
-		sgl_len = ccr_ulptx_sgl_len(sgl_nsegs);
-	}
-
-	/*
-	 * Any auth-only data before the cipher region is marked as AAD.
-	 * Auth-data that overlaps with the cipher region is placed in
-	 * the auth section.
-	 */
-	if (aad_len != 0) {
-		aad_start = s->blkcipher.iv_len + 1;
-		aad_stop = aad_start + aad_len - 1;
-	} else {
-		aad_start = 0;
-		aad_stop = 0;
-	}
-	cipher_start = s->blkcipher.iv_len + aad_len + 1;
-	if (op_type == CHCR_DECRYPT_OP)
-		cipher_stop = hash_size_in_response;
-	else
-		cipher_stop = 0;
-	if (aad_len == crda->crd_len) {
-		auth_start = 0;
-		auth_stop = 0;
-	} else {
-		if (aad_len != 0)
-			auth_start = cipher_start;
-		else
-			auth_start = s->blkcipher.iv_len + crda->crd_skip -
-			    crde->crd_skip + 1;
-		auth_stop = (crde->crd_skip + crde->crd_len) -
-		    (crda->crd_skip + crda->crd_len) + cipher_stop;
-	}
-	if (op_type == CHCR_DECRYPT_OP)
-		auth_insert = hash_size_in_response;
-	else
-		auth_insert = 0;
-
-	wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len;
-	if (iv_loc == IV_IMMEDIATE)
-		wr_len += s->blkcipher.iv_len;
-	wr = alloc_wrqe(wr_len, sc->txq);
-	if (wr == NULL) {
-		sc->stats_wr_nomem++;
-		return (ENOMEM);
-	}
-	crwr = wrtod(wr);
-	memset(crwr, 0, wr_len);
-
-	ccr_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len,
-	    op_type == CHCR_DECRYPT_OP ? hash_size_in_response : 0, iv_loc,
-	    crp);
-
-	/* XXX: Hardcodes SGE loopback channel of 0. */
-	crwr->sec_cpl.op_ivinsrtofst = htobe32(
-	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
-	    V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) |
-	    V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) |
-	    V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) |
-	    V_CPL_TX_SEC_PDU_IVINSRTOFST(1));
-
-	crwr->sec_cpl.pldlen = htobe32(s->blkcipher.iv_len + input_len);
-
-	crwr->sec_cpl.aadstart_cipherstop_hi = htobe32(
-	    V_CPL_TX_SEC_PDU_AADSTART(aad_start) |
-	    V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) |
-	    V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) |
-	    V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4));
-	crwr->sec_cpl.cipherstop_lo_authinsert = htobe32(
-	    V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) |
-	    V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) |
-	    V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) |
-	    V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert));
-

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list