Re: git: d5507f9e4366 - main - nvme: Separate total failures from I/O failures

From: Zhenlei Huang <zlei_at_FreeBSD.org>
Date: Fri, 16 Aug 2024 03:12:05 UTC
Hi Warner,

I'm not sure but this change seems include lots of unintended changes ( maybe some local WIP ) .

> On Aug 16, 2024, at 10:30 AM, Warner Losh <imp@FreeBSD.org> wrote:
> 
> The branch main has been updated by imp:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=d5507f9e436698ac17dc5ace7ef58493988a9b04
> 
> commit d5507f9e436698ac17dc5ace7ef58493988a9b04
> Author:     Warner Losh <imp@FreeBSD.org>
> AuthorDate: 2024-08-14 22:55:49 +0000
> Commit:     Warner Losh <imp@FreeBSD.org>
> CommitDate: 2024-08-16 02:22:18 +0000
> 
>    nvme: Separate total failures from I/O failures
> 
>    When it's a I/O failure, we can still send admin commands. Separate out
>    the admin failures and flag them as such so that we can still send admin
>    commands on half-failed drives.
> 
>    Fixes: 9229b3105d88 (nvme: Fail passthrough commands right away in failed state)
>    Sponsored by: Netflix
> ---
> sys/amd64/conf/IOSCHED                             |    2 +
> sys/amd64/conf/MPI3MR                              |   10 +
> sys/arm64/conf/GENERIC16K                          |    4 +
> .../linuxkpi/common/include/linux/#compiler.h#     |  117 +
> sys/contrib/dev/iwlwifi/fw/api/soc.h               |   35 +
> sys/contrib/zlib/contrib/asm686/README.686         |   51 +
> sys/contrib/zlib/contrib/asm686/match.S            |  357 +
> sys/dev/ice/ice_sriov.c                            |  595 ++
> sys/dev/ice/ice_sriov.h                            |   64 +
> sys/dev/mps/mpi/mpi2_pci.h                         |  141 +
> sys/dev/nvme/nvme_ctrlr.c                          |   46 +-
> sys/dev/nvme/nvme_private.h                        |    1 +
> sys/dev/nvme/nvme_qpair.c                          |   23 +-
> sys/dev/nvme/nvme_sim.c                            |   13 +-
> sys/dev/sound/pci/aureal.c                         |  686 ++
> sys/dev/sound/pci/aureal.h                         |   99 +
> sys/dev/sound/pci/ds1-fw.h                         | 1602 ++++
> sys/dev/sound/pci/ds1.c                            | 1103 +++
> sys/dev/sound/pci/ds1.h                            |  146 +
> sys/dev/sound/pci/maestro.c                        | 2043 +++++
> sys/dev/sound/pci/maestro_reg.h                    |  381 +
> sys/kern/bsduser-syscalls.c                        | 8712 ++++++++++++++++++++
> sys/modules/sound/driver/ds1/Makefile              |    8 +
> sys/modules/sound/driver/maestro/Makefile          |    8 +
> 24 files changed, 16219 insertions(+), 28 deletions(-)
> 
> diff --git a/sys/amd64/conf/IOSCHED b/sys/amd64/conf/IOSCHED
> new file mode 100644
> index 000000000000..e15106bc4c1f
> --- /dev/null
> +++ b/sys/amd64/conf/IOSCHED
> @@ -0,0 +1,2 @@
> +include "GENERIC"
> +options CAM_IOSCHED_DYNAMIC
> diff --git a/sys/amd64/conf/MPI3MR b/sys/amd64/conf/MPI3MR
> new file mode 100644
> index 000000000000..99e5244cb49d
> --- /dev/null
> +++ b/sys/amd64/conf/MPI3MR
> @@ -0,0 +1,10 @@
> +include GENERIC
> +
> +device mpi3mr
> +# All the debugging options
> +options DEADLKRES # Enable the deadlock resolver
> +options INVARIANTS # Enable calls of extra sanity checking
> +options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
> +options QUEUE_MACRO_DEBUG_TRASH # Trash queue(2) internal pointers on invalidation
> +options WITNESS # Enable checks to detect deadlocks and cycles
> +options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
> diff --git a/sys/arm64/conf/GENERIC16K b/sys/arm64/conf/GENERIC16K
> new file mode 100644
> index 000000000000..9bf9e2dadb08
> --- /dev/null
> +++ b/sys/arm64/conf/GENERIC16K
> @@ -0,0 +1,4 @@
> +include		"GENERIC"
> +
> +ident		GENERIC_16K
> +
> diff --git a/sys/compat/linuxkpi/common/include/linux/#compiler.h# b/sys/compat/linuxkpi/common/include/linux/#compiler.h#
> new file mode 100644
> index 000000000000..1177674aa68f
> --- /dev/null
> +++ b/sys/compat/linuxkpi/common/include/linux/#compiler.h#
> @@ -0,0 +1,117 @@
> +/*-
> + * Copyright (c) 2010 Isilon Systems, Inc.
> + * Copyright (c) 2010 iX Systems, Inc.
> + * Copyright (c) 2010 Panasas, Inc.
> + * Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
> + * Copyright (c) 2015 François Tigeot
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice unmodified, this list of conditions, and the following
> + *    disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + * $FreeBSD$
> + */
> +#ifndef	_LINUX_COMPILER_H_
> +#define	_LINUX_COMPILER_H_
> +
> +#include <sys/cdefs.h>
> +
> +#define __user
> +#define __kernel
> +#define __safe
> +#define __force
> +#define __nocast
> +#define __iomem
> +#define __chk_user_ptr(x)		((void)0)
> +#define __chk_io_ptr(x)			((void)0)
> +#define __builtin_warning(x, y...)	(1)
> +#define __acquires(x)
> +#define __releases(x)
> +#define __acquire(x)			do { } while (0)
> +#define __release(x)			do { } while (0)
> +#define __cond_lock(x,c)		(c)
> +#define	__bitwise
> +#define __devinitdata
> +#define	__deprecated
> +#define __init
> +#define	__initconst
> +#define	__devinit
> +#define	__devexit
> +#define __exit
> +#define	__rcu
> +#define	__percpu
> +#define	__weak __weak_symbol
> +#define	__malloc
> +#define	___stringify(...)		#__VA_ARGS__
> +#define	__stringify(...)		___stringify(__VA_ARGS__)
> +#define	__attribute_const__		__attribute__((__const__))
> +#undef __always_inline
> +#define	__always_inline			inline
> +#define	noinline			__noinline
> +#define	____cacheline_aligned		__aligned(CACHE_LINE_SIZE)
> +
> +#define	likely(x)			__builtin_expect(!!(x), 1)
> +#define	unlikely(x)			__builtin_expect(!!(x), 0)
> +#define typeof(x)			__typeof(x)
> +
> +#define	uninitialized_var(x)		x = x
> +#define	__maybe_unused			__unused
> +#define	__always_unused			__unused
> +#define	__must_check			__result_use_check
> +
> +#define	__printf(a,b)			__printflike(a,b)
> +
> +#define	barrier()			__asm__ __volatile__("": : :"memory")
> +
> +#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 50000
> +/* Moved from drm_os_freebsd.h */
> +#define	lower_32_bits(n)		((u32)(n))
> +#define	upper_32_bits(n)		((u32)(((n) >> 16) >> 16))
> +#endif
> +
> +#define	___PASTE(a,b) a##b
> +#define	__PASTE(a,b) ___PASTE(a,b)
> +
> +#define	ACCESS_ONCE(x)			(*(volatile __typeof(x) *)&(x))
> +
> +#define	WRITE_ONCE(x,v) do {		\
> +	barrier();			\
> +	ACCESS_ONCE(x) = (v);		\
> +	barrier();			\
> +} while (0)
> +
> +#define	READ_ONCE(x) ({			\
> +	__typeof(x) __var = ({		\
> +		barrier();		\
> +		ACCESS_ONCE(x);		\
> +	});				\
> +	barrier();			\
> +	__var;				\
> +})
> +
> +#define	lockless_dereference(p) READ_ONCE(p)
> +
> +#define	_AT(T,X)	((T)(X))
> +
> +#define	__same_type(a, b)	__builtin_types_compatible_p(typeof(a), typeof(b))
> +#define	__must_be_array(a)	__same_type(a, &(a)[0])
> +
> +#endif	/* _LINUX_COMPILER_H_ */
> diff --git a/sys/contrib/dev/iwlwifi/fw/api/soc.h b/sys/contrib/dev/iwlwifi/fw/api/soc.h
> new file mode 100644
> index 000000000000..c5df1171462b
> --- /dev/null
> +++ b/sys/contrib/dev/iwlwifi/fw/api/soc.h
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
> +/*
> + * Copyright (C) 2012-2014, 2019-2020 Intel Corporation
> + * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
> + * Copyright (C) 2016-2017 Intel Deutschland GmbH
> + */
> +#ifndef __iwl_fw_api_soc_h__
> +#define __iwl_fw_api_soc_h__
> +
> +#define SOC_CONFIG_CMD_FLAGS_DISCRETE		BIT(0)
> +#define SOC_CONFIG_CMD_FLAGS_LOW_LATENCY	BIT(1)
> +
> +#define SOC_FLAGS_LTR_APPLY_DELAY_MASK		0xc
> +#define SOC_FLAGS_LTR_APPLY_DELAY_NONE		0
> +#define SOC_FLAGS_LTR_APPLY_DELAY_200		1
> +#define SOC_FLAGS_LTR_APPLY_DELAY_2500		2
> +#define SOC_FLAGS_LTR_APPLY_DELAY_1820		3
> +
> +/**
> + * struct iwl_soc_configuration_cmd - Set device stabilization latency
> + *
> + * @flags: soc settings flags.  In VER_1, we can only set the DISCRETE
> + *	flag, because the FW treats the whole value as an integer. In
> + *	VER_2, we can set the bits independently.
> + * @latency: time for SOC to ensure stable power & XTAL
> + */
> +struct iwl_soc_configuration_cmd {
> +	__le32 flags;
> +	__le32 latency;
> +} __packed; /*
> +	     * SOC_CONFIGURATION_CMD_S_VER_1 (see description above)
> +	     * SOC_CONFIGURATION_CMD_S_VER_2
> +	     */
> +
> +#endif /* __iwl_fw_api_soc_h__ */
> diff --git a/sys/contrib/zlib/contrib/asm686/README.686 b/sys/contrib/zlib/contrib/asm686/README.686
> new file mode 100644
> index 000000000000..a0bf3bea4aff
> --- /dev/null
> +++ b/sys/contrib/zlib/contrib/asm686/README.686
> @@ -0,0 +1,51 @@
> +This is a patched version of zlib, modified to use
> +Pentium-Pro-optimized assembly code in the deflation algorithm. The
> +files changed/added by this patch are:
> +
> +README.686
> +match.S
> +
> +The speedup that this patch provides varies, depending on whether the
> +compiler used to build the original version of zlib falls afoul of the
> +PPro's speed traps. My own tests show a speedup of around 10-20% at
> +the default compression level, and 20-30% using -9, against a version
> +compiled using gcc 2.7.2.3. Your mileage may vary.
> +
> +Note that this code has been tailored for the PPro/PII in particular,
> +and will not perform particuarly well on a Pentium.
> +
> +If you are using an assembler other than GNU as, you will have to
> +translate match.S to use your assembler's syntax. (Have fun.)
> +
> +Brian Raiter
> +breadbox@muppetlabs.com
> +April, 1998
> +
> +
> +Added for zlib 1.1.3:
> +
> +The patches come from
> +http://www.muppetlabs.com/~breadbox/software/assembly.html
> +
> +To compile zlib with this asm file, copy match.S to the zlib directory
> +then do:
> +
> +CFLAGS="-O3 -DASMV" ./configure
> +make OBJA=match.o
> +
> +
> +Update:
> +
> +I've been ignoring these assembly routines for years, believing that
> +gcc's generated code had caught up with it sometime around gcc 2.95
> +and the major rearchitecting of the Pentium 4. However, I recently
> +learned that, despite what I believed, this code still has some life
> +in it. On the Pentium 4 and AMD64 chips, it continues to run about 8%
> +faster than the code produced by gcc 4.1.
> +
> +In acknowledgement of its continuing usefulness, I've altered the
> +license to match that of the rest of zlib. Share and Enjoy!
> +
> +Brian Raiter
> +breadbox@muppetlabs.com
> +April, 2007
> diff --git a/sys/contrib/zlib/contrib/asm686/match.S b/sys/contrib/zlib/contrib/asm686/match.S
> new file mode 100644
> index 000000000000..fa421092785d
> --- /dev/null
> +++ b/sys/contrib/zlib/contrib/asm686/match.S
> @@ -0,0 +1,357 @@
> +/* match.S -- x86 assembly version of the zlib longest_match() function.
> + * Optimized for the Intel 686 chips (PPro and later).
> + *
> + * Copyright (C) 1998, 2007 Brian Raiter <breadbox@muppetlabs.com>
> + *
> + * This software is provided 'as-is', without any express or implied
> + * warranty.  In no event will the author be held liable for any damages
> + * arising from the use of this software.
> + *
> + * Permission is granted to anyone to use this software for any purpose,
> + * including commercial applications, and to alter it and redistribute it
> + * freely, subject to the following restrictions:
> + *
> + * 1. The origin of this software must not be misrepresented; you must not
> + *    claim that you wrote the original software. If you use this software
> + *    in a product, an acknowledgment in the product documentation would be
> + *    appreciated but is not required.
> + * 2. Altered source versions must be plainly marked as such, and must not be
> + *    misrepresented as being the original software.
> + * 3. This notice may not be removed or altered from any source distribution.
> + */
> +
> +#ifndef NO_UNDERLINE
> +#define	match_init	_match_init
> +#define	longest_match	_longest_match
> +#endif
> +
> +#define	MAX_MATCH	(258)
> +#define	MIN_MATCH	(3)
> +#define	MIN_LOOKAHEAD	(MAX_MATCH + MIN_MATCH + 1)
> +#define	MAX_MATCH_8	((MAX_MATCH + 7) & ~7)
> +
> +/* stack frame offsets */
> +
> +#define	chainlenwmask		0	/* high word: current chain len	*/
> +					/* low word: s->wmask		*/
> +#define	window			4	/* local copy of s->window	*/
> +#define	windowbestlen		8	/* s->window + bestlen		*/
> +#define	scanstart		16	/* first two bytes of string	*/
> +#define	scanend			12	/* last two bytes of string	*/
> +#define	scanalign		20	/* dword-misalignment of string	*/
> +#define	nicematch		24	/* a good enough match size	*/
> +#define	bestlen			28	/* size of best match so far	*/
> +#define	scan			32	/* ptr to string wanting match	*/
> +
> +#define	LocalVarsSize		(36)
> +/*	saved ebx		36 */
> +/*	saved edi		40 */
> +/*	saved esi		44 */
> +/*	saved ebp		48 */
> +/*	return address		52 */
> +#define	deflatestate		56	/* the function arguments	*/
> +#define	curmatch		60
> +
> +/* All the +zlib1222add offsets are due to the addition of fields
> + *  in zlib in the deflate_state structure since the asm code was first written
> + * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
> + * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
> + * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
> + */
> +
> +#define zlib1222add		(8)
> +
> +#define	dsWSize			(36+zlib1222add)
> +#define	dsWMask			(44+zlib1222add)
> +#define	dsWindow		(48+zlib1222add)
> +#define	dsPrev			(56+zlib1222add)
> +#define	dsMatchLen		(88+zlib1222add)
> +#define	dsPrevMatch		(92+zlib1222add)
> +#define	dsStrStart		(100+zlib1222add)
> +#define	dsMatchStart		(104+zlib1222add)
> +#define	dsLookahead		(108+zlib1222add)
> +#define	dsPrevLen		(112+zlib1222add)
> +#define	dsMaxChainLen		(116+zlib1222add)
> +#define	dsGoodMatch		(132+zlib1222add)
> +#define	dsNiceMatch		(136+zlib1222add)
> +
> +
> +.file "match.S"
> +
> +.globl	match_init, longest_match
> +
> +.text
> +
> +/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
> +.cfi_sections	.debug_frame
> +
> +longest_match:
> +
> +.cfi_startproc
> +/* Save registers that the compiler may be using, and adjust %esp to	*/
> +/* make room for our stack frame.					*/
> +
> +		pushl	%ebp
> +		.cfi_def_cfa_offset 8
> +		.cfi_offset ebp, -8
> +		pushl	%edi
> +		.cfi_def_cfa_offset 12
> +		pushl	%esi
> +		.cfi_def_cfa_offset 16
> +		pushl	%ebx
> +		.cfi_def_cfa_offset 20
> +		subl	$LocalVarsSize, %esp
> +		.cfi_def_cfa_offset LocalVarsSize+20
> +
> +/* Retrieve the function arguments. %ecx will hold cur_match		*/
> +/* throughout the entire function. %edx will hold the pointer to the	*/
> +/* deflate_state structure during the function's setup (before		*/
> +/* entering the main loop).						*/
> +
> +		movl	deflatestate(%esp), %edx
> +		movl	curmatch(%esp), %ecx
> +
> +/* uInt wmask = s->w_mask;						*/
> +/* unsigned chain_length = s->max_chain_length;				*/
> +/* if (s->prev_length >= s->good_match) {				*/
> +/*     chain_length >>= 2;						*/
> +/* }									*/
> + 
> +		movl	dsPrevLen(%edx), %eax
> +		movl	dsGoodMatch(%edx), %ebx
> +		cmpl	%ebx, %eax
> +		movl	dsWMask(%edx), %eax
> +		movl	dsMaxChainLen(%edx), %ebx
> +		jl	LastMatchGood
> +		shrl	$2, %ebx
> +LastMatchGood:
> +
> +/* chainlen is decremented once beforehand so that the function can	*/
> +/* use the sign flag instead of the zero flag for the exit test.	*/
> +/* It is then shifted into the high word, to make room for the wmask	*/
> +/* value, which it will always accompany.				*/
> +
> +		decl	%ebx
> +		shll	$16, %ebx
> +		orl	%eax, %ebx
> +		movl	%ebx, chainlenwmask(%esp)
> +
> +/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;	*/
> +
> +		movl	dsNiceMatch(%edx), %eax
> +		movl	dsLookahead(%edx), %ebx
> +		cmpl	%eax, %ebx
> +		jl	LookaheadLess
> +		movl	%eax, %ebx
> +LookaheadLess:	movl	%ebx, nicematch(%esp)
> +
> +/* register Bytef *scan = s->window + s->strstart;			*/
> +
> +		movl	dsWindow(%edx), %esi
> +		movl	%esi, window(%esp)
> +		movl	dsStrStart(%edx), %ebp
> +		lea	(%esi,%ebp), %edi
> +		movl	%edi, scan(%esp)
> +
> +/* Determine how many bytes the scan ptr is off from being		*/
> +/* dword-aligned.							*/
> +
> +		movl	%edi, %eax
> +		negl	%eax
> +		andl	$3, %eax
> +		movl	%eax, scanalign(%esp)
> +
> +/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ?			*/
> +/*     s->strstart - (IPos)MAX_DIST(s) : NIL;				*/
> +
> +		movl	dsWSize(%edx), %eax
> +		subl	$MIN_LOOKAHEAD, %eax
> +		subl	%eax, %ebp
> +		jg	LimitPositive
> +		xorl	%ebp, %ebp
> +LimitPositive:
> +
> +/* int best_len = s->prev_length;					*/
> +
> +		movl	dsPrevLen(%edx), %eax
> +		movl	%eax, bestlen(%esp)
> +
> +/* Store the sum of s->window + best_len in %esi locally, and in %esi.	*/
> +
> +		addl	%eax, %esi
> +		movl	%esi, windowbestlen(%esp)
> +
> +/* register ush scan_start = *(ushf*)scan;				*/
> +/* register ush scan_end   = *(ushf*)(scan+best_len-1);			*/
> +/* Posf *prev = s->prev;						*/
> +
> +		movzwl	(%edi), %ebx
> +		movl	%ebx, scanstart(%esp)
> +		movzwl	-1(%edi,%eax), %ebx
> +		movl	%ebx, scanend(%esp)
> +		movl	dsPrev(%edx), %edi
> +
> +/* Jump into the main loop.						*/
> +
> +		movl	chainlenwmask(%esp), %edx
> +		jmp	LoopEntry
> +
> +.balign 16
> +
> +/* do {
> + *     match = s->window + cur_match;
> + *     if (*(ushf*)(match+best_len-1) != scan_end ||
> + *         *(ushf*)match != scan_start) continue;
> + *     [...]
> + * } while ((cur_match = prev[cur_match & wmask]) > limit
> + *          && --chain_length != 0);
> + *
> + * Here is the inner loop of the function. The function will spend the
> + * majority of its time in this loop, and majority of that time will
> + * be spent in the first ten instructions.
> + *
> + * Within this loop:
> + * %ebx = scanend
> + * %ecx = curmatch
> + * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
> + * %esi = windowbestlen - i.e., (window + bestlen)
> + * %edi = prev
> + * %ebp = limit
> + */
> +LookupLoop:
> +		andl	%edx, %ecx
> +		movzwl	(%edi,%ecx,2), %ecx
> +		cmpl	%ebp, %ecx
> +		jbe	LeaveNow
> +		subl	$0x00010000, %edx
> +		js	LeaveNow
> +LoopEntry:	movzwl	-1(%esi,%ecx), %eax
> +		cmpl	%ebx, %eax
> +		jnz	LookupLoop
> +		movl	window(%esp), %eax
> +		movzwl	(%eax,%ecx), %eax
> +		cmpl	scanstart(%esp), %eax
> +		jnz	LookupLoop
> +
> +/* Store the current value of chainlen.					*/
> +
> +		movl	%edx, chainlenwmask(%esp)
> +
> +/* Point %edi to the string under scrutiny, and %esi to the string we	*/
> +/* are hoping to match it up with. In actuality, %esi and %edi are	*/
> +/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is	*/
> +/* initialized to -(MAX_MATCH_8 - scanalign).				*/
> +
> +		movl	window(%esp), %esi
> +		movl	scan(%esp), %edi
> +		addl	%ecx, %esi
> +		movl	scanalign(%esp), %eax
> +		movl	$(-MAX_MATCH_8), %edx
> +		lea	MAX_MATCH_8(%edi,%eax), %edi
> +		lea	MAX_MATCH_8(%esi,%eax), %esi
> +
> +/* Test the strings for equality, 8 bytes at a time. At the end,
> + * adjust %edx so that it is offset to the exact byte that mismatched.
> + *
> + * We already know at this point that the first three bytes of the
> + * strings match each other, and they can be safely passed over before
> + * starting the compare loop. So what this code does is skip over 0-3
> + * bytes, as much as necessary in order to dword-align the %edi
> + * pointer. (%esi will still be misaligned three times out of four.)
> + *
> + * It should be confessed that this loop usually does not represent
> + * much of the total running time. Replacing it with a more
> + * straightforward "rep cmpsb" would not drastically degrade
> + * performance.
> + */
> +LoopCmps:
> +		movl	(%esi,%edx), %eax
> +		xorl	(%edi,%edx), %eax
> +		jnz	LeaveLoopCmps
> +		movl	4(%esi,%edx), %eax
> +		xorl	4(%edi,%edx), %eax
> +		jnz	LeaveLoopCmps4
> +		addl	$8, %edx
> +		jnz	LoopCmps
> +		jmp	LenMaximum
> +LeaveLoopCmps4:	addl	$4, %edx
> +LeaveLoopCmps:	testl	$0x0000FFFF, %eax
> +		jnz	LenLower
> +		addl	$2, %edx
> +		shrl	$16, %eax
> +LenLower:	subb	$1, %al
> +		adcl	$0, %edx
> +
> +/* Calculate the length of the match. If it is longer than MAX_MATCH,	*/
> +/* then automatically accept it as the best possible match and leave.	*/
> +
> +		lea	(%edi,%edx), %eax
> +		movl	scan(%esp), %edi
> +		subl	%edi, %eax
> +		cmpl	$MAX_MATCH, %eax
> +		jge	LenMaximum
> +
> +/* If the length of the match is not longer than the best match we	*/
> +/* have so far, then forget it and return to the lookup loop.		*/
> +
> +		movl	deflatestate(%esp), %edx
> +		movl	bestlen(%esp), %ebx
> +		cmpl	%ebx, %eax
> +		jg	LongerMatch
> +		movl	windowbestlen(%esp), %esi
> +		movl	dsPrev(%edx), %edi
> +		movl	scanend(%esp), %ebx
> +		movl	chainlenwmask(%esp), %edx
> +		jmp	LookupLoop
> +
> +/*         s->match_start = cur_match;					*/
> +/*         best_len = len;						*/
> +/*         if (len >= nice_match) break;				*/
> +/*         scan_end = *(ushf*)(scan+best_len-1);			*/
> +
> +LongerMatch:	movl	nicematch(%esp), %ebx
> +		movl	%eax, bestlen(%esp)
> +		movl	%ecx, dsMatchStart(%edx)
> +		cmpl	%ebx, %eax
> +		jge	LeaveNow
> +		movl	window(%esp), %esi
> +		addl	%eax, %esi
> +		movl	%esi, windowbestlen(%esp)
> +		movzwl	-1(%edi,%eax), %ebx
> +		movl	dsPrev(%edx), %edi
> +		movl	%ebx, scanend(%esp)
> +		movl	chainlenwmask(%esp), %edx
> +		jmp	LookupLoop
> +
> +/* Accept the current string, with the maximum possible length.		*/
> +
> +LenMaximum:	movl	deflatestate(%esp), %edx
> +		movl	$MAX_MATCH, bestlen(%esp)
> +		movl	%ecx, dsMatchStart(%edx)
> +
> +/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len;		*/
> +/* return s->lookahead;							*/
> +
> +LeaveNow:
> +		movl	deflatestate(%esp), %edx
> +		movl	bestlen(%esp), %ebx
> +		movl	dsLookahead(%edx), %eax
> +		cmpl	%eax, %ebx
> +		jg	LookaheadRet
> +		movl	%ebx, %eax
> +LookaheadRet:
> +
> +/* Restore the stack and return from whence we came.			*/
> +
> +		addl	$LocalVarsSize, %esp
> +		.cfi_def_cfa_offset 20
> +		popl	%ebx
> +		.cfi_def_cfa_offset 16
> +		popl	%esi
> +		.cfi_def_cfa_offset 12
> +		popl	%edi
> +		.cfi_def_cfa_offset 8
> +		popl	%ebp
> +		.cfi_def_cfa_offset 4
> +.cfi_endproc
> +match_init:	ret
> diff --git a/sys/dev/ice/ice_sriov.c b/sys/dev/ice/ice_sriov.c
> new file mode 100644
> index 000000000000..c0521e667fa2
> --- /dev/null
> +++ b/sys/dev/ice/ice_sriov.c
> @@ -0,0 +1,595 @@
> +/* SPDX-License-Identifier: BSD-3-Clause */
> +/*  Copyright (c) 2021, Intel Corporation
> + *  All rights reserved.
> + *
> + *  Redistribution and use in source and binary forms, with or without
> + *  modification, are permitted provided that the following conditions are met:
> + *
> + *   1. Redistributions of source code must retain the above copyright notice,
> + *      this list of conditions and the following disclaimer.
> + *
> + *   2. Redistributions in binary form must reproduce the above copyright
> + *      notice, this list of conditions and the following disclaimer in the
> + *      documentation and/or other materials provided with the distribution.
> + *
> + *   3. Neither the name of the Intel Corporation nor the names of its
> + *      contributors may be used to endorse or promote products derived from
> + *      this software without specific prior written permission.
> + *
> + *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
> + *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
> + *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + *  POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include "ice_common.h"
> +#include "ice_sriov.h"
> +
> +/**
> + * ice_aq_send_msg_to_vf
> + * @hw: pointer to the hardware structure
> + * @vfid: VF ID to send msg
> + * @v_opcode: opcodes for VF-PF communication
> + * @v_retval: return error code
> + * @msg: pointer to the msg buffer
> + * @msglen: msg length
> + * @cd: pointer to command details
> + *
> + * Send message to VF driver (0x0802) using mailbox
> + * queue and asynchronously sending message via
> + * ice_sq_send_cmd() function
> + */
> +enum ice_status
> +ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
> +		      u8 *msg, u16 msglen, struct ice_sq_cd *cd)
> +{
> +	struct ice_aqc_pf_vf_msg *cmd;
> +	struct ice_aq_desc desc;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
> +
> +	cmd = &desc.params.virt;
> +	cmd->id = CPU_TO_LE32(vfid);
> +
> +	desc.cookie_high = CPU_TO_LE32(v_opcode);
> +	desc.cookie_low = CPU_TO_LE32(v_retval);
> +
> +	if (msglen)
> +		desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
> +
> +	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
> +}
> +
> +/**
> + * ice_aq_send_msg_to_pf
> + * @hw: pointer to the hardware structure
> + * @v_opcode: opcodes for VF-PF communication
> + * @v_retval: return error code
> + * @msg: pointer to the msg buffer
> + * @msglen: msg length
> + * @cd: pointer to command details
> + *
> + * Send message to PF driver using mailbox queue. By default, this
> + * message is sent asynchronously, i.e. ice_sq_send_cmd()
> + * does not wait for completion before returning.
> + */
> +enum ice_status
> +ice_aq_send_msg_to_pf(struct ice_hw *hw, enum virtchnl_ops v_opcode,
> +		      enum ice_status v_retval, u8 *msg, u16 msglen,
> +		      struct ice_sq_cd *cd)
> +{
> +	struct ice_aq_desc desc;
> +
> +	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_pf);
> +	desc.cookie_high = CPU_TO_LE32(v_opcode);
> +	desc.cookie_low = CPU_TO_LE32(v_retval);
> +
> +	if (msglen)
> +		desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
> +
> +	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
> +}
> +
> +/**
> + * ice_conv_link_speed_to_virtchnl
> + * @adv_link_support: determines the format of the returned link speed
> + * @link_speed: variable containing the link_speed to be converted
> + *
> + * Convert link speed supported by HW to link speed supported by virtchnl.
> + * If adv_link_support is true, then return link speed in Mbps. Else return
> + * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
> + * needs to cast back to an enum virtchnl_link_speed in the case where
> + * adv_link_support is false, but when adv_link_support is true the caller can
> + * expect the speed in Mbps.
> + */
> +u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
> +{
> +	u32 speed;
> +
> +	if (adv_link_support)
> +		switch (link_speed) {
> +		case ICE_AQ_LINK_SPEED_10MB:
> +			speed = ICE_LINK_SPEED_10MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_100MB:
> +			speed = ICE_LINK_SPEED_100MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_1000MB:
> +			speed = ICE_LINK_SPEED_1000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_2500MB:
> +			speed = ICE_LINK_SPEED_2500MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_5GB:
> +			speed = ICE_LINK_SPEED_5000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_10GB:
> +			speed = ICE_LINK_SPEED_10000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_20GB:
> +			speed = ICE_LINK_SPEED_20000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_25GB:
> +			speed = ICE_LINK_SPEED_25000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_40GB:
> +			speed = ICE_LINK_SPEED_40000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_50GB:
> +			speed = ICE_LINK_SPEED_50000MBPS;
> +			break;
> +		case ICE_AQ_LINK_SPEED_100GB:
> +			speed = ICE_LINK_SPEED_100000MBPS;
> +			break;
> +		default:
> +			speed = ICE_LINK_SPEED_UNKNOWN;
> +			break;
> +		}
> +	else
> +		/* Virtchnl speeds are not defined for every speed supported in
> +		 * the hardware. To maintain compatibility with older AVF
> +		 * drivers, while reporting the speed the new speed values are
> +		 * resolved to the closest known virtchnl speeds
> +		 */
> +		switch (link_speed) {
> +		case ICE_AQ_LINK_SPEED_10MB:
> +		case ICE_AQ_LINK_SPEED_100MB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_100MB;
> +			break;
> +		case ICE_AQ_LINK_SPEED_1000MB:
> +		case ICE_AQ_LINK_SPEED_2500MB:
> +		case ICE_AQ_LINK_SPEED_5GB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_1GB;
> +			break;
> +		case ICE_AQ_LINK_SPEED_10GB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_10GB;
> +			break;
> +		case ICE_AQ_LINK_SPEED_20GB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_20GB;
> +			break;
> +		case ICE_AQ_LINK_SPEED_25GB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_25GB;
> +			break;
> +		case ICE_AQ_LINK_SPEED_40GB:
> +		case ICE_AQ_LINK_SPEED_50GB:
> +		case ICE_AQ_LINK_SPEED_100GB:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
> +			break;
> +		default:
> +			speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN;
> +			break;
> +		}
> +
> +	return speed;
> +}
> +
> +/* The mailbox overflow detection algorithm helps to check if there
> + * is a possibility of a malicious VF transmitting too many MBX messages to the
> + * PF.
> + * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
> + * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
> + * The struct ice_mbx_snapshot helps to track and traverse a static window of
> + * messages within the mailbox queue while looking for a malicious VF.
> + *
> + * 2. When the caller starts processing its mailbox queue in response to an
> + * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
> + * the algorithm can be run for the first time for that interrupt. This can be
> + * done via ice_mbx_reset_snapshot().
> + *
> + * 3. For every message read by the caller from the MBX Queue, the caller must
> + * call the detection algorithm's entry function ice_mbx_vf_state_handler().
> + * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
> + * filled as it is required to be passed to the algorithm.
> + *
> + * 4. Every time a message is read from the MBX queue, a VFId is received which
> + * is passed to the state handler. The boolean output is_malvf of the state
> + * handler ice_mbx_vf_state_handler() serves as an indicator to the caller
> + * whether this VF is malicious or not.
> + *
> + * 5. When a VF is identified to be malicious, the caller can send a message
> + * to the system administrator. The caller can invoke ice_mbx_report_malvf()
> + * to help determine if a malicious VF is to be reported or not. This function
> + * requires the caller to maintain a global bitmap to track all malicious VFs
> + * and pass that to ice_mbx_report_malvf() along with the VFID which was identified
> + * to be malicious by ice_mbx_vf_state_handler().
> + *
> + * 6. The global bitmap maintained by PF can be cleared completely if PF is in
> + * reset or the bit corresponding to a VF can be cleared if that VF is in reset.
> + * When a VF is shut down and brought back up, we assume that the new VF
> + * brought up is not malicious and hence report it if found malicious.
> + *
> + * 7. The function ice_mbx_reset_snapshot() is called to reset the information
> + * in ice_mbx_snapshot for every new mailbox interrupt handled.
> + *
> + * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated
> + * when driver is unloaded.
> + */
> +#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
> +/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
> + * the max messages check must be ignored in the algorithm
> + */
> +#define ICE_IGNORE_MAX_MSG_CNT	0xFFFF
> +
> +/**
> + * ice_mbx_traverse - Pass through mailbox snapshot
> + * @hw: pointer to the HW struct
> + * @new_state: new algorithm state
> + *
> + * Traversing the mailbox static snapshot without checking
> + * for malicious VFs.
> + */
> +static void
> +ice_mbx_traverse(struct ice_hw *hw,
> +		 enum ice_mbx_snapshot_state *new_state)
> +{
> +	struct ice_mbx_snap_buffer_data *snap_buf;
> +	u32 num_iterations;
> +
> +	snap_buf = &hw->mbx_snapshot.mbx_buf;
> +
> +	/* As mailbox buffer is circular, applying a mask
> +	 * on the incremented iteration count.
> +	 */
> +	num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
> +
> +	/* Checking either of the below conditions to exit snapshot traversal:
> +	 * Condition-1: If the number of iterations in the mailbox is equal to
> +	 * the mailbox head which would indicate that we have reached the end
> +	 * of the static snapshot.
> +	 * Condition-2: If the maximum messages serviced in the mailbox for a
> +	 * given interrupt is the highest possible value then there is no need
> +	 * to check if the number of messages processed is equal to it. If not
> +	 * check if the number of messages processed is greater than or equal
> +	 * to the maximum number of mailbox entries serviced in current work item.
> +	 */
> +	if (num_iterations == snap_buf->head ||
> +	    (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
> +	     ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
> +		*new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
> +}
> +
> +/**
> + * ice_mbx_detect_malvf - Detect malicious VF in snapshot
> + * @hw: pointer to the HW struct
> + * @vf_id: relative virtual function ID
> + * @new_state: new algorithm state
> + * @is_malvf: boolean output to indicate if VF is malicious
> + *
> + * This function tracks the number of asynchronous messages
> + * sent per VF and marks the VF as malicious if it exceeds
> + * the permissible number of messages to send.
> + */
> +static enum ice_status
> +ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
> +		     enum ice_mbx_snapshot_state *new_state,
> +		     bool *is_malvf)
> +{
> +	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
> +
> +	if (vf_id >= snap->mbx_vf.vfcntr_len)
> +		return ICE_ERR_OUT_OF_RANGE;
> +
> +	/* increment the message count in the VF array */
> +	snap->mbx_vf.vf_cntr[vf_id]++;
> +
> +	if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD)
> +		*is_malvf = true;
> +
> +	/* continue to iterate through the mailbox snapshot */
> +	ice_mbx_traverse(hw, new_state);
> +
> +	return ICE_SUCCESS;
> +}
> +
> +/**
> + * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
> + * @snap: pointer to mailbox snapshot structure in the ice_hw struct
> + *
> + * Reset the mailbox snapshot structure and clear VF counter array.
> + */
> +static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
> *** 15611 LINES SKIPPED ***