git: d5507f9e4366 - main - nvme: Separate total failures from I/O failures
Date: Fri, 16 Aug 2024 02:30:24 UTC
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=d5507f9e436698ac17dc5ace7ef58493988a9b04 commit d5507f9e436698ac17dc5ace7ef58493988a9b04 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2024-08-14 22:55:49 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2024-08-16 02:22:18 +0000 nvme: Separate total failures from I/O failures When it's a I/O failure, we can still send admin commands. Separate out the admin failures and flag them as such so that we can still send admin commands on half-failed drives. Fixes: 9229b3105d88 (nvme: Fail passthrough commands right away in failed state) Sponsored by: Netflix --- sys/amd64/conf/IOSCHED | 2 + sys/amd64/conf/MPI3MR | 10 + sys/arm64/conf/GENERIC16K | 4 + .../linuxkpi/common/include/linux/#compiler.h# | 117 + sys/contrib/dev/iwlwifi/fw/api/soc.h | 35 + sys/contrib/zlib/contrib/asm686/README.686 | 51 + sys/contrib/zlib/contrib/asm686/match.S | 357 + sys/dev/ice/ice_sriov.c | 595 ++ sys/dev/ice/ice_sriov.h | 64 + sys/dev/mps/mpi/mpi2_pci.h | 141 + sys/dev/nvme/nvme_ctrlr.c | 46 +- sys/dev/nvme/nvme_private.h | 1 + sys/dev/nvme/nvme_qpair.c | 23 +- sys/dev/nvme/nvme_sim.c | 13 +- sys/dev/sound/pci/aureal.c | 686 ++ sys/dev/sound/pci/aureal.h | 99 + sys/dev/sound/pci/ds1-fw.h | 1602 ++++ sys/dev/sound/pci/ds1.c | 1103 +++ sys/dev/sound/pci/ds1.h | 146 + sys/dev/sound/pci/maestro.c | 2043 +++++ sys/dev/sound/pci/maestro_reg.h | 381 + sys/kern/bsduser-syscalls.c | 8712 ++++++++++++++++++++ sys/modules/sound/driver/ds1/Makefile | 8 + sys/modules/sound/driver/maestro/Makefile | 8 + 24 files changed, 16219 insertions(+), 28 deletions(-) diff --git a/sys/amd64/conf/IOSCHED b/sys/amd64/conf/IOSCHED new file mode 100644 index 000000000000..e15106bc4c1f --- /dev/null +++ b/sys/amd64/conf/IOSCHED @@ -0,0 +1,2 @@ +include "GENERIC" +options CAM_IOSCHED_DYNAMIC diff --git a/sys/amd64/conf/MPI3MR b/sys/amd64/conf/MPI3MR new file mode 100644 index 000000000000..99e5244cb49d --- /dev/null +++ b/sys/amd64/conf/MPI3MR @@ -0,0 +1,10 @@ +include GENERIC + +device mpi3mr +# All the debugging options +options DEADLKRES # Enable the deadlock resolver +options INVARIANTS # Enable calls of extra sanity checking +options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS +options QUEUE_MACRO_DEBUG_TRASH # Trash queue(2) internal pointers on invalidation +options WITNESS # Enable checks to detect deadlocks and cycles +options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed diff --git a/sys/arm64/conf/GENERIC16K b/sys/arm64/conf/GENERIC16K new file mode 100644 index 000000000000..9bf9e2dadb08 --- /dev/null +++ b/sys/arm64/conf/GENERIC16K @@ -0,0 +1,4 @@ +include "GENERIC" + +ident GENERIC_16K + diff --git a/sys/compat/linuxkpi/common/include/linux/#compiler.h# b/sys/compat/linuxkpi/common/include/linux/#compiler.h# new file mode 100644 index 000000000000..1177674aa68f --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/#compiler.h# @@ -0,0 +1,117 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. + * Copyright (c) 2015 François Tigeot + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _LINUX_COMPILER_H_ +#define _LINUX_COMPILER_H_ + +#include <sys/cdefs.h> + +#define __user +#define __kernel +#define __safe +#define __force +#define __nocast +#define __iomem +#define __chk_user_ptr(x) ((void)0) +#define __chk_io_ptr(x) ((void)0) +#define __builtin_warning(x, y...) (1) +#define __acquires(x) +#define __releases(x) +#define __acquire(x) do { } while (0) +#define __release(x) do { } while (0) +#define __cond_lock(x,c) (c) +#define __bitwise +#define __devinitdata +#define __deprecated +#define __init +#define __initconst +#define __devinit +#define __devexit +#define __exit +#define __rcu +#define __percpu +#define __weak __weak_symbol +#define __malloc +#define ___stringify(...) #__VA_ARGS__ +#define __stringify(...) ___stringify(__VA_ARGS__) +#define __attribute_const__ __attribute__((__const__)) +#undef __always_inline +#define __always_inline inline +#define noinline __noinline +#define ____cacheline_aligned __aligned(CACHE_LINE_SIZE) + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define typeof(x) __typeof(x) + +#define uninitialized_var(x) x = x +#define __maybe_unused __unused +#define __always_unused __unused +#define __must_check __result_use_check + +#define __printf(a,b) __printflike(a,b) + +#define barrier() __asm__ __volatile__("": : :"memory") + +#if defined(LINUXKPI_VERSION) && LINUXKPI_VERSION >= 50000 +/* Moved from drm_os_freebsd.h */ +#define lower_32_bits(n) ((u32)(n)) +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) +#endif + +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) + +#define ACCESS_ONCE(x) (*(volatile __typeof(x) *)&(x)) + +#define WRITE_ONCE(x,v) do { \ + barrier(); \ + ACCESS_ONCE(x) = (v); \ + barrier(); \ +} while (0) + +#define READ_ONCE(x) ({ \ + __typeof(x) __var = ({ \ + barrier(); \ + ACCESS_ONCE(x); \ + }); \ + barrier(); \ + __var; \ +}) + +#define lockless_dereference(p) READ_ONCE(p) + +#define _AT(T,X) ((T)(X)) + +#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#define __must_be_array(a) __same_type(a, &(a)[0]) + +#endif /* _LINUX_COMPILER_H_ */ diff --git a/sys/contrib/dev/iwlwifi/fw/api/soc.h b/sys/contrib/dev/iwlwifi/fw/api/soc.h new file mode 100644 index 000000000000..c5df1171462b --- /dev/null +++ b/sys/contrib/dev/iwlwifi/fw/api/soc.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Copyright (C) 2012-2014, 2019-2020 Intel Corporation + * Copyright (C) 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2016-2017 Intel Deutschland GmbH + */ +#ifndef __iwl_fw_api_soc_h__ +#define __iwl_fw_api_soc_h__ + +#define SOC_CONFIG_CMD_FLAGS_DISCRETE BIT(0) +#define SOC_CONFIG_CMD_FLAGS_LOW_LATENCY BIT(1) + +#define SOC_FLAGS_LTR_APPLY_DELAY_MASK 0xc +#define SOC_FLAGS_LTR_APPLY_DELAY_NONE 0 +#define SOC_FLAGS_LTR_APPLY_DELAY_200 1 +#define SOC_FLAGS_LTR_APPLY_DELAY_2500 2 +#define SOC_FLAGS_LTR_APPLY_DELAY_1820 3 + +/** + * struct iwl_soc_configuration_cmd - Set device stabilization latency + * + * @flags: soc settings flags. In VER_1, we can only set the DISCRETE + * flag, because the FW treats the whole value as an integer. In + * VER_2, we can set the bits independently. + * @latency: time for SOC to ensure stable power & XTAL + */ +struct iwl_soc_configuration_cmd { + __le32 flags; + __le32 latency; +} __packed; /* + * SOC_CONFIGURATION_CMD_S_VER_1 (see description above) + * SOC_CONFIGURATION_CMD_S_VER_2 + */ + +#endif /* __iwl_fw_api_soc_h__ */ diff --git a/sys/contrib/zlib/contrib/asm686/README.686 b/sys/contrib/zlib/contrib/asm686/README.686 new file mode 100644 index 000000000000..a0bf3bea4aff --- /dev/null +++ b/sys/contrib/zlib/contrib/asm686/README.686 @@ -0,0 +1,51 @@ +This is a patched version of zlib, modified to use +Pentium-Pro-optimized assembly code in the deflation algorithm. The +files changed/added by this patch are: + +README.686 +match.S + +The speedup that this patch provides varies, depending on whether the +compiler used to build the original version of zlib falls afoul of the +PPro's speed traps. My own tests show a speedup of around 10-20% at +the default compression level, and 20-30% using -9, against a version +compiled using gcc 2.7.2.3. Your mileage may vary. + +Note that this code has been tailored for the PPro/PII in particular, +and will not perform particuarly well on a Pentium. + +If you are using an assembler other than GNU as, you will have to +translate match.S to use your assembler's syntax. (Have fun.) + +Brian Raiter +breadbox@muppetlabs.com +April, 1998 + + +Added for zlib 1.1.3: + +The patches come from +http://www.muppetlabs.com/~breadbox/software/assembly.html + +To compile zlib with this asm file, copy match.S to the zlib directory +then do: + +CFLAGS="-O3 -DASMV" ./configure +make OBJA=match.o + + +Update: + +I've been ignoring these assembly routines for years, believing that +gcc's generated code had caught up with it sometime around gcc 2.95 +and the major rearchitecting of the Pentium 4. However, I recently +learned that, despite what I believed, this code still has some life +in it. On the Pentium 4 and AMD64 chips, it continues to run about 8% +faster than the code produced by gcc 4.1. + +In acknowledgement of its continuing usefulness, I've altered the +license to match that of the rest of zlib. Share and Enjoy! + +Brian Raiter +breadbox@muppetlabs.com +April, 2007 diff --git a/sys/contrib/zlib/contrib/asm686/match.S b/sys/contrib/zlib/contrib/asm686/match.S new file mode 100644 index 000000000000..fa421092785d --- /dev/null +++ b/sys/contrib/zlib/contrib/asm686/match.S @@ -0,0 +1,357 @@ +/* match.S -- x86 assembly version of the zlib longest_match() function. + * Optimized for the Intel 686 chips (PPro and later). + * + * Copyright (C) 1998, 2007 Brian Raiter <breadbox@muppetlabs.com> + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef NO_UNDERLINE +#define match_init _match_init +#define longest_match _longest_match +#endif + +#define MAX_MATCH (258) +#define MIN_MATCH (3) +#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) +#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) + +/* stack frame offsets */ + +#define chainlenwmask 0 /* high word: current chain len */ + /* low word: s->wmask */ +#define window 4 /* local copy of s->window */ +#define windowbestlen 8 /* s->window + bestlen */ +#define scanstart 16 /* first two bytes of string */ +#define scanend 12 /* last two bytes of string */ +#define scanalign 20 /* dword-misalignment of string */ +#define nicematch 24 /* a good enough match size */ +#define bestlen 28 /* size of best match so far */ +#define scan 32 /* ptr to string wanting match */ + +#define LocalVarsSize (36) +/* saved ebx 36 */ +/* saved edi 40 */ +/* saved esi 44 */ +/* saved ebp 48 */ +/* return address 52 */ +#define deflatestate 56 /* the function arguments */ +#define curmatch 60 + +/* All the +zlib1222add offsets are due to the addition of fields + * in zlib in the deflate_state structure since the asm code was first written + * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). + * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). + * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). + */ + +#define zlib1222add (8) + +#define dsWSize (36+zlib1222add) +#define dsWMask (44+zlib1222add) +#define dsWindow (48+zlib1222add) +#define dsPrev (56+zlib1222add) +#define dsMatchLen (88+zlib1222add) +#define dsPrevMatch (92+zlib1222add) +#define dsStrStart (100+zlib1222add) +#define dsMatchStart (104+zlib1222add) +#define dsLookahead (108+zlib1222add) +#define dsPrevLen (112+zlib1222add) +#define dsMaxChainLen (116+zlib1222add) +#define dsGoodMatch (132+zlib1222add) +#define dsNiceMatch (136+zlib1222add) + + +.file "match.S" + +.globl match_init, longest_match + +.text + +/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ +.cfi_sections .debug_frame + +longest_match: + +.cfi_startproc +/* Save registers that the compiler may be using, and adjust %esp to */ +/* make room for our stack frame. */ + + pushl %ebp + .cfi_def_cfa_offset 8 + .cfi_offset ebp, -8 + pushl %edi + .cfi_def_cfa_offset 12 + pushl %esi + .cfi_def_cfa_offset 16 + pushl %ebx + .cfi_def_cfa_offset 20 + subl $LocalVarsSize, %esp + .cfi_def_cfa_offset LocalVarsSize+20 + +/* Retrieve the function arguments. %ecx will hold cur_match */ +/* throughout the entire function. %edx will hold the pointer to the */ +/* deflate_state structure during the function's setup (before */ +/* entering the main loop). */ + + movl deflatestate(%esp), %edx + movl curmatch(%esp), %ecx + +/* uInt wmask = s->w_mask; */ +/* unsigned chain_length = s->max_chain_length; */ +/* if (s->prev_length >= s->good_match) { */ +/* chain_length >>= 2; */ +/* } */ + + movl dsPrevLen(%edx), %eax + movl dsGoodMatch(%edx), %ebx + cmpl %ebx, %eax + movl dsWMask(%edx), %eax + movl dsMaxChainLen(%edx), %ebx + jl LastMatchGood + shrl $2, %ebx +LastMatchGood: + +/* chainlen is decremented once beforehand so that the function can */ +/* use the sign flag instead of the zero flag for the exit test. */ +/* It is then shifted into the high word, to make room for the wmask */ +/* value, which it will always accompany. */ + + decl %ebx + shll $16, %ebx + orl %eax, %ebx + movl %ebx, chainlenwmask(%esp) + +/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ + + movl dsNiceMatch(%edx), %eax + movl dsLookahead(%edx), %ebx + cmpl %eax, %ebx + jl LookaheadLess + movl %eax, %ebx +LookaheadLess: movl %ebx, nicematch(%esp) + +/* register Bytef *scan = s->window + s->strstart; */ + + movl dsWindow(%edx), %esi + movl %esi, window(%esp) + movl dsStrStart(%edx), %ebp + lea (%esi,%ebp), %edi + movl %edi, scan(%esp) + +/* Determine how many bytes the scan ptr is off from being */ +/* dword-aligned. */ + + movl %edi, %eax + negl %eax + andl $3, %eax + movl %eax, scanalign(%esp) + +/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + movl dsWSize(%edx), %eax + subl $MIN_LOOKAHEAD, %eax + subl %eax, %ebp + jg LimitPositive + xorl %ebp, %ebp +LimitPositive: + +/* int best_len = s->prev_length; */ + + movl dsPrevLen(%edx), %eax + movl %eax, bestlen(%esp) + +/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + addl %eax, %esi + movl %esi, windowbestlen(%esp) + +/* register ush scan_start = *(ushf*)scan; */ +/* register ush scan_end = *(ushf*)(scan+best_len-1); */ +/* Posf *prev = s->prev; */ + + movzwl (%edi), %ebx + movl %ebx, scanstart(%esp) + movzwl -1(%edi,%eax), %ebx + movl %ebx, scanend(%esp) + movl dsPrev(%edx), %edi + +/* Jump into the main loop. */ + + movl chainlenwmask(%esp), %edx + jmp LoopEntry + +.balign 16 + +/* do { + * match = s->window + cur_match; + * if (*(ushf*)(match+best_len-1) != scan_end || + * *(ushf*)match != scan_start) continue; + * [...] + * } while ((cur_match = prev[cur_match & wmask]) > limit + * && --chain_length != 0); + * + * Here is the inner loop of the function. The function will spend the + * majority of its time in this loop, and majority of that time will + * be spent in the first ten instructions. + * + * Within this loop: + * %ebx = scanend + * %ecx = curmatch + * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) + * %esi = windowbestlen - i.e., (window + bestlen) + * %edi = prev + * %ebp = limit + */ +LookupLoop: + andl %edx, %ecx + movzwl (%edi,%ecx,2), %ecx + cmpl %ebp, %ecx + jbe LeaveNow + subl $0x00010000, %edx + js LeaveNow +LoopEntry: movzwl -1(%esi,%ecx), %eax + cmpl %ebx, %eax + jnz LookupLoop + movl window(%esp), %eax + movzwl (%eax,%ecx), %eax + cmpl scanstart(%esp), %eax + jnz LookupLoop + +/* Store the current value of chainlen. */ + + movl %edx, chainlenwmask(%esp) + +/* Point %edi to the string under scrutiny, and %esi to the string we */ +/* are hoping to match it up with. In actuality, %esi and %edi are */ +/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +/* initialized to -(MAX_MATCH_8 - scanalign). */ + + movl window(%esp), %esi + movl scan(%esp), %edi + addl %ecx, %esi + movl scanalign(%esp), %eax + movl $(-MAX_MATCH_8), %edx + lea MAX_MATCH_8(%edi,%eax), %edi + lea MAX_MATCH_8(%esi,%eax), %esi + +/* Test the strings for equality, 8 bytes at a time. At the end, + * adjust %edx so that it is offset to the exact byte that mismatched. + * + * We already know at this point that the first three bytes of the + * strings match each other, and they can be safely passed over before + * starting the compare loop. So what this code does is skip over 0-3 + * bytes, as much as necessary in order to dword-align the %edi + * pointer. (%esi will still be misaligned three times out of four.) + * + * It should be confessed that this loop usually does not represent + * much of the total running time. Replacing it with a more + * straightforward "rep cmpsb" would not drastically degrade + * performance. + */ +LoopCmps: + movl (%esi,%edx), %eax + xorl (%edi,%edx), %eax + jnz LeaveLoopCmps + movl 4(%esi,%edx), %eax + xorl 4(%edi,%edx), %eax + jnz LeaveLoopCmps4 + addl $8, %edx + jnz LoopCmps + jmp LenMaximum +LeaveLoopCmps4: addl $4, %edx +LeaveLoopCmps: testl $0x0000FFFF, %eax + jnz LenLower + addl $2, %edx + shrl $16, %eax +LenLower: subb $1, %al + adcl $0, %edx + +/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +/* then automatically accept it as the best possible match and leave. */ + + lea (%edi,%edx), %eax + movl scan(%esp), %edi + subl %edi, %eax + cmpl $MAX_MATCH, %eax + jge LenMaximum + +/* If the length of the match is not longer than the best match we */ +/* have so far, then forget it and return to the lookup loop. */ + + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + cmpl %ebx, %eax + jg LongerMatch + movl windowbestlen(%esp), %esi + movl dsPrev(%edx), %edi + movl scanend(%esp), %ebx + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* s->match_start = cur_match; */ +/* best_len = len; */ +/* if (len >= nice_match) break; */ +/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: movl nicematch(%esp), %ebx + movl %eax, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + cmpl %ebx, %eax + jge LeaveNow + movl window(%esp), %esi + addl %eax, %esi + movl %esi, windowbestlen(%esp) + movzwl -1(%edi,%eax), %ebx + movl dsPrev(%edx), %edi + movl %ebx, scanend(%esp) + movl chainlenwmask(%esp), %edx + jmp LookupLoop + +/* Accept the current string, with the maximum possible length. */ + +LenMaximum: movl deflatestate(%esp), %edx + movl $MAX_MATCH, bestlen(%esp) + movl %ecx, dsMatchStart(%edx) + +/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +/* return s->lookahead; */ + +LeaveNow: + movl deflatestate(%esp), %edx + movl bestlen(%esp), %ebx + movl dsLookahead(%edx), %eax + cmpl %eax, %ebx + jg LookaheadRet + movl %ebx, %eax +LookaheadRet: + +/* Restore the stack and return from whence we came. */ + + addl $LocalVarsSize, %esp + .cfi_def_cfa_offset 20 + popl %ebx + .cfi_def_cfa_offset 16 + popl %esi + .cfi_def_cfa_offset 12 + popl %edi + .cfi_def_cfa_offset 8 + popl %ebp + .cfi_def_cfa_offset 4 +.cfi_endproc +match_init: ret diff --git a/sys/dev/ice/ice_sriov.c b/sys/dev/ice/ice_sriov.c new file mode 100644 index 000000000000..c0521e667fa2 --- /dev/null +++ b/sys/dev/ice/ice_sriov.c @@ -0,0 +1,595 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2021, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ice_common.h" +#include "ice_sriov.h" + +/** + * ice_aq_send_msg_to_vf + * @hw: pointer to the hardware structure + * @vfid: VF ID to send msg + * @v_opcode: opcodes for VF-PF communication + * @v_retval: return error code + * @msg: pointer to the msg buffer + * @msglen: msg length + * @cd: pointer to command details + * + * Send message to VF driver (0x0802) using mailbox + * queue and asynchronously sending message via + * ice_sq_send_cmd() function + */ +enum ice_status +ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, + u8 *msg, u16 msglen, struct ice_sq_cd *cd) +{ + struct ice_aqc_pf_vf_msg *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf); + + cmd = &desc.params.virt; + cmd->id = CPU_TO_LE32(vfid); + + desc.cookie_high = CPU_TO_LE32(v_opcode); + desc.cookie_low = CPU_TO_LE32(v_retval); + + if (msglen) + desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); + + return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); +} + +/** + * ice_aq_send_msg_to_pf + * @hw: pointer to the hardware structure + * @v_opcode: opcodes for VF-PF communication + * @v_retval: return error code + * @msg: pointer to the msg buffer + * @msglen: msg length + * @cd: pointer to command details + * + * Send message to PF driver using mailbox queue. By default, this + * message is sent asynchronously, i.e. ice_sq_send_cmd() + * does not wait for completion before returning. + */ +enum ice_status +ice_aq_send_msg_to_pf(struct ice_hw *hw, enum virtchnl_ops v_opcode, + enum ice_status v_retval, u8 *msg, u16 msglen, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_pf); + desc.cookie_high = CPU_TO_LE32(v_opcode); + desc.cookie_low = CPU_TO_LE32(v_retval); + + if (msglen) + desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); + + return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); +} + +/** + * ice_conv_link_speed_to_virtchnl + * @adv_link_support: determines the format of the returned link speed + * @link_speed: variable containing the link_speed to be converted + * + * Convert link speed supported by HW to link speed supported by virtchnl. + * If adv_link_support is true, then return link speed in Mbps. Else return + * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller + * needs to cast back to an enum virtchnl_link_speed in the case where + * adv_link_support is false, but when adv_link_support is true the caller can + * expect the speed in Mbps. + */ +u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) +{ + u32 speed; + + if (adv_link_support) + switch (link_speed) { + case ICE_AQ_LINK_SPEED_10MB: + speed = ICE_LINK_SPEED_10MBPS; + break; + case ICE_AQ_LINK_SPEED_100MB: + speed = ICE_LINK_SPEED_100MBPS; + break; + case ICE_AQ_LINK_SPEED_1000MB: + speed = ICE_LINK_SPEED_1000MBPS; + break; + case ICE_AQ_LINK_SPEED_2500MB: + speed = ICE_LINK_SPEED_2500MBPS; + break; + case ICE_AQ_LINK_SPEED_5GB: + speed = ICE_LINK_SPEED_5000MBPS; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = ICE_LINK_SPEED_10000MBPS; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = ICE_LINK_SPEED_20000MBPS; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = ICE_LINK_SPEED_25000MBPS; + break; + case ICE_AQ_LINK_SPEED_40GB: + speed = ICE_LINK_SPEED_40000MBPS; + break; + case ICE_AQ_LINK_SPEED_50GB: + speed = ICE_LINK_SPEED_50000MBPS; + break; + case ICE_AQ_LINK_SPEED_100GB: + speed = ICE_LINK_SPEED_100000MBPS; + break; + default: + speed = ICE_LINK_SPEED_UNKNOWN; + break; + } + else + /* Virtchnl speeds are not defined for every speed supported in + * the hardware. To maintain compatibility with older AVF + * drivers, while reporting the speed the new speed values are + * resolved to the closest known virtchnl speeds + */ + switch (link_speed) { + case ICE_AQ_LINK_SPEED_10MB: + case ICE_AQ_LINK_SPEED_100MB: + speed = (u32)VIRTCHNL_LINK_SPEED_100MB; + break; + case ICE_AQ_LINK_SPEED_1000MB: + case ICE_AQ_LINK_SPEED_2500MB: + case ICE_AQ_LINK_SPEED_5GB: + speed = (u32)VIRTCHNL_LINK_SPEED_1GB; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = (u32)VIRTCHNL_LINK_SPEED_10GB; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = (u32)VIRTCHNL_LINK_SPEED_20GB; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = (u32)VIRTCHNL_LINK_SPEED_25GB; + break; + case ICE_AQ_LINK_SPEED_40GB: + case ICE_AQ_LINK_SPEED_50GB: + case ICE_AQ_LINK_SPEED_100GB: + speed = (u32)VIRTCHNL_LINK_SPEED_40GB; + break; + default: + speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; + break; + } + + return speed; +} + +/* The mailbox overflow detection algorithm helps to check if there + * is a possibility of a malicious VF transmitting too many MBX messages to the + * PF. + * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during + * driver initialization in ice_init_hw() using ice_mbx_init_snapshot(). + * The struct ice_mbx_snapshot helps to track and traverse a static window of + * messages within the mailbox queue while looking for a malicious VF. + * + * 2. When the caller starts processing its mailbox queue in response to an + * interrupt, the structure ice_mbx_snapshot is expected to be cleared before + * the algorithm can be run for the first time for that interrupt. This can be + * done via ice_mbx_reset_snapshot(). + * + * 3. For every message read by the caller from the MBX Queue, the caller must + * call the detection algorithm's entry function ice_mbx_vf_state_handler(). + * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is + * filled as it is required to be passed to the algorithm. + * + * 4. Every time a message is read from the MBX queue, a VFId is received which + * is passed to the state handler. The boolean output is_malvf of the state + * handler ice_mbx_vf_state_handler() serves as an indicator to the caller + * whether this VF is malicious or not. + * + * 5. When a VF is identified to be malicious, the caller can send a message + * to the system administrator. The caller can invoke ice_mbx_report_malvf() + * to help determine if a malicious VF is to be reported or not. This function + * requires the caller to maintain a global bitmap to track all malicious VFs + * and pass that to ice_mbx_report_malvf() along with the VFID which was identified + * to be malicious by ice_mbx_vf_state_handler(). + * + * 6. The global bitmap maintained by PF can be cleared completely if PF is in + * reset or the bit corresponding to a VF can be cleared if that VF is in reset. + * When a VF is shut down and brought back up, we assume that the new VF + * brought up is not malicious and hence report it if found malicious. + * + * 7. The function ice_mbx_reset_snapshot() is called to reset the information + * in ice_mbx_snapshot for every new mailbox interrupt handled. + * + * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated + * when driver is unloaded. + */ +#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M) +/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that + * the max messages check must be ignored in the algorithm + */ +#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF + +/** + * ice_mbx_traverse - Pass through mailbox snapshot + * @hw: pointer to the HW struct + * @new_state: new algorithm state + * + * Traversing the mailbox static snapshot without checking + * for malicious VFs. + */ +static void +ice_mbx_traverse(struct ice_hw *hw, + enum ice_mbx_snapshot_state *new_state) +{ + struct ice_mbx_snap_buffer_data *snap_buf; + u32 num_iterations; + + snap_buf = &hw->mbx_snapshot.mbx_buf; + + /* As mailbox buffer is circular, applying a mask + * on the incremented iteration count. + */ + num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations); + + /* Checking either of the below conditions to exit snapshot traversal: + * Condition-1: If the number of iterations in the mailbox is equal to + * the mailbox head which would indicate that we have reached the end + * of the static snapshot. + * Condition-2: If the maximum messages serviced in the mailbox for a + * given interrupt is the highest possible value then there is no need + * to check if the number of messages processed is equal to it. If not + * check if the number of messages processed is greater than or equal + * to the maximum number of mailbox entries serviced in current work item. + */ + if (num_iterations == snap_buf->head || + (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT && + ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx)) + *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + +/** + * ice_mbx_detect_malvf - Detect malicious VF in snapshot + * @hw: pointer to the HW struct + * @vf_id: relative virtual function ID + * @new_state: new algorithm state + * @is_malvf: boolean output to indicate if VF is malicious + * + * This function tracks the number of asynchronous messages + * sent per VF and marks the VF as malicious if it exceeds + * the permissible number of messages to send. + */ +static enum ice_status +ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, + enum ice_mbx_snapshot_state *new_state, + bool *is_malvf) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + if (vf_id >= snap->mbx_vf.vfcntr_len) + return ICE_ERR_OUT_OF_RANGE; + + /* increment the message count in the VF array */ + snap->mbx_vf.vf_cntr[vf_id]++; + + if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD) + *is_malvf = true; + + /* continue to iterate through the mailbox snapshot */ + ice_mbx_traverse(hw, new_state); + + return ICE_SUCCESS; +} + +/** + * ice_mbx_reset_snapshot - Reset mailbox snapshot structure + * @snap: pointer to mailbox snapshot structure in the ice_hw struct + * + * Reset the mailbox snapshot structure and clear VF counter array. + */ +static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) *** 15611 LINES SKIPPED ***