svn commit: r343527 - in stable/12/sys/riscv: include riscv

Marko Zec zec at fer.hr
Tue Jan 29 18:27:23 UTC 2019


On Mon, 28 Jan 2019 16:14:53 +0000
Mark Johnston <markj at freebsd.org> wrote:

> Author: markj
> Date: Mon Jan 28 16:14:53 2019
> New Revision: 343527
> URL: https://svnweb.freebsd.org/changeset/base/343527
> 
> Log:
>   MFC r343274, r343275:
>   Optimize RISC-V copyin(9)/copyout(9) routines.

Was this subjected to any benchmarks?  I'd bet that placing

addi	a2, a2, -XLEN_BYTES

before

sd	a4, 0(a1)

instead of being scheduled after (the same goes for the byte copy loop)
would make the loops run faster on most in-order RV cores out there...

Marko


> 
> Modified:
>   stable/12/sys/riscv/include/riscvreg.h
>   stable/12/sys/riscv/riscv/copyinout.S
> Directory Properties:
>   stable/12/   (props changed)
> 
> Modified: stable/12/sys/riscv/include/riscvreg.h
> ==============================================================================
> --- stable/12/sys/riscv/include/riscvreg.h	Mon Jan 28 14:34:59
> 2019	(r343526) +++
> stable/12/sys/riscv/include/riscvreg.h	Mon Jan 28 16:14:53
> 2019	(r343527) @@ -155,7 +155,8 @@ #define
> SATP_MODE_SV39	(8ULL << SATP_MODE_S) #define
> SATP_MODE_SV48	(9ULL << SATP_MODE_S) 
> -#define	XLEN		8
> +#define	XLEN		__riscv_xlen
> +#define	XLEN_BYTES	(XLEN / 8)
>  #define	INSN_SIZE	4
>  #define	INSN_C_SIZE	2
>  
> 
> Modified: stable/12/sys/riscv/riscv/copyinout.S
> ==============================================================================
> --- stable/12/sys/riscv/riscv/copyinout.S	Mon Jan 28 14:34:59
> 2019	(r343526) +++
> stable/12/sys/riscv/riscv/copyinout.S	Mon Jan 28 16:14:53
> 2019	(r343527) @@ -1,5 +1,6 @@ /*-
>   * Copyright (c) 2015-2018 Ruslan Bukin <br at bsdpad.com>
> + * Copyright (c) 2019 Mitchell Horne
>   * All rights reserved.
>   *
>   * Portions of this software were developed by SRI International and
> the @@ -52,60 +53,94 @@ copyio_fault_nopcb:
>  END(copyio_fault)
>  
>  /*
> + * copycommon - common copy routine
> + *
> + * a0 - Source address
> + * a1 - Destination address
> + * a2 - Size of copy
> + */
> +	.macro copycommon
> +	la	a6, copyio_fault	/* Get the handler address
> */
> +	SET_FAULT_HANDLER(a6, a7)	/* Set the handler */
> +	ENTER_USER_ACCESS(a7)
> +
> +	li	t2, XLEN_BYTES
> +	blt	a2, t2, 3f		/* Byte-copy if len <
> XLEN_BYTES */ +
> +	/*
> +	 * Compare lower bits of src and dest.
> +	 * If they are aligned with each other, we can do word copy.
> +	 */
> +	andi	t0, a0, (XLEN_BYTES-1)	/* Low bits of src
> */
> +	andi	t1, a1, (XLEN_BYTES-1)	/* Low bits of
> dest */
> +	bne	t0, t1, 3f		/* Misaligned. Go to
> byte copy */
> +	beqz	t0, 2f			/* Already
> word-aligned, skip ahead */ +
> +	/* Byte copy until the first word-aligned address */
> +1:	lb	a4, 0(a0)		/* Load byte from src */
> +	addi	a0, a0, 1
> +	sb	a4, 0(a1)		/* Store byte in dest */
> +	addi	a1, a1, 1
> +	addi	a2, a2, -1		/* len-- */
> +	andi	t0, a0, (XLEN_BYTES-1)
> +	bnez	t0, 1b
> +
> +	/* Copy words */
> +2:	ld	a4, 0(a0)		/* Load word from src */
> +	addi	a0, a0, XLEN_BYTES
> +	sd	a4, 0(a1)		/* Store word in dest */
> +	addi	a1, a1, XLEN_BYTES
> +	addi	a2, a2, -XLEN_BYTES	/* len -= XLEN_BYTES
> */
> +	bgeu	a2, t2, 2b		/* Again if len >=
> XLEN_BYTES */ +
> +	/* Check if we're finished */
> +	beqz	a2, 4f
> +
> +	/* Copy any remaining bytes */
> +3:	lb	a4, 0(a0)		/* Load byte from src */
> +	addi	a0, a0, 1
> +	sb	a4, 0(a1)		/* Store byte in dest */
> +	addi	a1, a1, 1
> +	addi	a2, a2, -1		/* len-- */
> +	bnez	a2, 3b
> +
> +4:	EXIT_USER_ACCESS(a7)
> +	SET_FAULT_HANDLER(x0, a7)	/* Clear the handler */
> +	.endm
> +
> +/*
>   * Copies from a kernel to user address
>   *
>   * int copyout(const void *kaddr, void *udaddr, size_t len)
>   */
>  ENTRY(copyout)
> -	beqz	a2, 2f		/* If len == 0 then skip
> loop */
> +	beqz	a2, copyout_end	/* If len == 0 then skip
> loop */ add	a3, a1, a2
>  	li	a4, VM_MAXUSER_ADDRESS
>  	bgt	a3, a4, copyio_fault_nopcb
>  
> -	la	a6, copyio_fault /* Get the handler address */
> -	SET_FAULT_HANDLER(a6, a7) /* Set the handler */
> -	ENTER_USER_ACCESS(a7)
> +	copycommon
>  
> -1:	lb	a4, 0(a0)	/* Load from kaddr */
> -	addi	a0, a0, 1
> -	sb	a4, 0(a1)	/* Store in uaddr */
> -	addi	a1, a1, 1
> -	addi	a2, a2, -1	/* len-- */
> -	bnez	a2, 1b
> -
> -	EXIT_USER_ACCESS(a7)
> -	SET_FAULT_HANDLER(x0, a7) /* Clear the handler */
> -
> -2:	li	a0, 0		/* return 0 */
> +copyout_end:
> +	li	a0, 0		/* return 0 */
>  	ret
>  END(copyout)
>  
>  /*
>   * Copies from a user to kernel address
>   *
> - * int copyin(const void *uaddr, void *kdaddr, size_t len)
> + * int copyin(const void *uaddr, void *kaddr, size_t len)
>   */
>  ENTRY(copyin)
> -	beqz	a2, 2f		/* If len == 0 then skip
> loop */
> +	beqz	a2, copyin_end	/* If len == 0 then skip
> loop */ add	a3, a0, a2
>  	li	a4, VM_MAXUSER_ADDRESS
>  	bgt	a3, a4, copyio_fault_nopcb
>  
> -	la	a6, copyio_fault /* Get the handler address */
> -	SET_FAULT_HANDLER(a6, a7) /* Set the handler */
> -	ENTER_USER_ACCESS(a7)
> +	copycommon
>  
> -1:	lb	a4, 0(a0)	/* Load from uaddr */
> -	addi	a0, a0, 1
> -	sb	a4, 0(a1)	/* Store in kaddr */
> -	addi	a1, a1, 1
> -	addi	a2, a2, -1	/* len-- */
> -	bnez	a2, 1b
> -
> -	EXIT_USER_ACCESS(a7)
> -	SET_FAULT_HANDLER(x0, a7) /* Clear the handler */
> -
> -2:	li	a0, 0		/* return 0 */
> +copyin_end:
> +	li	a0, 0		/* return 0 */
>  	ret
>  END(copyin)
>  
> 



More information about the svn-src-all mailing list