svn commit: r263755 - head/sys/kern

Fri Mar 28 16:13:33 UTC 2014

On 28 Mar, David Xu wrote:
> On 2014/03/28 06:31, Don Lewis wrote:
>> On 27 Mar, Konstantin Belousov wrote:
>>> On Thu, Mar 27, 2014 at 04:05:12PM +0100, Mateusz Guzik wrote:
>>>> On Thu, Mar 27, 2014 at 03:58:19PM +0100, Mateusz Guzik wrote:
>>>>> On Thu, Mar 27, 2014 at 04:46:57PM +0800, David Xu wrote:
>>>>>> On 2014/03/27 16:37, Mateusz Guzik wrote:
>>>>>>> On Thu, Mar 27, 2014 at 03:45:17PM +0800, David Xu wrote:
>>>>>>>> I think the async process pointer can be cleared when a process exits
>>>>>>>> by registering an event handler. please see attached patch.
>>>>>>>>
>>>>>>>
>>>>>>> Sure, but I'm not very fond of this solution.
>>>>>>>
>>>>>>> This is a rather obscure bug you wont hit unless you explicitly try,
>>>>>>> and even then you need root privs by default.
>>>>>>>
>>>>>> OK, but I don't like the bug exists in kernel. It is not obscure for me,
>>>>>> I can run "shutdown now" command, and insert a device, and then the
>>>>>> kernel will write garbage data into freed memory space.
>>>>>>
>>>>>
>>>>> Not sure what you mean. devd does not use this feature, and even if it
>>>>> did async_proc is cleared on close, which happens while signal delivery
>>>>> is still legal.
>>>>>
>>>>> That said, you are not going to encounter this bug unless you code
>>>>> something up to specifically trigger it.
>>>>>
>>>>> fwiw, I think we could axe this feature if there was no way to fix it
>>>>> without introducing a check for every process.
>>>>>
>>>>>>> As such writing a callback function which will be executed for all exiting
>>>>>>> processes seems unjustified for me.
>>>>>>>
>>>>>>> Ideally we would get some mechanism which would allow to register
>>>>>>> callbacks for events related to given entity. Then it could be used to
>>>>>>> provide a "call this function when process p exits", amongst other things.
>>>>>>>
>>>>>>
>>>>>> Yes, but the callback itself is cheap enough and is not worth to be
>>>>>> per-entity entry.
>>>>>>
>>>>>
>>>>> There is other code in the kernel which would benefit from such
>>>>> functionality - dev/syscons/scmouse, dev/vt/vt_core.c, aio and possibly
>>>>> more.
>>>>>
>>>>> As such I think this is worth pursuing.
>>>>>
>>>>
>>>> We can hack around this one the way the other code is doing - apart from
>>>> from proc pointer you store pid and then compare result of pfind(pid).
>>>>
>>>> This is still buggy as both proc and pid pointer can be recycled and end
>>>> up being the same (but you have an entrirely new process).
>>>>
>>>> However, then in absolutely worst cae you send SIGIO to incorrect
>>>> process, always an existing process so no more corruption.
>>>>
>>>> Would you be ok with such hack for the time being?
>>>
>>> Isn't p_sigiolist and fsetown(9) already provide the neccessary registration
>>> and cleanup on the process exit ?  The KPI might require some generalization,
>>> but I think that the mechanism itself is enough.
>>
>> That's the correct mechanism, but it's not being used here.
>>
>> Something like the following untested patch should do the trick:
>>
>> Index: sys/kern/subr_bus.c
>> ===================================================================
>> --- sys/kern/subr_bus.c	(revision 263289)
>> +++ sys/kern/subr_bus.c	(working copy)
>> @@ -402,7 +402,7 @@
>>   	struct cv cv;
>>   	struct selinfo sel;
>>   	struct devq devq;
>> -	struct proc *async_proc;
>> +	struct sigio *sigio;
>>   } devsoftc;
>>
>>   static struct cdev *devctl_dev;
>> @@ -425,7 +425,7 @@
>>   	/* move to init */
>>   	devsoftc.inuse = 1;
>>   	devsoftc.nonblock = 0;
>> -	devsoftc.async_proc = NULL;
>> +	funsetown(&devsoftc.sigio);
>>   	return (0);
>>   }
>>
>> @@ -436,7 +436,7 @@
>>   	mtx_lock(&devsoftc.mtx);
>>   	cv_broadcast(&devsoftc.cv);
>>   	mtx_unlock(&devsoftc.mtx);
>> -	devsoftc.async_proc = NULL;
>> +	funsetown(&devsoftc.sigio);
>>   	return (0);
>>   }
>>
>> @@ -492,9 +492,8 @@
>>   		return (0);
>>   	case FIOASYNC:
>>   		if (*(int*)data)
>> -			devsoftc.async_proc = td->td_proc;
>> -		else
>> -			devsoftc.async_proc = NULL;
>> +			return (fsetown(td->td_proc->p_pid, &devsoftc.sigio));
>> +		funsetown(&devsoftc.sigio);
>>   		return (0);
>>
>>   		/* (un)Support for other fcntl() calls. */
>> @@ -546,7 +545,6 @@
>>   devctl_queue_data_f(char *data, int flags)
>>   {
>>   	struct dev_event_info *n1 = NULL, *n2 = NULL;
>> -	struct proc *p;
>>
>>   	if (strlen(data) == 0)
>>   		goto out;
>> @@ -576,12 +574,8 @@
>>   	cv_broadcast(&devsoftc.cv);
>>   	mtx_unlock(&devsoftc.mtx);
>>   	selwakeup(&devsoftc.sel);
>> -	p = devsoftc.async_proc;
>> -	if (p != NULL) {
>> -		PROC_LOCK(p);
>> -		kern_psignal(p, SIGIO);
>> -		PROC_UNLOCK(p);
>> -	}
>> +	if (devsoftc.sigio != NULL)
>> +		pgsigio(&devsoftc.sigio, SIGIO, 0);
>>   	return;
>>   out:
>>   	/*
>>
>>
> I have tweaked it a bit, is this okay ?
> 
> # HG changeset patch
> # Parent 53b614ff2cae108f27e4475989d3a86997017268
> 
> diff -r 53b614ff2cae sys/kern/subr_bus.c
> --- a/sys/kern/subr_bus.c	Thu Mar 27 10:03:50 2014 +0800
> +++ b/sys/kern/subr_bus.c	Fri Mar 28 14:22:29 2014 +0800
> @@ -391,11 +391,12 @@
>   	int	inuse;
>   	int	nonblock;
>   	int	queued;
> +	int	async;
>   	struct mtx mtx;
>   	struct cv cv;
>   	struct selinfo sel;
>   	struct devq devq;
> -	struct proc *async_proc;
> +	struct sigio *sigio;
>   } devsoftc;
> 
>   static struct cdev *devctl_dev;
> @@ -422,7 +423,8 @@
>   	/* move to init */
>   	devsoftc.inuse = 1;
>   	devsoftc.nonblock = 0;
> -	devsoftc.async_proc = NULL;
> +	devsoftc.async = 0;
> +	devsoftc.sigio = NULL;
>   	mtx_unlock(&devsoftc.mtx);
>   	return (0);
>   }
> @@ -433,8 +435,9 @@
> 
>   	mtx_lock(&devsoftc.mtx);
>   	devsoftc.inuse = 0;
> -	devsoftc.async_proc = NULL;
> +	devsoftc.async = 0;
>   	cv_broadcast(&devsoftc.cv);
> +	funsetown(&devsoftc.sigio);
>   	mtx_unlock(&devsoftc.mtx);
>   	return (0);
>   }
> @@ -490,33 +493,21 @@
>   			devsoftc.nonblock = 0;
>   		return (0);
>   	case FIOASYNC:
> -		/*
> -		 * FIXME:
> -		 * Since this is a simple assignment there is no guarantee that
> -		 * devsoftc.async_proc consumers will get a valid pointer.
> -		 *
> -		 * Example scenario where things break (processes A and B):
> -		 * 1. A opens devctl
> -		 * 2. A sends fd to B
> -		 * 3. B sets itself as async_proc
> -		 * 4. B exits
> -		 *
> -		 * However, normally this requires root privileges and the only
> -		 * in-tree consumer does not behave in a dangerous way so the
> -		 * issue is not critical.
> -		 */
>   		if (*(int*)data)
> -			devsoftc.async_proc = td->td_proc;
> +			devsoftc.async = 1;
>   		else
> -			devsoftc.async_proc = NULL;
> +			devsoftc.async = 0;
> +		return (0);
> +	case FIOSETOWN:
> +		return fsetown(*(int *)data, &devsoftc.sigio);
> +	case FIOGETOWN:
> +		*(int *)data = fgetown(&devsoftc.sigio);
>   		return (0);
> 
>   		/* (un)Support for other fcntl() calls. */
>   	case FIOCLEX:
>   	case FIONCLEX:
>   	case FIONREAD:
> -	case FIOSETOWN:
> -	case FIOGETOWN:
>   	default:
>   		break;
>   	}
> @@ -560,7 +551,6 @@
>   devctl_queue_data_f(char *data, int flags)
>   {
>   	struct dev_event_info *n1 = NULL, *n2 = NULL;
> -	struct proc *p;
> 
>   	if (strlen(data) == 0)
>   		goto out;
> @@ -590,13 +580,8 @@
>   	cv_broadcast(&devsoftc.cv);
>   	mtx_unlock(&devsoftc.mtx);
>   	selwakeup(&devsoftc.sel);
> -	/* XXX see a comment in devioctl */
> -	p = devsoftc.async_proc;
> -	if (p != NULL) {
> -		PROC_LOCK(p);
> -		kern_psignal(p, SIGIO);
> -		PROC_UNLOCK(p);
> -	}
> +	if (devsoftc.async && devsoftc.sigio != NULL)
> +		pgsigio(&devsoftc.sigio, SIGIO, 0);
>   	return;
>   out:
>   	/*
> 
> 

That makes it work more like the other users of fsetown(), which is
probably a good thing.  The downside is that two syscalls are needed to
activate it, which I was trying to avoid that with my patch.  I noticed
that logopen() in subr_log.c unconditionally calls fsetown(), which
would avoid the need for an extra syscall.  That also avoids the direct
manipulation of the pointer in your patch, which makes me nervous about
the possibility of a leak.

I wonder if FIOASYNC should fail if
	td->td_proc != devsoftc.sigio.sio_proc
(or the equivalent for other instances) to prevent a process from
maniuplating the async flag for a device "owned" by another process.  I
think this check would need to be wrapped in SIGIO_LOCK()/SIGIO_UNLOCK()
to be safe.