svn commit: r263755 - head/sys/kern

Sat Mar 29 02:56:40 UTC 2014

On 2014/03/29 00:13, Don Lewis wrote:
> On 28 Mar, David Xu wrote:
>> On 2014/03/28 06:31, Don Lewis wrote:
>>> On 27 Mar, Konstantin Belousov wrote:
>>>> On Thu, Mar 27, 2014 at 04:05:12PM +0100, Mateusz Guzik wrote:
>>>>> On Thu, Mar 27, 2014 at 03:58:19PM +0100, Mateusz Guzik wrote:
>>>>>> On Thu, Mar 27, 2014 at 04:46:57PM +0800, David Xu wrote:
>>>>>>> On 2014/03/27 16:37, Mateusz Guzik wrote:
>>>>>>>> On Thu, Mar 27, 2014 at 03:45:17PM +0800, David Xu wrote:
>>>>>>>>> I think the async process pointer can be cleared when a process exits
>>>>>>>>> by registering an event handler. please see attached patch.
>>>>>>>>>
>>>>>>>> Sure, but I'm not very fond of this solution.
>>>>>>>>
>>>>>>>> This is a rather obscure bug you wont hit unless you explicitly try,
>>>>>>>> and even then you need root privs by default.
>>>>>>>>
>>>>>>> OK, but I don't like the bug exists in kernel. It is not obscure for me,
>>>>>>> I can run "shutdown now" command, and insert a device, and then the
>>>>>>> kernel will write garbage data into freed memory space.
>>>>>>>
>>>>>> Not sure what you mean. devd does not use this feature, and even if it
>>>>>> did async_proc is cleared on close, which happens while signal delivery
>>>>>> is still legal.
>>>>>>
>>>>>> That said, you are not going to encounter this bug unless you code
>>>>>> something up to specifically trigger it.
>>>>>>
>>>>>> fwiw, I think we could axe this feature if there was no way to fix it
>>>>>> without introducing a check for every process.
>>>>>>
>>>>>>>> As such writing a callback function which will be executed for all exiting
>>>>>>>> processes seems unjustified for me.
>>>>>>>>
>>>>>>>> Ideally we would get some mechanism which would allow to register
>>>>>>>> callbacks for events related to given entity. Then it could be used to
>>>>>>>> provide a "call this function when process p exits", amongst other things.
>>>>>>>>
>>>>>>> Yes, but the callback itself is cheap enough and is not worth to be
>>>>>>> per-entity entry.
>>>>>>>
>>>>>> There is other code in the kernel which would benefit from such
>>>>>> functionality - dev/syscons/scmouse, dev/vt/vt_core.c, aio and possibly
>>>>>> more.
>>>>>>
>>>>>> As such I think this is worth pursuing.
>>>>>>
>>>>> We can hack around this one the way the other code is doing - apart from
>>>>> from proc pointer you store pid and then compare result of pfind(pid).
>>>>>
>>>>> This is still buggy as both proc and pid pointer can be recycled and end
>>>>> up being the same (but you have an entrirely new process).
>>>>>
>>>>> However, then in absolutely worst cae you send SIGIO to incorrect
>>>>> process, always an existing process so no more corruption.
>>>>>
>>>>> Would you be ok with such hack for the time being?
>>>> Isn't p_sigiolist and fsetown(9) already provide the neccessary registration
>>>> and cleanup on the process exit ?  The KPI might require some generalization,
>>>> but I think that the mechanism itself is enough.
>>> That's the correct mechanism, but it's not being used here.
>>>
>>> Something like the following untested patch should do the trick:
>>>
>>> Index: sys/kern/subr_bus.c
>>> ===================================================================
>>> --- sys/kern/subr_bus.c	(revision 263289)
>>> +++ sys/kern/subr_bus.c	(working copy)
>>> @@ -402,7 +402,7 @@
>>>    	struct cv cv;
>>>    	struct selinfo sel;
>>>    	struct devq devq;
>>> -	struct proc *async_proc;
>>> +	struct sigio *sigio;
>>>    } devsoftc;
>>>
>>>    static struct cdev *devctl_dev;
>>> @@ -425,7 +425,7 @@
>>>    	/* move to init */
>>>    	devsoftc.inuse = 1;
>>>    	devsoftc.nonblock = 0;
>>> -	devsoftc.async_proc = NULL;
>>> +	funsetown(&devsoftc.sigio);
>>>    	return (0);
>>>    }
>>>
>>> @@ -436,7 +436,7 @@
>>>    	mtx_lock(&devsoftc.mtx);
>>>    	cv_broadcast(&devsoftc.cv);
>>>    	mtx_unlock(&devsoftc.mtx);
>>> -	devsoftc.async_proc = NULL;
>>> +	funsetown(&devsoftc.sigio);
>>>    	return (0);
>>>    }
>>>
>>> @@ -492,9 +492,8 @@
>>>    		return (0);
>>>    	case FIOASYNC:
>>>    		if (*(int*)data)
>>> -			devsoftc.async_proc = td->td_proc;
>>> -		else
>>> -			devsoftc.async_proc = NULL;
>>> +			return (fsetown(td->td_proc->p_pid, &devsoftc.sigio));
>>> +		funsetown(&devsoftc.sigio);
>>>    		return (0);
>>>
>>>    		/* (un)Support for other fcntl() calls. */
>>> @@ -546,7 +545,6 @@
>>>    devctl_queue_data_f(char *data, int flags)
>>>    {
>>>    	struct dev_event_info *n1 = NULL, *n2 = NULL;
>>> -	struct proc *p;
>>>
>>>    	if (strlen(data) == 0)
>>>    		goto out;
>>> @@ -576,12 +574,8 @@
>>>    	cv_broadcast(&devsoftc.cv);
>>>    	mtx_unlock(&devsoftc.mtx);
>>>    	selwakeup(&devsoftc.sel);
>>> -	p = devsoftc.async_proc;
>>> -	if (p != NULL) {
>>> -		PROC_LOCK(p);
>>> -		kern_psignal(p, SIGIO);
>>> -		PROC_UNLOCK(p);
>>> -	}
>>> +	if (devsoftc.sigio != NULL)
>>> +		pgsigio(&devsoftc.sigio, SIGIO, 0);
>>>    	return;
>>>    out:
>>>    	/*
>>>
>>>
>> I have tweaked it a bit, is this okay ?
>>
>> # HG changeset patch
>> # Parent 53b614ff2cae108f27e4475989d3a86997017268
>>
>> diff -r 53b614ff2cae sys/kern/subr_bus.c
>> --- a/sys/kern/subr_bus.c	Thu Mar 27 10:03:50 2014 +0800
>> +++ b/sys/kern/subr_bus.c	Fri Mar 28 14:22:29 2014 +0800
>> @@ -391,11 +391,12 @@
>>    	int	inuse;
>>    	int	nonblock;
>>    	int	queued;
>> +	int	async;
>>    	struct mtx mtx;
>>    	struct cv cv;
>>    	struct selinfo sel;
>>    	struct devq devq;
>> -	struct proc *async_proc;
>> +	struct sigio *sigio;
>>    } devsoftc;
>>
>>    static struct cdev *devctl_dev;
>> @@ -422,7 +423,8 @@
>>    	/* move to init */
>>    	devsoftc.inuse = 1;
>>    	devsoftc.nonblock = 0;
>> -	devsoftc.async_proc = NULL;
>> +	devsoftc.async = 0;
>> +	devsoftc.sigio = NULL;
>>    	mtx_unlock(&devsoftc.mtx);
>>    	return (0);
>>    }
>> @@ -433,8 +435,9 @@
>>
>>    	mtx_lock(&devsoftc.mtx);
>>    	devsoftc.inuse = 0;
>> -	devsoftc.async_proc = NULL;
>> +	devsoftc.async = 0;
>>    	cv_broadcast(&devsoftc.cv);
>> +	funsetown(&devsoftc.sigio);
>>    	mtx_unlock(&devsoftc.mtx);
>>    	return (0);
>>    }
>> @@ -490,33 +493,21 @@
>>    			devsoftc.nonblock = 0;
>>    		return (0);
>>    	case FIOASYNC:
>> -		/*
>> -		 * FIXME:
>> -		 * Since this is a simple assignment there is no guarantee that
>> -		 * devsoftc.async_proc consumers will get a valid pointer.
>> -		 *
>> -		 * Example scenario where things break (processes A and B):
>> -		 * 1. A opens devctl
>> -		 * 2. A sends fd to B
>> -		 * 3. B sets itself as async_proc
>> -		 * 4. B exits
>> -		 *
>> -		 * However, normally this requires root privileges and the only
>> -		 * in-tree consumer does not behave in a dangerous way so the
>> -		 * issue is not critical.
>> -		 */
>>    		if (*(int*)data)
>> -			devsoftc.async_proc = td->td_proc;
>> +			devsoftc.async = 1;
>>    		else
>> -			devsoftc.async_proc = NULL;
>> +			devsoftc.async = 0;
>> +		return (0);
>> +	case FIOSETOWN:
>> +		return fsetown(*(int *)data, &devsoftc.sigio);
>> +	case FIOGETOWN:
>> +		*(int *)data = fgetown(&devsoftc.sigio);
>>    		return (0);
>>
>>    		/* (un)Support for other fcntl() calls. */
>>    	case FIOCLEX:
>>    	case FIONCLEX:
>>    	case FIONREAD:
>> -	case FIOSETOWN:
>> -	case FIOGETOWN:
>>    	default:
>>    		break;
>>    	}
>> @@ -560,7 +551,6 @@
>>    devctl_queue_data_f(char *data, int flags)
>>    {
>>    	struct dev_event_info *n1 = NULL, *n2 = NULL;
>> -	struct proc *p;
>>
>>    	if (strlen(data) == 0)
>>    		goto out;
>> @@ -590,13 +580,8 @@
>>    	cv_broadcast(&devsoftc.cv);
>>    	mtx_unlock(&devsoftc.mtx);
>>    	selwakeup(&devsoftc.sel);
>> -	/* XXX see a comment in devioctl */
>> -	p = devsoftc.async_proc;
>> -	if (p != NULL) {
>> -		PROC_LOCK(p);
>> -		kern_psignal(p, SIGIO);
>> -		PROC_UNLOCK(p);
>> -	}
>> +	if (devsoftc.async && devsoftc.sigio != NULL)
>> +		pgsigio(&devsoftc.sigio, SIGIO, 0);
>>    	return;
>>    out:
>>    	/*
>>
>>
> That makes it work more like the other users of fsetown(), which is
> probably a good thing.  The downside is that two syscalls are needed to
> activate it, which I was trying to avoid that with my patch.  I noticed
> that logopen() in subr_log.c unconditionally calls fsetown(), which
> would avoid the need for an extra syscall.

This is incompatible with POSIX, at least "man 2 open" does not say
a process opened the file will immediately be the owner. Two syscalls
are necessary, because fsetown is expensive, so using a light weight
fioasync to temporally disable and enable it is more flexible.

>   That also avoids the direct
> manipulation of the pointer in your patch, which makes me nervous about
> the possibility of a leak.
fsetown() does already guard the leak problem.

> I wonder if FIOASYNC should fail if
> 	td->td_proc != devsoftc.sigio.sio_proc
> (or the equivalent for other instances) to prevent a process from
> maniuplating the async flag for a device "owned" by another process.  I
> think this check would need to be wrapped in SIGIO_LOCK()/SIGIO_UNLOCK()
> to be safe.
>
>
>
most code in the kernel do not check ownership, I can not find one
in kern/ sub-directory.