linux kernel, userspace buffers, do access_ok and wait create a race condition?
In the following code (the read
implementation for a char driver), is it possible for MMU TLB entries to change during wait_event_interruptible
, such that __put_user
causes an exception even though access_ok
succeeded?
Is it possible to lock the user buffer such that it remains valid for the duration of the request?
Would repeating the access_ok
check after wait_event_interruptible
returns make this safe?
ssize_t mydriver_pkt_read( struct file* filp, char __user* const buff, size_t count, loff_t* offp )
{
struct mydriver_pkt_private* priv;
volatile unsigned short* iobase;
unsigned next;
char __user* p = buff;
if (count <= 0) return -EINVAL;
if (!access_ok(VERIFY_WRITE, buff, count)) return -EFAULT;
priv = (struct mydriver_pkt_private*)filp->private_data;
iobase = priv->iobase;
next = priv->retained;
if ((next & PKTBUF_FLAG_NOTEMPTY) == 0) {
next = ioread16(iobase);
if ((next & PKTBUF_FLAG_NOTEMPTY) == 0) { // no data, start blocking read
iowrite16(1, iobase); // enable interrupts
if (wait_event_interruptible(priv->wait_for_ringbuffer, (priv->retained & PKTBUF_FLAG_NOTEMPTY)))
return -ERESTARTSYS;
next = priv->retained;
}
}
while (count > 0) {
__put_user( (char)next, p );
p++;
count--;
next = ioread16(iobase);
if ((next & PKTBUF_FLAG_STARTPKT) || !(next & PKTBUF_FLAG_NOTEMPTY)) {
priv->retained = next;
return (p - buff);
}
}
/* discard remainder of packet */
do {
next = ioread16(iobase);
} while ((next & PKTBUF_FLAG_NOTEMPTY) && !(next & PKTBUF_FLAG_STARTPKT));
priv->retained = next;
return (p - buff);
}
Exclusive open code:
int mydriver_pkt_open( struct inode* inode, struct file* filp )
{
开发者_运维问答 struct mydriver_pkt_private* priv;
priv = container_of(inode->i_cdev, struct mydriver_pkt_private, cdevnode);
if (atomic_cmpxchg(&priv->inuse, 0, 1))
return -EBUSY;
nonseekable_open(inode, filp);
filp->private_data = priv;
return 0;
}
Unless you have the mm_sem semaphore held, page tables can change at any time (by other threads of the same process unmapping pages from a different processor, or by evictions from page reclaim processes). You don't even need to sleep; it can happen even if you have preemption disabled, as long as the TLB shootdown interrupt can arrive. And it can happen even if interrupts are disabled, if you have a SMP machine, as you can, sometimes, see page table updates reflected even without an explicit TLB flush.
access_ok()
only checks that the range of addresses does not overlap with kernel space. So it doesn't tell you anything about whether the page table entries allow access - but its result also does not change, even if you block. If access is denied, __put_user()
will return -EFAULT
, which must be propagated to userspace (ie, error out here with -EFAULT
).
Note that the only difference between put_user()
and __put_user()
is that put_user()
performs an access_ok()
check as well. So if you're using it in a loop, doing a single access_ok()
ahead of time and using __put_user()
is probably the right thing to do.
精彩评论