kern/119144: spontaneous iir(4) crash in probe phase

Jens Rehsack rehsack at web.de
Sat Dec 29 13:30:01 PST 2007


>Number:         119144
>Category:       kern
>Synopsis:       spontaneous  iir(4) crash in probe phase
>Confidential:   no
>Severity:       critical
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sat Dec 29 21:30:01 UTC 2007
>Closed-Date:
>Last-Modified:
>Originator:     Jens Rehsack
>Release:        7.0-PRERELEASE
>Organization:
>Environment:
FreeBSD netfinity.muppets.liwing.de 7.0-PRERELEASE FreeBSD 7.0-PRERELEASE #18: Sat Dec 29 20:40:00 UTC 2007     root at netfinity:/usr/obj/usr/src/sys/NETFINITY  i386
>Description:
When FreeBSD 7.0-BETA1 came out, I started some initial tests (abilities, how does it feel - just to get warm ...) and after I built my own kernel, it crashes on reboot ("Page fault while in kernel mode" and later sometimes "Memory modified after free").

So first I thought, I made a mistake configuring my kernel - that's what integration tests are good for, aren't they? So I built a generic kernel from updated sources - sometimes it works, sometimes it crashes with "Page fault while in kernel mode".

So I added all debug options I could find: INVARIANTS, WITHNESS etc. incl.
options         DEBUG_MEMGUARD
options         DEBUG_REDZONE (I think from here it starts to give "Memory modified after free").

Because it's my very last x86 machine and my only 7.0 one, debugging was a little more difficult than usual. A lot of extra CAM_DEBUG() additions later I found a senseful watchpoint and could examinate the reason for the problem:

In file sys/cam/cam_xpt.c 2 memory regions for scsi_inquiry are allocated - in line 5767 and 5833 - both smaller than sizeof(struct scsi_inquiry_data). In the iir.c module - within gdt_internal_cache_cmd() it's expected that the size of the given data area is large enough to hold the entire data structure used to execute given command.

I choose as fix to fill the data into a local buffer on the stack and copy as much of it as fits into the csio.data_ptr area when ready. On systems with small stack size this fix may introduce new problems, but I don't expect a hardware raid controller on such systems ;)
>How-To-Repeat:
Enable options DEBUG_REDZONE and device iir to the kernel, insert iir compatible controller and boot.
>Fix:
Apply attached fix.

Patch attached with submission follows:

Index: sys/dev/iir/iir.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/iir/iir.c,v
retrieving revision 1.19
diff -u -u -r1.19 iir.c
--- sys/dev/iir/iir.c	17 Jun 2007 05:55:50 -0000	1.19
+++ sys/dev/iir/iir.c	29 Dec 2007 21:04:59 -0000
@@ -1160,20 +1160,25 @@
         break;
       case INQUIRY:
         {
-            struct scsi_inquiry_data *inq;
+            struct scsi_inquiry_data inq;
+            size_t copylen = MIN(sizeof(inq), ccb->csio.dxfer_len);
 
-            inq = (struct scsi_inquiry_data *)ccb->csio.data_ptr;       
-            bzero(inq, sizeof(struct scsi_inquiry_data));
-            inq->device = (gdt->sc_hdr[t].hd_devtype & 4) ?
+            bzero(&inq, sizeof(inq));
+            inq.device = (gdt->sc_hdr[t].hd_devtype & 4) ?
                 T_CDROM : T_DIRECT;
-            inq->dev_qual2 = (gdt->sc_hdr[t].hd_devtype & 1) ? 0x80 : 0;
-            inq->version = SCSI_REV_2;
-            inq->response_format = 2; 
-            inq->additional_length = 32; 
-            inq->flags = SID_CmdQue | SID_Sync; 
-            strcpy(inq->vendor, gdt->oem_name);
-            sprintf(inq->product, "Host Drive   #%02d", t);
-            strcpy(inq->revision, "   ");
+            inq.dev_qual2 = (gdt->sc_hdr[t].hd_devtype & 1) ? 0x80 : 0;
+            inq.version = SCSI_REV_2;
+            inq.response_format = 2; 
+            inq.additional_length = 32; 
+            inq.flags = SID_CmdQue | SID_Sync; 
+            strncpy(inq.vendor, gdt->oem_name, sizeof(inq.vendor));
+            snprintf(inq.product, sizeof(inq.product),
+                     "Host Drive   #%02d", t);
+            strncpy(inq.revision, "   ", sizeof(inq.revision));
+            bcopy(&inq, ccb->csio.data_ptr, copylen );
+            if( ccb->csio.dxfer_len > copylen )
+                bzero( ccb->csio.data_ptr+copylen,
+                       ccb->csio.dxfer_len - copylen );
             break;
         }
       case MODE_SENSE_6:
@@ -1182,18 +1187,24 @@
                 struct scsi_mode_hdr_6 hd;
                 struct scsi_mode_block_descr bd;
                 struct scsi_control_page cp;
-            } *mpd;
+            } mpd;
+            size_t copylen = MIN(sizeof(mpd), ccb->csio.dxfer_len);
             u_int8_t page;
 
-            mpd = (struct mpd_data *)ccb->csio.data_ptr;        
-            bzero(mpd, sizeof(struct mpd_data));
-            mpd->hd.datalen = sizeof(struct scsi_mode_hdr_6) +
+            /*mpd = (struct mpd_data *)ccb->csio.data_ptr;*/
+            bzero(&mpd, sizeof(mpd));
+            mpd.hd.datalen = sizeof(struct scsi_mode_hdr_6) +
                 sizeof(struct scsi_mode_block_descr);
-            mpd->hd.dev_specific = (gdt->sc_hdr[t].hd_devtype & 2) ? 0x80 : 0;
-            mpd->hd.block_descr_len = sizeof(struct scsi_mode_block_descr);
-            mpd->bd.block_len[0] = (GDT_SECTOR_SIZE & 0x00ff0000) >> 16;
-            mpd->bd.block_len[1] = (GDT_SECTOR_SIZE & 0x0000ff00) >> 8;
-            mpd->bd.block_len[2] = (GDT_SECTOR_SIZE & 0x000000ff);
+            mpd.hd.dev_specific = (gdt->sc_hdr[t].hd_devtype & 2) ? 0x80 : 0;
+            mpd.hd.block_descr_len = sizeof(struct scsi_mode_block_descr);
+            mpd.bd.block_len[0] = (GDT_SECTOR_SIZE & 0x00ff0000) >> 16;
+            mpd.bd.block_len[1] = (GDT_SECTOR_SIZE & 0x0000ff00) >> 8;
+            mpd.bd.block_len[2] = (GDT_SECTOR_SIZE & 0x000000ff);
+
+            bcopy(&mpd, ccb->csio.data_ptr, copylen );
+            if( ccb->csio.dxfer_len > copylen )
+                bzero( ccb->csio.data_ptr+copylen,
+                       ccb->csio.dxfer_len - copylen );
             page=((struct scsi_mode_sense_6 *)ccb->csio.cdb_io.cdb_bytes)->page;
             switch (page) {
               default:
@@ -1204,12 +1215,17 @@
         }
       case READ_CAPACITY:
         {
-            struct scsi_read_capacity_data *rcd;
+            struct scsi_read_capacity_data rcd;
+            size_t copylen = MIN(sizeof(rcd), ccb->csio.dxfer_len);
               
-            rcd = (struct scsi_read_capacity_data *)ccb->csio.data_ptr; 
-            bzero(rcd, sizeof(struct scsi_read_capacity_data));
-            scsi_ulto4b(gdt->sc_hdr[t].hd_size - 1, rcd->addr);
-            scsi_ulto4b(GDT_SECTOR_SIZE, rcd->length);
+            /*rcd = (struct scsi_read_capacity_data *)ccb->csio.data_ptr;*/
+            bzero(&rcd, sizeof(rcd));
+            scsi_ulto4b(gdt->sc_hdr[t].hd_size - 1, rcd.addr);
+            scsi_ulto4b(GDT_SECTOR_SIZE, rcd.length);
+            bcopy(&rcd, ccb->csio.data_ptr, copylen );
+            if( ccb->csio.dxfer_len > copylen )
+                bzero( ccb->csio.data_ptr+copylen,
+                       ccb->csio.dxfer_len - copylen );
             break;
         }
       default:


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list