[patch] Re: getfsent(3) and spaces in fstab
    Simon Barner 
    barner at in.tum.de
       
    Thu Aug  7 07:08:37 PDT 2003
    
    
  
> imho - expensive algorithm... i want to see anything more simple... 
> like "gtok()" instead "es_strsep() + remove_escapes()"?
I have adopted my patch to use your neat gtok() function, but I came to
the conclusion that a two-pass algorithm is necessary:
The first pass detects whether a line from fstab is the old or the new
style format (old style lines may only have unescaped white spaces
before a trailing #-comment).
Then, the second pass extracts the information.
I admit this is rather complicated, but I don't how to handle two sets
of delimiters (":\n" and " \n\r\t") with only one pass. Using gtok() to
detect the style of line is not an option IMO, since it would convert
escape sequences.
Now, the following lines can be processed:
1) old style:
<file system>:<mount point>:<mount type>:<dump>:<passno>([' ','\t']*#<comment>)*
2) new style
format as described in fstab(5) + an optional #-comment at the end of the line
3) empty lines, white space lines, deliberately many white spaces + comment
In both the old and the new style lines, white spaces can be written as
escape sequences or in double quotes.
Could somebody please review my patch - if there are no objections (but
I am sure there are some more details that can be improved), I will
write a PR in order
Regards,
 Simon
-------------- next part --------------
--- fstab.c.orig	Fri Aug  1 17:18:00 2003
+++ fstab.c	Thu Aug  7 15:46:39 2003
@@ -84,6 +84,60 @@
 	_fs_fstab.fs_spec = buf;
 }
 
+/*
+ * Gets a token from a string *s, that is either empty or is separated by
+ * a set of delimiters *delim.
+ * Characters that are in *delim, can occur in the token if the are escaped,
+ * i.e. have a '\' prepended. The character '\' itself is encoded as '\\'.
+ * *s can have a trailing comment (indicated by a '#'), which will cause the
+ * characters after the '#' to be ignored. To encode a '#' within a token,
+ * use '\#'.
+ *
+ * If a token is found, gtok sets the last character after its end
+ * to '\0' and returns a pointer it. Otherwise the return value is NULL.
+ * As a side effect, the input string *s modified and points to the next
+ * character after the end of the current token, i.e. after the '\0'.
+ */
+char *gtok(char **s, char const *delim)
+{
+	int quoted, escaped;
+	static char const esc_set[] = {  't',  'r',  'n',  'a', 0 };
+	static char const esc_rep[] = { '\t', '\r', '\n', '\a', 0 };
+	char *tok, *r, *w, *p;
+
+	if (!s || !*s || !*(tok = *s + strspn(*s, delim)) || *tok == '#')
+		return NULL;
+
+	for (quoted = escaped = 0, r = w = tok; *r; r++) {
+		if (!escaped) {
+			if (*r == '\\') {
+				escaped = 1;
+				continue;
+			}
+			if (*r == '\"') {
+				quoted ^= -1;
+				continue;
+			}
+			if (!quoted) {
+				if (strchr(delim, *r)) {
+					r++;
+					break;
+				}
+			}
+		} else {
+			escaped = 0;
+			if ((p = strchr(esc_set, *r)) != NULL) {
+				*w++ = esc_rep[p - esc_set];
+				continue;
+			}
+		}
+		*w++ = *r;
+	}
+	*w = 0;
+	*s = r;
+	return tok;
+}
+
 static int
 fstabscan()
 {
@@ -91,21 +145,73 @@
 #define	MAXLINELENGTH	1024
 	static char line[MAXLINELENGTH];
 	char subline[MAXLINELENGTH];
-	int typexx;
+	int typexx, escaped=0, quoted=0, ws_sep=0;
 
 	for (;;) {
 
 		if (!(p = fgets(line, sizeof(line), _fs_fp)))
 			return(0);
-/* OLD_STYLE_FSTAB */
 		++LineNo;
-		if (*line == '#' || *line == '\n')
-			continue;
-		if (!strpbrk(p, " \t")) {
-			_fs_fstab.fs_spec = strsep(&p, ":\n");
-			_fs_fstab.fs_file = strsep(&p, ":\n");
+		
+		/* Detect whether line is in old or new fstab style */
+		for (cp=p; *cp != '\n'; ++cp) {
+			if (*cp == '\\') {
+			    escaped = (escaped ? 0 : 1);
+			    continue;
+			}
+			if (!escaped) {
+			    /* Quotes */
+			    if (*cp == '\"') {
+			    	quoted = (quoted ? 0 : 1);
+				continue;
+			    }
+			    if (quoted)
+			    	continue;
+			    /* new white separator found */
+			    if (cp > p && strspn (cp, " \n\r\t") &&
+ 				!strspn(cp-1, " \t"))
+				++ws_sep;
+			    
+			    /* #-comment found */
+			    if (*cp == '#') {
+			    	*cp = '\0';
+				/* ignore white spaces in front of a comment */
+				if (cp > p && strspn(cp-1, " \t") && 
+				    ws_sep > 0)
+				    ws_sep--;
+				    break;
+			    }
+			} else
+			    escaped = 0;
+		}
+		/* open quotes and unfinished escape-sequences are bad */
+		if (quoted || escaped)
+		    goto bad;
+		/* ignore trailing white spaces */
+	        if (*(cp + strspn (cp, " \t")) == '\n' && ws_sep > 0)
+		    --ws_sep;
+		   
+		/* No white space separators found => OLD_STYLE_FSTAB */
+		if (ws_sep == 0) {
+			/*
+			 * line consists only of white spaces
+			 * (evtl. + #-comment)
+			 */
+			if (strspn (p, " \t"))
+				continue;
+			/*
+			 * Now read the different values (gtok will convert
+			 * escape seq.). Format is:
+			 *  <fs_spec>:<fs_file>:<fs_type>:<freq>:<passno>
+			 * ':' itself can be encodes as '\:'
+			 */
+			if (!(_fs_fstab.fs_spec = gtok(&p, ":\n\r")))
+				continue;
+			if (!(_fs_fstab.fs_file = gtok(&p, ":\n\r"))) {
+				goto bad;
+			}
 			fixfsfile();
-			_fs_fstab.fs_type = strsep(&p, ":\n");
+			_fs_fstab.fs_type = gtok(&p, ":\n\r");
 			if (_fs_fstab.fs_type) {
 				if (!strcmp(_fs_fstab.fs_type, FSTAB_XX))
 					continue;
@@ -113,46 +219,43 @@
 				_fs_fstab.fs_vfstype =
 				    strcmp(_fs_fstab.fs_type, FSTAB_SW) ?
 				    "ufs" : "swap";
-				if ((cp = strsep(&p, ":\n")) != NULL) {
+				if ((cp = gtok(&p, ":\n\r")) != NULL) {
 					_fs_fstab.fs_freq = atoi(cp);
-					if ((cp = strsep(&p, ":\n")) != NULL) {
+					if ((cp = gtok(&p, " \n\r\t")) != NULL) {
 						_fs_fstab.fs_passno = atoi(cp);
+						if (gtok (&p, " \n\r\t"))
+						    goto bad;
+						    
 						return(1);
 					}
 				}
 			}
 			goto bad;
 		}
-/* OLD_STYLE_FSTAB */
-		while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-			;
-		_fs_fstab.fs_spec = cp;
-		if (!_fs_fstab.fs_spec || *_fs_fstab.fs_spec == '#')
+		
+		/* At least one white space sep. found => NEW_STYLE_FSTAB */
+		if (!(_fs_fstab.fs_spec = gtok(&p, " \n\r\t")))
 			continue;
-		while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-			;
-		_fs_fstab.fs_file = cp;
+		if (!(_fs_fstab.fs_file = gtok(&p, " \n\r\t")))
+			goto bad;
 		fixfsfile();
-		while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-			;
-		_fs_fstab.fs_vfstype = cp;
-		while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-			;
-		_fs_fstab.fs_mntops = cp;
-		if (_fs_fstab.fs_mntops == NULL)
+		if (!(_fs_fstab.fs_vfstype = gtok(&p, " \n\r\t")))
+			goto bad;
+		if (!(_fs_fstab.fs_mntops = gtok(&p, " \n\r\t")))
 			goto bad;
 		_fs_fstab.fs_freq = 0;
 		_fs_fstab.fs_passno = 0;
-		while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-			;
+		cp = gtok(&p, " \n\r\t");
 		if (cp != NULL) {
 			_fs_fstab.fs_freq = atoi(cp);
-			while ((cp = strsep(&p, " \t\n")) != NULL && *cp == '\0')
-				;
+			cp = gtok(&p, " \n\r\t");
 			if (cp != NULL)
 				_fs_fstab.fs_passno = atoi(cp);
 		}
 		strcpy(subline, _fs_fstab.fs_mntops);
+		if (gtok (&p, " \n\r\t"))
+		    goto bad;
+
 		p = subline;
 		for (typexx = 0, cp = strsep(&p, ","); cp;
 		     cp = strsep(&p, ",")) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 187 bytes
Desc: Digital signature
Url : http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20030807/fa303b7b/attachment.bin
    
    
More information about the freebsd-hackers
mailing list