git: 15c1622ad8fa - main - databases/mariadb104-server: Fix DB corruption

Bernard Spil brnrd at FreeBSD.org
Sun Sep 12 12:04:30 UTC 2021


The branch main has been updated by brnrd:

URL: https://cgit.FreeBSD.org/ports/commit/?id=15c1622ad8fa4271d5f18831528af4d5b215e79e

commit 15c1622ad8fa4271d5f18831528af4d5b215e79e
Author:     Bernard Spil <brnrd at FreeBSD.org>
AuthorDate: 2021-09-12 12:00:07 +0000
Commit:     Bernard Spil <brnrd at FreeBSD.org>
CommitDate: 2021-09-12 12:04:19 +0000

    databases/mariadb104-server: Fix DB corruption
    
     * InnoDB corrupts files due to incorrect st_blksize calculation
    
    PR:             257728, 257958
    Reported by:    mfechner, iron udjin gmail com
    Obtained from:  https://jira.mariadb.org/projects/MDEV/issues/MDEV-26537
    MFH:            2021Q3
---
 databases/mariadb103-server/files/patch-MDEV-26537 | 126 +++++++++++++++++++++
 databases/mariadb104-server/Makefile               |   1 +
 databases/mariadb104-server/files/patch-MDEV-26537 | 126 +++++++++++++++++++++
 3 files changed, 253 insertions(+)

diff --git a/databases/mariadb103-server/files/patch-MDEV-26537 b/databases/mariadb103-server/files/patch-MDEV-26537
new file mode 100644
index 000000000000..05cf0f5a4ebc
--- /dev/null
+++ b/databases/mariadb103-server/files/patch-MDEV-26537
@@ -0,0 +1,126 @@
+From d09426f9e60fd93296464ec9eb5f9d85566437d3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela at mariadb.com>
+Date: Fri, 10 Sep 2021 19:15:41 +0300
+Subject: [PATCH] MDEV-26537 InnoDB corrupts files due to incorrect st_blksize
+ calculation
+
+The st_blksize returned by fstat(2) is not documented to be
+a power of 2, like we assumed in
+commit 58252fff15acfe7c7b0452a87e202e3f8e454e19 (MDEV-26040).
+While on Linux, the st_blksize appears to report the file system
+block size (which hopefully is not smaller than the sector size
+of the underlying block device), on FreeBSD we observed
+st_blksize values that might have been something similar to st_size.
+
+Also IBM AIX was affected by this. A simple test case would
+lead to a crash when using the minimum innodb_buffer_pool_size=5m
+on both FreeBSD and AIX:
+
+seq -f 'create table t%g engine=innodb select * from seq_1_to_200000;' \
+1 100|mysql test&
+seq -f 'create table u%g engine=innodb select * from seq_1_to_200000;' \
+1 100|mysql test&
+
+We will fix this by not trusting st_blksize at all, and assuming that
+the smallest allowed write size (for O_DIRECT) is 4096 bytes. We hope
+that no storage systems with larger block size exist. Anything larger
+than 4096 bytes should be unlikely, given that it is the minimum
+virtual memory page size of many contemporary processors.
+
+MariaDB Server on Microsoft Windows was not affected by this.
+
+While the 512-byte sector size of the venerable Seagate ST-225 is still
+in widespread use, the minimum innodb_page_size is 4096 bytes, and
+innodb_log_file_size can be set in integer multiples of 65536 bytes.
+
+The only occasion where InnoDB uses smaller data file block sizes than
+4096 bytes is with ROW_FORMAT=COMPRESSED tables with KEY_BLOCK_SIZE=1
+or KEY_BLOCK_SIZE=2 (or innodb_page_size=4096). For such tables,
+we will from now on preallocate space in integer multiples of 4096 bytes
+and let regular writes extend the file by 1024, 2048, or 3072 bytes.
+
+The view INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES.FS_BLOCK_SIZE
+should report the raw st_blksize.
+
+For page_compressed tables, the function fil_space_get_block_size()
+will map to 512 any st_blksize value that is larger than 4096.
+
+os_file_set_size(): Assume that the file system block size is 4096 bytes,
+and only support extending files to integer multiples of 4096 bytes.
+
+fil_space_extend_must_retry(): Round down the preallocation size to
+an integer multiple of 4096 bytes.
+--- mysql-test/suite/innodb/t/check_ibd_filesize.test.orig	2021-08-02 10:58:56 UTC
++++ mysql-test/suite/innodb/t/check_ibd_filesize.test
+@@ -46,6 +46,12 @@ perl;
+ print "# bytes: ", (-s "$ENV{MYSQLD_DATADIR}/test/t1.ibd"), "\n";
+ EOF
+ INSERT INTO t1 SELECT seq,REPEAT('a',30000) FROM seq_1_to_20;
++# Ensure that the file will be extended with the last 1024-byte page
++# after the file was pre-extended in 4096-byte increments.
++--disable_query_log
++FLUSH TABLE t1 FOR EXPORT;
++UNLOCK TABLES;
++--enable_query_log
+ perl;
+ print "# bytes: ", (-s "$ENV{MYSQLD_DATADIR}/test/t1.ibd"), "\n";
+ EOF
+ mysql-test/suite/innodb/t/check_ibd_filesize.test |  6 ++++++
+ storage/innobase/fil/fil0fil.cc                   | 13 +++++++++----
+ storage/innobase/os/os0file.cc                    |  7 ++++---
+ 3 files changed, 19 insertions(+), 7 deletions(-)
+
+--- storage/innobase/fil/fil0fil.cc.orig	2021-08-02 10:58:57 UTC
++++ storage/innobase/fil/fil0fil.cc
+@@ -942,11 +942,17 @@ fil_space_extend_must_retry(
+ 	const page_size_t	pageSize(space->flags);
+ 	const ulint		page_size = pageSize.physical();
+ 
+-	/* fil_read_first_page() expects srv_page_size bytes.
+-	fil_node_open_file() expects at least 4 * srv_page_size bytes.*/
+-	os_offset_t new_size = std::max(
+-		os_offset_t(size - file_start_page_no) * page_size,
++		/* fil_read_first_page() expects innodb_page_size bytes.
++		fil_node_open_file() expects at least 4 * innodb_page_size bytes.
++		os_file_set_size() expects multiples of 4096 bytes.
++		For ROW_FORMAT=COMPRESSED tables using 1024-byte or 2048-byte
++		pages, we will preallocate up to an integer multiple of 4096 bytes,
++		and let normal writes append 1024, 2048, or 3072 bytes to the file. */
++		os_offset_t new_size = std::max(
++		(os_offset_t(size - file_start_page_no) * page_size)
++		& ~os_offset_t(4095),
+ 		os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift));
++		
+ 
+ 	*success = os_file_set_size(node->name, node->handle, new_size,
+ 		FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
+--- storage/innobase/os/os0file.cc.orig	2021-08-02 10:58:57 UTC
++++ storage/innobase/os/os0file.cc
+@@ -5351,6 +5351,8 @@ os_file_set_size(
+ 	os_offset_t	size,
+ 	bool	is_sparse)
+ {
++	ut_ad(!(size & 4095));
++
+ #ifdef _WIN32
+ 	/* On Windows, changing file size works well and as expected for both
+ 	sparse and normal files.
+@@ -5392,7 +5394,7 @@ fallback:
+ 			if (current_size >= size) {
+ 				return true;
+ 			}
+-			current_size &= ~os_offset_t(statbuf.st_blksize - 1);
++			current_size &= ~4095ULL;
+ 			err = posix_fallocate(file, current_size,
+ 					      size - current_size);
+ 		}
+@@ -5432,8 +5434,7 @@ fallback:
+ 	if (fstat(file, &statbuf)) {
+ 		return false;
+ 	}
+-	os_offset_t current_size = statbuf.st_size
+-		& ~os_offset_t(statbuf.st_blksize - 1);
++	os_offset_t current_size = statbuf.st_size & ~4095ULL;
+ #endif
+ 	if (current_size >= size) {
+ 		return true;
diff --git a/databases/mariadb104-server/Makefile b/databases/mariadb104-server/Makefile
index df85b463fda6..eef8cc75b7de 100644
--- a/databases/mariadb104-server/Makefile
+++ b/databases/mariadb104-server/Makefile
@@ -2,6 +2,7 @@
 
 PORTNAME?=	mariadb
 PORTVERSION=	10.4.21
+PORTREVISION?=	1
 CATEGORIES=	databases
 MASTER_SITES=	http://mirrors.supportex.net/${SITESDIR}/ \
 		http://mirror2.hs-esslingen.de/pub/Mirrors/${SITESDIR}/ \
diff --git a/databases/mariadb104-server/files/patch-MDEV-26537 b/databases/mariadb104-server/files/patch-MDEV-26537
new file mode 100644
index 000000000000..05cf0f5a4ebc
--- /dev/null
+++ b/databases/mariadb104-server/files/patch-MDEV-26537
@@ -0,0 +1,126 @@
+From d09426f9e60fd93296464ec9eb5f9d85566437d3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela at mariadb.com>
+Date: Fri, 10 Sep 2021 19:15:41 +0300
+Subject: [PATCH] MDEV-26537 InnoDB corrupts files due to incorrect st_blksize
+ calculation
+
+The st_blksize returned by fstat(2) is not documented to be
+a power of 2, like we assumed in
+commit 58252fff15acfe7c7b0452a87e202e3f8e454e19 (MDEV-26040).
+While on Linux, the st_blksize appears to report the file system
+block size (which hopefully is not smaller than the sector size
+of the underlying block device), on FreeBSD we observed
+st_blksize values that might have been something similar to st_size.
+
+Also IBM AIX was affected by this. A simple test case would
+lead to a crash when using the minimum innodb_buffer_pool_size=5m
+on both FreeBSD and AIX:
+
+seq -f 'create table t%g engine=innodb select * from seq_1_to_200000;' \
+1 100|mysql test&
+seq -f 'create table u%g engine=innodb select * from seq_1_to_200000;' \
+1 100|mysql test&
+
+We will fix this by not trusting st_blksize at all, and assuming that
+the smallest allowed write size (for O_DIRECT) is 4096 bytes. We hope
+that no storage systems with larger block size exist. Anything larger
+than 4096 bytes should be unlikely, given that it is the minimum
+virtual memory page size of many contemporary processors.
+
+MariaDB Server on Microsoft Windows was not affected by this.
+
+While the 512-byte sector size of the venerable Seagate ST-225 is still
+in widespread use, the minimum innodb_page_size is 4096 bytes, and
+innodb_log_file_size can be set in integer multiples of 65536 bytes.
+
+The only occasion where InnoDB uses smaller data file block sizes than
+4096 bytes is with ROW_FORMAT=COMPRESSED tables with KEY_BLOCK_SIZE=1
+or KEY_BLOCK_SIZE=2 (or innodb_page_size=4096). For such tables,
+we will from now on preallocate space in integer multiples of 4096 bytes
+and let regular writes extend the file by 1024, 2048, or 3072 bytes.
+
+The view INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES.FS_BLOCK_SIZE
+should report the raw st_blksize.
+
+For page_compressed tables, the function fil_space_get_block_size()
+will map to 512 any st_blksize value that is larger than 4096.
+
+os_file_set_size(): Assume that the file system block size is 4096 bytes,
+and only support extending files to integer multiples of 4096 bytes.
+
+fil_space_extend_must_retry(): Round down the preallocation size to
+an integer multiple of 4096 bytes.
+--- mysql-test/suite/innodb/t/check_ibd_filesize.test.orig	2021-08-02 10:58:56 UTC
++++ mysql-test/suite/innodb/t/check_ibd_filesize.test
+@@ -46,6 +46,12 @@ perl;
+ print "# bytes: ", (-s "$ENV{MYSQLD_DATADIR}/test/t1.ibd"), "\n";
+ EOF
+ INSERT INTO t1 SELECT seq,REPEAT('a',30000) FROM seq_1_to_20;
++# Ensure that the file will be extended with the last 1024-byte page
++# after the file was pre-extended in 4096-byte increments.
++--disable_query_log
++FLUSH TABLE t1 FOR EXPORT;
++UNLOCK TABLES;
++--enable_query_log
+ perl;
+ print "# bytes: ", (-s "$ENV{MYSQLD_DATADIR}/test/t1.ibd"), "\n";
+ EOF
+ mysql-test/suite/innodb/t/check_ibd_filesize.test |  6 ++++++
+ storage/innobase/fil/fil0fil.cc                   | 13 +++++++++----
+ storage/innobase/os/os0file.cc                    |  7 ++++---
+ 3 files changed, 19 insertions(+), 7 deletions(-)
+
+--- storage/innobase/fil/fil0fil.cc.orig	2021-08-02 10:58:57 UTC
++++ storage/innobase/fil/fil0fil.cc
+@@ -942,11 +942,17 @@ fil_space_extend_must_retry(
+ 	const page_size_t	pageSize(space->flags);
+ 	const ulint		page_size = pageSize.physical();
+ 
+-	/* fil_read_first_page() expects srv_page_size bytes.
+-	fil_node_open_file() expects at least 4 * srv_page_size bytes.*/
+-	os_offset_t new_size = std::max(
+-		os_offset_t(size - file_start_page_no) * page_size,
++		/* fil_read_first_page() expects innodb_page_size bytes.
++		fil_node_open_file() expects at least 4 * innodb_page_size bytes.
++		os_file_set_size() expects multiples of 4096 bytes.
++		For ROW_FORMAT=COMPRESSED tables using 1024-byte or 2048-byte
++		pages, we will preallocate up to an integer multiple of 4096 bytes,
++		and let normal writes append 1024, 2048, or 3072 bytes to the file. */
++		os_offset_t new_size = std::max(
++		(os_offset_t(size - file_start_page_no) * page_size)
++		& ~os_offset_t(4095),
+ 		os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift));
++		
+ 
+ 	*success = os_file_set_size(node->name, node->handle, new_size,
+ 		FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags));
+--- storage/innobase/os/os0file.cc.orig	2021-08-02 10:58:57 UTC
++++ storage/innobase/os/os0file.cc
+@@ -5351,6 +5351,8 @@ os_file_set_size(
+ 	os_offset_t	size,
+ 	bool	is_sparse)
+ {
++	ut_ad(!(size & 4095));
++
+ #ifdef _WIN32
+ 	/* On Windows, changing file size works well and as expected for both
+ 	sparse and normal files.
+@@ -5392,7 +5394,7 @@ fallback:
+ 			if (current_size >= size) {
+ 				return true;
+ 			}
+-			current_size &= ~os_offset_t(statbuf.st_blksize - 1);
++			current_size &= ~4095ULL;
+ 			err = posix_fallocate(file, current_size,
+ 					      size - current_size);
+ 		}
+@@ -5432,8 +5434,7 @@ fallback:
+ 	if (fstat(file, &statbuf)) {
+ 		return false;
+ 	}
+-	os_offset_t current_size = statbuf.st_size
+-		& ~os_offset_t(statbuf.st_blksize - 1);
++	os_offset_t current_size = statbuf.st_size & ~4095ULL;
+ #endif
+ 	if (current_size >= size) {
+ 		return true;


More information about the dev-commits-ports-main mailing list