git: 29cb30dcb08e - main - sdhci_fsl_fdt: Fix tuning code

From: Wojciech Macek <wma_at_FreeBSD.org>
Date: Tue, 22 Feb 2022 08:58:49 UTC
The branch main has been updated by wma:

URL: https://cgit.FreeBSD.org/src/commit/?id=29cb30dcb08ea06e146628f4ef8d029bf55ffa01

commit 29cb30dcb08ea06e146628f4ef8d029bf55ffa01
Author:     Kornel Duleba <mindal@semihalf.com>
AuthorDate: 2021-12-22 08:17:28 +0000
Commit:     Wojciech Macek <wma@FreeBSD.org>
CommitDate: 2022-02-22 08:58:32 +0000

    sdhci_fsl_fdt: Fix tuning code
    
    - Some of the register writes were already done in the generic tuning code.
      Remove them.
    - Increase the polling timeout. The previous value is probably fine, but since
      timeouts are treated as fatal errors increasing it to 200ms won't hurt.
    - Rework the HS400 switching code. Make sure that the switch happens at the
      right time. Reset the DLL0 block. We need to do that if u-boot has previously
      configured the controller in HS400 mode.
    - Check current timing before tuning. The tuning devmethod is always called,
      even for timings that don't require the tuning procedure.
    - Rework software tuning routine code. Use inner formula for clock
      divider calculation, as previous one was incorrect.
    - Implement custom re-tune procedure.
    
    Co-authored-by: Hubert Mazur <hum@semihalf.com>
    Obtained from: Semihalf
    Sponsored by: Alstom Group
    Differential Revision: https://reviews.freebsd.org/D34027
---
 sys/dev/sdhci/sdhci_fsl_fdt.c | 423 +++++++++++++++++++++---------------------
 1 file changed, 209 insertions(+), 214 deletions(-)

diff --git a/sys/dev/sdhci/sdhci_fsl_fdt.c b/sys/dev/sdhci/sdhci_fsl_fdt.c
index 5d4052a12a2e..2f19136c6078 100644
--- a/sys/dev/sdhci/sdhci_fsl_fdt.c
+++ b/sys/dev/sdhci/sdhci_fsl_fdt.c
@@ -91,10 +91,11 @@ __FBSDID("$FreeBSD$");
 #define	SDHCI_FSL_WTMK_WR_512B		(0 << 15)
 
 #define SDHCI_FSL_AUTOCERR		0x3C
-#define SDHCI_FSL_AUTOCERR_UHMS_HS200	(3 << 17)
-#define SDHCI_FSL_AUTOCERR_UHMS		(7 << 18)
+#define SDHCI_FSL_AUTOCERR_UHMS_HS200	(3 << 16)
+#define SDHCI_FSL_AUTOCERR_UHMS		(7 << 16)
 #define SDHCI_FSL_AUTOCERR_EXTN		(1 << 22)
 #define SDHCI_FSL_AUTOCERR_SMPCLKSEL	(1 << 23)
+#define SDHCI_FSL_AUTOCERR_UHMS_SHIFT	16
 
 #define	SDHCI_FSL_HOST_VERSION		0xfc
 #define	SDHCI_FSL_VENDOR_V23		0x13
@@ -115,8 +116,8 @@ __FBSDID("$FreeBSD$");
 #define SDHCI_FSL_TBCTL_MODE_SW		3
 
 #define SDHCI_FSL_TBPTR			0x128
-#define SDHCI_FSL_TBPTR_WND_START	0x7F00
-#define SDHCI_FSL_TBPTR_WND_END		0x7F
+#define SDHCI_FSL_TBPTR_WND_START_SHIFT 8
+#define SDHCI_FSL_TBPTR_WND_MASK	0x7F
 
 #define SDHCI_FSL_SDCLKCTL		0x144
 #define SDHCI_FSL_SDCLKCTL_CMD_CLK_CTL	(1 << 15)
@@ -127,13 +128,14 @@ __FBSDID("$FreeBSD$");
 
 #define SDHCI_FSL_DLLCFG0		0x160
 #define SDHCI_FSL_DLLCFG0_FREQ_SEL	(1 << 27)
+#define SDHCI_FSL_DLLCFG0_RESET		(1 << 30)
 #define SDHCI_FSL_DLLCFG0_EN		(1 << 31)
 
 #define SDHCI_FSL_DLLCFG1		0x164
 #define SDHCI_FSL_DLLCFG1_PULSE_STRETCH	(1 << 31)
 
-#define SDHCI_FSL_DLLCFG1               0x164
-#define SDHCI_FSL_DLLCFG1_PULSE_STRETCH (1 << 31)
+#define SDHCI_FSL_DLLSTAT0		0x170
+#define SDHCI_FSL_DLLSTAT0_SLV_STS	(1 << 27)
 
 #define	SDHCI_FSL_ESDHC_CTRL		0x40c
 #define	SDHCI_FSL_ESDHC_CTRL_SNOOP	(1 << 6)
@@ -160,11 +162,14 @@ __FBSDID("$FreeBSD$");
  */
 #define SDHCI_FSL_HS400_LIMITED_CLK_DIV	(1 << 4)
 
+/*
+ * HS400 tuning is done in HS200 mode, but it has to be done using
+ * the target frequency. In order to apply the errata above we need to
+ * know the target mode during tuning procedure. Use this flag for just that.
+ */
 #define SDHCI_FSL_HS400_FLAG		(1 << 0)
-#define SDHCI_FSL_SWITCH_TO_HS400_FLAG	(1 << 1)
-#define SDHCI_FSL_HS400_DONE		(1 << 2)
 
-#define SDHCI_FSL_MAX_RETRIES		10
+#define SDHCI_FSL_MAX_RETRIES		20000	/* DELAY(10) * this = 200ms */
 
 struct sdhci_fsl_fdt_softc {
 	device_t				dev;
@@ -180,9 +185,9 @@ struct sdhci_fsl_fdt_softc {
 	uint32_t				cmd_and_mode;
 	uint16_t				sdclk_bits;
 	struct mmc_helper			fdt_helper;
+	uint32_t				div_ratio;
 	uint8_t					vendor_ver;
 	uint32_t				flags;
-	enum mmc_bus_timing			last_mode;
 
 	uint32_t (* read)(struct sdhci_fsl_fdt_softc *, bus_size_t);
 	void (* write)(struct sdhci_fsl_fdt_softc *, bus_size_t, uint32_t);
@@ -211,7 +216,7 @@ static const struct sdhci_fsl_fdt_soc_data sdhci_fsl_fdt_ls1028a_soc_data = {
 static const struct sdhci_fsl_fdt_soc_data sdhci_fsl_fdt_ls1046a_soc_data = {
 	.quirks = SDHCI_QUIRK_DONT_SET_HISPD_BIT | SDHCI_QUIRK_BROKEN_AUTO_STOP,
 	.baseclk_div = 2,
-	.errata = SDHCI_FSL_UNSUPP_1_8V,
+	.errata = SDHCI_FSL_UNSUPP_1_8V | SDHCI_FSL_TUNING_ERRATUM_TYPE2,
 };
 
 static const struct sdhci_fsl_fdt_soc_data sdhci_fsl_fdt_gen_data = {
@@ -305,13 +310,6 @@ fsl_sdhc_fdt_set_clock(struct sdhci_fsl_fdt_softc *sc, struct sdhci_slot *slot,
 	 */
 	SDHCI_FSL_FDT_CLK_DIV(sc, sc->baseclk_hz, slot->clock, prescale, div);
 
-#ifdef DEBUG
-	device_printf(sc->dev,
-	    "Desired SD/MMC freq: %d, actual: %d; base %d prescale %d divisor %d\n",
-	    slot->clock, sc->baseclk_hz / (prescale * div),
-	    sc->baseclk_hz, prescale, div);
-#endif
-
 	div_ratio = prescale * div;
 
 	/*
@@ -335,6 +333,13 @@ fsl_sdhc_fdt_set_clock(struct sdhci_fsl_fdt_softc *sc, struct sdhci_slot *slot,
 		}
 	}
 
+	sc->div_ratio = prescale * div;
+	if (bootverbose)
+		device_printf(sc->dev,
+		    "Desired SD/MMC freq: %d, actual: %d; base %d prescale %d divisor %d\n",
+		    slot->clock, sc->baseclk_hz / (prescale * div),
+		    sc->baseclk_hz, prescale, div);
+
 	prescale >>= 1;
 	div -= 1;
 
@@ -502,24 +507,13 @@ sdhci_fsl_fdt_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off,
 		WR4(sc, SDHCI_TRANSFER_MODE, sc->cmd_and_mode);
 		sc->cmd_and_mode = 0;
 		return;
-	/*
-	 * When switching to HS400 mode, setting these registers
-	 * is required for successful tuning.
-	 */
 	case SDHCI_HOST_CONTROL2:
-		if ((val & SDHCI_CTRL2_MMC_HS400) &&
-		    (sc->flags & SDHCI_FSL_SWITCH_TO_HS400_FLAG)) {
-			val32 = RD4(sc, SDHCI_FSL_TBCTL);
-			val32 |= SDHCI_FSL_TBCTL_HS400_EN;
-			WR4(sc, SDHCI_FSL_TBCTL, val32);
-
-			val32 = RD4(sc, SDHCI_FSL_SDCLKCTL);
-			val32 |= SDHCI_FSL_SDCLKCTL_CMD_CLK_CTL;
-			WR4(sc, SDHCI_FSL_SDCLKCTL, val32);
-
-		}
-
-		val &= ~SDHCI_CTRL2_MMC_HS400;
+		/*
+		 * Switching to HS400 requires a special procedure,
+		 * which is done in sdhci_fsl_fdt_set_uhs_timing.
+		 */
+		if ((val & SDHCI_CTRL2_UHS_MASK) == SDHCI_CTRL2_MMC_HS400)
+			val &= ~SDHCI_CTRL2_MMC_HS400;
 	default:
 		val32 = RD4(sc, off & ~3);
 		val32 &= ~(UINT16_MAX << (off & 3) * 8);
@@ -800,7 +794,6 @@ sdhci_fsl_fdt_attach(device_t dev)
 	sc->dev = dev;
 	sc->slot.quirks = sc->soc_data->quirks;
 	sc->flags = 0;
-	sc->last_mode = bus_timing_normal;
 	host = &sc->slot.host;
 
 	rid = 0;
@@ -1028,7 +1021,7 @@ sdhci_fsl_fdt_reset(device_t dev, struct sdhci_slot *slot, uint8_t mask)
 		WR4(sc, SDHCI_FSL_DLLCFG1, val);
 	}
 
-	sc->flags &= ~SDHCI_FSL_HS400_DONE;
+	sc->flags = 0;
 }
 
 static void
@@ -1049,124 +1042,49 @@ sdhci_fsl_switch_tuning_block(device_t dev, bool enable)
 	WR4(sc, SDHCI_FSL_TBCTL, reg);
 }
 
-static int
-sdhci_hw_tuning(struct sdhci_fsl_fdt_softc *sc, device_t bus, device_t child,
-   bool hs400)
-{
-	int error, retries;
-	uint32_t reg;
-
-	retries = SDHCI_FSL_MAX_RETRIES;
-
-	/* Set SYSCTL2[EXTN] to start the tuning procedure. */
-	reg = RD4(sc, SDHCI_FSL_AUTOCERR);
-	reg |= SDHCI_FSL_AUTOCERR_EXTN;
-	WR4(sc, SDHCI_FSL_AUTOCERR, reg);
-
-	while (true) {
-		/* Execute generic tuning to issue CMD19 and CMD21. */
-		error = sdhci_generic_tune(bus, child, hs400);
-		if (error != 0) {
-			device_printf(bus, "Generic tuning failed.\n");
-			return (error);
-		}
-
-		/*
-		 * Tuning is done after a corresponding interrupt
-		 * is set.
-		 */
-		error = sdhci_fsl_poll_register(sc, SDHCI_FSL_IRQSTAT,
-		    SDHCI_FSL_IRQSTAT_BRR, SDHCI_FSL_IRQSTAT_BRR);
-		if (error)
-			device_printf(sc->dev,
-			    "Timeout while waiting for hardware to finish tuning.\n");
-
-		/* Clear IRQSTAT[BRR] register. */
-		reg = RD4(sc, SDHCI_FSL_IRQSTAT);
-		reg &= ~SDHCI_FSL_IRQSTAT_BRR;
-		WR4(sc, SDHCI_FSL_IRQSTAT, reg);
-
-		/* Check SYSCTL2[EXTN] register. */
-		if ((RD4(sc, SDHCI_FSL_AUTOCERR) &
-		    SDHCI_FSL_AUTOCERR_EXTN) == 0)
-			break;
-
-		if (!retries--) {
-			error = ENXIO;
-			device_printf(bus,
-			    "Failed to execute HW tuning.\n");
-			break;
-		}
-
-		DELAY(10);
-	}
-
-	/* Check SYSCTL2[SMPCLKSEL]. */
-	reg = RD4(sc, SDHCI_FSL_AUTOCERR);
-	if (!(reg & SDHCI_FSL_AUTOCERR_SMPCLKSEL)) {
-		error = ENXIO;
-		device_printf(bus,
-		    "Hardware tuning failed. Turning off tuning block\n");
-		sdhci_fsl_switch_tuning_block(bus, false);
-	} else
-		error = 0;
-
-	return (error);
-}
-
 static int
 sdhci_fsl_sw_tuning(struct sdhci_fsl_fdt_softc *sc, device_t bus,
     device_t child, bool hs400, uint32_t wnd_start, uint32_t wnd_end)
 {
-	uint32_t start_p, end_p, reg;
+	uint32_t reg;
 	int error;
 
-	wnd_start = 5 * sc->soc_data->baseclk_div;
-	wnd_end = 3 * sc->soc_data->baseclk_div;
-
-	reg = RD4(sc, SDHCI_FSL_TBCTL);
-	reg &= ~SDHCI_FSL_TBCTL_TB_MODE_MASK;
-	reg |= SDHCI_FSL_TBCTL_MODE_SW;
-	WR4(sc, SDHCI_FSL_TBCTL, reg);
-
-	reg = RD4(sc, SDHCI_FSL_TBPTR);
-	start_p = reg | SDHCI_FSL_TBPTR_WND_START;
-	end_p = reg | SDHCI_FSL_TBPTR_WND_END;
-
-	if (abs(start_p - end_p) > (4 * sc->soc_data->baseclk_div + 2)) {
-		wnd_start = 8 * sc->soc_data->baseclk_div;
-		wnd_end = 4 * sc->soc_data->baseclk_div;
+	if (sc->soc_data->errata & SDHCI_FSL_TUNING_ERRATUM_TYPE1 ||
+	    abs(wnd_start - wnd_end) <= (4 * sc->div_ratio + 2)) {
+		wnd_start = 5 * sc->div_ratio;
+		wnd_end = 3 * sc->div_ratio;
 	} else {
-		wnd_start = 5 * sc->soc_data->baseclk_div;
-		wnd_end = 3 * sc->soc_data->baseclk_div;
+		wnd_start = 8 * sc->div_ratio;
+		wnd_end = 4 * sc->div_ratio;
 	}
 
-	reg |= (wnd_start << 14);
-	reg |= (wnd_end << 6);
+	reg = RD4(sc, SDHCI_FSL_TBPTR);
+	reg &= ~SDHCI_FSL_TBPTR_WND_MASK;
+	reg &= ~(SDHCI_FSL_TBPTR_WND_MASK << SDHCI_FSL_TBPTR_WND_START_SHIFT);
+	reg |= wnd_start << SDHCI_FSL_TBPTR_WND_START_SHIFT;
+	reg |= wnd_end;
 	WR4(sc, SDHCI_FSL_TBPTR, reg);
 
+	/*
+	 * Normally those are supposed to be set in sdhci_execute_tuning.
+	 * However in our case we need a small delay between setting the two.
+	 */
 	reg = RD4(sc, SDHCI_FSL_AUTOCERR);
-	reg |= SDHCI_FSL_AUTOCERR_EXTN | SDHCI_FSL_AUTOCERR_SMPCLKSEL;
+	reg |= SDHCI_FSL_AUTOCERR_EXTN;
 	WR4(sc, SDHCI_FSL_AUTOCERR, reg);
+	DELAY(10);
+	reg |= SDHCI_FSL_AUTOCERR_SMPCLKSEL;
+	WR4(sc, SDHCI_FSL_AUTOCERR, reg);
+
+	reg = RD4(sc, SDHCI_FSL_TBCTL);
+	reg &= ~SDHCI_FSL_TBCTL_TB_MODE_MASK;
+	reg |= SDHCI_FSL_TBCTL_MODE_SW;
+	WR4(sc, SDHCI_FSL_TBCTL, reg);
 
 	error = sdhci_generic_tune(bus, child, hs400);
 	if (error != 0) {
 		device_printf(bus,
 		    "Failed to execute generic tune while performing software tuning.\n");
-		return (error);
-	}
-
-	reg = RD4(sc, SDHCI_FSL_IRQSTAT);
-	reg &= ~SDHCI_FSL_IRQSTAT_BRR;
-	WR4(sc, SDHCI_FSL_IRQSTAT, reg);
-
-	reg = RD4(sc, SDHCI_FSL_AUTOCERR);
-	if (!(reg & SDHCI_FSL_AUTOCERR_SMPCLKSEL)) {
-		/* Error occured, need to disable tuning block. */
-		reg = RD4(sc, SDHCI_FSL_TBCTL);
-		reg &= ~SDHCI_FSL_TBCTL_TBEN;
-		WR4(sc, SDHCI_FSL_TBCTL, reg);
-		error = ENXIO;
 	}
 
 	return (error);
@@ -1175,17 +1093,37 @@ sdhci_fsl_sw_tuning(struct sdhci_fsl_fdt_softc *sc, device_t bus,
 static int
 sdhci_fsl_fdt_tune(device_t bus, device_t child, bool hs400)
 {
-	uint32_t wnd_start, wnd_end, clk_divider, reg;
 	struct sdhci_fsl_fdt_softc *sc;
+	uint32_t wnd_start, wnd_end;
+	uint32_t clk_divider, reg;
 	struct sdhci_slot *slot;
 	int error;
 
 	sc = device_get_softc(bus);
 	slot = device_get_ivars(child);
 	error = 0;
-	clk_divider = slot->max_clk / slot->clock;
+	clk_divider = sc->baseclk_hz / slot->clock;
+
+	switch (sc->slot.host.ios.timing) {
+	case bus_timing_mmc_hs400:
+		return (EINVAL);
+	case bus_timing_mmc_hs200:
+	case bus_timing_uhs_ddr50:
+	case bus_timing_uhs_sdr104:
+		break;
+	case bus_timing_uhs_sdr50:
+		if (slot->opt & SDHCI_SDR50_NEEDS_TUNING)
+			break;
+	default:
+		return (0);
+	}
 
-	/* For tuning mode SD clock divider must be within 3 to 16. */
+	/*
+	 * For tuning mode SD clock divider must be within 3 to 16.
+	 * We also need to match the frequency to whatever mode is used.
+	 * For that reason we're just bailing if the dividers don't match
+	 * that requirement.
+	 */
 	if (clk_divider < 3 || clk_divider > 16)
 		return (ENXIO);
 
@@ -1200,9 +1138,9 @@ sdhci_fsl_fdt_tune(device_t bus, device_t child, bool hs400)
 	    SDHCI_FSL_PRES_SDSTB, SDHCI_FSL_PRES_SDSTB);
 	if (error != 0)
 		device_printf(bus,
-		    "Timeout while waiting for hardware.\n");
+		    "Timeout while waiting for clock to stabilize.\n");
 
-	/* Set ESDHCCTL[FAF] register. */
+	/* Flush async IO. */
 	reg = RD4(sc, SDHCI_FSL_ESDHC_CTRL);
 	reg |= SDHCI_FSL_ESDHC_CTRL_FAF;
 	WR4(sc, SDHCI_FSL_ESDHC_CTRL, reg);
@@ -1214,13 +1152,13 @@ sdhci_fsl_fdt_tune(device_t bus, device_t child, bool hs400)
 		device_printf(bus,
 		    "Timeout while waiting for hardware.\n");
 
-	/* Set AUTOCERR[UHSM] register. */
-	reg = RD4(sc, SDHCI_FSL_AUTOCERR);
-	reg &= ~SDHCI_FSL_AUTOCERR_UHMS;
-	reg |= SDHCI_FSL_AUTOCERR_UHMS_HS200;
-	WR4(sc, SDHCI_FSL_AUTOCERR, reg);
-
-	/* Set TBCTL[TB_EN] register and program valid tuning mode. */
+	/*
+	 * Set TBCTL[TB_EN] register and program valid tuning mode.
+	 * According to RM MODE_3 means that:
+	 * "eSDHC takes care of the re-tuning during data transfer
+	 * (auto re-tuning).".
+	 * Tuning mode can only be changed while the clock is disabled.
+	 */
 	reg = RD4(sc, SDHCI_FSL_TBCTL);
 	reg &= ~SDHCI_FSL_TBCTL_TB_MODE_MASK;
 	reg |= SDHCI_FSL_TBCTL_TBEN | SDHCI_FSL_TBCTL_MODE_3;
@@ -1237,24 +1175,29 @@ sdhci_fsl_fdt_tune(device_t bus, device_t child, bool hs400)
 		    "Timeout while waiting for clock to stabilize.\n");
 
 	/* Perform hardware tuning. */
-	error = sdhci_hw_tuning(sc, bus, child, hs400);
+	error = sdhci_generic_tune(bus, child, hs400);
 
 	reg = RD4(sc, SDHCI_FSL_TBPTR);
-	wnd_start = reg & SDHCI_FSL_TBPTR_WND_START;
-	wnd_end = reg & SDHCI_FSL_TBPTR_WND_END;
-
-	/* For erratum type2 affected platforms, check tuning pointer window. */
-	if (sc->soc_data->errata & SDHCI_FSL_TUNING_ERRATUM_TYPE2) {
-		if (abs(wnd_start - wnd_end) >
-		    (4 * sc->soc_data->baseclk_div + 2))
-			error = ENXIO;
+	wnd_start = reg >> SDHCI_FSL_TBPTR_WND_START_SHIFT;
+	wnd_start &= SDHCI_FSL_TBPTR_WND_MASK;
+	wnd_end = reg & SDHCI_FSL_TBPTR_WND_MASK;
+
+	/*
+	 * For erratum type2 affected platforms, the controller can erroneously
+	 * declare that the tuning was successful. Verify the tuning window to
+	 * make sure that we're fine.
+	 */
+	if (error == 0 &&
+	    sc->soc_data->errata & SDHCI_FSL_TUNING_ERRATUM_TYPE2 &&
+	    abs(wnd_start - wnd_end) > (4 * sc->div_ratio + 2)) {
+		error = EIO;
 	}
 
+	/* If hardware tuning failed, try software tuning. */
 	if (error != 0 &&
 	    (sc->soc_data->errata &
 	    (SDHCI_FSL_TUNING_ERRATUM_TYPE1 |
 	    SDHCI_FSL_TUNING_ERRATUM_TYPE2))) {
-		/* If hardware tuning failed, try software tuning. */
 		error = sdhci_fsl_sw_tuning(sc, bus, child, hs400, wnd_start,
 		    wnd_end);
 		if (error != 0)
@@ -1263,33 +1206,58 @@ sdhci_fsl_fdt_tune(device_t bus, device_t child, bool hs400)
 
 	if (error != 0) {
 		sdhci_fsl_switch_tuning_block(bus, false);
-
 		return (error);
 	}
-
 	if (hs400) {
 		reg = RD4(sc, SDHCI_FSL_SDTIMINGCTL);
 		reg |= SDHCI_FSL_SDTIMINGCTL_FLW_CTL;
 		WR4(sc, SDHCI_FSL_SDTIMINGCTL, reg);
-
-		/*
-		 * Tuning block needs to be disabled now.
-		 * It will be enabled by set_uhs_signalling
-		 */
-		reg = RD4(sc, SDHCI_FSL_TBCTL);
-		reg &= ~SDHCI_FSL_TBCTL_TBEN;
-		WR4(sc, SDHCI_FSL_TBCTL, reg);
 	}
 
 	return (0);
 }
 
+static int
+sdhci_fsl_fdt_retune(device_t bus, device_t child, bool reset)
+{
+	struct sdhci_slot *slot;
+	struct sdhci_fsl_fdt_softc *sc;
+
+	slot = device_get_ivars(child);
+	sc = device_get_softc(bus);
+
+	if (!(slot->opt & SDHCI_TUNING_ENABLED))
+		return (0);
+
+	/* HS400 must be tuned in HS200 mode. */
+	if (slot->host.ios.timing == bus_timing_mmc_hs400)
+		return (EINVAL);
+
+	/*
+	 * Only re-tuning with full reset is supported.
+	 * The controller is normally put in "mode 3", which means that
+	 * periodic re-tuning is done automatically. See comment in
+	 * sdhci_fsl_fdt_tune for details.
+	 * Because of that re-tuning should only be triggered as a result
+	 * of a CRC error.
+	 */
+	 if (!reset)
+		return (ENOTSUP);
+
+	return (sdhci_fsl_fdt_tune(bus, child,
+	    sc->flags & SDHCI_FSL_HS400_FLAG));
+}
 static void
 sdhci_fsl_disable_hs400_mode(device_t dev, struct sdhci_fsl_fdt_softc *sc)
 {
 	uint32_t reg;
 	int error;
 
+	/* Check if HS400 is enabled right now. */
+	reg = RD4(sc, SDHCI_FSL_TBCTL);
+	if ((reg & SDHCI_FSL_TBCTL_HS400_EN) == 0)
+		return;
+
 	reg = RD4(sc, SDHCI_FSL_SDTIMINGCTL);
 	reg &= ~SDHCI_FSL_SDTIMINGCTL_FLW_CTL;
 	WR4(sc, SDHCI_FSL_SDTIMINGCTL, reg);
@@ -1343,24 +1311,46 @@ sdhci_fsl_enable_hs400_mode(device_t dev, struct sdhci_slot *slot,
 	error = sdhci_fsl_poll_register(sc, SDHCI_FSL_PRES_STATE,
 	    SDHCI_FSL_PRES_SDSTB, SDHCI_FSL_PRES_SDSTB);
 	if (error != 0)
-		device_printf(dev, "Internal clock never stabilized.\n");
+		device_printf(dev,
+		    "Timeout while waiting for clock to stabilize.\n");
+
+	reg = RD4(sc, SDHCI_FSL_TBCTL);
+	reg |= SDHCI_FSL_TBCTL_HS400_EN;
+	WR4(sc, SDHCI_FSL_TBCTL, reg);
+	reg = RD4(sc, SDHCI_FSL_SDCLKCTL);
+	reg |= SDHCI_FSL_SDCLKCTL_CMD_CLK_CTL;
+	WR4(sc, SDHCI_FSL_SDCLKCTL, reg);
 
 	fsl_sdhc_fdt_set_clock(sc, slot, SDHCI_CLOCK_CARD_EN |
 	    sc->sdclk_bits);
 	error = sdhci_fsl_poll_register(sc, SDHCI_FSL_PRES_STATE,
 	    SDHCI_FSL_PRES_SDSTB, SDHCI_FSL_PRES_SDSTB);
 	if (error != 0)
-		device_printf(dev, "Internal clock never stabilized.\n");
+		device_printf(dev,
+		    "Timeout while waiting for clock to stabilize.\n");
 
-	reg = sc->read(sc, SDHCI_FSL_DLLCFG0);
-	reg |= SDHCI_FSL_DLLCFG0_EN | SDHCI_FSL_DLLCFG0_FREQ_SEL;
-	sc->write(sc, SDHCI_FSL_DLLCFG0, reg);
+	reg = RD4(sc, SDHCI_FSL_DLLCFG0);
+	reg |= SDHCI_FSL_DLLCFG0_EN | SDHCI_FSL_DLLCFG0_RESET |
+	    SDHCI_FSL_DLLCFG0_FREQ_SEL;
+	WR4(sc, SDHCI_FSL_DLLCFG0, reg);
+
+	/*
+	 * The reset bit is not a self clearing one.
+	 * Give it some time and clear it manually.
+	 */
+	DELAY(100);
+	reg &= ~SDHCI_FSL_DLLCFG0_RESET;
+	WR4(sc, SDHCI_FSL_DLLCFG0, reg);
 
-	DELAY(20);
+	error = sdhci_fsl_poll_register(sc, SDHCI_FSL_DLLSTAT0,
+	    SDHCI_FSL_DLLSTAT0_SLV_STS, SDHCI_FSL_DLLSTAT0_SLV_STS);
+	if (error != 0)
+		device_printf(dev,
+		    "Timeout while waiting for DLL0.\n");
 
-	reg = sc->read(sc, SDHCI_FSL_TBCTL);
+	reg = RD4(sc, SDHCI_FSL_TBCTL);
 	reg |= SDHCI_FSL_TBCTL_HS400_WND_ADJ;
-	sc->write(sc, SDHCI_FSL_TBCTL, reg);
+	WR4(sc, SDHCI_FSL_TBCTL, reg);
 
 	fsl_sdhc_fdt_set_clock(sc, slot, sc->sdclk_bits);
 
@@ -1368,11 +1358,11 @@ sdhci_fsl_enable_hs400_mode(device_t dev, struct sdhci_slot *slot,
 	    SDHCI_FSL_PRES_SDSTB, SDHCI_FSL_PRES_SDSTB);
 	if (error != 0)
 		device_printf(dev,
-		    "Timeout while waiting for clock to stabilize.\n");
+		    "timeout while waiting for clock to stabilize.\n");
 
-	reg = sc->read(sc, SDHCI_FSL_ESDHC_CTRL);
+	reg = RD4(sc, SDHCI_FSL_ESDHC_CTRL);
 	reg |= SDHCI_FSL_ESDHC_CTRL_FAF;
-	sc->write(sc, SDHCI_FSL_ESDHC_CTRL, reg);
+	WR4(sc, SDHCI_FSL_ESDHC_CTRL, reg);
 
 	error = sdhci_fsl_poll_register(sc, SDHCI_FSL_ESDHC_CTRL,
 	    SDHCI_FSL_ESDHC_CTRL_FAF, 0);
@@ -1388,50 +1378,55 @@ sdhci_fsl_enable_hs400_mode(device_t dev, struct sdhci_slot *slot,
 	if (error != 0)
 		device_printf(dev,
 		    "Timeout while waiting for clock to stabilize.\n");
-
-	sdhci_generic_set_uhs_timing(dev, slot);
-	sc->flags = SDHCI_FSL_HS400_DONE;
 }
 
 static void
 sdhci_fsl_fdt_set_uhs_timing(device_t dev, struct sdhci_slot *slot)
 {
 	struct sdhci_fsl_fdt_softc *sc;
-	uint32_t reg;
+	const struct mmc_ios *ios;
+	uint32_t mode, reg;
 
 	sc = device_get_softc(dev);
+	ios = &slot->host.ios;
+	mode = 0;
 
-	if (sc->flags & SDHCI_FSL_HS400_DONE)
-		return;
-
-	reg = RD4(sc, SDHCI_FSL_TBCTL);
-
-	if (slot->host.ios.timing == bus_timing_hs &&
-	    (!(sc->flags & SDHCI_FSL_SWITCH_TO_HS400_FLAG)) &&
-	    sc->last_mode >= bus_timing_mmc_hs200) {
-		sdhci_fsl_switch_tuning_block(dev, false);
-
-		if (reg & SDHCI_FSL_TBCTL_HS400_EN)
-			sdhci_fsl_disable_hs400_mode(dev, sc);
-
-		sc->flags |= SDHCI_FSL_SWITCH_TO_HS400_FLAG;
-
-		return;
-	}
-
-	if ((slot->host.ios.timing == bus_timing_mmc_hs400) &&
-	    sc->flags & SDHCI_FSL_SWITCH_TO_HS400_FLAG) {
-		if (sc->last_mode < bus_timing_uhs_sdr50) {
-			sc->last_mode = sc->slot.host.ios.timing;
-			return;
-		}
-
+	/*
+	 * When we switch to HS400 this function is called twice.
+	 * First after the timing is set, and then after the clock
+	 * is changed to the target frequency.
+	 * The controller can be switched to HS400 only after the latter
+	 * is done.
+	 */
+	if (slot->host.ios.timing == bus_timing_mmc_hs400 &&
+	    ios->clock > SD_SDR50_MAX)
 		sdhci_fsl_enable_hs400_mode(dev, slot, sc);
+	else if (slot->host.ios.timing < bus_timing_mmc_hs400) {
+		sdhci_fsl_disable_hs400_mode(dev, sc);
 
-	} else
-		sdhci_generic_set_uhs_timing(dev, slot);
-
-	sc->last_mode = sc->slot.host.ios.timing;
+		/*
+		 * Switching to HS400 requires a custom procedure executed in
+		 * sdhci_fsl_enable_hs400_mode in case above.
+		 * For all other modes we just need to set the corresponding flag.
+		 */
+		reg = RD4(sc, SDHCI_FSL_AUTOCERR);
+		reg &= ~SDHCI_FSL_AUTOCERR_UHMS;
+		if (ios->clock > SD_SDR50_MAX)
+			mode = SDHCI_CTRL2_UHS_SDR104;
+		else if (ios->clock > SD_SDR25_MAX)
+			mode = SDHCI_CTRL2_UHS_SDR50;
+		else if (ios->clock > SD_SDR12_MAX) {
+			if (ios->timing == bus_timing_uhs_ddr50 ||
+			    ios->timing == bus_timing_mmc_ddr52)
+				mode = SDHCI_CTRL2_UHS_DDR50;
+			else
+				mode = SDHCI_CTRL2_UHS_SDR25;
+		} else if (ios->clock > SD_MMC_CARD_ID_FREQUENCY)
+			mode = SDHCI_CTRL2_UHS_SDR12;
+
+		reg |= mode << SDHCI_FSL_AUTOCERR_UHMS_SHIFT;
+		WR4(sc, SDHCI_FSL_AUTOCERR, reg);
+	}
 }
 
 static const device_method_t sdhci_fsl_fdt_methods[] = {
@@ -1452,7 +1447,7 @@ static const device_method_t sdhci_fsl_fdt_methods[] = {
 	DEVMETHOD(mmcbr_switch_vccq,		sdhci_fsl_fdt_switch_vccq),
 	DEVMETHOD(mmcbr_update_ios,		sdhci_fsl_fdt_update_ios),
 	DEVMETHOD(mmcbr_tune,			sdhci_fsl_fdt_tune),
-	DEVMETHOD(mmcbr_retune,			sdhci_generic_retune),
+	DEVMETHOD(mmcbr_retune,			sdhci_fsl_fdt_retune),
 
 	/* SDHCI accessors. */
 	DEVMETHOD(sdhci_read_1,			sdhci_fsl_fdt_read_1),