Commit 3638d17b authored by Ian Chechin's avatar Ian Chechin
Browse files

rows: cache the parseTime format index per result column

(*conn).parseTime ran on every TEXT-stored DATETIME / DATE / TIMESTAMP
column read in Next(). The function tried (*conn).parseTimeString first
and then walked parseTimeFormats[0..6] sequentially until time.Parse
matched the row's value. For the canonical SQLite TEXT datetime format
("2006-01-02 15:04:05.999999999", index 2) every row paid two failed
time.Parse attempts in the warmup, plus the one successful match. Each
failed Parse allocates a ParseError, so the per-row cost on a steady
1000-row scan was ~5 allocs per row from the format-search alone.

Add a sticky per-column hint cache:

  - rows.parseFmtIdx []int8, sized once at newRows() to the column count,
    initialised to -1 (no match recorded).
  - (*conn).parseTime now takes hintIdx int and returns the index that
    actually matched (or -1 when parseTimeString matched / all formats
    failed). It tries hintIdx first if in range, then walks the list
    skipping the index it just tried.
 ...
parent 7da793ef
Loading
Loading
Loading
Loading
+33 −13
Original line number Diff line number Diff line
@@ -102,29 +102,49 @@ func newConn(dsn string) (*conn, error) {
	return c, nil
}

// Attempt to parse s as a time. Return (s, false) if s is not
// recognized as a valid time encoding.
func (c *conn) parseTime(s string) (interface{}, bool) {
// parseTime attempts to parse s as a time encoding. If hintIdx is a valid
// index into parseTimeFormats, that format is tried before the rest of the
// list; otherwise the search runs in declaration order. The returned int is
// the index of the format that matched, or -1 if the parseTimeString
// (t.String()) branch matched or all formats failed. Callers that scan many
// rows of a same-format column can feed the previous match back as hintIdx
// to skip the redundant time.Parse attempts that would otherwise run for
// every row.
//
// Return value contract is preserved: (parsed-value, ok). On failure the
// value is the original input string and ok is false.
func (c *conn) parseTime(s string, hintIdx int) (interface{}, bool, int) {
	if v, ok := c.parseTimeString(s, strings.Index(s, "m=")); ok {
		return v, true
		return v, true, -1
	}

	ts, hadZ := strings.CutSuffix(s, "Z")

	for _, f := range parseTimeFormats {
		var t time.Time
		var err error
	tryFormat := func(f string) (time.Time, error) {
		if c.loc != nil && !hadZ {
			t, err = time.ParseInLocation(f, ts, c.loc)
		} else {
			t, err = time.Parse(f, ts)
			return time.ParseInLocation(f, ts, c.loc)
		}
		if err == nil {
			return c.applyTimezone(t), true
		return time.Parse(f, ts)
	}

	// Try the caller's hint first, if any.
	if hintIdx >= 0 && hintIdx < len(parseTimeFormats) {
		if t, err := tryFormat(parseTimeFormats[hintIdx]); err == nil {
			return c.applyTimezone(t), true, hintIdx
		}
	}

	// Sequential fallthrough, skipping the hint we already tried.
	for i, f := range parseTimeFormats {
		if i == hintIdx {
			continue
		}
		if t, err := tryFormat(f); err == nil {
			return c.applyTimezone(t), true, i
		}
	}

	return s, false
	return s, false, -1
}

// Attempt to parse s as a time string produced by t.String().  If x > 0 it's
+130 −0
Original line number Diff line number Diff line
@@ -396,3 +396,133 @@ func TestColumnTypeScanTypeDecltypeCache(t *testing.T) {
		})
	}
}

// TestParseTimeFormatCache verifies that the per-rows-per-column format-index
// hint reused by (*conn).parseTime keeps returning correct parsed time values
// across many rows of a steady-format column, and that a column whose format
// switches mid-result-set still parses correctly via the fallthrough path.
func TestParseTimeFormatCache(t *testing.T) {
	db, err := sql.Open(driverName, "file::memory:")
	if err != nil {
		t.Fatal(err)
	}
	defer db.Close()

	if _, err := db.Exec(`CREATE TABLE t (id INTEGER PRIMARY KEY, dt DATETIME)`); err != nil {
		t.Fatal(err)
	}

	// First three rows use the same canonical SQLite TEXT format (matches
	// format index 2 of parseTimeFormats: "2006-01-02 15:04:05.999999999").
	// Row 4 uses the ISO-T format (matches index 3). Row 5 uses the
	// date-only fallback (index 6). After the cache stabilises on row 1, the
	// hinted format helps rows 2 and 3 directly and rows 4-5 fall through.
	values := []string{
		"2025-01-15 10:30:00",
		"2025-01-15 11:00:00",
		"2025-01-15 11:30:00",
		"2025-01-16T08:15:00",
		"2025-01-17",
	}
	for i, v := range values {
		if _, err := db.Exec(`INSERT INTO t(id, dt) VALUES (?, ?)`, i+1, v); err != nil {
			t.Fatalf("insert id=%d: %v", i+1, err)
		}
	}

	rows, err := db.Query(`SELECT dt FROM t ORDER BY id`)
	if err != nil {
		t.Fatal(err)
	}
	defer rows.Close()

	wantTimes := []time.Time{
		time.Date(2025, 1, 15, 10, 30, 0, 0, time.UTC),
		time.Date(2025, 1, 15, 11, 0, 0, 0, time.UTC),
		time.Date(2025, 1, 15, 11, 30, 0, 0, time.UTC),
		time.Date(2025, 1, 16, 8, 15, 0, 0, time.UTC),
		time.Date(2025, 1, 17, 0, 0, 0, 0, time.UTC),
	}
	i := 0
	for rows.Next() {
		if i >= len(wantTimes) {
			t.Fatalf("too many rows; want %d", len(wantTimes))
		}
		var got time.Time
		if err := rows.Scan(&got); err != nil {
			t.Fatalf("row %d scan: %v", i, err)
		}
		if !got.Equal(wantTimes[i]) {
			t.Errorf("row %d: got %v, want %v", i, got, wantTimes[i])
		}
		i++
	}
	if err := rows.Err(); err != nil {
		t.Fatal(err)
	}
	if i != len(wantTimes) {
		t.Fatalf("row count: got %d, want %d", i, len(wantTimes))
	}
}

// benchParseTimeScan exercises the rows.Next + Scan path on a DATETIME TEXT
// column. With the parseTime format-index cache, every row after the first
// hits the hinted format directly; without the cache, every row re-walks
// the parseTimeFormats list until it finds a match.
func benchParseTimeScan(b *testing.B) {
	db, err := sql.Open(driverName, "file::memory:")
	if err != nil {
		b.Fatal(err)
	}
	defer db.Close()

	if _, err := db.Exec(`CREATE TABLE t (dt DATETIME)`); err != nil {
		b.Fatal(err)
	}

	const rows = 1000
	tx, err := db.Begin()
	if err != nil {
		b.Fatal(err)
	}
	stmt, err := tx.Prepare(`INSERT INTO t (dt) VALUES (?)`)
	if err != nil {
		b.Fatal(err)
	}
	for i := 0; i < rows; i++ {
		// Canonical SQLite TEXT datetime format (index 2 of
		// parseTimeFormats).
		if _, err := stmt.Exec("2025-01-15 10:30:00"); err != nil {
			b.Fatal(err)
		}
	}
	stmt.Close()
	if err := tx.Commit(); err != nil {
		b.Fatal(err)
	}

	b.ReportAllocs()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r, err := db.Query(`SELECT dt FROM t`)
		if err != nil {
			b.Fatal(err)
		}
		var got time.Time
		for r.Next() {
			if err := r.Scan(&got); err != nil {
				b.Fatal(err)
			}
		}
		if err := r.Err(); err != nil {
			b.Fatal(err)
		}
		r.Close()
	}
}

// BenchmarkParseTimeScan measures the rows.Next DATETIME TEXT path with the
// format-index cache active.
func BenchmarkParseTimeScan(b *testing.B) {
	benchParseTimeScan(b)
}
+19 −2
Original line number Diff line number Diff line
@@ -27,6 +27,17 @@ type rows struct {
	// libc.GoString + strings.ToUpper from the Next() hot path for callers
	// that hit the time-conversion branches (_texttotime, _time_format).
	decltypes []string
	// parseFmtIdx caches, per column, the index into parseTimeFormats that
	// matched the first successful (*conn).parseTime call on that column.
	// Subsequent rows reuse the saved index as the first attempt instead of
	// re-walking the format list from the top. Slot value -1 means no match
	// has been recorded yet (either parseTime has not run on this column, or
	// the parseTimeString / m= branch matched, which is not in
	// parseTimeFormats). The cache is sticky: once a successful index is
	// stored it is not overwritten if a later row happens to match a
	// different format, so mixed-format columns still pay only the original
	// fallthrough cost and a steady column wins on every subsequent row.
	parseFmtIdx []int8
	pstmt       uintptr

	doStep    bool
@@ -56,11 +67,13 @@ func newRows(c *conn, pstmt uintptr, allocs *[]uintptr, empty bool) (r *rows, er

	r.columns = make([]string, n)
	r.decltypes = make([]string, n)
	r.parseFmtIdx = make([]int8, n)
	for i := range r.columns {
		if r.columns[i], err = r.c.columnName(pstmt, i); err != nil {
			return nil, err
		}
		r.decltypes[i] = strings.ToUpper(r.c.columnDeclType(pstmt, i))
		r.parseFmtIdx[i] = -1
	}

	return r, nil
@@ -179,7 +192,11 @@ func (r *rows) Next(dest []driver.Value) (err error) {

				switch r.ColumnTypeDatabaseTypeName(i) {
				case "DATE", "DATETIME", "TIMESTAMP":
					dest[i], _ = r.c.parseTime(v)
					val, ok, idx := r.c.parseTime(v, int(r.parseFmtIdx[i]))
					if ok && r.parseFmtIdx[i] < 0 && idx >= 0 {
						r.parseFmtIdx[i] = int8(idx)
					}
					dest[i] = val
				default:
					dest[i] = v
				}