From: Simon Glass <simon.glass@canonical.com> The stb_truetype library performs around 5 allocations per character rendered, totalling approximately 26KB of temporary memory. This creates significant malloc/free overhead and heap fragmentation. Add a scratch buffer mechanism that pre-allocates memory once during probe and reuses it for each character. The buffer is reset at the start of each putc_xy() call, and allocations come from this buffer using a simple bump allocator with 8-byte alignment. If the scratch buffer is exhausted (e.g. for very complex glyphs), the allocator falls back to malloc transparently. The scratch buffer is controlled by two new Kconfig options: - CONSOLE_TRUETYPE_SCRATCH: Enable/disable the feature (default y) - CONSOLE_TRUETYPE_SCRATCH_SIZE: Buffer size in bytes (default 32KB) Co-developed-by: Claude <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com> --- doc/usage/cmd/font.rst | 9 +++++ drivers/video/Kconfig | 23 ++++++++++++ drivers/video/console_truetype.c | 62 ++++++++++++++++++++++++++++++-- drivers/video/stb_truetype.h | 46 ++++++++++++++++++++++-- 4 files changed, 136 insertions(+), 4 deletions(-) diff --git a/doc/usage/cmd/font.rst b/doc/usage/cmd/font.rst index f7a4897667b..a4b9495b977 100644 --- a/doc/usage/cmd/font.rst +++ b/doc/usage/cmd/font.rst @@ -85,6 +85,15 @@ CONFIG_CONSOLE_TRUETYPE_GLYPH_BUF enables a pre-allocated buffer for glyph rendering, avoiding malloc/free per character. The buffer starts at 4KB and grows as needed via realloc(). +CONFIG_CONSOLE_TRUETYPE_SCRATCH enables a scratch buffer for internal stbtt +allocations. Without this, the TrueType library performs around 5 allocations +per character (totalling ~26KB), creating malloc/free overhead and heap +fragmentation. With the scratch buffer, memory is allocated once at probe time +and reused for each character. CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE sets the +buffer size (default 32KB), which is sufficient for most Latin characters. +Complex glyphs (CJK, emoji) or very large font sizes may need 64KB or more. +Allocations exceeding the buffer size fall back to malloc transparently. + CONFIG_VIDEO_GLYPH_STATS enables tracking of glyph-rendering statistics. Return value diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 0f99ba1845b..4a8090e622d 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -259,6 +259,29 @@ config CONSOLE_TRUETYPE_GLYPH_BUF The buffer starts at 4KB and grows via realloc() as needed to accommodate larger glyphs. +config CONSOLE_TRUETYPE_SCRATCH + bool "TrueType scratch buffer to reduce malloc traffic" + depends on CONSOLE_TRUETYPE + default y + help + Enable a pre-allocated scratch buffer for internal TrueType + rendering allocations. This eliminates malloc/free calls during + character rendering, improving performance and reducing heap + fragmentation. + + With this disabled, stbtt allocates and frees around 26KB of + temporary memory for each character rendered. + +config CONSOLE_TRUETYPE_SCRATCH_SIZE + int "TrueType scratch buffer size" + depends on CONSOLE_TRUETYPE_SCRATCH + default 32768 + help + Size of the scratch buffer in bytes for TrueType rendering. + 32KB is sufficient for most Latin characters. Complex glyphs + (CJK, emoji) may need 64KB or more. Allocations exceeding this + size fall back to malloc. + config VIDEO_GLYPH_STATS bool "Track glyph rendering statistics" depends on CONSOLE_TRUETYPE diff --git a/drivers/video/console_truetype.c b/drivers/video/console_truetype.c index 6e65f55d598..c0574b75fbe 100644 --- a/drivers/video/console_truetype.c +++ b/drivers/video/console_truetype.c @@ -105,8 +105,47 @@ static double tt_acos(double val) #define STBTT_fmod tt_fmod #define STBTT_cos tt_cos #define STBTT_acos tt_acos -#define STBTT_malloc(size, u) ((void)(u), malloc(size)) -#define STBTT_free(size, u) ((void)(u), free(size)) + +/* Scratch buffer for zero-malloc rendering - must match stb_truetype.h */ +#define STBTT_SCRATCH_DEFINED +struct stbtt_scratch { + char *buf; + size_t size; + size_t used; +}; + +static inline void stbtt_scratch_reset(struct stbtt_scratch *s) +{ + if (s) + s->used = 0; +} + +static inline void *stbtt__scratch_alloc(size_t size, void *userdata) +{ + struct stbtt_scratch *s = userdata; + size_t aligned = (size + 7) & ~7; + + if (s && s->used + aligned <= s->size) { + void *p = s->buf + s->used; + + s->used += aligned; + + return p; + } + + return malloc(size); +} + +static inline void stbtt__scratch_free(void *ptr, void *userdata) +{ + struct stbtt_scratch *s = userdata; + + if (!s || ptr < (void *)s->buf || ptr >= (void *)(s->buf + s->size)) + free(ptr); +} + +#define STBTT_malloc(size, u) stbtt__scratch_alloc(size, u) +#define STBTT_free(ptr, u) stbtt__scratch_free(ptr, u) #define STBTT_assert(x) #define STBTT_strlen(x) strlen(x) #define STBTT_memcpy memcpy @@ -184,6 +223,8 @@ struct console_tt_metrics { * this avoids malloc/free per character. Allocated lazily after * relocation to avoid using early malloc space. * @glyph_buf_size: Current size of glyph_buf in bytes + * @scratch: Scratch buffer state for stbtt internal allocations + * @scratch_buf: Memory for scratch buffer */ struct console_tt_priv { struct console_tt_metrics *cur_met; @@ -196,6 +237,8 @@ struct console_tt_priv { int pos_count; u8 *glyph_buf; int glyph_buf_size; + struct stbtt_scratch scratch; + char *scratch_buf; }; /** @@ -377,6 +420,9 @@ static int console_truetype_putc_xy(struct udevice *dev, uint x, uint y, if (priv->cur_fontdata) return console_fixed_putc_xy(dev, x, y, cp, priv->cur_fontdata); + /* Reset scratch buffer for this character */ + stbtt_scratch_reset(&priv->scratch); + /* First get some basic metrics about this character */ font = &met->font; stbtt_GetCodepointHMetrics(font, cp, &advance, &lsb); @@ -813,6 +859,7 @@ static int truetype_add_metrics(struct udevice *dev, const char *font_name, debug("%s: Font init failed\n", __func__); return -EPERM; } + font->userdata = &priv->scratch; /* Pre-calculate some things we will need regularly */ met->scale = stbtt_ScaleForPixelHeight(font, font_size); @@ -1217,6 +1264,17 @@ static int console_truetype_probe(struct udevice *dev) int ret; debug("%s: start\n", __func__); + + /* Allocate scratch buffer for stbtt internal allocations */ + if (CONFIG_IS_ENABLED(CONSOLE_TRUETYPE_SCRATCH)) { + priv->scratch_buf = malloc(CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE); + if (priv->scratch_buf) { + priv->scratch.buf = priv->scratch_buf; + priv->scratch.size = CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE; + priv->scratch.used = 0; + } + } + if (vid_priv->font_size) font_size = vid_priv->font_size; else diff --git a/drivers/video/stb_truetype.h b/drivers/video/stb_truetype.h index 90a5c2e2b3f..23a88898287 100644 --- a/drivers/video/stb_truetype.h +++ b/drivers/video/stb_truetype.h @@ -465,11 +465,53 @@ int main(int arg, char **argv) #define STBTT_fabs(x) fabs(x) #endif + /* Scratch buffer for zero-malloc rendering */ + #ifndef STBTT_SCRATCH_DEFINED + #define STBTT_SCRATCH_DEFINED + struct stbtt_scratch { + char *buf; + size_t size; + size_t used; + }; + + static inline void stbtt_scratch_reset(struct stbtt_scratch *s) + { + if (s) + s->used = 0; + } + #endif + // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h #ifndef STBTT_malloc #include <stdlib.h> - #define STBTT_malloc(x,u) ((void)(u),malloc(x)) - #define STBTT_free(x,u) ((void)(u),free(x)) + + static inline void *stbtt__scratch_alloc(size_t size, void *userdata) + { + struct stbtt_scratch *s = userdata; + size_t aligned = (size + 7) & ~7; /* 8-byte alignment */ + + if (s && s->used + aligned <= s->size) { + void *p = s->buf + s->used; + + s->used += aligned; + + return p; + } + + return malloc(size); /* fallback */ + } + + static inline void stbtt__scratch_free(void *ptr, void *userdata) + { + struct stbtt_scratch *s = userdata; + + /* Only free if not from scratch buffer */ + if (!s || ptr < (void *)s->buf || ptr >= (void *)(s->buf + s->size)) + free(ptr); + } + + #define STBTT_malloc(x,u) stbtt__scratch_alloc(x, u) + #define STBTT_free(x,u) stbtt__scratch_free(x, u) #endif #ifndef STBTT_assert -- 2.43.0