papers: add arxiv-latency — IRQ + audio/input latency analysis for Parag

+1

papers/SCORE.md

··· 40 40 41 41 | Paper | Format | PDF | Source | 42 42 |-------|--------|-----|--------| 43 + | Where the Microseconds Go: Input and Audio Latency in AC Native OS | arXiv (LaTeX, 4pp) | `arxiv-latency/latency.pdf` | `arxiv-latency/latency.tex` | 43 44 | Aesthetic Computer Demo (C&C 2026) | ACM Demo (LaTeX) | `cc-demo-2026/demo.pdf` | `cc-demo-2026/demo.tex` | 44 45 | The URL Tradition | arXiv (LaTeX) | `arxiv-url-tradition/url-tradition.pdf` | `arxiv-url-tradition/url-tradition.tex` | 45 46 | The Potter and the Prompt | arXiv (LaTeX) | `arxiv-holden/holden.pdf` | `arxiv-holden/holden.tex` |

+1

papers/arxiv-latency/ac-paper-layout.sty

··· 1 + ../ac-paper-layout.sty

+350

papers/arxiv-latency/latency.tex

··· 1 + % !TEX program = xelatex 2 + \documentclass[10pt,letterpaper,twocolumn]{article} 3 + 4 + % === GEOMETRY === 5 + \usepackage[top=0.75in, bottom=0.75in, left=0.75in, right=0.75in]{geometry} 6 + 7 + % === FONTS === 8 + \usepackage{fontspec} 9 + \usepackage{unicode-math} 10 + 11 + \setmainfont{Latin Modern Roman} 12 + \setsansfont{Latin Modern Sans} 13 + 14 + \newfontfamily\acbold{ywft-processing-bold}[ 15 + Path=../../system/public/type/webfonts/, 16 + Extension=.ttf 17 + ] 18 + \newfontfamily\aclight{ywft-processing-light}[ 19 + Path=../../system/public/type/webfonts/, 20 + Extension=.ttf 21 + ] 22 + \setmonofont{Latin Modern Mono}[Scale=0.85] 23 + 24 + % === PACKAGES === 25 + \usepackage{xcolor} 26 + \usepackage{titlesec} 27 + \usepackage{enumitem} 28 + \usepackage{booktabs} 29 + \usepackage{tabularx} 30 + \usepackage{multicol} 31 + \usepackage{fancyhdr} 32 + \usepackage{hyperref} 33 + \usepackage{graphicx} 34 + \usepackage{ragged2e} 35 + \usepackage{microtype} 36 + \usepackage{listings} 37 + \usepackage{natbib} 38 + \usepackage[colorspec=0.92]{draftwatermark} 39 + 40 + % === COLORS (AC palette) === 41 + \definecolor{acpink}{RGB}{180,72,135} 42 + \definecolor{acpurple}{RGB}{120,80,180} 43 + \definecolor{acdark}{RGB}{64,56,74} 44 + \definecolor{acgray}{RGB}{119,119,119} 45 + \definecolor{draftcolor}{RGB}{180,72,135} 46 + 47 + % === DRAFT WATERMARK === 48 + \DraftwatermarkOptions{ 49 + text=WORKING DRAFT, 50 + fontsize=3cm, 51 + color=draftcolor!18, 52 + angle=45, 53 + pos={0.5\paperwidth, 0.5\paperheight} 54 + } 55 + 56 + % === C/JS SYNTAX COLORS === 57 + \definecolor{jskw}{RGB}{119,51,170} 58 + \definecolor{jsfn}{RGB}{0,136,170} 59 + \definecolor{jsstr}{RGB}{170,120,0} 60 + \definecolor{jsnum}{RGB}{204,0,102} 61 + \definecolor{jscmt}{RGB}{102,102,102} 62 + 63 + % === HYPERREF === 64 + \hypersetup{ 65 + colorlinks=true, 66 + linkcolor=acpurple, 67 + urlcolor=acpurple, 68 + citecolor=acpurple, 69 + pdfauthor={@jeffrey}, 70 + pdftitle={Where the Microseconds Go: Input and Audio Latency in AC Native OS}, 71 + } 72 + 73 + % === SECTION FORMATTING === 74 + \titleformat{\section} 75 + {\normalfont\bfseries\normalsize\uppercase} 76 + {\thesection.} 77 + {0.5em} 78 + {} 79 + \titlespacing{\section}{0pt}{1.2em}{0.3em} 80 + 81 + \titleformat{\subsection} 82 + {\normalfont\bfseries\small} 83 + {\thesubsection} 84 + {0.5em} 85 + {} 86 + \titlespacing{\subsection}{0pt}{0.8em}{0.2em} 87 + 88 + % === HEADER/FOOTER === 89 + \pagestyle{fancy} 90 + \fancyhf{} 91 + \renewcommand{\headrulewidth}{0pt} 92 + \fancyhead[C]{\footnotesize\color{acpink}\textit{Working Draft --- not for citation}} 93 + \fancyfoot[C]{\footnotesize\thepage} 94 + 95 + % === CUSTOM COMMANDS === 96 + \newcommand{\acdot}{{\color{acpink}.}} 97 + \newcommand{\ac}{\textsc{Aesthetic.Computer}} 98 + \newcommand{\acos}{\textsc{AC Native OS}} 99 + 100 + % === LISTINGS === 101 + \lstdefinelanguage{acc}{ 102 + morekeywords=[1]{const,static,struct,int,unsigned,void,if,else,return,while,for,sizeof,#define,#include}, 103 + morekeywords=[2]{snd_pcm_writei,snd_pcm_hw_params_set_period_size_near,read,poll,epoll_wait,clock_gettime,wl_keyboard,wl_display}, 104 + sensitive=true, 105 + morecomment=[l]{//}, 106 + morestring=[b]", 107 + } 108 + 109 + \lstdefinestyle{accstyle}{ 110 + language=acc, 111 + keywordstyle=[1]\color{jskw}\bfseries, 112 + keywordstyle=[2]\color{jsfn}\bfseries, 113 + commentstyle=\color{jscmt}\itshape, 114 + stringstyle=\color{jsstr}, 115 + } 116 + 117 + \lstset{ 118 + basicstyle=\ttfamily\small, 119 + breaklines=true, 120 + frame=single, 121 + rulecolor=\color{acgray!30}, 122 + backgroundcolor=\color{acgray!5}, 123 + xleftmargin=0.5em, 124 + xrightmargin=0.5em, 125 + aboveskip=0.5em, 126 + belowskip=0.5em, 127 + } 128 + 129 + % === LIST SETTINGS === 130 + \setlist[itemize]{nosep, leftmargin=1.2em, itemsep=0.1em} 131 + \setlist[enumerate]{nosep, leftmargin=1.2em} 132 + 133 + \setlength{\columnsep}{1.8em} 134 + \setlength{\parindent}{1em} 135 + \setlength{\parskip}{0.3em} 136 + 137 + \tolerance=800 138 + \emergencystretch=1em 139 + \hyphenpenalty=50 140 + 141 + \begin{document} 142 + 143 + % ============ TITLE BLOCK ============ 144 + 145 + \twocolumn[{% 146 + \begin{center} 147 + \includegraphics[height=4em]{pals}\par\vspace{0.5em} 148 + {\acbold\fontsize{22pt}{26pt}\selectfont\color{acdark} Where the Microseconds Go}\par 149 + \vspace{0.2em} 150 + {\aclight\fontsize{11pt}{13pt}\selectfont\color{acpink} Input and Audio Latency in AC Native OS}\par 151 + \vspace{0.3em} 152 + {\aclight\fontsize{9pt}{11pt}\selectfont\color{acgray} A letter for Parag, on what an interrupt is and how close to physics we already are}\par 153 + \vspace{0.6em} 154 + {\normalsize\href{https://prompt.ac/@jeffrey}{@jeffrey}}\par 155 + {\small\color{acgray} Aesthetic.Computer}\par 156 + {\small\color{acgray} ORCID: \href{https://orcid.org/0009-0007-4460-4913}{0009-0007-4460-4913}}\par 157 + \vspace{0.3em} 158 + {\small\color{acpurple} \url{https://aesthetic.computer}}\par 159 + \vspace{0.6em} 160 + \rule{\textwidth}{1.5pt} 161 + \vspace{0.5em} 162 + \end{center} 163 + 164 + \begin{center} 165 + {\small\color{acpink}\textbf{[ working draft --- not for citation ]}} 166 + \end{center} 167 + \vspace{0.3em} 168 + 169 + \begin{quote} 170 + \small\noindent\textbf{Abstract.} 171 + This paper, written for a friend (Parag) who asked what an IRQ is and whether stacking display servers makes a computer feel slower, walks the keypress-to-sound path inside \acos{} from the keyboard's USB host controller IRQ down to the audio codec's DMA engine. I quantify each layer the signal must cross, compare the values measured in \acos{} today against the theoretical floor set by physics and minimum kernel work, and trace the commit-by-commit history of how the chromatic keyboard piece \texttt{notepat} arrived at its current numbers. \acos{} runs ALSA at a 192-frame period at 192\,kHz ($\approx$1\,ms hardware turnaround) on HDA-direct codecs, falling back to 10--20\,ms periods on Sound Open Firmware (SOF) platforms whose DAPM models cannot tolerate sub-period scheduling pressure. Wayland is supported but not required: the system also ships a direct DRM/KMS path and an evdev fallback, because each compositing or buffering layer adds either a context switch ($\mu$s, harmless) or a buffer turnaround (ms or one frame, audible). I show that the realistic floor is approximately 2\,ms key-to-DAC; we are at roughly 3--4\,ms on HDA hardware and 12--22\,ms on SOF. The remaining gap is not algorithmic --- it is the cost of supporting hardware whose firmware demands buffering we do not need. 172 + \end{quote} 173 + \vspace{0.5em} 174 + }] 175 + 176 + % ============ 1. INTRODUCTION ============ 177 + 178 + \section{Introduction} 179 + 180 + Parag, an IRQ --- an \emph{interrupt request} --- is the hardware equivalent of someone tapping the CPU on the shoulder mid-sentence. Every modern CPU is, by default, ignoring almost everything. It runs whatever the kernel's scheduler last placed in front of it, and only stops when a wire is pulled. The keyboard controller pulls a wire when a key changes; the sound card pulls a wire when its DMA buffer has shifted enough samples out the speaker amplifier that it is about to run dry. The kernel responds with an \emph{interrupt service routine} (ISR), reads the device's status, and either consumes the event or wakes a userspace process that was waiting. That, in five sentences, is the entire mechanism by which a press of the spacebar eventually becomes a sound: a piece of physics (the key bottoming out closes a circuit) becomes a USB transaction, becomes an IRQ, becomes a wakeup, becomes a synth voice, becomes another DMA buffer, becomes a vibration in the air. 181 + 182 + The interesting question, the one you actually asked, is: how long does that path take, and how much of the time is spent on things we cannot avoid versus things we have chosen to put in the way? This paper answers that question for \acos{}~\citep{scudder2026os}, the bare-metal Linux build that powers the chromatic-keyboard piece \texttt{notepat}~\citep{scudder2026notepat}. The system was designed from the start as a musical instrument: the relevant performance metric is not throughput but the latency of a single keypress reaching the DAC, and the jitter on that latency. Wessel and Wright argued in 2002 that any tool intended for intimate musical control should keep this number below 10\,ms~\citep{wessel2002problems}; McPherson and colleagues showed in 2016 that most general-purpose computing stacks are nowhere close~\citep{mcpherson2016action}, motivating the dedicated Bela platform~\citep{jack2018sub,bela}. \acos{} is not Bela. It runs on whatever surplus laptop you flash it onto. But its design intent is the same: minimize the layers between the key and the DAC, and put visible numbers next to each one that remains. 183 + 184 + I describe the keypress path~(\S\ref{sec:keypath}), the audio path~(\S\ref{sec:audiopath}), the current measured numbers against the theoretical floor~(\S\ref{sec:floor}), the role of Wayland and the direct-KMS alternative~(\S\ref{sec:wayland}), the commit history of \texttt{notepat} latency improvements~(\S\ref{sec:history}), and what is left to squeeze~(\S\ref{sec:future}). 185 + 186 + % ============ 2. THE KEYPRESS PATH ============ 187 + 188 + \section{The Keypress Path} 189 + \label{sec:keypath} 190 + 191 + In \texttt{fedac/native/src/input.c}, the chain from key to userspace runs as follows: 192 + 193 + \begin{enumerate} 194 + \item The user closes a key contact. Mechanical latency (debounce, scan-matrix sweep) is set by the keyboard's own microcontroller --- typically 1--5\,ms. 195 + \item The keyboard schedules a USB HID input report. The polling interval is 8\,ms on legacy low-speed devices, 1\,ms on full-speed devices, and 125\,$\mu$s on high-speed gaming keyboards (NuPhy analog HE, used by the development unit, polls at 1\,kHz). 196 + \item The xHCI host controller raises an IRQ when the URB completes. The Linux ISR runs in tens of microseconds. 197 + \item The USB HID driver decodes the report and posts an \texttt{input\_event} via the \texttt{evdev} subsystem~\citep{evdev}, which makes it readable on \texttt{/dev/input/event*}. 198 + \item \acos{}'s main loop, blocking in \texttt{poll()} on those file descriptors, wakes inside another tens of microseconds. The event is dispatched to the active piece's \texttt{act()} function via the QuickJS bridge~\citep{quickjs}. 199 + \item The piece updates state. If the event triggers a note, it calls \texttt{system.sound.play()}, which posts a voice descriptor to a lock-protected ring shared with the audio thread. 200 + \end{enumerate} 201 + 202 + The hardware floor on this path is the USB poll interval (1\,ms on a typical keyboard, 125\,$\mu$s on a gaming keyboard). Everything else is microseconds of kernel work plus a single context switch into the ac-native main loop. The runtime carries an extra subtlety: the NuPhy analog keyboard reports both as a generic HID keyboard and as a vendor-specific hidraw device that streams continuous pressure values. \acos{} reads the hidraw stream for analog pressure but suppresses the duplicate evdev events, since processing both would double-trigger every note (\texttt{input.c:478}). 203 + 204 + When a Wayland compositor is in the loop, step 5 changes: the compositor reads evdev via \texttt{libinput}~\citep{libinput}, decides which client has focus, and serializes the event into a \texttt{wl\_keyboard.key} message on the per-client Wayland socket~\citep{wayland}. The client wakes, dispatches it through \texttt{wl\_display\_dispatch}, and only then does the piece see it. Each hop is one socket write plus one context switch --- typically under 200\,$\mu$s on a quiet system, but the worst case is bounded by the compositor's own scheduling, not by the input stack. Comments in \texttt{input.c:1135} note that when no compositor advertises a seat, ac-native falls back to reading evdev directly --- the lower-latency path. 205 + 206 + % ============ 3. THE AUDIO PATH ============ 207 + 208 + \section{The Audio Path} 209 + \label{sec:audiopath} 210 + 211 + Audio in \acos{} is ALSA~\citep{alsa} all the way down. The configuration in \texttt{fedac/native/src/audio.h:11} declares: 212 + 213 + \begin{lstlisting}[style=accstyle] 214 + #define AUDIO_SAMPLE_RATE 192000 215 + #define AUDIO_PERIOD_SIZE 192 // ~1ms at 192kHz 216 + \end{lstlisting} 217 + 218 + The audio thread runs a tight loop: render \texttt{period\_frames} samples of mixed synth output into a buffer, then call \texttt{snd\_pcm\_writei()} to hand them to the codec. The codec's DMA engine consumes those samples on its own clock; when it has shifted out one full period, it raises an IRQ and the kernel wakes \texttt{snd\_pcm\_writei} so the next period can be written. The rendering and writing are double-buffered, so end-to-end latency is approximately \emph{period $\times$ number-of-periods-in-buffer}. 219 + 220 + \acos{} configures four periods of buffer (\texttt{audio.c:2275}). At the design rate this is 4\,ms of audio in flight at any moment, plus the analog delay of the codec and amplifier (typically $<$1\,ms). 221 + 222 + \subsection{Why two configurations} 223 + 224 + The codec landscape on x86\_64 laptops splits cleanly. Older HDA-direct codecs (Realtek ALC257 on a ThinkPad X13, e.g.) accept 192-frame periods without complaint. Newer Intel SoCs (Tiger Lake and later) route audio through Sound Open Firmware (SOF)~\citep{sof}, a coprocessor that runs DAPM (Dynamic Audio Power Management) state machines. Commit \texttt{3e3608733} (\emph{native: SOF-aware audio period sizing}) records the discovery: with a 1\,ms period and 4\,ms buffer the SOF firmware logged \emph{10{,}686 sdmode toggles per boot} on a Framework Laptop 13 G7 with the MAX98360A speaker amp --- a DAPM amp-storm that desynchronized the speaker and produced silence. The fix was to detect SOF and back off the period to 10\,ms (480 frames at 48\,kHz) with a 40\,ms buffer. 225 + 226 + Even that turned out to be too tight: commit \texttt{ec143aca7} (\emph{native: bigger SOF buffer 20ms/80ms}) records continuing XRUN messages with short writes of 96 frames out of 480, indicating the buffer was draining faster than the userspace mixer could refill it under boot-time load. The current production setting on SOF hardware is a 20\,ms period in an 80\,ms buffer. 227 + 228 + This is the central asymmetry of audio latency on modern laptops: \emph{the kernel can do better, but the firmware cannot}. SOF is not a Linux limitation. It is a hardware-vendor decision to put a coprocessor between the OS and the DAC, and that coprocessor's state machine has its own latency floor. 229 + 230 + % ============ 4. CURRENT VS. THEORETICAL FLOOR ============ 231 + 232 + \section{Current vs. Theoretical Floor} 233 + \label{sec:floor} 234 + 235 + Table~\ref{tab:floor} sums the path. The upper section is the current measured/derived state; the middle is the floor set by physics and minimum kernel work; the lower row is the gap. 236 + 237 + \begin{table}[h] 238 + \small 239 + \centering 240 + \begin{tabular}{lrr} 241 + \toprule 242 + \textbf{Stage} & \textbf{Current} & \textbf{Floor} \\ 243 + \midrule 244 + Key contact + scan & 1--5\,ms & 1\,ms \\ 245 + USB HID poll & 0.125--1\,ms & 0.125\,ms \\ 246 + Kernel ISR + evdev & $<$50\,$\mu$s & $<$50\,$\mu$s \\ 247 + poll() wake $\rightarrow$ piece & $<$100\,$\mu$s & $<$100\,$\mu$s \\ 248 + QuickJS dispatch + synth voice & $<$100\,$\mu$s & $<$50\,$\mu$s \\ 249 + Voice $\rightarrow$ next ALSA period & up to 1 period & 0 (best case) \\ 250 + ALSA buffer drain & 4 ms (HDA) & 1--2\,ms \\ 251 + & 80 ms (SOF) & 80\,ms \\ 252 + DAC + amp & $<$1\,ms & $<$1\,ms \\ 253 + \midrule 254 + \textbf{Total (HDA-direct)} & \textbf{$\sim$3--4\,ms} & \textbf{$\sim$2\,ms} \\ 255 + \textbf{Total (SOF)} & \textbf{$\sim$12--22\,ms} & \textbf{$\sim$82\,ms*} \\ 256 + \bottomrule 257 + \end{tabular} 258 + \caption{Latency budget, key-to-DAC. *SOF floor is dominated by the firmware buffer ac-native cannot shrink without losing audio.} 259 + \label{tab:floor} 260 + \end{table} 261 + 262 + The HDA-direct number sits within the 5\,ms threshold below which McPherson et al. showed users cannot reliably distinguish action from sound~\citep{mcpherson2016action}. The SOF number does not. There is no software fix on the Linux side: shrinking the SOF buffer reintroduces the DAPM amp-storm. The only paths to a smaller SOF floor are (a) firmware changes upstream, (b) a kernel patch that reroutes the DAPM events out of the audio fast path, or (c) selecting hardware whose codec is HDA-direct. 263 + 264 + For comparison, the \texttt{notepat} macOS port (\texttt{fedac/native/macos/}) running on Apple Silicon through SDL3~\citep{sdl3} and CoreAudio~\citep{coreaudio} has its own measurement: with a 64-frame request and the CoreAudio pipeline floor, the \texttt{AC\_LATENCY\_TEST} bench reports a median of $\sim$6.4\,ms with the jitter ceiling at $\sim$7\,ms (commit \texttt{c8256aa29}). Smaller buffers do not lower the median; the floor there is set by CoreAudio's own pipeline scheduling. The Linux HDA path is genuinely faster than CoreAudio, because there is no userspace audio server in the way --- ac-native talks to ALSA directly, no PipeWire, no PulseAudio. 265 + 266 + % ============ 5. WAYLAND, DIRECT KMS, AND DISPLAY ============ 267 + 268 + \section{Wayland, Direct KMS, and Display} 269 + \label{sec:wayland} 270 + 271 + Wayland does not touch audio --- it is a display and input protocol. The audio path described above runs identically with or without a compositor. So the question ``does Wayland add audio latency'' has a clean answer: no. The PipeWire/PulseAudio user-space audio servers \emph{would} add 5--20\,ms of resampling and buffering, but ac-native deliberately bypasses them. ALSA hw-direct. 272 + 273 + Wayland does add input latency, but only marginally: the compositor's libinput thread reads evdev on its own schedule and forwards events through the \texttt{wl\_keyboard} protocol to the focused client. On a quiet system this is sub-millisecond. \texttt{input.c:441,478} record a sharper concern: when ac-native runs as a Wayland client, the compositor \emph{grabs} the input devices, and reading them in parallel via evdev produces double-counted keys and progressively drifting cursor positions. The fix is conditional --- if a Wayland seat is advertised, evdev is suppressed; otherwise (\texttt{input.c:1135}) ac-native reads evdev directly. 274 + 275 + Display is where Wayland costs the most. A compositor inserts an additional buffer (the client renders into a \texttt{wl\_buffer}, the compositor composites it, then queues a KMS pageflip), which costs one vsync period --- 16.7\,ms at 60\,Hz. \acos{} ships \texttt{drm-display.c} (987 lines) as the no-compositor option: ac-native talks directly to DRM/KMS~\citep{drmkms} and pageflips its own framebuffer, cutting that frame out of the loop. \texttt{wayland-display.c} (394 lines) exists for the case where the OS is embedded in someone else's desktop session. 276 + 277 + The general rule: each layer between hardware and the application costs either a context switch ($\mu$s, free) or a buffer turnaround (one frame or one period, audible/visible). \acos{}'s design is to make the buffering layers \emph{optional}, not structural. 278 + 279 + % ============ 6. NOTEPAT LATENCY HISTORY ============ 280 + 281 + \section{The notepat Latency History} 282 + \label{sec:history} 283 + 284 + The chromatic keyboard piece \texttt{notepat} is the canonical instrument running on \acos{}. Its current feel is the result of a sequence of small commits, each of which moved the experience closer to the floor. Reading them in order is the most honest answer to ``where does the present number come from.'' 285 + 286 + \begin{description} 287 + \item[\texttt{f9670700} \emph{NuPhy analog smoothing, dark theme, boot perf, media keys}.] The first appearance of analog-pressure handling for the NuPhy HE keyboard. Hidraw reports were noisy at low pressures; raw\_accum/raw\_count averaging cleaned the signal at the cost of one frame of input averaging. 288 + 289 + \item[\texttt{d8b28e65c} \emph{simplify NuPhy evdev filter, add input diagnostics}.] Stopped reading the NuPhy as both a generic HID keyboard \emph{and} a hidraw analog device, removing a class of double-trigger artifacts. 290 + 291 + \item[\texttt{18880d7a8} \emph{velocity-capture + pressure smoothing + NuPhy badge/gauge}.] Introduced velocity capture from the analog stream so a hard press maps to a louder note, with on-screen pressure feedback. 292 + 293 + \item[\texttt{cf3ca7f43} \emph{Karplus-Strong plucked string; notepat noise$\rightarrow$harp}.] Replaced the noise-based default voice with a Karplus-Strong delay-line waveguide. The synth itself adds no latency relative to a sine; this is mentioned because the perceived attack tightness depends as much on the voice's transient shape as on the buffer size. 294 + 295 + \item[\texttt{474237ee4} \emph{tanh soft-limiter + dropped/shaped dither}.] The dither story is its own subplot. Commit \texttt{319732304} added a $\pm$1\,LSB dither to prevent SOF's silence detector from gating the amplifier. \texttt{e075ebac5} bumped it to $\pm$160 when $\pm$1 turned out to extend the sustain only to 96\,s. \texttt{7add48bb5} reduced it to $\pm$1 again because $\pm$160 was audible as a 24\,kHz fizz. \texttt{ec143aca7} settled on $\pm$32 with a 20\,ms/80\,ms SOF buffer. 296 + 297 + \item[\texttt{3e3608733} \emph{SOF-aware audio period sizing}.] The structural break: split the audio config into HDA-direct (1\,ms period) and SOF (10\,ms period) paths. 298 + 299 + \item[\texttt{ec143aca7} \emph{bigger SOF buffer (20ms/80ms)}.] Pushed the SOF buffer further when XRUNs persisted under boot load. 300 + 301 + \item[\texttt{72476348f} \emph{negotiate S32\_LE format for SOF speaker PCM}.] Format negotiation moved samples to 32-bit on SOF paths. No latency change directly, but it eliminated a class of crunchy-quiet artifacts that had been mistakenly attributed to buffer underruns. 302 + 303 + \item[\texttt{04dea9da7} \emph{macos: low-latency audio + windowed resizable default}.] On the Mac port: dropped CoreAudio's default 2048--4096-frame buffer (40--85\,ms) to 128 frames ($\sim$2.7\,ms), bringing the Mac round-trip to roughly 5--8\,ms. 304 + 305 + \item[\texttt{c8256aa29} \emph{macos: dynamic FB reflow, live resize, keypress-latency bench}.] Added the \texttt{AC\_LATENCY\_TEST} benchmark and tightened the Mac buffer to 64 frames. Median held at $\sim$6.4\,ms (CoreAudio floor); jitter ceiling fell from $\sim$11 to $\sim$7\,ms. 306 + 307 + \item[\texttt{7a2e69f92} \emph{notepat: wobble/flange FX + snap-release (kill dead silent time)}.] Worth flagging: ``snap-release'' is a perceptual fix, not a measured one. By snapping the envelope's release rather than letting it ramp, the \emph{end} of a note feels tighter. Latency at the start did not change, but the instrument felt faster. 308 + \end{description} 309 + 310 + The arc of these commits is unsurprising in retrospect but only obvious in hindsight: the bulk of the work has not been clever DSP or kernel hacking. It has been understanding which parts of the stack are negotiable and which are firmware-controlled, then choosing settings on each side that do not trip the firmware into defensive behavior. 311 + 312 + % ============ 7. WHAT IS LEFT TO SQUEEZE ============ 313 + 314 + \section{What is Left to Squeeze} 315 + \label{sec:future} 316 + 317 + \begin{itemize} 318 + \item \textbf{Direct evdev grab on Wayland.} Currently when a compositor is present, ac-native disables evdev to avoid double-input. A small refactor would let ac-native take an exclusive grab via \texttt{EVIOCGRAB} and skip the Wayland keyboard hop entirely while remaining a Wayland client for display. Saves $\sim$200\,$\mu$s and eliminates an entire scheduling boundary. 319 + 320 + \item \textbf{ALSA mmap mode.} \texttt{snd\_pcm\_writei} copies frames into the kernel's ring; \texttt{snd\_pcm\_mmap\_writei} maps the ring into userspace and removes the copy. On HDA paths this could trim a fraction of a millisecond. On SOF the firmware buffer dominates so the change is invisible. 321 + 322 + \item \textbf{IRQ thread priority and CPU pinning.} Linux RT throttling and IRQ-thread priorities are at default values. Pinning the audio thread to a single CPU and elevating the relevant IRQ thread to \texttt{SCHED\_FIFO} would tighten the jitter ceiling, though probably not the median. The cost is making the system less polite to other workloads, which on a single-piece appliance is acceptable. 323 + 324 + \item \textbf{High-poll-rate keyboards.} A 1\,kHz USB polling rate is already standard on the development NuPhy. Moving to an 8\,kHz polling keyboard shaves another $\sim$0.5\,ms in the worst case. 325 + 326 + \item \textbf{HDA-only build.} An option flag at build time to refuse SOF hardware would let the synth use a 1\,ms period unconditionally. This is a hardware-selection decision dressed up as a software switch, but it is honest about the trade. 327 + \end{itemize} 328 + 329 + None of these will produce a dramatic step change. They will each save microseconds to a few milliseconds. The dramatic step changes are behind us: removing PulseAudio (saved $\sim$10\,ms), removing the desktop compositor for direct KMS (saved one frame), tuning the ALSA period to the codec's ceiling (took the audio buffer from a default 23\,ms or worse down to 4\,ms on HDA), and identifying the SOF DAPM trap (which prevented an entire class of laptops from working at all). 330 + 331 + % ============ 8. CONCLUSION ============ 332 + 333 + \section{Conclusion} 334 + 335 + So, Parag: an interrupt is a wire that the CPU has agreed to listen to. The keyboard pulls one when a key changes. The sound card pulls one when its DMA buffer is about to run dry. Between those two events the kernel does roughly 100\,$\mu$s of work and userspace does another 100\,$\mu$s --- those are not the latency. The latency is the buffers we keep in flight to absorb scheduling jitter, plus whatever firmware sits between the OS and the silicon. On a ThinkPad with HDA-direct audio, \acos{} is at $\sim$3--4\,ms key-to-DAC, against a $\sim$2\,ms physics floor. On a Framework with SOF, the floor itself is $\sim$80\,ms. Wayland adds nothing to audio, a fraction of a millisecond to input, and one vsync to display --- which is why ac-native ships a direct-KMS path for the cases where that vsync matters. 336 + 337 + The right question for an instrument is never ``how low can the average go,'' it is ``how predictable is the worst case.'' That is what the recent work has been about. The audible thresholds (10\,ms for intimate musical control, 20\,ms before the action-sound bond starts to break) are met on the hardware we recommend. They are not yet met on every laptop in a thrift bin --- not because the kernel cannot deliver, but because the firmware sometimes will not let it. 338 + 339 + \vspace{0.5em} 340 + \noindent\textbf{Acknowledgments.} For Parag, who asked the question that made this paper worth writing. The numbers in this paper are derived from the public commit history of the \texttt{aesthetic-computer} repository as of April 2026. 341 + 342 + \vspace{0.5em} 343 + \noindent\textbf{ORCID:} \href{https://orcid.org/0009-0007-4460-4913}{0009-0007-4460-4913} 344 + 345 + % ============ REFERENCES ============ 346 + 347 + \bibliographystyle{plainnat} 348 + \bibliography{references} 349 + 350 + \end{document}

+1

papers/arxiv-latency/pals.pdf

··· 1 + ../arxiv-ac/figures/pals.pdf

+129

papers/arxiv-latency/references.bib

··· 1 + @misc{scudder2026ac, 2 + title={Aesthetic Computer '26: A Mobile-First Runtime for Creative Computing}, 3 + author={{@jeffrey}}, 4 + year={2026}, 5 + note={Companion paper describing the AC platform} 6 + } 7 + 8 + @misc{scudder2026os, 9 + title={AC Native OS '26: A Bare-Metal Creative Computing Operating System}, 10 + author={{@jeffrey}}, 11 + year={2026}, 12 + note={Companion paper describing the bare-metal Linux build} 13 + } 14 + 15 + @misc{scudder2026notepat, 16 + title={notepat.com: From Keyboard Toy to System Front Door}, 17 + author={{@jeffrey}}, 18 + year={2026}, 19 + note={Companion paper on the chromatic keyboard piece} 20 + } 21 + 22 + @misc{linuxkernel, 23 + title={The Linux Kernel}, 24 + author={{Linux Kernel Contributors}}, 25 + year={2026}, 26 + note={Version 6.14.2 used by AC Native OS}, 27 + url={https://www.kernel.org/} 28 + } 29 + 30 + @misc{alsa, 31 + title={Advanced Linux Sound Architecture (ALSA) Project}, 32 + author={{ALSA Project}}, 33 + year={2026}, 34 + url={https://www.alsa-project.org/} 35 + } 36 + 37 + @misc{evdev, 38 + title={Linux Input Subsystem User-Space API (evdev)}, 39 + author={{Linux Kernel Documentation}}, 40 + year={2026}, 41 + note={Documentation/input/input.rst} 42 + } 43 + 44 + @misc{libinput, 45 + title={libinput --- A library to handle input devices}, 46 + author={Hutterer, Peter}, 47 + year={2026}, 48 + url={https://www.freedesktop.org/wiki/Software/libinput/} 49 + } 50 + 51 + @misc{wayland, 52 + title={Wayland Protocol Specification}, 53 + author={H{\"o}genauer, Kristian and others}, 54 + year={2025}, 55 + url={https://wayland.freedesktop.org/} 56 + } 57 + 58 + @misc{drmkms, 59 + title={Linux DRM/KMS Subsystem Documentation}, 60 + author={{Linux Kernel Documentation}}, 61 + year={2026}, 62 + note={Documentation/gpu/drm-kms.rst} 63 + } 64 + 65 + @misc{usbhid, 66 + title={Universal Serial Bus HID Usage Tables, Version 1.4}, 67 + author={{USB Implementers Forum}}, 68 + year={2022} 69 + } 70 + 71 + @misc{sof, 72 + title={Sound Open Firmware Project}, 73 + author={{Sound Open Firmware Contributors}}, 74 + year={2026}, 75 + url={https://thesofproject.github.io/latest/index.html} 76 + } 77 + 78 + @misc{coreaudio, 79 + title={Core Audio Overview}, 80 + author={{Apple Inc.}}, 81 + year={2025}, 82 + note={developer.apple.com/library/archive/documentation/MusicAudio/} 83 + } 84 + 85 + @misc{sdl3, 86 + title={Simple DirectMedia Layer 3.0}, 87 + author={Lantinga, Sam and others}, 88 + year={2025}, 89 + url={https://libsdl.org/} 90 + } 91 + 92 + @inproceedings{wessel2002problems, 93 + title={Problems and Prospects for Intimate Musical Control of Computers}, 94 + author={Wessel, David and Wright, Matthew}, 95 + booktitle={Computer Music Journal}, 96 + volume={26}, 97 + number={3}, 98 + pages={11--22}, 99 + year={2002} 100 + } 101 + 102 + @inproceedings{mcpherson2016action, 103 + title={Action-Sound Latency: Are Our Tools Fast Enough?}, 104 + author={McPherson, Andrew and Jack, Robert and Moro, Giulio}, 105 + booktitle={Proceedings of the International Conference on New Interfaces for Musical Expression (NIME)}, 106 + pages={20--25}, 107 + year={2016} 108 + } 109 + 110 + @inproceedings{jack2018sub, 111 + title={Sub-Millisecond Latency Audio with Bela}, 112 + author={Jack, Robert and Moro, Giulio and McPherson, Andrew}, 113 + booktitle={Audio Engineering Society Convention 144}, 114 + year={2018} 115 + } 116 + 117 + @misc{bela, 118 + title={Bela: Real-Time Audio and Sensors for Embedded Systems}, 119 + author={{Augmented Instruments Lab}}, 120 + year={2025}, 121 + url={https://bela.io/} 122 + } 123 + 124 + @misc{quickjs, 125 + title={QuickJS Javascript Engine}, 126 + author={Bellard, Fabrice}, 127 + year={2024}, 128 + url={https://bellard.org/quickjs/} 129 + }

Configure Feed

Configure Feed