···11+# MIT License
22+33+Copyright (c) 2025 Thomas Wylie
44+55+Permission is hereby granted, free of charge, to any person obtaining a copy
66+of this software and associated documentation files (the "Software"), to deal
77+in the Software without restriction, including without limitation the rights
88+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
99+copies of the Software, and to permit persons to whom the Software is
1010+furnished to do so, subject to the following conditions:
1111+1212+The above copyright notice and this permission notice shall be included in all
1313+copies or substantial portions of the Software.
1414+1515+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1717+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1818+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121+SOFTWARE.
2222+
+76
yazi/plugins/duckdb.yazi/README.md
···11+# duckdb.yazi
22+33+[duckdb](https://github.com/duckdb/duckdb) now in [yazi](https://github.com/sxyazi/yazi).
44+55+<img width="1710" alt="Screenshot 2025-03-22 at 18 00 06" src="https://github.com/user-attachments/assets/db09fff9-2db1-4273-9ddf-34d0bf087967" />
66+77+## Installation
88+99+To install, use the command:
1010+1111+ya pack -a wylie102/duckdb
1212+1313+and add to your yazi.toml:
1414+1515+[plugin]
1616+prepend_previewers = [
1717+ { mime = "text/csv", run = "duckdb" },
1818+ { name = "*.tsv", run = "duckdb" },
1919+ { name = "*.json", run = "duckdb" },
2020+ { name = "*.parquet", run = "duckdb" },
2121+]
2222+2323+prepend_preloaders = [
2424+ { mime = "text/csv", run = "duckdb", multi = false },
2525+ { name = "*.tsv", run = "duckdb", multi = false },
2626+ { name = "*.json", run = "duckdb", multi = false },
2727+ { name = "*.parquet", run = "duckdb", multi = false },
2828+]
2929+3030+### Yazi
3131+3232+[Installation installations](https://yazi-rs.github.io/docs/installation)
3333+3434+### duckdb
3535+3636+[Installation instructions](https://duckdb.org/docs/installation/?version=stable&environment=cli&platform=macos&download_method=direct)
3737+3838+## Recommended plugins
3939+4040+Use with a larger preview window or maximize the preview pane plugin:
4141+<https://github.com/yazi-rs/plugins/tree/main/toggle-pane.yazi>
4242+4343+## What does it do?
4444+4545+This plugin previews your data files in yazi using DuckDB, with two available view modes:
4646+4747+- Standard mode (default): Displays the file as a table.
4848+- Summarized mode: Uses DuckDB's summarize function, enhanced with custom formatting for readability.
4949+5050+Supported file types:
5151+5252+- .csv
5353+- .json
5454+- .parquet
5555+- .tsv
5656+5757+## New Features
5858+5959+- Default preview mode is now "standard."
6060+- Preview mode can be toggled within yazi:
6161+ - Press "K" at the top of the file to toggle between "standard" and "summarized."
6262+- Preview mode is remembered per file, even after switching files or restarting yazi.
6363+- Performance improvements through caching:
6464+ - "Standard" and "summarized" views are cached upon first load, improving scrolling performance.
6565+6666+## Setup and usage changes
6767+6868+Previously, preview mode was selected by setting an environment variable (`DUCKDB_PREVIEW_MODE`).
6969+7070+The new version no longer uses environment variables. Toggle preview modes directly within yazi using the keybinding described above.
7171+7272+Scrolling within both views (standard and summarized) is handled by pressing J (down) and K (up). Performance is significantly better due to caching.
7373+7474+## Preview
7575+7676+<img width="1710" alt="Screenshot 2025-03-22 at 17 59 21" src="https://github.com/user-attachments/assets/ac006667-4281-4e0a-87a4-bfaeefc6f20b" />
+231
yazi/plugins/duckdb.yazi/main.lua
···11+-- This function generates the SQL query based on the preview mode.
22+local function generate_sql(job, mode)
33+ if mode == "standard" then
44+ return string.format("SELECT * FROM '%s' LIMIT 500", tostring(job.file.url))
55+ else
66+ return string.format(
77+ [[SELECT
88+ column_name AS column,
99+ column_type AS type,
1010+ count,
1111+ approx_unique AS unique,
1212+ null_percentage AS null,
1313+ LEFT(min, 10) AS min,
1414+ LEFT(max, 10) AS max,
1515+ CASE
1616+ WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-'
1717+ WHEN avg IS NULL THEN 'NULL'
1818+ WHEN TRY_CAST(avg AS DOUBLE) IS NULL THEN avg
1919+ WHEN CAST(avg AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(avg AS DOUBLE), 2) AS VARCHAR)
2020+ WHEN CAST(avg AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k'
2121+ WHEN CAST(avg AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm'
2222+ WHEN CAST(avg AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b'
2323+ ELSE '∞'
2424+ END AS avg,
2525+ CASE
2626+ WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-'
2727+ WHEN std IS NULL THEN 'NULL'
2828+ WHEN TRY_CAST(std AS DOUBLE) IS NULL THEN std
2929+ WHEN CAST(std AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(std AS DOUBLE), 2) AS VARCHAR)
3030+ WHEN CAST(std AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k'
3131+ WHEN CAST(std AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm'
3232+ WHEN CAST(std AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b'
3333+ ELSE '∞'
3434+ END AS std,
3535+ CASE
3636+ WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-'
3737+ WHEN q25 IS NULL THEN 'NULL'
3838+ WHEN TRY_CAST(q25 AS DOUBLE) IS NULL THEN q25
3939+ WHEN CAST(q25 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q25 AS DOUBLE), 2) AS VARCHAR)
4040+ WHEN CAST(q25 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k'
4141+ WHEN CAST(q25 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm'
4242+ WHEN CAST(q25 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b'
4343+ ELSE '∞'
4444+ END AS q25,
4545+ CASE
4646+ WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-'
4747+ WHEN q50 IS NULL THEN 'NULL'
4848+ WHEN TRY_CAST(q50 AS DOUBLE) IS NULL THEN q50
4949+ WHEN CAST(q50 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q50 AS DOUBLE), 2) AS VARCHAR)
5050+ WHEN CAST(q50 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k'
5151+ WHEN CAST(q50 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm'
5252+ WHEN CAST(q50 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b'
5353+ ELSE '∞'
5454+ END AS q50,
5555+ CASE
5656+ WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-'
5757+ WHEN q75 IS NULL THEN 'NULL'
5858+ WHEN TRY_CAST(q75 AS DOUBLE) IS NULL THEN q75
5959+ WHEN CAST(q75 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q75 AS DOUBLE), 2) AS VARCHAR)
6060+ WHEN CAST(q75 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k'
6161+ WHEN CAST(q75 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm'
6262+ WHEN CAST(q75 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b'
6363+ ELSE '∞'
6464+ END AS q75
6565+ FROM (summarize FROM '%s')]],
6666+ tostring(job.file.url)
6767+ )
6868+ end
6969+end
7070+7171+local function get_cache_path(job, type)
7272+ local skip = job.skip
7373+ job.skip = 0
7474+ local base = ya.file_cache(job)
7575+ job.skip = skip
7676+ if not base then
7777+ return nil
7878+ end
7979+ local suffix = ({ standard = "_standard.db", summarized = "_summarized.db", mode = "_mode.db" })[type or "standard"]
8080+ return Url(tostring(base) .. suffix)
8181+end
8282+8383+local function run_query(job, query, target)
8484+ local args = {}
8585+ if target ~= job.file.url then
8686+ table.insert(args, tostring(target))
8787+ end
8888+ table.insert(args, "-c")
8989+ table.insert(args, query)
9090+ local child = Command("duckdb"):args(args):stdout(Command.PIPED):stderr(Command.PIPED):spawn()
9191+ if not child then
9292+ return nil
9393+ end
9494+ local output, err = child:wait_with_output()
9595+ if err then
9696+ return nil
9797+ end
9898+ if not output.status.success then
9999+ ya.err("DuckDB exited with error: " .. output.stderr)
100100+ return nil
101101+ end
102102+ return output
103103+end
104104+105105+local function create_cache(job, mode, path)
106106+ local filename = job.file.url:name() or "unknown"
107107+ if fs.cha(path) then
108108+ return true
109109+ end
110110+ local sql = (mode == "mode") and "CREATE TABLE My_table AS SELECT 'standard' AS Preview_mode;"
111111+ or string.format("CREATE TABLE My_table AS (%s);", generate_sql(job, mode))
112112+ local out = run_query(job, sql, path, mode == "mode" and "mode" or nil)
113113+ if not out then
114114+ ya.err("Preload - Failed to generate " .. mode .. " cache for file: " .. tostring(filename) .. ".")
115115+ return false
116116+ end
117117+ return true
118118+end
119119+120120+local function get_preview_mode(job)
121121+ local mode = "standard"
122122+ local mode_cache = get_cache_path(job, "mode")
123123+ if not mode_cache then
124124+ return mode
125125+ end
126126+ if not fs.cha(mode_cache) then
127127+ create_cache(job, "mode", mode_cache)
128128+ end
129129+ local result = run_query(job, "SELECT Preview_mode FROM My_table LIMIT 1;", mode_cache, "mode")
130130+ if result and result.stdout and result.stdout ~= "" then
131131+ local value = result.stdout:lower()
132132+ if value:match("summarized") then
133133+ mode = "summarized"
134134+ end
135135+ end
136136+ return mode
137137+end
138138+139139+local function generate_query(target, job, limit, offset)
140140+ local mode = get_preview_mode(job)
141141+ if target == job.file.url then
142142+ if mode == "standard" then
143143+ return string.format("SELECT * FROM '%s' LIMIT %d OFFSET %d;", tostring(target), limit, offset)
144144+ else
145145+ local query = generate_sql(job, mode)
146146+ return string.format("WITH query AS (%s) SELECT * FROM query LIMIT %d OFFSET %d;", query, limit, offset)
147147+ end
148148+ else
149149+ return string.format("SELECT * FROM My_table LIMIT %d OFFSET %d;", limit, offset)
150150+ end
151151+end
152152+153153+local function set_preview_mode(job, mode)
154154+ local mode_cache = get_cache_path(job, "mode")
155155+ if not mode_cache then
156156+ return false
157157+ end
158158+ run_query(job, "DELETE FROM My_table;", mode_cache, "mode")
159159+ local sql = string.format("INSERT INTO My_table VALUES ('%s');", mode)
160160+ local result = run_query(job, sql, mode_cache, "mode")
161161+ if not result then
162162+ ya.err("SetPreviewMode - Failed to update preview mode.")
163163+ return false
164164+ end
165165+ return true
166166+end
167167+168168+local M = {}
169169+170170+function M:preload(job)
171171+ local cache_standard = get_cache_path(job, "standard")
172172+ local cache_summarized = get_cache_path(job, "summarized")
173173+ if not cache_standard or not cache_summarized then
174174+ return false
175175+ end
176176+ if fs.cha(cache_standard) and fs.cha(cache_summarized) then
177177+ return true
178178+ end
179179+ local success = true
180180+ success = create_cache(job, "standard", cache_standard) and success
181181+ success = create_cache(job, "summarized", cache_summarized) and success
182182+ return success
183183+end
184184+185185+function M:peek(job)
186186+ local raw_skip = job.skip or 0
187187+ local skip = math.max(0, raw_skip - 50)
188188+ if raw_skip > 0 and raw_skip < 50 then
189189+ local current_mode = get_preview_mode(job)
190190+ local new_mode = current_mode == "standard" and "summarized" or "standard"
191191+ set_preview_mode(job, new_mode)
192192+ skip = 0
193193+ end
194194+ job.skip = skip
195195+ local mode = get_preview_mode(job)
196196+ local cache = get_cache_path(job, mode)
197197+ local file_url = job.file.url
198198+ local target = cache
199199+ local limit = job.area.h - 7
200200+ local offset = skip
201201+ if not cache or not fs.cha(cache) then
202202+ target = file_url
203203+ end
204204+ local query = generate_query(target, job, limit, offset)
205205+ local output = run_query(job, query, target)
206206+ if not output or output.stdout == "" then
207207+ if target ~= file_url then
208208+ target = file_url
209209+ query = generate_query(target, job, limit, offset)
210210+ output = run_query(job, query, target)
211211+ if not output or output.stdout == "" then
212212+ return require("code"):peek(job)
213213+ end
214214+ else
215215+ return require("code"):peek(job)
216216+ end
217217+ end
218218+ ya.preview_widgets(job, { ui.Text.parse(output.stdout):area(job.area) })
219219+end
220220+221221+function M:seek(job)
222222+ local OFFSET_BASE = 50
223223+ local encoded_current_skip = cx.active.preview.skip or 0
224224+ local current_skip = math.max(0, encoded_current_skip - OFFSET_BASE)
225225+ local units = job.units or 0
226226+ local new_skip = current_skip + units
227227+ local encoded_skip = new_skip + OFFSET_BASE
228228+ ya.manager_emit("peek", { encoded_skip, only_if = job.file.url })
229229+end
230230+231231+return M
+14
yazi/yazi.toml
···11+[plugin]
22+prepend_previewers = [
33+{ mime = "text/csv", run = "duckdb" },
44+{ name = ".tsv", run = "duckdb" },
55+{ name = ".json", run = "duckdb" },
66+{ name = "*.parquet", run = "duckdb" },
77+]
88+99+prepend_preloaders = [
1010+{ mime = "text/csv", run = "duckdb", multi = false },
1111+{ name = ".tsv", run = "duckdb", multi = false },
1212+{ name = ".json", run = "duckdb", multi = false },
1313+{ name = "*.parquet", run = "duckdb", multi = false },
1414+]