clone of my dotfiles.ssp.sh
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

stow: sesh yazi

sspaeti 66ea0885 824f72b8

+1221 -316
sesh/sesh.toml sesh/.config/sesh/sesh.toml
+66
yazi/.config/yazi/keymap.toml
··· 1 + [mgr] 2 + prepend_keymap = [ 3 + { on = "?", run = 'help', desc = "Help" }, 4 + { on = "H", run = 'plugin duckdb -1', desc = "Scroll one column to the left" }, 5 + { on = "L", run = 'plugin duckdb +1', desc = "Scroll one column to the right" }, 6 + # { on = "sr", run = 'S', desc = "fuzzy search ripgrep" }, 7 + ] 8 + 9 + 10 + 11 + [help] 12 + prepend_keymap = [ 13 + { on = "/", run = 'filter', desc = "Filter" }, 14 + ] 15 + 16 + # [[manager.prepend_keymap]] 17 + # on = "H" 18 + # run = "plugin duckdb -1" 19 + # desc = "Scroll one column to the left" 20 + 21 + # [[manager.prepend_keymap]] 22 + # on = "L" 23 + # run = "plugin duckdb +1" 24 + # desc = "Scroll one column to the right" 25 + 26 + # [[manager.prepend_keymap]] 27 + # on = ["g", "o"] 28 + # run = "plugin duckdb -open" 29 + # desc = "open with duckdb" 30 + 31 + # [[manager.prepend_keymap]] 32 + # on = ["g", "u"] 33 + # run = "plugin duckdb -ui" 34 + # desc = "open with duckdb ui" 35 + 36 + # keymap = [ 37 + 38 + # # Help 39 + # { on = [ "?" ], exec = "help", desc = "Open help" }, 40 + # ] 41 + 42 + # [tasks] 43 + 44 + # keymap = [ 45 + # { on = [ "?" ], exec = "help", desc = "Open help" } 46 + # ] 47 + 48 + # [select] 49 + 50 + # keymap = [ 51 + # { on = [ "?" ], exec = "help", desc = "Open help" } 52 + # ] 53 + 54 + # [input] 55 + 56 + # keymap = [ 57 + # # Help 58 + # { on = [ "?" ], exec = "help", desc = "Open help" } 59 + # ] 60 + 61 + # [completion] 62 + 63 + # keymap = [ 64 + # { on = [ "?" ], exec = "help", desc = "Open help" } 65 + # ] 66 +
+7
yazi/.config/yazi/package.toml
··· 1 + [[plugin.deps]] 2 + use = "wylie102/duckdb" 3 + rev = "3f8c863" 4 + hash = "87b9233a74325e95011670ed20bbd0d4" 5 + 6 + [flavor] 7 + deps = []
+336
yazi/.config/yazi/plugins/duckdb.yazi/README.md
··· 1 + # duckdb.yazi 2 + 3 + **Uses [duckdb](https://github.com/duckdb/duckdb) to quickly preview and summarize data files in [yazi](https://github.com/sxyazi/yazi)!** 4 + 5 + <br> 6 + 7 + <https://github.com/user-attachments/assets/ff2b11fb-d6fa-4b6a-b1a9-8aceed520189> 8 + 9 + <br><br> 10 + 11 + ## What does it do? 12 + 13 + This plugin previews your data files in yazi using DuckDB, with two available view modes: 14 + 15 + - Preview csv, tsv, json, or parquet files in the following modes 16 + - Standard mode (default): Displays the file as a table 17 + - Summarized mode: Uses DuckDB's summarize function, enhanced with custom formatting for readability 18 + - Preview duckdb databases 19 + - See the tables and the number of rows, columns, indexes in each. Plus a list of column names in index order. 20 + - Scroll rows using `J` and `K` 21 + - Scroll columns using your chosen keys ( I use `H` and `L` ) 22 + - Change modes by pressing K when at the top of a file 23 + 24 + Supported file types: 25 + 26 + - .csv 27 + - .tsv 28 + - .txt - if tabular data 29 + - .json 30 + - .parquet 31 + - .xlsx 32 + - .duckdb 33 + - .db - if file is a duckdb database 34 + 35 + <br><br> 36 + 37 + ## Features 38 + 39 + ### Column Scrolling 40 + 41 + <br> 42 + 43 + <https://github.com/user-attachments/assets/b347a7e8-05ea-442d-a88e-e2447975b653> 44 + 45 + <br> 46 + 47 + - Now supports scrolling horizontally (by column). 48 + - Works in all views 49 + - In the database view you can even scroll through the list of column names. 50 + - Output highlighting should now work across any os (where duckdb supports it). 51 + 52 + >Requires a small amount of extra configuration from previous versions. These are keymaps (I use `H` and `L`) and some other aditional customisation options. 53 + > 54 + >See the [Installation](https://github.com/wylie102/duckdb.yazi/tree/main?tab=readme-ov-file#installation) and [Configuration](https://github.com/wylie102/duckdb.yazi/tree/main?tab=readme-ov-file#configurationcustomisation) sections. 55 + 56 + >**Cache changes - update 04/04/25** - If you want info on the latest (cache related changes) then see [here](https://github.com/wylie102/duckdb.yazi?tab=readme-ov-file#setup-and-usage-changes-from-previous-versions). Otherwise keep reading new features and config options below. 57 + <br> 58 + 59 + <br> 60 + 61 + ### Output Syntax Highlighting 62 + 63 + - Passes through the colors from the duckdb output as you would see if using directly in the terminal. 64 + - These colors can be configured in your `~/.duckdbrc` file, see the Configuration section for details. 65 + 66 + <br> 67 + 68 + **Syntax highlighting with duckdb's default color scheme.** 69 + <img width="700" alt="Screenshot 2025-04-02 at 14 53 38" src="https://github.com/user-attachments/assets/d2267298-b91b-496c-ae74-1d432b826f6f" /> 70 + 71 + <br> 72 + 73 + **Syntax highlighting with customized color scheme.** 74 + <img width="700" alt="Screenshot 2025-04-02 at 14 44 08" src="https://github.com/user-attachments/assets/965a0a4e-e4ed-4d88-ab95-84cd543f2a58" /> 75 + 76 + <br> 77 + 78 + ### Preview DuckDB Databases 79 + 80 + - If you open a `.db` or `.duckdb` file directly, the plugin lists all tables in the database. 81 + - Each entry includes: 82 + - Table name 83 + - Rows Count 84 + - Column count 85 + - Primary key presence 86 + - Index count 87 + - All column names (aggregated and in index order) 88 + - Tables are **alphabetically ordered** and paginated for smooth scrolling. 89 + - Reads directly from the db in read only mode for file safety. 90 + 91 + <br> 92 + 93 + <img width="700" alt="Screenshot 2025-04-02 at 14 46 19" src="https://github.com/user-attachments/assets/c640d6f3-d9f6-4d98-acd8-9e4c87c6e728" /> 94 + 95 + <br> 96 + 97 + ### More customisation options - row_id (row number) and width of the min/max columns 98 + 99 + - Row id - in standard view to help keep track when scrolling, Default is off, but can be turned on in `init.lua` options. 100 + - Width of min and max columns. Default is now 21 twice as wide as previously. Is now customisable in the `init.lua`, the unit is the number of characters shown. 101 + 102 + <br> 103 + 104 + <img width="700" alt="Screenshot 2025-04-02 at 14 49 26" src="https://github.com/user-attachments/assets/6c8fb1ae-3de8-41ce-9c90-0279dc3b5e61" /> 105 + 106 + <br><br> 107 + 108 + ### Preview mode is now toggleable 109 + 110 + - Preview mode can be toggled within yazi 111 + - Press "K" at the top of the file to toggle between "standard" and "summarized." 112 + - The mode enabled at startup is customisable in the `init.lua` see Configuration section. 113 + 114 + ### Performance improvements through caching 115 + 116 + - "Standard" and "summarized" views are cached upon first load, improving scrolling performance 117 + 118 + - Note that on entering a directory you haven't entered before (or one containing files that have been changed) cacheing is triggered. Until cache's are generated, summarized mode may take a longer to show as it will be run on the original file, and scrolling other files during this time (especially large ones) can slow things even further as new queries on the file will be competing with cache queries. Instead it is worth waiting until the caches load (displayed in bottom right corner) or switching to standard view during these first few seconds. This will be most apparent on large, non-parquet files 119 + 120 + <br><br> 121 + 122 + ## Installation 123 + 124 + ### Installing dependancies 125 + 126 + First you will need Yazi and DuckDB installed. 127 + 128 + - [Yazi Installation instructions](https://yazi-rs.github.io/docs/installation) 129 + 130 + - [DuckDB Installation instructions](https://duckdb.org/docs/installation/?version=stable&environment=cli&platform=macos&download_method=direct) 131 + 132 + Once these are installed you can use the yazi plugin manager to install the plugin. 133 + 134 + Use the command: 135 + 136 + ``` 137 + ya pack -a wylie102/duckdb 138 + ``` 139 + 140 + in your terminal 141 + 142 + <br> 143 + 144 + ### yazi.toml 145 + 146 + Then navigate to your [yazi.toml](https://yazi-rs.github.io/docs/configuration/yazi#manager.ratio) file this should be the `yazi` folder in your `config` directory 147 + 148 + and add: 149 + 150 + ```toml 151 + [plugin] 152 + prepend_previewers = [ 153 + { name = "*.csv", run = "duckdb" }, 154 + { name = "*.tsv", run = "duckdb" }, 155 + { name = "*.json", run = "duckdb" }, 156 + { name = "*.parquet", run = "duckdb" }, 157 + { name = "*.txt", run = "duckdb" }, 158 + { name = "*.xlsx", run = "duckdb" }, 159 + { name = "*.db", run = "duckdb" }, 160 + { name = "*.duckdb", run = "duckdb" } 161 + ] 162 + 163 + prepend_preloaders = [ 164 + { name = "*.csv", run = "duckdb", multi = false }, 165 + { name = "*.tsv", run = "duckdb", multi = false }, 166 + { name = "*.json", run = "duckdb", multi = false }, 167 + { name = "*.parquet", run = "duckdb", multi = false }, 168 + { name = "*.txt", run = "duckdb", multi = false }, 169 + { name = "*.xlsx", run = "duckdb", multi = false } 170 + ] 171 + ``` 172 + 173 + >note on .txt: I have tried to exclude files that contain only raw text (if duckdb reads only one column). However, if you don't ever work with .txt files which contain tabular data (basically misnamed csv or tsv files) then you can just not include the .txt lines in your setup. 174 + 175 + <br> 176 + 177 + >note on .xlsx: This can be temperamental, especially around inferring types. This is due to the way that duckdb handles excel files. This feature currently uses st_read from the spatial extension since it gives the most consistent type results. Hopefully they will soon implement some of the smart type detection from the csv reader in their excel extension and then we can use that instead. 178 + 179 + <br> 180 + 181 + ### init.lua 182 + 183 + Then create an `init.lua` file in the same folder and add 184 + 185 + ```lua 186 + -- DuckDB plugin configuration 187 + require("duckdb"):setup() 188 + ``` 189 + 190 + This is where the configuration/settings can go ([see below](https://github.com/wylie102/duckdb.yazi?tab=readme-ov-file#configurationcustomisation)), but the init.lua file and this line are required for the plugin to run, even if the settings are blank. Another option is to add all of the settings with the defaults in so that it's easy to change at a later date. 191 + 192 + <br> 193 + 194 + ### keymap.toml 195 + 196 + Then in your [keymap.toml](https://yazi-rs.github.io/docs/configuration/keymap) file add: 197 + 198 + ```toml 199 + [[manager.prepend_keymap]] 200 + on = "H" 201 + run = "plugin duckdb -1" 202 + desc = "Scroll one column to the left" 203 + 204 + [[manager.prepend_keymap]] 205 + on = "L" 206 + run = "plugin duckdb +1" 207 + desc = "Scroll one column to the right" 208 + 209 + [[manager.prepend_keymap]] 210 + on = ["g", "o"] 211 + run = "plugin duckdb -open" 212 + desc = "open with duckdb" 213 + 214 + [[manager.prepend_keymap]] 215 + on = ["g", "u"] 216 + run = "plugin duckdb -ui" 217 + desc = "open with duckdb ui" 218 + 219 + ``` 220 + 221 + >I use `H` and `L` because it makes logical sense to me. 222 + > 223 + >But these overwrite: 224 + > 225 + >- `H` - previous directory and 226 + >- `L` - next directory 227 + >(different from standard `h` and `l` for patent and child directory). 228 + > 229 + >So if you use those you might want to choose something else, or remap those to <C-h> and <C-l> instead. 230 + 231 + <br> 232 + 233 + ### Aditional setup and recommended plugins for more preview space 234 + 235 + Use with a larger preview window - add to your `yazi.toml` 236 + 237 + ```toml 238 + [manager] 239 + ratio = [1, 2, 5] 240 + ``` 241 + 242 + For reference the default ratio is 1, 4, 3 243 + 244 + Use: 245 + 246 + [maximize the preview pane plugin](https://github.com/yazi-rs/plugins/tree/main/toggle-pane.yazi) 247 + 248 + <br><br> 249 + 250 + ## Configuration/Customisation 251 + 252 + Configuration of yazi.duckdb is done via the `init.lua` file in `config/yazi` (where your plugin folder and yazi.toml file live). 253 + If you don't have one you can just create one. 254 + Add the following: 255 + 256 + ```lua 257 + -- DuckDB plugin configuration 258 + require("duckdb"):setup({ 259 + mode = "standard"/"summarized", -- Default: "summarized" 260 + cache_size = 1000 -- Default: 500 261 + row_id = true/false/"dynamic", -- Default: false 262 + minmax_column_width = int -- Default: 21 263 + column_fit_factor = float -- Default: 10.0 264 + }) 265 + ``` 266 + 267 + If you don't include a setting, it will revert to the default. 268 + 269 + But the setup call `require("duckdb"):setup()` is still required for the plugin to intialize correctly. 270 + 271 + <br> 272 + 273 + ### Explaination of settings 274 + 275 + - mode - the view that will be the default on startup. The default is summarized, but this can sometimes be slow if running while the files are also being cached. Most of the time it will be the same speed as standard, so pick the one you like. 276 + 277 + - cache_size - the number of rows cached in the standard mode. Make the number higher if you want to be able to scroll further down in your files. Be aware this could impact cache size and cache performance if it was made too large. If you change this setting you will need to run `yazi --clear-cache` for it to take effect. 278 + 279 + - row_id - displays a row column when viewing in standard mode. If set to dynamic it will only turn on when scrolling columns and will always be the left most column. 280 + 281 + - minmax_column_width - is the number of characters displayed in the min and max columns in summarized view. Default is 21, which is roughly enough to see date and time in a datetime column. If you need more set it higher, if you want mim/max to take up less space set it lower. 282 + 283 + - column_fit_factor - this one is actually important but might feel a bit counter-intuitive so have a look below. 284 + - TLDR: duckdb.yazi is designed to overspill the screen on the right side. Unless all your columns are incredibly narrow/you can see the right border of your table when there are still more columns to scroll OR you work with tables with a very large number of columns and scrolling them feels slightly show, you can probably leave it alone. 285 + - Slightly longer instructions: To fully optimise this, 1. Lower it until your columns no longer spill off the end of the screen (check this on a few files) Step 2 - Increase by 1 so that columns again spill over the right border. 286 + - More detailed explaination: Implementing column scrolling also gave us a mechanism to user-attachments only the columns we need to fill (in reality slightly overfill) the screen. The reason for this is that if the table is incredibly wide (has a high number of columns) it would slow down the query. But while the plugin can detect how wide the display area is, it doesn't know how wide your collumns are. So this number represents the average amount of space (in characters) duckdb.yazi expects each column to take up when deciding how many columns to request. columns_displayed = display_area_width / column_fit_factor. So larger number = fewer columns, smaller number = more columns. Ideally you want the columns to **just** spill over the right border of the screen which will give the feeling of movement when scrolling. The default - 10.0 - should accommodate most column sizes while giving good performance. Setting to 7.73 should display even the narrowest columns correctly, but may cause queries to be slightly slower when working with very large numbers of columns. 287 + 288 + ### Configuring duckdb 289 + 290 + Configuration of DuckDB can be done in the `~/.duckdbrc` file. 291 + This should be placed in your home directory ([duckdb docs](https://duckdb.org/docs/stable/operations_manual/footprint_of_duckdb/files_created_by_duckdb)). 292 + 293 + You can customise the colors of the preview using the following options 294 + 295 + ``` 296 + .highlight_colors layout gray 297 + .highlight_colors column_name magenta bold 298 + .highlight_colors column_type gray 299 + .highlight_colors string_value cyan 300 + .highlight_colors numeric_value green 301 + .highlight_colors temporal_value blue 302 + .highlight_colors footer gray 303 + ``` 304 + 305 + The above configuration is what is used in the video at the top of the readme and in the screenshots of the color highlithing section. 306 + Although the actual colours will depend on your terminal/yazi color scheme. 307 + These should be placed in your `~./duckdbrc` file as is. 308 + No header is needed, they are simply commands run on the startup of any duckdb instance (when using the CLI). 309 + These will change the color of the output in both duckdb.yazi and when using it in the CLI. 310 + 311 + Color options are: 312 + red|green|yellow|blue|magenta|cyan|white 313 + 314 + You can also specify bold, underline or bold_underline after the colors 315 + e.g. `.highlight_colors column_type red bold_underline` 316 + 317 + If the file is empty or doesn't exist then the default duckdb color scheme will be used 318 + This uses gray for borders and NULLs and looks like this 319 + 320 + <img width="700" alt="Screenshot 2025-04-02 at 14 53 38" src="https://github.com/user-attachments/assets/d2267298-b91b-496c-ae74-1d432b826f6f" /> 321 + 322 + You can also turn the highlighting off by adding `.highlight_results off` 323 + In which case it will look like below. 324 + 325 + <img width="700" alt="Screenshot 2025-03-22 at 18 00 06" src="https://github.com/user-attachments/assets/db09fff9-2db1-4273-9ddf-34d0bf087967" /> 326 + 327 + More information [here](https://duckdb.org/docs/stable/clients/cli/dot_commands#configuring-the-result-syntax-highlighter) 328 + 329 + <br><br> 330 + 331 + ## Setup and usage changes from previous versions 332 + 333 + ### A Note on the Latest update 334 + 335 + Added logic for reading `.xlsx` and `.txt` files, you can just add these to your yazi.toml file to be able to view them. 336 + Also added the ability to set the cache row size in the yazi.toml file.
+806
yazi/.config/yazi/plugins/duckdb.yazi/main.lua
··· 1 + --- @since 25.4.8 2 + -- DuckDB Plugin for Yazi 3 + local M = {} 4 + 5 + local update_state = ya.sync(function(state, action, category, key, value) 6 + -- Ensure the subtable for the category exists. 7 + state[category] = state[category] or {} 8 + 9 + if action == "set" then 10 + state[category][key] = value 11 + elseif action == "get" then 12 + return state[category][key] 13 + elseif action == "check" then 14 + return state[category][key] ~= nil 15 + else 16 + ya.err("Unknown action: " .. tostring(action)) 17 + end 18 + end) 19 + 20 + local function set_opts(key, value) 21 + update_state("set", "opts", key, value) 22 + end 23 + 24 + local function get_opts(key) 25 + return update_state("get", "opts", key) 26 + end 27 + 28 + local function add_to_list(category, cache_str) 29 + update_state("set", category, cache_str, true) 30 + end 31 + 32 + local function remove_from_list(category, cache_str) 33 + update_state("set", category, cache_str, nil) 34 + end 35 + 36 + local function is_on_list(category, cache_str) 37 + return update_state("check", category, cache_str) 38 + end 39 + 40 + local function clear_list(category) 41 + set_opts(category, {}) -- replaces the whole list with an empty table 42 + end 43 + 44 + local function add_queries_to_table(target_table, queries) 45 + if type(queries) == "table" then 46 + for _, item in ipairs(queries) do 47 + table.insert(target_table, "-c") 48 + table.insert(target_table, item) 49 + end 50 + else 51 + table.insert(target_table, "-c") 52 + table.insert(target_table, queries) 53 + end 54 + end 55 + 56 + local function generate_data_source_string(target, file_type) 57 + local url_string = "'" .. tostring(target) .. "'" 58 + if file_type == "excel" then 59 + return string.format("st_read(%s)", url_string) 60 + elseif file_type == "text" then 61 + return string.format("read_csv(%s)", url_string) 62 + else 63 + return url_string 64 + end 65 + end 66 + 67 + local extension_map = { 68 + csv = "csv", 69 + tsv = "csv", 70 + txt = "text", 71 + json = "json", 72 + parquet = "parquet", 73 + xlsx = "excel", 74 + duckdb = "duckdb", 75 + db = "duckdb", 76 + } 77 + 78 + local function get_extension(filename) 79 + -- Match the last "dot + word characters" at the end of the string 80 + return filename:match("^.+%.([a-zA-Z0-9]+)$") 81 + end 82 + 83 + local function check_file_type(path) 84 + local name = path.name or "" 85 + local ext = get_extension(name) 86 + if ext then 87 + local filetype = extension_map[ext:lower()] 88 + if filetype then 89 + return filetype 90 + end 91 + end 92 + ya.err("File is not a supported file type") 93 + end 94 + 95 + local get_hovered_url_string = ya.sync(function() 96 + return tostring(cx.active.current.hovered.url) 97 + end) 98 + 99 + local duckdb_opener = ya.sync(function(_, arg) 100 + local hovered_url = Url(get_hovered_url_string()) 101 + local file_type = check_file_type(hovered_url) 102 + local command = "duckdb " 103 + if file_type == "excel" then 104 + command = string.format([[%s-cmd "install spatial;" -cmd "load spatial;" ]], command) 105 + ya.dbg("command: " .. tostring(command)) 106 + end 107 + 108 + if file_type ~= "duckdb" then 109 + local table_name = '\\"' .. hovered_url.stem .. '\\"' 110 + local data_source_string = generate_data_source_string(hovered_url, file_type) 111 + local query = string.format("CREATE TABLE %s AS FROM %s;", table_name, data_source_string) 112 + command = string.format('%s-cmd "%s"', command, query) 113 + ya.dbg("command final: " .. tostring(command)) 114 + else 115 + command = command .. tostring(hovered_url) 116 + end 117 + 118 + if arg ~= "-open" then 119 + command = string.format("%s -ui", command) 120 + end 121 + ya.emit("shell", { command, block = true, orphan = true, confirm = true }) 122 + end) 123 + 124 + function M:entry(job) 125 + local arg = job.args and job.args[1] 126 + if arg ~= "+1" and arg ~= "-1" then 127 + return duckdb_opener(arg) 128 + end 129 + local scroll_delta = tonumber(arg) 130 + 131 + if not scroll_delta then 132 + ya.err("DuckDB column scroll entry: Invalid or missing scroll delta; exiting.") 133 + return 134 + end 135 + 136 + local scrolled_columns = get_opts("scrolled_columns") or 0 137 + scrolled_columns = math.max(0, scrolled_columns + scroll_delta) 138 + set_opts("scrolled_columns", scrolled_columns) 139 + 140 + ya.emit("seek", { "lateral scroll" }) 141 + end 142 + 143 + -- Setup from init.lua: require("duckdb"):setup({ mode = "standard"/"summarized" }) 144 + function M:setup(opts) 145 + opts = opts or {} 146 + 147 + local mode = opts.mode or "summarized" 148 + local operating_system = ya.target_os() 149 + local column_width = opts.minmax_column_width or 21 150 + local row_id = opts.row_id 151 + if row_id == nil then 152 + row_id = false 153 + end 154 + local column_fit_factor = opts.column_fit_factor or 10 155 + local limit = opts.cache_size or 500 156 + 157 + set_opts("mode", mode) 158 + set_opts("mode_changed", false) 159 + set_opts("re_peek", false) 160 + set_opts("os", operating_system) 161 + set_opts("column_width", column_width) 162 + set_opts("row_id", row_id) 163 + set_opts("scrolled_columns", 0) 164 + set_opts("column_fit_factor", column_fit_factor) 165 + set_opts("limit", limit) 166 + end 167 + 168 + local function generate_preload_query(job, mode, file_type, limit) 169 + local data_source_string = generate_data_source_string(job.file.url, file_type) 170 + local limit_string = "" 171 + if limit then 172 + limit_string = "LIMIT " .. tostring(limit) 173 + end 174 + if mode == "standard" then 175 + return "FROM " .. data_source_string .. limit_string 176 + else 177 + return string.format( 178 + "SELECT * EXCLUDE(null_percentage), CAST(null_percentage AS DOUBLE) AS null_percentage FROM (SUMMARIZE FROM %s)", 179 + data_source_string 180 + ) 181 + end 182 + end 183 + 184 + local function generate_summary_cte(target) 185 + local column_width = get_opts("column_width") 186 + return string.format( 187 + [[ 188 + SELECT 189 + column_name AS column, 190 + column_type AS type, 191 + count, 192 + approx_unique AS unique, 193 + null_percentage AS "null%%", 194 + LEFT(min, %d) AS min, 195 + LEFT(max, %d) AS max, 196 + CASE 197 + WHEN avg IS NULL THEN NULL 198 + WHEN TRY_CAST(avg AS DOUBLE) IS NULL THEN CAST(avg AS VARCHAR) 199 + WHEN CAST(avg AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(avg AS DOUBLE), 2) AS VARCHAR) 200 + WHEN CAST(avg AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 201 + WHEN CAST(avg AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 202 + WHEN CAST(avg AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 203 + ELSE '∞' 204 + END AS avg, 205 + CASE 206 + WHEN std IS NULL THEN NULL 207 + WHEN TRY_CAST(std AS DOUBLE) IS NULL THEN CAST(std AS VARCHAR) 208 + WHEN CAST(std AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(std AS DOUBLE), 2) AS VARCHAR) 209 + WHEN CAST(std AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 210 + WHEN CAST(std AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 211 + WHEN CAST(std AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 212 + ELSE '∞' 213 + END AS std, 214 + CASE 215 + WHEN q25 IS NULL THEN NULL 216 + WHEN column_type = 'TIMESTAMP' THEN coalesce(strftime(try_strptime(q25::VARCHAR, '%%c.%%f'), '%%c'), q25::VARCHAR) 217 + WHEN TRY_CAST(q25 AS DOUBLE) IS NULL THEN CAST(q25 AS VARCHAR) 218 + WHEN CAST(q25 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q25 AS DOUBLE), 2) AS VARCHAR) 219 + WHEN CAST(q25 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 220 + WHEN CAST(q25 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 221 + WHEN CAST(q25 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 222 + ELSE '∞' 223 + END AS q25, 224 + CASE 225 + WHEN q50 IS NULL THEN NULL 226 + WHEN column_type = 'TIMESTAMP' THEN coalesce(strftime(try_strptime(q50::VARCHAR, '%%c.%%f'), '%%c'), q50::VARCHAR) 227 + WHEN TRY_CAST(q50 AS DOUBLE) IS NULL THEN CAST(q50 AS VARCHAR) 228 + WHEN CAST(q50 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q50 AS DOUBLE), 2) AS VARCHAR) 229 + WHEN CAST(q50 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 230 + WHEN CAST(q50 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 231 + WHEN CAST(q50 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 232 + ELSE '∞' 233 + END AS q50, 234 + CASE 235 + WHEN q75 IS NULL THEN NULL 236 + WHEN column_type = 'TIMESTAMP' THEN coalesce(strftime(try_strptime(q75::VARCHAR, '%%c.%%f'), '%%c'), q75::VARCHAR) 237 + WHEN TRY_CAST(q75 AS DOUBLE) IS NULL THEN CAST(q75 AS VARCHAR) 238 + WHEN CAST(q75 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q75 AS DOUBLE), 2) AS VARCHAR) 239 + WHEN CAST(q75 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 240 + WHEN CAST(q75 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 241 + WHEN CAST(q75 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 242 + ELSE '∞' 243 + END AS q75 244 + FROM %s 245 + ]], 246 + column_width, 247 + column_width, 248 + target 249 + ) 250 + end 251 + 252 + -- Get preview cache path 253 + local function get_cache_path(job, mode, extension) 254 + local suffix = "_" .. mode .. ".parquet" 255 + if extension then 256 + suffix = "_" .. extension .. "." .. extension 257 + end 258 + local cache_version = 3 259 + local skip = job.skip 260 + job.skip = 1000000 + cache_version 261 + local base = ya.file_cache(job) 262 + job.skip = skip 263 + 264 + if not base then 265 + return nil, nil 266 + end 267 + 268 + local base_str = tostring(base) .. suffix 269 + local path_url = Url(base_str) 270 + local path_str = tostring(path_url.name) 271 + return path_str, path_url 272 + end 273 + 274 + -- Run queries. 275 + local function run_query(job, query, target, file_type) 276 + local width = math.max((job.area and job.area.w * 3 or 80), 80) 277 + local height = math.max((job.area and job.area.h or 25), 25) 278 + 279 + local args = {} 280 + 281 + if file_type == "duckdb" then 282 + table.insert(args, "-readonly") 283 + table.insert(args, tostring(target)) 284 + elseif file_type == "excel" then 285 + add_queries_to_table(args, { "install spatial", "load spatial" }) 286 + end 287 + 288 + -- Duckbox config 289 + add_queries_to_table(args, { 290 + ".mode duckbox", 291 + ".timer off", 292 + "SET enable_progress_bar = false;", 293 + string.format(".maxwidth %d", width), 294 + string.format(".maxrows %d", height), 295 + ".highlight_results on", 296 + }) 297 + 298 + -- Add query or list of queries 299 + add_queries_to_table(args, query) 300 + 301 + local child = Command("duckdb"):arg(args):stdout(Command.PIPED):stderr(Command.PIPED):spawn() 302 + if not child then 303 + ya.err("Failed to spawn DuckDB") 304 + return nil 305 + end 306 + 307 + local output, err = child:wait_with_output() 308 + if err or not output.status.success then 309 + ya.err("DuckDB error: " .. (err or output.stderr or "[unknown error]")) 310 + return nil 311 + end 312 + 313 + return output 314 + end 315 + 316 + local function generate_db_query(limit, offset) 317 + local scroll = get_opts("scrolled_columns") or 0 318 + 319 + local metadata_fields = { "rows", "columns", "has_pk", "indexes" } 320 + local visible_column_count = 10 321 + local max_scroll_metadata = #metadata_fields 322 + local metadata_projection = { "table_name" } 323 + 324 + if scroll < max_scroll_metadata then 325 + for i = scroll + 1, #metadata_fields do 326 + table.insert(metadata_projection, metadata_fields[i]) 327 + end 328 + table.insert(metadata_projection, "column_names") -- always show 329 + 330 + local projection = table.concat(metadata_projection, ", ") 331 + return string.format( 332 + [[ 333 + WITH table_info AS ( 334 + SELECT 335 + DISTINCT t.table_name, 336 + t.estimated_size AS rows, 337 + t.column_count AS columns, 338 + t.has_primary_key AS has_pk, 339 + t.index_count AS indexes, 340 + STRING_AGG(c.column_name, ', ' ORDER BY c.column_index) OVER (PARTITION BY t.table_name) AS column_names 341 + FROM duckdb_tables() t 342 + LEFT JOIN duckdb_columns() c ON t.table_name = c.table_name 343 + ) 344 + SELECT %s FROM table_info 345 + ORDER BY table_name 346 + LIMIT %d OFFSET %d; 347 + ]], 348 + projection, 349 + limit, 350 + offset 351 + ) 352 + else 353 + local column_scroll = scroll - max_scroll_metadata 354 + local start_pos = column_scroll + 1 355 + local end_pos = column_scroll + visible_column_count 356 + 357 + return string.format( 358 + [[ 359 + WITH raw AS ( 360 + SELECT 361 + t.table_name, 362 + c.column_name, 363 + row_number() OVER (PARTITION BY t.table_name ORDER BY c.column_index) AS col_pos 364 + FROM duckdb_tables() t 365 + LEFT JOIN duckdb_columns() c ON t.table_name = c.table_name 366 + ), 367 + scrolling AS ( 368 + SELECT 369 + table_name, 370 + column_name, 371 + col_pos 372 + FROM raw 373 + WHERE col_pos >= %d AND col_pos < %d 374 + ), 375 + aggregated AS ( 376 + SELECT 377 + table_name, 378 + STRING_AGG(column_name, ', ' ORDER BY col_pos) AS column_names 379 + FROM scrolling 380 + GROUP BY table_name 381 + ) 382 + SELECT table_name, column_names FROM aggregated 383 + ORDER BY table_name 384 + LIMIT %d OFFSET %d; 385 + ]], 386 + start_pos, 387 + end_pos, 388 + limit, 389 + offset 390 + ) 391 + end 392 + end 393 + 394 + local function generate_standard_query(target, job, limit, offset) 395 + local scroll = get_opts("scrolled_columns") or 0 396 + local actual_width = math.max((job.area and job.area.w or 80), 80) 397 + local column_fit_factor = get_opts("column_fit_factor") or 7 398 + local fetched_columns = math.floor(actual_width / column_fit_factor) + scroll 399 + local row_id_mode = get_opts("row_id") 400 + 401 + -- Determine if row_id should be prepended 402 + local row_id_prefix = "" 403 + local row_id_enabled = (row_id_mode == true) or (row_id_mode == "dynamic" and scroll > 0) 404 + if row_id_enabled then 405 + row_id_prefix = "row_number() over () as row, " 406 + end 407 + 408 + local included_columns_cte = string.format( 409 + [[ 410 + set variable included_columns = ( 411 + with column_list as ( 412 + select column_name, row_number() over () as row 413 + from (describe select * from %s) 414 + ) 415 + select list(column_name) 416 + from column_list 417 + where row > %d and row <= (%d) 418 + ); 419 + ]], 420 + target, 421 + scroll, 422 + fetched_columns 423 + ) 424 + 425 + local filtered_select = string.format( 426 + "select %scolumns(c -> list_contains(getvariable('included_columns'), c)) from %s limit %d offset %d;", 427 + row_id_prefix, 428 + target, 429 + limit, 430 + offset 431 + ) 432 + return { included_columns_cte, filtered_select } 433 + end 434 + 435 + local function generate_summarized_query(source, limit, offset) 436 + local scroll = get_opts("scrolled_columns") or 0 437 + 438 + -- These are the scrollable fields, in display order 439 + local fields = { 440 + '"type"', 441 + '"count"', 442 + '"unique"', 443 + '"null%"', 444 + '"min"', 445 + '"max"', 446 + '"avg"', 447 + '"std"', 448 + '"q25"', 449 + '"q50"', 450 + '"q75"', 451 + } 452 + 453 + -- Always include the column name 454 + local selected_fields = { '"column"' } 455 + 456 + -- Add scrollable fields from scroll onwards 457 + for i = scroll + 1, #fields do 458 + table.insert(selected_fields, fields[i]) 459 + end 460 + 461 + local summary_cte = generate_summary_cte(source) 462 + local projection = table.concat(selected_fields, ", ") 463 + 464 + return string.format( 465 + [[ 466 + WITH summary_cte AS ( 467 + %s 468 + ) 469 + SELECT %s FROM summary_cte LIMIT %d OFFSET %d; 470 + ]], 471 + summary_cte, 472 + projection, 473 + limit, 474 + offset 475 + ) 476 + end 477 + 478 + local function generate_peek_query(target, job, limit, offset, file_type, cache_str) 479 + local mode = get_opts("mode") 480 + local is_original_file = (target == job.file.url) 481 + 482 + -- If the file itself is a DuckDB database, list tables/columns 483 + if is_original_file and file_type == "duckdb" then 484 + return generate_db_query(limit, offset) 485 + end 486 + 487 + local target_type = is_original_file and file_type or "cache" 488 + local source = generate_data_source_string(target, target_type) 489 + 490 + if mode == "standard" then 491 + return generate_standard_query(source, job, limit, offset) 492 + end 493 + local placeholder = "⏱" 494 + if is_on_list("bad_cache", cache_str) then 495 + placeholder = "∅" 496 + end 497 + 498 + if file_type ~= "parquet" then 499 + local summary_source = is_original_file 500 + and string.format( 501 + [[(select 502 + column_name, 503 + column_type, 504 + ' %s ' as count, 505 + ' %s ' as "approx_unique", 506 + ' %s ' as "null_percentage", 507 + ' %s ' as min, 508 + ' %s ' as max, 509 + ' %s ' as avg, 510 + ' %s ' as std, 511 + ' %s ' as q25, 512 + ' %s ' as q50, 513 + ' %s ' as q75 514 + from (describe select * from %s))]], 515 + placeholder, 516 + placeholder, 517 + placeholder, 518 + placeholder, 519 + placeholder, 520 + placeholder, 521 + placeholder, 522 + placeholder, 523 + placeholder, 524 + placeholder, 525 + source 526 + ) 527 + or source 528 + return generate_summarized_query(summary_source, limit, offset) 529 + else 530 + local summary_source = is_original_file 531 + and string.format( 532 + [[ 533 + (select 534 + d.column_name, 535 + d.column_type, 536 + sum(m.num_values) as count, 537 + ' %s ' as "approx_unique", 538 + ' %s ' as "null_percentage", 539 + case when min(m.stats_min) is null then '%s' else min(m.stats_min) end as min, 540 + case when min(m.stats_max) is null then '%s' else max(m.stats_max) end as max, 541 + ' %s ' as "avg", 542 + ' %s ' as "std", 543 + ' %s ' as q25, 544 + ' %s ' as q50, 545 + ' %s ' as q75 546 + from (describe select * from %s) d 547 + left join parquet_metadata(%s) m 548 + on d.column_name = m.path_in_schema 549 + group by all 550 + order by min(column_id)) 551 + ]], 552 + placeholder, 553 + placeholder, 554 + placeholder, 555 + placeholder, 556 + placeholder, 557 + placeholder, 558 + placeholder, 559 + placeholder, 560 + placeholder, 561 + source, 562 + source 563 + ) 564 + or source 565 + return generate_summarized_query(summary_source, limit, offset) 566 + end 567 + end 568 + 569 + local function render_output(output, job) 570 + local cleaned = output.stdout and output.stdout:gsub("\r", "") or "[no output]" 571 + ya.preview_widget(job, { 572 + ui.Text.parse(cleaned):area(job.area), 573 + }) 574 + end 575 + 576 + local function output_is_valid(output, mode, job) 577 + if output then 578 + if output.stderr and output.stderr ~= "" then 579 + ya.err("DuckDB returned an error or:\n" .. output.stderr) 580 + return false 581 + elseif not output.stdout or output.stdout == "" then 582 + ya.err(string.format("Peek - No stdout/stderr from %s cache for %s", mode, job.file.url)) 583 + return false 584 + else 585 + return true 586 + end 587 + else 588 + ya.err("Duckdb failed to return output") 589 + return false 590 + end 591 + end 592 + 593 + local function prepare_peek_context(job) 594 + local file_url = job.file.url 595 + local re_peek = get_opts("re_peek") 596 + local mode = get_opts("mode") 597 + local mode_changed = get_opts("mode_changed") 598 + 599 + -- Handle scroll reset and peek triggering 600 + if not re_peek then 601 + local raw_skip = job.skip or 0 602 + if raw_skip == 0 or mode_changed then 603 + set_opts("scrolled_columns", 0) 604 + end 605 + if mode_changed then 606 + set_opts("mode_changed", false) 607 + end 608 + job.skip = math.max(0, raw_skip - 50) 609 + end 610 + set_opts("re_peek", false) 611 + 612 + local cache_str, cache_url = get_cache_path(job, mode) 613 + local scrolled_collumns = get_opts("scrolled_columns") 614 + 615 + local use_cache = cache_url 616 + and fs.cha(cache_url) 617 + and not is_on_list("preloading", cache_str) 618 + and not is_on_list("bad_cache", cache_str) 619 + 620 + local target = use_cache and cache_url or file_url 621 + local file_type = check_file_type(target) 622 + local area = job.area or { h = 25 } 623 + local limit = area.h - 7 624 + local offset = job.skip 625 + 626 + return { 627 + file_url = file_url, 628 + mode = mode, 629 + file_type = file_type, 630 + cache_str = cache_str, 631 + cache_url = cache_url, 632 + scrolled_collumns = scrolled_collumns, 633 + use_cache = use_cache, 634 + target = target, 635 + limit = limit, 636 + offset = offset, 637 + } 638 + end 639 + 640 + local function remove_file(cache_url) 641 + if fs.cha(cache_url) then 642 + local ok, err = fs.remove("file", cache_url) 643 + if not ok then 644 + ya.err( 645 + string.format("[duckdb] failed to remove partial cache at %s: %s", tostring(cache_url), tostring(err)) 646 + ) 647 + end 648 + end 649 + end 650 + 651 + local function finish_preload(success, cache_str1, cache_str2) 652 + for _, cache_str in ipairs({ cache_str1, cache_str2 }) do 653 + if not success then 654 + add_to_list("bad_cache", cache_str) 655 + end 656 + remove_from_list("preloading", cache_str) 657 + add_to_list("completed", cache_str) 658 + end 659 + return success 660 + end 661 + 662 + local function create_cache(job, mode, file_type, limit) 663 + local cache_str, cache_url = get_cache_path(job, mode) 664 + if not cache_url or fs.cha(cache_url) or is_on_list("bad_cache", cache_str) then 665 + return true 666 + end 667 + 668 + add_to_list("preloading", cache_str) 669 + 670 + local target = tostring(cache_url) 671 + 672 + local base_query = generate_preload_query(job, mode, file_type, limit) 673 + local query = string.format("COPY (%s) TO '%s' (FORMAT 'parquet');", base_query, target) 674 + local output = run_query(job, query, nil, file_type) 675 + ya.dbg("stdout: " .. tostring(output.stdout)) 676 + ya.dbg("stderr: " .. tostring(output.stderr)) 677 + 678 + if not output or (output.stderr and output.stderr ~= "") then 679 + ya.err( 680 + output 681 + and string.format( 682 + "[duckdb] error creating %s cache for %s: %s", 683 + mode, 684 + tostring(job.file.url), 685 + output.stderr 686 + ) 687 + or string.format( 688 + "[duckdb] no output returned while creating %s cache for %s", 689 + mode, 690 + tostring(job.file.url) 691 + ) 692 + ) 693 + remove_file(cache_url) 694 + local result = finish_preload(false, cache_str) 695 + return result 696 + end 697 + 698 + local result = finish_preload(true, cache_str) 699 + return result 700 + end 701 + 702 + local function is_plain_text(job, file_type) 703 + local file_hash, _ = get_cache_path(job, "standard", "text") 704 + if is_on_list("is_plain_text", file_hash) then 705 + return true 706 + end 707 + 708 + file_type = file_type or check_file_type(job.file.url) 709 + if file_type ~= "text" then 710 + return false 711 + end 712 + 713 + local query = { 714 + ".mode csv", 715 + ".headers off", 716 + string.format("select count(column_name) from (describe from read_csv('%s'));", tostring(job.file.url)), 717 + } 718 + local output = run_query(job, query, nil, file_type) 719 + local result = (output and output.stdout == "1\r\n") 720 + 721 + if result then 722 + add_to_list("is_plain_text", file_hash) 723 + end 724 + 725 + return result 726 + end 727 + 728 + -- Preload summarized and standard preview caches 729 + function M:preload(job) 730 + if is_plain_text(job, nil) then 731 + return true 732 + end 733 + local limit = get_opts("limit") 734 + local file_type = check_file_type(job.file.url) 735 + local all_done = true 736 + 737 + if file_type == "duckdb" then 738 + return true 739 + end 740 + 741 + for _, mode in ipairs({ "standard", "summarized" }) do 742 + local success = create_cache(job, mode, file_type, limit) 743 + if not success then 744 + all_done = false 745 + end 746 + end 747 + 748 + return all_done 749 + end 750 + 751 + -- Peek with mode toggle if scrolling at top 752 + function M:peek(job) 753 + local args = prepare_peek_context(job) 754 + if is_plain_text(job, args.file_type) then 755 + return require("code"):peek(job) 756 + end 757 + 758 + local query = generate_peek_query(args.target, job, args.limit, args.offset, args.file_type, args.cache_str) 759 + ya.dbg("query: " .. tostring(query)) 760 + local output = run_query(job, query, args.target, args.file_type) 761 + ya.dbg("stdout: " .. tostring(output.stdout)) 762 + ya.dbg("stderr: " .. tostring(output.stderr)) 763 + if not output_is_valid(output, args.mode, job) then 764 + if args.target == args.cache_url and args.scrolled_collumns == 0 then 765 + add_to_list("bad_cache", args.cache_str) 766 + remove_file(args.cache_url) 767 + return require("duckdb"):peek(job) 768 + elseif is_on_list("bad_cache", args.cache_str) then 769 + return require("code"):peek(job) 770 + end 771 + end 772 + 773 + if args.target == args.file_url and args.mode == "summarized" and not args.use_cache then 774 + render_output(output, job) 775 + while not is_on_list("completed", args.cache_str) do 776 + ya.sleep(0.2) 777 + end 778 + clear_list("completed") 779 + set_opts("re_peek", true) 780 + return require("duckdb"):peek(job) 781 + end 782 + 783 + render_output(output, job) 784 + end 785 + 786 + -- Seek, also triggers mode change if skip negative. 787 + function M:seek(job) 788 + local OFFSET_BASE = 50 789 + local current_skip = math.max(0, cx.active.preview.skip - OFFSET_BASE) 790 + local units = job.units or 0 791 + local new_skip = current_skip + units 792 + 793 + if new_skip < 0 then 794 + -- Toggle preview mode 795 + local mode = get_opts("mode") 796 + local new_mode = (mode == "summarized") and "standard" or "summarized" 797 + set_opts("mode", new_mode) 798 + set_opts("mode_changed", true) 799 + -- Trigger re-peek 800 + ya.emit("peek", { OFFSET_BASE, only_if = job.file.url }) 801 + else 802 + ya.emit("peek", { new_skip + OFFSET_BASE, only_if = job.file.url }) 803 + end 804 + end 805 + 806 + return M
yazi/init.lua yazi/.config/yazi/init.lua
yazi/keymap.toml yazi/.config/yazi/keymap.toml-1751916066183657
-7
yazi/package.toml
··· 1 - [[plugin.deps]] 2 - use = "wylie102/duckdb" 3 - rev = "4acd427" 4 - hash = "66f23737a9aae6d98824176af7e78fa1" 5 - 6 - [flavor] 7 - deps = []
yazi/plugins/duckdb.yazi/LICENSE yazi/.config/yazi/plugins/duckdb.yazi/LICENSE
-76
yazi/plugins/duckdb.yazi/README.md
··· 1 - # duckdb.yazi 2 - 3 - [duckdb](https://github.com/duckdb/duckdb) now in [yazi](https://github.com/sxyazi/yazi). 4 - 5 - <img width="1710" alt="Screenshot 2025-03-22 at 18 00 06" src="https://github.com/user-attachments/assets/db09fff9-2db1-4273-9ddf-34d0bf087967" /> 6 - 7 - ## Installation 8 - 9 - To install, use the command: 10 - 11 - ya pack -a wylie102/duckdb 12 - 13 - and add to your yazi.toml: 14 - 15 - [plugin] 16 - prepend_previewers = [ 17 - { mime = "text/csv", run = "duckdb" }, 18 - { name = "*.tsv", run = "duckdb" }, 19 - { name = "*.json", run = "duckdb" }, 20 - { name = "*.parquet", run = "duckdb" }, 21 - ] 22 - 23 - prepend_preloaders = [ 24 - { mime = "text/csv", run = "duckdb", multi = false }, 25 - { name = "*.tsv", run = "duckdb", multi = false }, 26 - { name = "*.json", run = "duckdb", multi = false }, 27 - { name = "*.parquet", run = "duckdb", multi = false }, 28 - ] 29 - 30 - ### Yazi 31 - 32 - [Installation installations](https://yazi-rs.github.io/docs/installation) 33 - 34 - ### duckdb 35 - 36 - [Installation instructions](https://duckdb.org/docs/installation/?version=stable&environment=cli&platform=macos&download_method=direct) 37 - 38 - ## Recommended plugins 39 - 40 - Use with a larger preview window or maximize the preview pane plugin: 41 - <https://github.com/yazi-rs/plugins/tree/main/toggle-pane.yazi> 42 - 43 - ## What does it do? 44 - 45 - This plugin previews your data files in yazi using DuckDB, with two available view modes: 46 - 47 - - Standard mode (default): Displays the file as a table. 48 - - Summarized mode: Uses DuckDB's summarize function, enhanced with custom formatting for readability. 49 - 50 - Supported file types: 51 - 52 - - .csv 53 - - .json 54 - - .parquet 55 - - .tsv 56 - 57 - ## New Features 58 - 59 - - Default preview mode is now "standard." 60 - - Preview mode can be toggled within yazi: 61 - - Press "K" at the top of the file to toggle between "standard" and "summarized." 62 - - Preview mode is remembered per file, even after switching files or restarting yazi. 63 - - Performance improvements through caching: 64 - - "Standard" and "summarized" views are cached upon first load, improving scrolling performance. 65 - 66 - ## Setup and usage changes 67 - 68 - Previously, preview mode was selected by setting an environment variable (`DUCKDB_PREVIEW_MODE`). 69 - 70 - The new version no longer uses environment variables. Toggle preview modes directly within yazi using the keybinding described above. 71 - 72 - Scrolling within both views (standard and summarized) is handled by pressing J (down) and K (up). Performance is significantly better due to caching. 73 - 74 - ## Preview 75 - 76 - <img width="1710" alt="Screenshot 2025-03-22 at 17 59 21" src="https://github.com/user-attachments/assets/ac006667-4281-4e0a-87a4-bfaeefc6f20b" />
-231
yazi/plugins/duckdb.yazi/main.lua
··· 1 - -- This function generates the SQL query based on the preview mode. 2 - local function generate_sql(job, mode) 3 - if mode == "standard" then 4 - return string.format("SELECT * FROM '%s' LIMIT 500", tostring(job.file.url)) 5 - else 6 - return string.format( 7 - [[SELECT 8 - column_name AS column, 9 - column_type AS type, 10 - count, 11 - approx_unique AS unique, 12 - null_percentage AS null, 13 - LEFT(min, 10) AS min, 14 - LEFT(max, 10) AS max, 15 - CASE 16 - WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-' 17 - WHEN avg IS NULL THEN 'NULL' 18 - WHEN TRY_CAST(avg AS DOUBLE) IS NULL THEN avg 19 - WHEN CAST(avg AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(avg AS DOUBLE), 2) AS VARCHAR) 20 - WHEN CAST(avg AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 21 - WHEN CAST(avg AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 22 - WHEN CAST(avg AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(avg AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 23 - ELSE '∞' 24 - END AS avg, 25 - CASE 26 - WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-' 27 - WHEN std IS NULL THEN 'NULL' 28 - WHEN TRY_CAST(std AS DOUBLE) IS NULL THEN std 29 - WHEN CAST(std AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(std AS DOUBLE), 2) AS VARCHAR) 30 - WHEN CAST(std AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 31 - WHEN CAST(std AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 32 - WHEN CAST(std AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(std AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 33 - ELSE '∞' 34 - END AS std, 35 - CASE 36 - WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-' 37 - WHEN q25 IS NULL THEN 'NULL' 38 - WHEN TRY_CAST(q25 AS DOUBLE) IS NULL THEN q25 39 - WHEN CAST(q25 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q25 AS DOUBLE), 2) AS VARCHAR) 40 - WHEN CAST(q25 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 41 - WHEN CAST(q25 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 42 - WHEN CAST(q25 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q25 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 43 - ELSE '∞' 44 - END AS q25, 45 - CASE 46 - WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-' 47 - WHEN q50 IS NULL THEN 'NULL' 48 - WHEN TRY_CAST(q50 AS DOUBLE) IS NULL THEN q50 49 - WHEN CAST(q50 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q50 AS DOUBLE), 2) AS VARCHAR) 50 - WHEN CAST(q50 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 51 - WHEN CAST(q50 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 52 - WHEN CAST(q50 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q50 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 53 - ELSE '∞' 54 - END AS q50, 55 - CASE 56 - WHEN column_type IN ('TIMESTAMP', 'DATE') THEN '-' 57 - WHEN q75 IS NULL THEN 'NULL' 58 - WHEN TRY_CAST(q75 AS DOUBLE) IS NULL THEN q75 59 - WHEN CAST(q75 AS DOUBLE) < 100000 THEN CAST(ROUND(CAST(q75 AS DOUBLE), 2) AS VARCHAR) 60 - WHEN CAST(q75 AS DOUBLE) < 1000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000, 1) AS VARCHAR) || 'k' 61 - WHEN CAST(q75 AS DOUBLE) < 1000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000, 2) AS VARCHAR) || 'm' 62 - WHEN CAST(q75 AS DOUBLE) < 1000000000000 THEN CAST(ROUND(CAST(q75 AS DOUBLE) / 1000000000, 2) AS VARCHAR) || 'b' 63 - ELSE '∞' 64 - END AS q75 65 - FROM (summarize FROM '%s')]], 66 - tostring(job.file.url) 67 - ) 68 - end 69 - end 70 - 71 - local function get_cache_path(job, type) 72 - local skip = job.skip 73 - job.skip = 0 74 - local base = ya.file_cache(job) 75 - job.skip = skip 76 - if not base then 77 - return nil 78 - end 79 - local suffix = ({ standard = "_standard.db", summarized = "_summarized.db", mode = "_mode.db" })[type or "standard"] 80 - return Url(tostring(base) .. suffix) 81 - end 82 - 83 - local function run_query(job, query, target) 84 - local args = {} 85 - if target ~= job.file.url then 86 - table.insert(args, tostring(target)) 87 - end 88 - table.insert(args, "-c") 89 - table.insert(args, query) 90 - local child = Command("duckdb"):args(args):stdout(Command.PIPED):stderr(Command.PIPED):spawn() 91 - if not child then 92 - return nil 93 - end 94 - local output, err = child:wait_with_output() 95 - if err then 96 - return nil 97 - end 98 - if not output.status.success then 99 - ya.err("DuckDB exited with error: " .. output.stderr) 100 - return nil 101 - end 102 - return output 103 - end 104 - 105 - local function create_cache(job, mode, path) 106 - local filename = job.file.url:name() or "unknown" 107 - if fs.cha(path) then 108 - return true 109 - end 110 - local sql = (mode == "mode") and "CREATE TABLE My_table AS SELECT 'standard' AS Preview_mode;" 111 - or string.format("CREATE TABLE My_table AS (%s);", generate_sql(job, mode)) 112 - local out = run_query(job, sql, path, mode == "mode" and "mode" or nil) 113 - if not out then 114 - ya.err("Preload - Failed to generate " .. mode .. " cache for file: " .. tostring(filename) .. ".") 115 - return false 116 - end 117 - return true 118 - end 119 - 120 - local function get_preview_mode(job) 121 - local mode = "standard" 122 - local mode_cache = get_cache_path(job, "mode") 123 - if not mode_cache then 124 - return mode 125 - end 126 - if not fs.cha(mode_cache) then 127 - create_cache(job, "mode", mode_cache) 128 - end 129 - local result = run_query(job, "SELECT Preview_mode FROM My_table LIMIT 1;", mode_cache, "mode") 130 - if result and result.stdout and result.stdout ~= "" then 131 - local value = result.stdout:lower() 132 - if value:match("summarized") then 133 - mode = "summarized" 134 - end 135 - end 136 - return mode 137 - end 138 - 139 - local function generate_query(target, job, limit, offset) 140 - local mode = get_preview_mode(job) 141 - if target == job.file.url then 142 - if mode == "standard" then 143 - return string.format("SELECT * FROM '%s' LIMIT %d OFFSET %d;", tostring(target), limit, offset) 144 - else 145 - local query = generate_sql(job, mode) 146 - return string.format("WITH query AS (%s) SELECT * FROM query LIMIT %d OFFSET %d;", query, limit, offset) 147 - end 148 - else 149 - return string.format("SELECT * FROM My_table LIMIT %d OFFSET %d;", limit, offset) 150 - end 151 - end 152 - 153 - local function set_preview_mode(job, mode) 154 - local mode_cache = get_cache_path(job, "mode") 155 - if not mode_cache then 156 - return false 157 - end 158 - run_query(job, "DELETE FROM My_table;", mode_cache, "mode") 159 - local sql = string.format("INSERT INTO My_table VALUES ('%s');", mode) 160 - local result = run_query(job, sql, mode_cache, "mode") 161 - if not result then 162 - ya.err("SetPreviewMode - Failed to update preview mode.") 163 - return false 164 - end 165 - return true 166 - end 167 - 168 - local M = {} 169 - 170 - function M:preload(job) 171 - local cache_standard = get_cache_path(job, "standard") 172 - local cache_summarized = get_cache_path(job, "summarized") 173 - if not cache_standard or not cache_summarized then 174 - return false 175 - end 176 - if fs.cha(cache_standard) and fs.cha(cache_summarized) then 177 - return true 178 - end 179 - local success = true 180 - success = create_cache(job, "standard", cache_standard) and success 181 - success = create_cache(job, "summarized", cache_summarized) and success 182 - return success 183 - end 184 - 185 - function M:peek(job) 186 - local raw_skip = job.skip or 0 187 - local skip = math.max(0, raw_skip - 50) 188 - if raw_skip > 0 and raw_skip < 50 then 189 - local current_mode = get_preview_mode(job) 190 - local new_mode = current_mode == "standard" and "summarized" or "standard" 191 - set_preview_mode(job, new_mode) 192 - skip = 0 193 - end 194 - job.skip = skip 195 - local mode = get_preview_mode(job) 196 - local cache = get_cache_path(job, mode) 197 - local file_url = job.file.url 198 - local target = cache 199 - local limit = job.area.h - 7 200 - local offset = skip 201 - if not cache or not fs.cha(cache) then 202 - target = file_url 203 - end 204 - local query = generate_query(target, job, limit, offset) 205 - local output = run_query(job, query, target) 206 - if not output or output.stdout == "" then 207 - if target ~= file_url then 208 - target = file_url 209 - query = generate_query(target, job, limit, offset) 210 - output = run_query(job, query, target) 211 - if not output or output.stdout == "" then 212 - return require("code"):peek(job) 213 - end 214 - else 215 - return require("code"):peek(job) 216 - end 217 - end 218 - ya.preview_widgets(job, { ui.Text.parse(output.stdout):area(job.area) }) 219 - end 220 - 221 - function M:seek(job) 222 - local OFFSET_BASE = 50 223 - local encoded_current_skip = cx.active.preview.skip or 0 224 - local current_skip = math.max(0, encoded_current_skip - OFFSET_BASE) 225 - local units = job.units or 0 226 - local new_skip = current_skip + units 227 - local encoded_skip = new_skip + OFFSET_BASE 228 - ya.manager_emit("peek", { encoded_skip, only_if = job.file.url }) 229 - end 230 - 231 - return M
+6 -2
yazi/yazi.toml yazi/.config/yazi/yazi.toml
··· 4 4 { name = "*.tsv", run = "duckdb" }, 5 5 { name = "*.json", run = "duckdb" }, 6 6 { name = "*.parquet", run = "duckdb" }, 7 + { name = "*.txt", run = "duckdb" }, 8 + { name = "*.xlsx", run = "duckdb" }, 9 + { name = "*.db", run = "duckdb" }, 10 + { name = "*.duckdb", run = "duckdb" } 7 11 ] 8 12 9 13 prepend_preloaders = [ ··· 11 15 { name = "*.tsv", run = "duckdb", multi = false }, 12 16 { name = "*.json", run = "duckdb", multi = false }, 13 17 { name = "*.parquet", run = "duckdb", multi = false }, 14 - { name = "*.db", run = "duckdb" }, 15 - { name = "*.duckdb", run = "duckdb" }, 18 + { name = "*.txt", run = "duckdb", multi = false }, 19 + { name = "*.xlsx", run = "duckdb", multi = false } 16 20 ]