A virtual jailed shell environment for Go apps backed by an io/fs#FS.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(command): port join from just-bash

Join lines of two files on a common field. Supports custom field
selection (-1/-2), separators (-t), outer joins (-a), unpairable-only
mode (-v), case-insensitive matching (-i), custom output format (-o),
and missing field replacement (-e).

Signed-off-by: Xe Iaso <me@xeiaso.net>
Assisted-by: Claude Opus 4.7 via Claude Code

Xe Iaso bc85b066 ccf5721a

+547
+325
command/internal/join/join.go
··· 1 + package join 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "fmt" 7 + "io" 8 + "path" 9 + "strconv" 10 + "strings" 11 + 12 + "github.com/pborman/getopt/v2" 13 + "mvdan.cc/sh/v3/interp" 14 + "tangled.org/xeiaso.net/kefka/command" 15 + ) 16 + 17 + type Impl struct{} 18 + 19 + type parsedLine struct { 20 + fields []string 21 + joinKey string 22 + } 23 + 24 + type formatField struct { 25 + file int 26 + field int 27 + } 28 + 29 + func (Impl) Exec(_ context.Context, ec *command.ExecContext, args []string) error { 30 + if ec == nil { 31 + return errors.New("join: nil ExecContext") 32 + } 33 + 34 + stdout := ec.Stdout 35 + if stdout == nil { 36 + stdout = io.Discard 37 + } 38 + stderr := ec.Stderr 39 + if stderr == nil { 40 + stderr = io.Discard 41 + } 42 + 43 + set := getopt.New() 44 + set.SetProgram("join") 45 + set.SetParameters("FILE1 FILE2") 46 + 47 + usage := func() { 48 + fmt.Fprint(stderr, "Usage: join [OPTION]... FILE1 FILE2\n") 49 + fmt.Fprint(stderr, "For each pair of input lines with identical join fields, write a line to\nstandard output. The default join field is the first, delimited by blanks.\n\n") 50 + fmt.Fprint(stderr, " -1 FIELD Join on this FIELD of file 1 (default: 1)\n") 51 + fmt.Fprint(stderr, " -2 FIELD Join on this FIELD of file 2 (default: 1)\n") 52 + fmt.Fprint(stderr, " -t CHAR Use CHAR as input and output field separator\n") 53 + fmt.Fprint(stderr, " -a FILENUM Also print unpairable lines from file FILENUM (1 or 2)\n") 54 + fmt.Fprint(stderr, " -v FILENUM Like -a but only output unpairable lines\n") 55 + fmt.Fprint(stderr, " -e STRING Replace missing fields with STRING\n") 56 + fmt.Fprint(stderr, " -o FORMAT Output format (comma-separated list of FILENUM.FIELD)\n") 57 + fmt.Fprint(stderr, " -i Ignore case when comparing fields\n") 58 + fmt.Fprint(stderr, " --help display this help and exit\n") 59 + } 60 + set.SetUsage(usage) 61 + 62 + field1 := set.Int('1', 1, "join on this FIELD of file 1") 63 + field2 := set.Int('2', 1, "join on this FIELD of file 2") 64 + separator := set.StringLong("field-separator", 't', "", "use CHAR as input and output field separator") 65 + aFlag := set.Int('a', 0, "also print unpairable lines from file FILENUM") 66 + vFlag := set.Int('v', 0, "like -a but only output unpairable lines") 67 + emptyStr := set.String('e', "", "replace missing fields with STRING") 68 + oFlag := set.String('o', "", "output format") 69 + ignoreCase := set.BoolLong("ignore-case", 'i', "ignore case when comparing fields") 70 + help := set.BoolLong("help", 0, "display this help and exit") 71 + 72 + if err := set.Getopt(append([]string{"join"}, args...), nil); err != nil { 73 + fmt.Fprintf(stderr, "join: %s\n", err) 74 + usage() 75 + return interp.ExitStatus(1) 76 + } 77 + if *help { 78 + usage() 79 + return nil 80 + } 81 + 82 + if *field1 < 1 { 83 + fmt.Fprintf(stderr, "join: invalid field number: '%d'\n", *field1) 84 + return interp.ExitStatus(1) 85 + } 86 + if *field2 < 1 { 87 + fmt.Fprintf(stderr, "join: invalid field number: '%d'\n", *field2) 88 + return interp.ExitStatus(1) 89 + } 90 + if *aFlag != 0 && *aFlag != 1 && *aFlag != 2 { 91 + fmt.Fprintf(stderr, "join: invalid file number: '%d'\n", *aFlag) 92 + return interp.ExitStatus(1) 93 + } 94 + if *vFlag != 0 && *vFlag != 1 && *vFlag != 2 { 95 + fmt.Fprintf(stderr, "join: invalid file number: '%d'\n", *vFlag) 96 + return interp.ExitStatus(1) 97 + } 98 + 99 + var formatSpec []formatField 100 + if *oFlag != "" { 101 + var err error 102 + formatSpec, err = parseOutputFormat(*oFlag) 103 + if err != nil { 104 + fmt.Fprintf(stderr, "join: %s\n", err) 105 + return interp.ExitStatus(1) 106 + } 107 + } 108 + 109 + files := set.Args() 110 + if len(files) != 2 { 111 + if len(files) < 2 { 112 + fmt.Fprint(stderr, "join: missing file operand\n") 113 + } else { 114 + fmt.Fprint(stderr, "join: extra operand\n") 115 + } 116 + return interp.ExitStatus(1) 117 + } 118 + 119 + content1, err := readFile(ec, files[0], stderr) 120 + if err != nil { 121 + return err 122 + } 123 + content2, err := readFile(ec, files[1], stderr) 124 + if err != nil { 125 + return err 126 + } 127 + 128 + lines1 := parseLines(content1, *separator, *field1, *ignoreCase) 129 + lines2 := parseLines(content2, *separator, *field2, *ignoreCase) 130 + 131 + index2 := make(map[string][]*parsedLine) 132 + for i := range lines2 { 133 + key := lines2[i].joinKey 134 + index2[key] = append(index2[key], &lines2[i]) 135 + } 136 + 137 + sep := *separator 138 + if sep == "" { 139 + sep = " " 140 + } 141 + 142 + var output []string 143 + matchedKeys2 := make(map[string]bool) 144 + 145 + for i := range lines1 { 146 + matches := index2[lines1[i].joinKey] 147 + if len(matches) > 0 { 148 + matchedKeys2[lines1[i].joinKey] = true 149 + if *vFlag == 0 { 150 + for _, m := range matches { 151 + output = append(output, formatLine(&lines1[i], m, *field1, *field2, sep, *emptyStr, formatSpec)) 152 + } 153 + } 154 + } else { 155 + if *aFlag == 1 || *vFlag == 1 { 156 + output = append(output, formatLine(&lines1[i], nil, *field1, *field2, sep, *emptyStr, formatSpec)) 157 + } 158 + } 159 + } 160 + 161 + if *aFlag == 2 || *vFlag == 2 { 162 + for i := range lines2 { 163 + if !matchedKeys2[lines2[i].joinKey] { 164 + output = append(output, formatLine(nil, &lines2[i], *field1, *field2, sep, *emptyStr, formatSpec)) 165 + } 166 + } 167 + } 168 + 169 + if len(output) > 0 { 170 + fmt.Fprint(stdout, strings.Join(output, "\n")+"\n") 171 + } 172 + 173 + return nil 174 + } 175 + 176 + func splitFields(line, sep string) []string { 177 + if sep != "" { 178 + return strings.Split(line, sep) 179 + } 180 + return strings.Fields(line) 181 + } 182 + 183 + func parseLines(content, sep string, joinField int, ignoreCase bool) []parsedLine { 184 + lines := strings.Split(content, "\n") 185 + if len(lines) > 0 && lines[len(lines)-1] == "" { 186 + lines = lines[:len(lines)-1] 187 + } 188 + var result []parsedLine 189 + for _, line := range lines { 190 + if line == "" { 191 + continue 192 + } 193 + fields := splitFields(line, sep) 194 + joinKey := "" 195 + if joinField-1 < len(fields) { 196 + joinKey = fields[joinField-1] 197 + } 198 + if ignoreCase { 199 + joinKey = strings.ToLower(joinKey) 200 + } 201 + result = append(result, parsedLine{fields: fields, joinKey: joinKey}) 202 + } 203 + return result 204 + } 205 + 206 + func formatLine(line1, line2 *parsedLine, field1, field2 int, sep, emptyStr string, formatSpec []formatField) string { 207 + if len(formatSpec) > 0 { 208 + var parts []string 209 + for _, ff := range formatSpec { 210 + var line *parsedLine 211 + if ff.file == 1 { 212 + line = line1 213 + } else { 214 + line = line2 215 + } 216 + if line != nil && ff.field == 0 { 217 + parts = append(parts, line.joinKey) 218 + } else if line != nil && ff.field-1 < len(line.fields) { 219 + parts = append(parts, line.fields[ff.field-1]) 220 + } else { 221 + parts = append(parts, emptyStr) 222 + } 223 + } 224 + return strings.Join(parts, sep) 225 + } 226 + 227 + var parts []string 228 + joinKey := "" 229 + if line1 != nil { 230 + joinKey = line1.joinKey 231 + } else if line2 != nil { 232 + joinKey = line2.joinKey 233 + } 234 + parts = append(parts, joinKey) 235 + 236 + if line1 != nil { 237 + for i, f := range line1.fields { 238 + if i != field1-1 { 239 + parts = append(parts, f) 240 + } 241 + } 242 + } 243 + if line2 != nil { 244 + for i, f := range line2.fields { 245 + if i != field2-1 { 246 + parts = append(parts, f) 247 + } 248 + } 249 + } 250 + 251 + return strings.Join(parts, sep) 252 + } 253 + 254 + func parseOutputFormat(format string) ([]formatField, error) { 255 + var result []formatField 256 + for _, part := range strings.Split(format, ",") { 257 + part = strings.TrimSpace(part) 258 + idx := strings.IndexByte(part, '.') 259 + if idx < 0 { 260 + return nil, fmt.Errorf("invalid field spec: '%s'", format) 261 + } 262 + file, err := strconv.Atoi(part[:idx]) 263 + if err != nil { 264 + return nil, fmt.Errorf("invalid field spec: '%s'", format) 265 + } 266 + field, err := strconv.Atoi(part[idx+1:]) 267 + if err != nil { 268 + return nil, fmt.Errorf("invalid field spec: '%s'", format) 269 + } 270 + if file != 1 && file != 2 { 271 + return nil, fmt.Errorf("invalid field spec: '%s'", format) 272 + } 273 + result = append(result, formatField{file: file, field: field}) 274 + } 275 + return result, nil 276 + } 277 + 278 + func readFile(ec *command.ExecContext, name string, stderr io.Writer) (string, error) { 279 + if name == "-" { 280 + if ec.Stdin == nil { 281 + return "", nil 282 + } 283 + data, err := io.ReadAll(ec.Stdin) 284 + if err != nil { 285 + return "", interp.ExitStatus(1) 286 + } 287 + return string(data), nil 288 + } 289 + if ec.FS == nil { 290 + fmt.Fprintf(stderr, "join: %s: No such file or directory\n", name) 291 + return "", interp.ExitStatus(1) 292 + } 293 + full := resolvePath(ec, name) 294 + f, err := ec.FS.Open(full) 295 + if err != nil { 296 + fmt.Fprintf(stderr, "join: %s: No such file or directory\n", name) 297 + return "", interp.ExitStatus(1) 298 + } 299 + defer f.Close() 300 + data, err := io.ReadAll(f) 301 + if err != nil { 302 + fmt.Fprintf(stderr, "join: %s: %v\n", name, err) 303 + return "", interp.ExitStatus(1) 304 + } 305 + return string(data), nil 306 + } 307 + 308 + func resolvePath(ec *command.ExecContext, p string) string { 309 + dir := ec.Dir 310 + if dir == "" { 311 + dir = "." 312 + } 313 + if path.IsAbs(p) { 314 + p = strings.TrimPrefix(p, "/") 315 + if p == "" { 316 + return "." 317 + } 318 + return path.Clean(p) 319 + } 320 + joined := path.Join(dir, p) 321 + if joined == "" { 322 + return "." 323 + } 324 + return joined 325 + }
+222
command/internal/join/join_test.go
··· 1 + package join 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "os" 7 + "strings" 8 + "testing" 9 + 10 + "github.com/go-git/go-billy/v5" 11 + "github.com/go-git/go-billy/v5/memfs" 12 + "tangled.org/xeiaso.net/kefka/command" 13 + ) 14 + 15 + func newFS(t *testing.T) billy.Filesystem { 16 + t.Helper() 17 + fs := memfs.New() 18 + write := func(name string, data []byte) { 19 + f, err := fs.OpenFile(name, os.O_CREATE|os.O_WRONLY, 0o644) 20 + if err != nil { 21 + t.Fatal(err) 22 + } 23 + f.Write(data) 24 + f.Close() 25 + } 26 + write("file1.txt", []byte("1 one\n2 two\n3 three\n")) 27 + write("file2.txt", []byte("1 alpha\n2 beta\n4 delta\n")) 28 + write("names.txt", []byte("Alice 30\nBob 25\nCharlie 35\n")) 29 + write("ages.txt", []byte("30 NY\n25 LA\n40 SF\n")) 30 + write("csv1.txt", []byte("a,1\nb,2\nc,3\n")) 31 + write("csv2.txt", []byte("a,x\nb,y\nd,z\n")) 32 + write("case1.txt", []byte("Hello world\nGoodbye moon\n")) 33 + write("case2.txt", []byte("hello earth\ngoodbye sun\n")) 34 + write("multi1.txt", []byte("key a\nkey b\nother c\n")) 35 + write("multi2.txt", []byte("key x\nkey y\n")) 36 + write("empty.txt", []byte("")) 37 + return fs 38 + } 39 + 40 + func run(t *testing.T, args []string, stdin string, fs billy.Filesystem) (string, string, error) { 41 + t.Helper() 42 + var stdout, stderr bytes.Buffer 43 + ec := &command.ExecContext{ 44 + Stdin: strings.NewReader(stdin), 45 + Stdout: &stdout, 46 + Stderr: &stderr, 47 + Dir: ".", 48 + FS: fs, 49 + } 50 + err := Impl{}.Exec(context.Background(), ec, args) 51 + return stdout.String(), stderr.String(), err 52 + } 53 + 54 + func TestJoin(t *testing.T) { 55 + tests := []struct { 56 + name string 57 + args []string 58 + stdin string 59 + wantStdout string 60 + wantErrSub string 61 + wantErr bool 62 + }{ 63 + { 64 + name: "inner join on first field", 65 + args: []string{"file1.txt", "file2.txt"}, 66 + wantStdout: "1 one alpha\n2 two beta\n", 67 + }, 68 + { 69 + name: "custom separator", 70 + args: []string{"-t", ",", "csv1.txt", "csv2.txt"}, 71 + wantStdout: "a,1,x\nb,2,y\n", 72 + }, 73 + { 74 + name: "long form field-separator", 75 + args: []string{"--field-separator", ",", "csv1.txt", "csv2.txt"}, 76 + wantStdout: "a,1,x\nb,2,y\n", 77 + }, 78 + { 79 + name: "different field numbers", 80 + args: []string{"-1", "2", "-2", "1", "names.txt", "ages.txt"}, 81 + wantStdout: "30 Alice NY\n25 Bob LA\n", 82 + }, 83 + { 84 + name: "left outer join", 85 + args: []string{"-a", "1", "file1.txt", "file2.txt"}, 86 + wantStdout: "1 one alpha\n2 two beta\n3 three\n", 87 + }, 88 + { 89 + name: "right outer join", 90 + args: []string{"-a", "2", "file1.txt", "file2.txt"}, 91 + wantStdout: "1 one alpha\n2 two beta\n4 delta\n", 92 + }, 93 + { 94 + name: "unpairable from file 1 only", 95 + args: []string{"-v", "1", "file1.txt", "file2.txt"}, 96 + wantStdout: "3 three\n", 97 + }, 98 + { 99 + name: "unpairable from file 2 only", 100 + args: []string{"-v", "2", "file1.txt", "file2.txt"}, 101 + wantStdout: "4 delta\n", 102 + }, 103 + { 104 + name: "case-insensitive join", 105 + args: []string{"-i", "case1.txt", "case2.txt"}, 106 + wantStdout: "hello world earth\ngoodbye moon sun\n", 107 + }, 108 + { 109 + name: "custom output format", 110 + args: []string{"-o", "1.1,1.2,2.2", "file1.txt", "file2.txt"}, 111 + wantStdout: "1 one alpha\n2 two beta\n", 112 + }, 113 + { 114 + name: "empty string replacement with format", 115 + args: []string{"-a", "1", "-e", "N/A", "-o", "1.1,1.2,2.2", "file1.txt", "file2.txt"}, 116 + wantStdout: "1 one alpha\n2 two beta\n3 three N/A\n", 117 + }, 118 + { 119 + name: "multiple matches for same key", 120 + args: []string{"multi1.txt", "multi2.txt"}, 121 + wantStdout: "key a x\nkey a y\nkey b x\nkey b y\n", 122 + }, 123 + { 124 + name: "empty input file produces no output", 125 + args: []string{"empty.txt", "file2.txt"}, 126 + wantStdout: "", 127 + }, 128 + { 129 + name: "no matching keys produces no output", 130 + args: []string{"csv1.txt", "ages.txt"}, 131 + wantStdout: "", 132 + }, 133 + { 134 + name: "stdin as first file", 135 + args: []string{"-", "file2.txt"}, 136 + stdin: "1 one\n2 two\n", 137 + wantStdout: "1 one alpha\n2 two beta\n", 138 + }, 139 + { 140 + name: "missing operand", 141 + args: []string{"file1.txt"}, 142 + wantErrSub: "missing file operand", 143 + wantErr: true, 144 + }, 145 + { 146 + name: "extra operand", 147 + args: []string{"file1.txt", "file2.txt", "file3.txt"}, 148 + wantErrSub: "extra operand", 149 + wantErr: true, 150 + }, 151 + { 152 + name: "missing file", 153 + args: []string{"nope.txt", "file2.txt"}, 154 + wantErrSub: "No such file or directory", 155 + wantErr: true, 156 + }, 157 + { 158 + name: "unknown flag", 159 + args: []string{"--nope", "file1.txt", "file2.txt"}, 160 + wantErr: true, 161 + }, 162 + { 163 + name: "invalid field number for -1", 164 + args: []string{"-1", "0", "file1.txt", "file2.txt"}, 165 + wantErrSub: "invalid field number", 166 + wantErr: true, 167 + }, 168 + { 169 + name: "invalid file number for -a", 170 + args: []string{"-a", "3", "file1.txt", "file2.txt"}, 171 + wantErrSub: "invalid file number", 172 + wantErr: true, 173 + }, 174 + { 175 + name: "invalid output format", 176 + args: []string{"-o", "bad", "file1.txt", "file2.txt"}, 177 + wantErrSub: "invalid field spec", 178 + wantErr: true, 179 + }, 180 + { 181 + name: "output format with wrong file number", 182 + args: []string{"-o", "3.1", "file1.txt", "file2.txt"}, 183 + wantErrSub: "invalid field spec", 184 + wantErr: true, 185 + }, 186 + } 187 + 188 + for _, tt := range tests { 189 + t.Run(tt.name, func(t *testing.T) { 190 + stdout, stderr, err := run(t, tt.args, tt.stdin, newFS(t)) 191 + if tt.wantErr { 192 + if err == nil { 193 + t.Fatalf("expected error, got nil; stdout=%q stderr=%q", stdout, stderr) 194 + } 195 + } else if err != nil { 196 + t.Fatalf("unexpected error: %v; stderr=%q", err, stderr) 197 + } 198 + if stdout != tt.wantStdout { 199 + t.Errorf("stdout = %q, want %q", stdout, tt.wantStdout) 200 + } 201 + if tt.wantErrSub != "" && !strings.Contains(stderr, tt.wantErrSub) { 202 + t.Errorf("stderr = %q, want substring %q", stderr, tt.wantErrSub) 203 + } 204 + }) 205 + } 206 + } 207 + 208 + func TestHelp(t *testing.T) { 209 + stdout, stderr, err := run(t, []string{"--help"}, "", newFS(t)) 210 + if err != nil { 211 + t.Fatalf("unexpected error: %v", err) 212 + } 213 + if stdout != "" { 214 + t.Errorf("expected empty stdout, got %q", stdout) 215 + } 216 + if !strings.Contains(stderr, "Usage: join [OPTION]... FILE1 FILE2") { 217 + t.Errorf("usage line missing from stderr: %q", stderr) 218 + } 219 + if !strings.Contains(stderr, "-t CHAR") { 220 + t.Errorf("separator flag missing from help: %q", stderr) 221 + } 222 + }