A command-line utility to manage Apple Voice Memos recordings.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Import voicememosutil

+323 -5
+5 -1
Package.swift
··· 17 17 dependencies: [ 18 18 .product(name: "ArgumentParser", package: "swift-argument-parser") 19 19 ] 20 - ) 20 + ), 21 + .testTarget( 22 + name: "VoiceMemosUtilTests", 23 + dependencies: [.target(name: "voicememosutil")], 24 + resources: [.copy("Resources")]), 21 25 ] 22 26 )
+88
README.md
··· 1 + # voicememosutil 2 + 3 + A command-line utility to manage Apple Voice Memos recordings. 4 + 5 + ## Requirements 6 + 7 + - macOS 15+ 8 + 9 + ## Installation 10 + 11 + Build from source using Swift 6.2+: 12 + 13 + ```bash 14 + swift build -c release 15 + cp .build/release/voicememosutil /usr/local/bin/ 16 + ``` 17 + 18 + ## Usage 19 + 20 + ```bash 21 + voicememosutil get-transcript [--format <format>] <file> 22 + ``` 23 + 24 + ### Arguments 25 + 26 + - `<file>`: Path to the `.m4a` Voice Memos file. 27 + 28 + Voice Memos recordings are stored at: 29 + 30 + ``` 31 + ~/Library/Group Containers/group.com.apple.VoiceMemos.shared/Recordings 32 + ``` 33 + 34 + (Only available if iCloud sync is enabled for Voice Memos) 35 + 36 + ### Options 37 + 38 + - `--format <format>`: Output format. One of: 39 + - `text`: Plain text transcript (default) 40 + - `json`: Transcript data as JSON 41 + - `raw`: Raw binary transcript data 42 + 43 + ### JSON output format 44 + 45 + Returns the full transcript metadata including timing information. Two formats are used depending on the source: 46 + 47 + **Interleaved format:** 48 + 49 + ```json 50 + { 51 + "attributedString": [ 52 + "This is", 53 + { "timeRange": [0, 0.42] }, 54 + " the transcript text", 55 + { "timeRange": [0.42, 1.23] }, 56 + " interleaved with attributes.", 57 + { "timeRange": [1.23, 2.00] } 58 + ], 59 + "locale": { "identifier": "en_US", "current": 0 } 60 + } 61 + ``` 62 + 63 + **Separated format:** 64 + 65 + ```json 66 + { 67 + "attributedString": { 68 + "attributeTable": [ 69 + { "timeRange": [0, 0.42] }, 70 + { "timeRange": [0.42, 1.23] }, 71 + { "timeRange": [1.23, 2.00] } 72 + ], 73 + "runs": [ 74 + "In this format", 75 + 0, 76 + " text is interleaved with", 77 + 1, 78 + " indices of attributes.", 79 + 2 80 + ] 81 + }, 82 + "locale": { "identifier": "en_US", "current": 0 } 83 + } 84 + ``` 85 + 86 + ### Raw output format 87 + 88 + Outputs the raw binary transcript data from the audio file. While typically identical to the JSON output, the raw format is unvalidated and may contain malformed JSON.
+41
Sources/voicememosutil/Commands/GetTranscript.swift
··· 1 + // SPDX-License-Identifier: 0BSD 2 + 3 + import AVFoundation 4 + import ArgumentParser 5 + import Foundation 6 + 7 + struct GetTranscript: AsyncParsableCommand { 8 + static let configuration = CommandConfiguration( 9 + abstract: "Get transcript from a Voice Memos recording." 10 + ) 11 + 12 + @Option(name: .shortAndLong, help: "Output format.") 13 + var format: Format = .text 14 + 15 + @Argument(help: "Path to the .m4a Voice Memos file.") 16 + var file: String 17 + 18 + mutating func run() async throws { 19 + let transcript = try await Transcript(audioURL: URL(fileURLWithPath: file)) 20 + 21 + switch format { 22 + case .json: 23 + let jsonData = try transcript.formattedJSONData() 24 + FileHandle.standardOutput.write(jsonData) 25 + print("") 26 + case .raw: 27 + FileHandle.standardOutput.write(transcript.data) 28 + case .text: 29 + let text = try transcript.text() 30 + print(text) 31 + } 32 + } 33 + } 34 + 35 + extension GetTranscript { 36 + enum Format: String, ExpressibleByArgument, CaseIterable { 37 + case json 38 + case raw 39 + case text 40 + } 41 + }
+120
Sources/voicememosutil/Transcript.swift
··· 1 + // SPDX-License-Identifier: 0BSD 2 + 3 + import AVFoundation 4 + import Foundation 5 + 6 + private let isoUserDataKeyTranscript = AVMetadataKey("tsrp") 7 + 8 + struct Transcript { 9 + let data: Data 10 + 11 + init(audioURL: URL) async throws { 12 + self.data = try await extractTranscript(audioURL: audioURL) 13 + } 14 + 15 + init(data: Data) { 16 + self.data = data 17 + } 18 + 19 + func jsonObject() throws -> Any { 20 + do { 21 + return try JSONSerialization.jsonObject(with: data) 22 + } catch { 23 + throw Error.invalidFormat 24 + } 25 + } 26 + 27 + func formattedJSONData() throws -> Data { 28 + let jsonObject = try jsonObject() 29 + return try JSONSerialization.data( 30 + withJSONObject: jsonObject, 31 + options: [.sortedKeys, .withoutEscapingSlashes]) 32 + } 33 + 34 + func text() throws -> String { 35 + let json = try jsonObject() 36 + 37 + guard let root = json as? [String: Any], 38 + let attributedString = root["attributedString"] 39 + else { 40 + throw Error.invalidFormat 41 + } 42 + 43 + if let interleaved = attributedString as? [Any] { 44 + let strings = interleaved.compactMap { $0 as? String } 45 + guard !strings.isEmpty else { 46 + throw Error.invalidFormat 47 + } 48 + return strings.joined() 49 + } 50 + 51 + if let separated = attributedString as? [String: Any], 52 + let runs = separated["runs"] as? [Any] 53 + { 54 + let strings = runs.compactMap { $0 as? String } 55 + guard !strings.isEmpty else { 56 + throw Error.invalidFormat 57 + } 58 + return strings.joined() 59 + } 60 + 61 + throw Error.invalidData 62 + } 63 + } 64 + 65 + extension Transcript { 66 + enum Error: Swift.Error, CustomStringConvertible { 67 + case failedToLoadFile 68 + case failedToLoadMetadata 69 + case invalidData 70 + case invalidFormat 71 + case noTrackFound 72 + case noTranscriptFound 73 + 74 + var description: String { 75 + switch self { 76 + case .failedToLoadFile: 77 + return "Failed to load file" 78 + case .failedToLoadMetadata: 79 + return "Failed to load metadata from track" 80 + case .invalidData: 81 + return "Invalid transcript data" 82 + case .invalidFormat: 83 + return "Invalid transcript format" 84 + case .noTrackFound: 85 + return "No track found in the audio" 86 + case .noTranscriptFound: 87 + return "No transcript metadata found in the track" 88 + } 89 + } 90 + } 91 + } 92 + 93 + private func extractTranscript(audioURL: URL) async throws -> Data { 94 + let asset = AVURLAsset(url: audioURL) 95 + 96 + guard (try? await asset.load(.isReadable)) == true else { 97 + throw Transcript.Error.failedToLoadFile 98 + } 99 + 100 + guard let track = (try? await asset.load(.tracks))?.first else { 101 + throw Transcript.Error.noTrackFound 102 + } 103 + 104 + guard let metadata = try? await track.load(.metadata) else { 105 + throw Transcript.Error.failedToLoadMetadata 106 + } 107 + 108 + let transcripts = AVMetadataItem.metadataItems( 109 + from: metadata, withKey: isoUserDataKeyTranscript, keySpace: .isoUserData) 110 + 111 + guard let transcript = transcripts.first else { 112 + throw Transcript.Error.noTranscriptFound 113 + } 114 + 115 + guard let data = (try? await transcript.load(.value)) as? Data else { 116 + throw Transcript.Error.invalidData 117 + } 118 + 119 + return data 120 + }
+6 -4
Sources/voicememosutil/VoiceMemosUtil.swift
··· 3 3 import ArgumentParser 4 4 5 5 @main 6 - struct VoiceMemosUtil: ParsableCommand { 7 - mutating func run() throws { 8 - print("Hello, world!") 9 - } 6 + struct VoiceMemosUtil: AsyncParsableCommand { 7 + static let configuration = CommandConfiguration( 8 + commandName: "voicememosutil", 9 + abstract: "A utility to manage Apple Voice Memos recordings.", 10 + subcommands: [GetTranscript.self] 11 + ) 10 12 }
+11
Tests/Resources/transcript_interleaved.json
··· 1 + { 2 + "attributedString": [ 3 + "This is", 4 + { "timeRange": [0, 0.42] }, 5 + " the transcript text", 6 + { "timeRange": [0.42, 1.23] }, 7 + " interleaved with attributes.", 8 + { "timeRange": [1.23, 2.0] } 9 + ], 10 + "locale": { "identifier": "en_US", "current": 0 } 11 + }
+18
Tests/Resources/transcript_separated.json
··· 1 + { 2 + "attributedString": { 3 + "attributeTable": [ 4 + { "timeRange": [0, 0.42] }, 5 + { "timeRange": [0.42, 1.23] }, 6 + { "timeRange": [1.23, 2.0] } 7 + ], 8 + "runs": [ 9 + "In this format", 10 + 0, 11 + " text is interleaved with", 12 + 1, 13 + " indices of attributes.", 14 + 2 15 + ] 16 + }, 17 + "locale": { "identifier": "en_US", "current": 0 } 18 + }
+15
Tests/TestHelpers.swift
··· 1 + import Foundation 2 + 3 + func loadResource(_ name: String) throws -> Data { 4 + guard 5 + let url = Bundle.module.url( 6 + forResource: name, withExtension: nil, subdirectory: "Resources") 7 + else { 8 + throw ResourceError.notFound(name) 9 + } 10 + return try Data(contentsOf: url) 11 + } 12 + 13 + enum ResourceError: Error { 14 + case notFound(String) 15 + }
+19
Tests/TranscriptTests.swift
··· 1 + import Testing 2 + 3 + @testable import voicememosutil 4 + 5 + struct TranscriptTests { 6 + @Test("text(): interleaved format") 7 + func textInterleaved() throws { 8 + let data = try loadResource("transcript_interleaved.json") 9 + let text = try Transcript(data: data).text() 10 + #expect(text == "This is the transcript text interleaved with attributes.") 11 + } 12 + 13 + @Test("text(): separated format") 14 + func textSeparated() throws { 15 + let data = try loadResource("transcript_separated.json") 16 + let text = try Transcript(data: data).text() 17 + #expect(text == "In this format text is interleaved with indices of attributes.") 18 + } 19 + }