voice: Improvements to the talk clip generation · tsiry-sandratraina.com/rockbox-zig@613a143

Rockbox open source high quality audio player as a Music Player Daemon

mpris rockbox mpd libadwaita audio rust zig deno

voice: Improvements to the talk clip generation

This covers the voiced directory and filenames

* Don't regenerate a talk clip if one is present (?)
* Format awareness; ie if the TTS engine generates an mp3 file,
convert it to a wav file so we can encode it properly
* Use a global variable for the wavtrim threshold

Change-Id: I9f441b573704bdf7675794fd0e1984446308463b

Solomon Peachy 2 years ago 613a1432 39c9c350

+23 -9

1 changed file

expand all

tools

voice.pl

+23 -9

tools/voice.pl

··· 124 124 'turkce' => 'tr', 125 125 ); 126 126 127 + my $trim_thresh = 500; # Trim silence if over this, in ms 128 + 127 129 # Initialize TTS engine. May return an object or value which will be passed 128 130 # to voicestring and shutdown_tts 129 131 sub init_tts { ··· 433 435 } else { 434 436 voicestring($voice, $wav, $tts_engine_opts, $tts_object); 435 437 if ($format eq "wav") { 436 - wavtrim($wav, 500, $tts_object); 437 - # 500 seems to be a reasonable default for now 438 + wavtrim($wav, $trim_thresh, $tts_object); 438 439 } 439 440 } 440 441 # Convert from mp3 to wav so we can use rbspeex ··· 510 511 my $d = new DirHandle $dir; 511 512 while (my $file = $d->read) { 512 513 my ($voice, $wav, $enc); 514 + my $format = $tts_object->{'format'}; 515 + 513 516 # Print some progress information 514 517 if (++$i % 10 == 0 and !$verbose) { 515 518 print("."); ··· 527 530 } 528 531 # Element is a dir 529 532 if ( -d $path) { 533 + $enc = sprintf("%s/_dirname.talk", $path); 530 534 gentalkclips($path, $tts_object, $encoder, $encoder_opts, $tts_engine_opts, $i); 531 - $enc = sprintf("%s/_dirname.talk", $path); 532 535 } 533 536 # Element is a file 534 537 else { ··· 537 540 } 538 541 539 542 printf("Talkclip %s: %s", $enc, $voice) if $verbose; 543 + # Don't generate encoded file if it already exists 544 + next if (-f $enc); 540 545 541 - voicestring($voice, $wav, $tts_engine_opts, $tts_object); 542 - wavtrim($wav, 500, $tts_object); 543 - # 500 seems to be a reasonable default for now 544 - encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); 545 - synchronize($tts_object); 546 - unlink($wav); 546 + voicestring($voice, $wav, $tts_engine_opts, $tts_object); 547 + wavtrim($wav, $trim_thresh, $tts_object); 548 + 549 + if ($format eq "mp3") { 550 + system("ffmpeg -loglevel 0 -i $wav $voice$wav"); 551 + rename("$voice$wav","$wav"); 552 + $format = "wav"; 553 + } 554 + if ($format eq "wav") { 555 + encodewav($wav, $enc, $encoder, $encoder_opts, $tts_object); 556 + } else { 557 + copy($wav, $enc); 558 + } 559 + synchronize($tts_object); 560 + unlink($wav); 547 561 } 548 562 } 549 563

Configure Feed

Configure Feed