audio TtsAudioManager.ts

42.01% Statements 429/1021
100% Branches 29/29
13.79% Functions 4/29
42.01% Lines 429/1021
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022 1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
130x
130x
130x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
145x
145x
145x
145x
145x
152x
152x
152x
152x
152x
25x
25x
25x
25x
25x
25x
5x
5x
152x
7x
7x
7x
127x
120x
120x
120x
120x
120x
120x
120x
152x
152x
150x
152x
152x
152x
152x
152x
152x
11x
11x
9x
9x
9x
3x
3x
3x
3x
3x
3x
9x
11x
11x
11x
1x
1x
152x
139x
139x
139x
139x
139x
139x
139x
139x
139x
152x
145x
34x
34x
28x
28x
34x
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
34x
34x
 
 
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
 
 
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
 
 
 
 
 
 
 
34x
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34x
34x
34x
34x
 
 
 
 
 
34x
34x
 
 
1x
1x
1x
1x
1x
1x
1x
1x
 
 
 
 
 
 
 
 
1x
1x
1x
1x
1x
 
 
 
 
 
 
 
 
 
1x
1x
1x
1x
 
 
1x
1x
1x
1x
 
 
 
 
 
1x
1x
 
 
 
 
1x
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
  import { AUDIO_MANIFEST_MAP } from './audioManifest'
import { getClipMeta } from './audioClipRegistry'
import { computeClipHash, resolveCanonicalText } from './clipHash'
 
export type SubtitleAnchor = 'top' | 'bottom'
 
export interface TtsAudioManagerConfig {
  volume: number
  enabled: boolean
  subtitleDurationMultiplier: number
  /** Bottom offset in pixels for subtitle positioning (default 64). */
  subtitleBottomOffset: number
  /** Anchor subtitles to top or bottom of viewport (default 'bottom'). */
  subtitleAnchor: SubtitleAnchor
  /** Audio playback speed multiplier (default 1). Pitch is preserved. */
  playbackRate: number
}
 
export interface ManagerSnapshot {
  isPlaying: boolean
  isEnabled: boolean
  volume: number
  subtitleText: string | null
  subtitleDurationMultiplier: number
  subtitleDurationMs: number
  /** Bottom offset in pixels for subtitle positioning. */
  subtitleBottomOffset: number
  /** Anchor subtitles to top or bottom of viewport. */
  subtitleAnchor: SubtitleAnchor
  /** Last TTS error message (e.g. API failures). Cleared on next successful speak(). */
  lastError: string | null
}
 
/** Locale -> fallback text map (BCP 47 keys). */
export type TtsSay = Record<string, string>
 
export interface TtsConfig {
  tone?: string
  say?: TtsSay
  /** Voice sources prepended to the global chain for this specific call. */
  prependChain?: VoiceSource[]
}
 
/**
 * A segment in a TTS sequence.
 * - `string` -> explicit clip ID
 * - `{ clipId: string, ... }` -> explicit clip ID with per-segment config
 * - `{ say: TtsSay, tone?: string }` -> hash-based (clip ID computed from content)
 */
export type TtsSegment =
  | string
  | ({ clipId: string } & Partial<TtsConfig>)
  | { say: TtsSay; tone?: string }
 
function hasExplicitClipId(seg: object): seg is { clipId: string } & Partial<TtsConfig> {
  return 'clipId' in seg && typeof (seg as Record<string, unknown>).clipId === 'string'
}
 
export type TtsInput = TtsSegment | TtsSegment[]
 
export interface CollectedClip {
  clipId: string
  say: TtsSay
  tone: string
  playCount: number
  firstSeen: Date
  lastSeen: Date
}
 
export type { VoiceSourceData } from './voiceSource'
import type { VoiceSourceData } from './voiceSource'
import {
  type VoiceSource,
  PregeneratedVoice,
  CustomVoice,
  GenerateVoice,
  hydrateVoiceChain,
} from './voiceSource'
 
type Listener = () => void
 
type ChainAttemptOutcome = 'no-clip' | 'play-error' | 'unavailable' | 'skipped'
 
interface ChainAttempt {
  source: VoiceSource
  outcome: ChainAttemptOutcome
}
 
/** Resolved segment ready for playback. */
interface ResolvedSegment {
  clipId: string
  fallbackText: string
  tone: string
  prependChain?: VoiceSource[]
}
 
const INTER_SEGMENT_GAP_MS = 80
 
export class TtsAudioManager {
  private listeners = new Set<Listener>()
  private collection = new Map<string, CollectedClip>()
 
  private _isPlaying = false
  private _isEnabled = false
  private _volume = 0.8
  private _playbackRate = 1
  private _subtitleDurationMultiplier = 1
  private _currentSubtitleDurationMs = 0
  private _subtitleBottomOffset = 64
  private _subtitleAnchor: SubtitleAnchor = 'bottom'
 
  // Voice chain: ordered fallback list for playback
  private _voiceChain: VoiceSource[] = []
 
  // Pre-generated clip IDs: voice name -> set of clip IDs with mp3s on disk
  private _pregenClipIds = new Map<string, Set<string>>()
 
  // Currently playing Audio element (for mp3 playback)
  private _currentAudio: HTMLAudioElement | null = null
 
  // Duration (ms) of the currently playing audio clip, set from loadedmetadata
  private _currentAudioDurationMs: number | null = null
 
  // Preloaded Audio element for the next expected speak() call.
  // Set by preloadForSpeak(), consumed by playMp3() for instant playback.
  private _preloadedAudio: { url: string; audio: HTMLAudioElement } | null = null
 
  // Pending seek: if set, playMp3() applies this seek after audio starts.
  // Set by seekNextAudio(), consumed (cleared) by playMp3().
  private _pendingSeekMs: number | null = null
 
  // Sequence cancellation flag
  private _sequenceCancelled = false
 
  // Monotonically increasing speak call ID for stale detection
  private _speakSeq = 0
  private _activeSpeakId = 0
 
  // Subtitle display state
  private _subtitleText: string | null = null
  private _subtitleTimer: ReturnType<typeof setTimeout> | null = null
  private _subtitleResolve: (() => void) | null = null
 
  // Last error from TTS generation/playback (surfaced to admin/debug UI)
  private _lastError: string | null = null
 
  // Cached snapshot for useSyncExternalStore -- must be referentially stable
  private _cachedSnapshot: ManagerSnapshot = {
    isPlaying: false,
    isEnabled: false,
    volume: 0.8,
    subtitleText: null,
    subtitleDurationMultiplier: 1,
    subtitleDurationMs: 0,
    subtitleBottomOffset: 64,
    subtitleAnchor: 'bottom',
    lastError: null,
  }
 
  // -- Configuration --
 
  configure(config: Partial<TtsAudioManagerConfig>): void {
    let changed = false
 
    if (config.volume !== undefined) {
      this._volume = Math.max(0, Math.min(1, config.volume))
      changed = true
    }
    if (config.enabled !== undefined) {
      const wasEnabled = this._isEnabled
      this._isEnabled = config.enabled
      changed = true
      if (wasEnabled && !config.enabled) {
        this.stop()
      }
    }
    if (config.subtitleDurationMultiplier !== undefined) {
      this._subtitleDurationMultiplier = config.subtitleDurationMultiplier
      changed = true
      // Restart the active subtitle timer so the new speed takes effect immediately
      if (this._subtitleText && this._subtitleResolve) {
        if (this._subtitleTimer) clearTimeout(this._subtitleTimer)
        const newMs = this.estimateReadingTimeMs(this._subtitleText)
        this._currentSubtitleDurationMs = newMs
        const resolve = this._subtitleResolve
        this._subtitleTimer = setTimeout(() => {
          this._subtitleResolve = null
          resolve()
        }, newMs)
      }
    }
    if (config.subtitleBottomOffset !== undefined) {
      this._subtitleBottomOffset = config.subtitleBottomOffset
      changed = true
    }
    if (config.subtitleAnchor !== undefined) {
      this._subtitleAnchor = config.subtitleAnchor
      changed = true
    }
    if (config.playbackRate !== undefined) {
      this._playbackRate = Math.max(0.25, Math.min(4, config.playbackRate))
      // Apply to currently playing audio element immediately
      if (this._currentAudio) {
        this._currentAudio.playbackRate = this._playbackRate
        this._currentAudio.preservesPitch = true
      }
      changed = true
    }
 
    if (changed) this.notify()
  }
 
  // -- Voice Chain --
 
  /**
   * Load the pre-generated clip manifest for the given voice chain.
   * Fetches from the manifest endpoint to populate `_pregenClipIds`.
   */
  async loadPregenManifest(voiceChainData: VoiceSourceData[]): Promise<void> {
    this._voiceChain = hydrateVoiceChain(voiceChainData)

    const diskVoices = this._voiceChain
      .filter(
        (s): s is PregeneratedVoice | CustomVoice =>
          s instanceof PregeneratedVoice || s instanceof CustomVoice
      )
      .map((s) => s.name)

    if (diskVoices.length === 0) return

    try {
      const res = await fetch(`/api/audio/collected-clips/manifest?voices=${diskVoices.join(',')}`)
      if (!res.ok) return

      const data: { clipIdsByVoice: Record<string, string[]> } = await res.json()
      for (const [voice, ids] of Object.entries(data.clipIdsByVoice)) {
        this._pregenClipIds.set(voice, new Set(ids))
      }
    } catch {
      // Non-fatal -- fall back to browser TTS
    }
  }
 
  // -- Runtime Collection --
 
  register(input: TtsInput, config?: TtsConfig): void {
    const segments = Array.isArray(input) ? input : [input]
    const topTone = config?.tone ?? ''
    const topSay = config?.say
 
    for (const seg of segments) {
      let clipId: string
      let segSay: TtsSay | undefined
      let segTone: string | undefined
 
      if (typeof seg === 'string') {
        clipId = seg
        // Look up per-clip text from the static registry so it overrides
        // any sentence-level topSay (e.g. when an array of clip IDs is
        // registered alongside a full-sentence say map).
        const meta = getClipMeta(clipId)
        if (meta) {
          segSay = { en: meta.text }
        }
      } else if (hasExplicitClipId(seg)) {
        clipId = seg.clipId
        segSay = seg.say
        segTone = seg.tone
      } else {
        // Hash-based segment: compute clip ID from content
        segSay = seg.say
        segTone = seg.tone
        const effectiveSay: TtsSay = { ...topSay, ...segSay }
        const effectiveTone = segTone ?? topTone
        clipId = computeClipHash(effectiveSay, effectiveTone)
      }
 
      if (!clipId) continue
 
      const effectiveTone = segTone ?? topTone
      // Merge: top-level say, then segment say (segment wins)
      const effectiveSay: TtsSay = { ...topSay, ...segSay }
 
      const existing = this.collection.get(clipId)
      if (existing) {
        // Clobber detection for explicit IDs: warn if canonical text changed
        if (typeof seg === 'string' || (typeof seg === 'object' && hasExplicitClipId(seg))) {
          const oldCanonical = resolveCanonicalText(existing.say)
          const newCanonical = resolveCanonicalText(effectiveSay)
          if (oldCanonical && newCanonical && oldCanonical !== newCanonical) {
            console.warn(
              `[TTS] Clip "${clipId}" re-registered with different text: ` +
                `"${oldCanonical}" → "${newCanonical}". ` +
                `Consider using hash-based IDs: useTTS({ say: { en: '...' }, tone: '...' })`
            )
          }
        }
        // Merge say maps across calls
        Object.assign(existing.say, effectiveSay)
        if (effectiveTone && !existing.tone) {
          existing.tone = effectiveTone
        }
      } else {
        this.collection.set(clipId, {
          clipId,
          say: effectiveSay,
          tone: effectiveTone,
          playCount: 0,
          firstSeen: new Date(),
          lastSeen: new Date(),
        })
      }
    }
  }
 
  getCollection(): CollectedClip[] {
    return Array.from(this.collection.values())
  }
 
  /**
   * Flush played clips to the server.
   *
   * By default uses `sendBeacon` (fire-and-forget, safe during page unload).
   * Pass `{ awaitResponse: true }` to use `fetch` so the caller can await
   * the server's response before proceeding (e.g. before generation).
   */
  async flush(options?: { awaitResponse?: boolean }): Promise<void> {
    const clips = this.getCollection().filter((c) => c.playCount > 0)
    if (clips.length === 0) return

    const payload = clips.map((c) => ({
      clipId: c.clipId,
      say: c.say,
      tone: c.tone,
      playCount: c.playCount,
    }))

    const body = JSON.stringify({ clips: payload })

    if (options?.awaitResponse) {
      // Awaitable path — caller needs the data committed before continuing
      try {
        await fetch('/api/audio/collected-clips', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body,
        })
      } catch {
        // Best-effort -- don't throw during cleanup
      }
      return
    }

    // Fire-and-forget path — safe during page unload
    if (typeof navigator !== 'undefined' && navigator.sendBeacon) {
      navigator.sendBeacon(
        '/api/audio/collected-clips',
        new Blob([body], { type: 'application/json' })
      )
    } else {
      try {
        await fetch('/api/audio/collected-clips', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body,
        })
      } catch {
        // Best-effort -- don't throw during cleanup
      }
    }
  }
 
  // -- Audio duration (for adaptive animation timing) --
 
  /**
   * Duration (ms) of the currently playing audio clip.
   * Reads directly from the live Audio element when available (immune to
   * speak()'s cached field reset race). Falls back to the cached value
   * from loadedmetadata for edge cases.
   * Returns null when no audio is playing or duration is unknown.
   */
  getCurrentAudioDurationMs(): number | null {
    const audio = this._currentAudio
    if (audio && audio.readyState >= 1) {
      const d = audio.duration
      if (isFinite(d) && d > 0) return d * 1000
    }
    return this._currentAudioDurationMs
  }
 
  /**
   * Request that the next audio clip starts playback from `timeMs` into
   * the clip rather than from the beginning. Consumed (cleared) by
   * playMp3() once applied. Use before calling speak() to resume
   * mid-sentence after a scrub.
   */
  seekNextAudio(timeMs: number): void {
    this._pendingSeekMs = timeMs > 0 ? timeMs : null
  }
 
  /**
   * Preload audio for an upcoming speak() call so it starts instantly.
   *
   * Resolves the input through the voice chain and, if a pre-generated mp3
   * is available, creates an Audio element with `preload='auto'` to start
   * fetching in the background. When speak() is later called for the same
   * clip, playMp3() uses the already-buffered element — zero load latency.
   *
   * No-op when audio is disabled or when the clip requires generation /
   * browser TTS / subtitle-only playback.
   */
  preloadForSpeak(input: TtsInput, config?: TtsConfig): void {
    if (!this._isEnabled) return
    const segments = Array.isArray(input) ? input : [input]
    if (segments.length !== 1) return

    const resolved = this.resolveSegment(segments[0], config)

    for (const source of this._voiceChain) {
      if (source instanceof PregeneratedVoice || source instanceof CustomVoice) {
        const clipIds = this._pregenClipIds.get(source.name)
        if (clipIds?.has(resolved.clipId)) {
          const url = `/api/audio/clips/${encodeURIComponent(source.name)}/${encodeURIComponent(resolved.clipId)}`
          if (this._preloadedAudio?.url === url) return // already preloading
          if (this._preloadedAudio) this._preloadedAudio.audio.src = ''
          const audio = new Audio(url)
          audio.preload = 'auto'
          audio.volume = this._volume
          this._preloadedAudio = { url, audio }
          return
        }
      }
    }
  }
 
  // -- Playback --
 
  /**
   * Play an mp3 file from a URL.
   * Returns true if playback completed, false on error.
   */
  private playMp3(url: string): Promise<boolean> {
    return new Promise<boolean>((resolve) => {
      // Pause any orphaned audio element before creating a new one
      if (this._currentAudio) {
        this._currentAudio.pause()
        this._currentAudio = null
      }

      // Use preloaded element if available — instant playback, no network wait
      let audio: HTMLAudioElement
      if (this._preloadedAudio?.url === url) {
        audio = this._preloadedAudio.audio
        this._preloadedAudio = null
      } else {
        audio = new Audio(url)
      }

      audio.volume = this._volume
      audio.playbackRate = this._playbackRate
      audio.preservesPitch = true
      this._currentAudio = audio

      // Expose audio duration for adaptive animation timing
      audio.addEventListener('loadedmetadata', () => {
        if (this._currentAudio === audio) {
          this._currentAudioDurationMs = audio.duration * 1000
        }
      })
      // For preloaded elements, metadata is already available
      if (audio.readyState >= 1 && this._currentAudio === audio) {
        this._currentAudioDurationMs = audio.duration * 1000
      }

      // Only clear _currentAudio if it's still THIS element — a newer
      // speak() call may have already overwritten it with a different one.
      const clearIfOwned = () => {
        if (this._currentAudio === audio) this._currentAudio = null
      }

      audio.onended = () => {
        clearIfOwned()
        resolve(true)
      }
      audio.onerror = () => {
        clearIfOwned()
        resolve(false)
      }
      audio.play().catch(() => {
        clearIfOwned()
        resolve(false)
      })

      // Apply pending seek (set by seekNextAudio) — works even before
      // the audio is fully loaded; the browser buffers and seeks.
      if (this._pendingSeekMs !== null) {
        const seekSec = this._pendingSeekMs / 1000
        this._pendingSeekMs = null
        // If metadata is already loaded, seek immediately
        if (audio.readyState >= 1 && isFinite(audio.duration)) {
          audio.currentTime = Math.min(seekSec, audio.duration)
        } else {
          // Wait for metadata then seek
          audio.addEventListener(
            'loadedmetadata',
            () => {
              if (this._currentAudio === audio && isFinite(audio.duration)) {
                audio.currentTime = Math.min(seekSec, audio.duration)
              }
            },
            { once: true }
          )
        }
      }
    })
  }
 
  /**
   * Resolve BCP 47 locale from a TtsSay map using navigator.languages.
   */
  private resolveSay(say: TtsSay | undefined): string | undefined {
    if (!say) return undefined
    const keys = Object.keys(say)
    if (keys.length === 0) return undefined

    if (typeof navigator !== 'undefined' && navigator.languages) {
      for (const locale of navigator.languages) {
        // Exact match
        if (say[locale] !== undefined) return say[locale]
        // Language-only prefix: en-US -> en
        const langOnly = locale.split('-')[0]
        if (say[langOnly] !== undefined) return say[langOnly]
      }
    }

    // Fall back to first available key
    return say[keys[0]]
  }
 
  /**
   * Resolve a single segment into playback-ready form.
   */
  private resolveSegment(seg: TtsSegment, topConfig?: TtsConfig): ResolvedSegment {
    let clipId: string
    let segSay: TtsSay | undefined
    let segTone: string | undefined

    if (typeof seg === 'string') {
      const meta = getClipMeta(seg)
      if (meta) {
        clipId = seg
        segSay = { en: meta.text }
      } else {
        // Not a registered clip — treat as text to be spoken.
        // Hash it to produce a safe, short clipId instead of using
        // the raw string (which can exceed filesystem name limits).
        segSay = { en: seg }
        segTone = topConfig?.tone ?? ''
        clipId = computeClipHash(segSay, segTone)
      }
    } else if (hasExplicitClipId(seg)) {
      clipId = seg.clipId
      segSay = seg.say
      segTone = seg.tone
    } else {
      // Hash-based segment
      segSay = seg.say
      segTone = seg.tone
      const effectiveSay: TtsSay = { ...topConfig?.say, ...segSay }
      const effectiveTone = segTone ?? topConfig?.tone ?? ''
      clipId = computeClipHash(effectiveSay, effectiveTone)
    }

    const effectiveTone = segTone ?? topConfig?.tone ?? ''
    const effectiveSay: TtsSay = { ...topConfig?.say, ...segSay }

    // Resolve fallback text: manifest entry .text > resolveSay() > clipId
    const manifestEntry = AUDIO_MANIFEST_MAP[clipId]
    const fallbackText = manifestEntry?.text ?? this.resolveSay(effectiveSay) ?? clipId

    return { clipId, fallbackText, tone: effectiveTone, prependChain: topConfig?.prependChain }
  }
 
  /** Check if this speak call has been superseded by a newer one. */
  private _isStale(speakId: number): boolean {
    return speakId !== this._activeSpeakId
  }
 
  /**
   * Try playing a single resolved segment via the voice chain.
   * Returns true if something played, false if chain exhausted.
   */
  private async playOneSegment(resolved: ResolvedSegment, speakId: number): Promise<boolean> {
    if (this._isStale(speakId)) return false

    // When a per-call prependChain is present, insert a GenerateVoice
    // after the prepended sources so that on-demand generation fires
    // for the character voice before the global chain's cached voices.
    const effectiveChain = resolved.prependChain?.length
      ? [...resolved.prependChain, new GenerateVoice(), ...this._voiceChain]
      : this._voiceChain

    const log: ChainAttempt[] = []

    if (effectiveChain.length > 0) {
      for (let i = 0; i < effectiveChain.length; i++) {
        if (this._isStale(speakId)) return false

        const source = effectiveChain[i]
        if (source instanceof PregeneratedVoice || source instanceof CustomVoice) {
          const clipIds = this._pregenClipIds.get(source.name)
          const hasClip = clipIds?.has(resolved.clipId) ?? false
          if (hasClip) {
            const url = `/api/audio/clips/${encodeURIComponent(source.name)}/${encodeURIComponent(resolved.clipId)}`
            const ok = await this.playMp3(url)
            if (this._isStale(speakId)) return false
            if (ok) return true
            log.push({ source, outcome: 'play-error' })
          } else {
            log.push({ source, outcome: 'no-clip' })
          }
        } else if (source.type === 'browser-tts') {
          const ok = await this.speakBrowserTts(resolved.fallbackText)
          if (this._isStale(speakId)) return false
          if (ok) return true
          log.push({ source, outcome: 'unavailable' })
        } else if (source.type === 'subtitle') {
          const readingMs = this.estimateReadingTimeMs(resolved.fallbackText)
          this._subtitleText = resolved.fallbackText
          this._currentSubtitleDurationMs = readingMs
          this.notify()

          await new Promise<void>((resolve) => {
            this._subtitleResolve = resolve
            this._subtitleTimer = setTimeout(() => {
              this._subtitleResolve = null
              resolve()
            }, readingMs)
          })

          this._subtitleText = null
          this._subtitleTimer = null
          this._subtitleResolve = null
          this._currentSubtitleDurationMs = 0
          this.notify()
          return !this._isStale(speakId)
        } else if (source.type === 'generate') {
          const missed = log
            .filter((a) => a.outcome === 'no-clip' && a.source.canGenerate())
            .map((a) => a.source)

          if (missed.length > 0) {
            const [primary, ...others] = missed
            const ok = await this.generateAndPlay(primary, resolved, speakId)
            if (this._isStale(speakId)) return false
            if (ok) {
              if (others.length > 0) {
                this.generateInBackground(others, resolved)
              }
              return true
            }
            log.push({ source, outcome: 'play-error' })
          } else {
            log.push({ source, outcome: 'skipped' })
          }
        }
      }
      return false
    }

    // No voice chain -- fall back to browser TTS
    if (this._isStale(speakId)) return false
    return await this.speakBrowserTts(resolved.fallbackText)
  }
 
  /**
   * Play a sequence of resolved segments with inter-segment gaps.
   */
  private async playSequence(segments: ResolvedSegment[], speakId: number): Promise<void> {
    this._isPlaying = true
    this._sequenceCancelled = false
    this.notify()

    for (let i = 0; i < segments.length; i++) {
      if (this._isStale(speakId)) break

      await this.playOneSegment(segments[i], speakId)

      // Inter-segment gap (skip after last segment)
      if (i < segments.length - 1 && !this._isStale(speakId)) {
        await new Promise<void>((resolve) => setTimeout(resolve, INTER_SEGMENT_GAP_MS))
      }
    }

    this.setPlaying(false)
  }
 
  async speak(input: TtsInput, config?: TtsConfig): Promise<void> {
    this.register(input, config)

    const speakId = ++this._speakSeq
    this._activeSpeakId = speakId

    if (!this._isEnabled) return

    const segments = Array.isArray(input) ? input : [input]
    if (segments.length === 0) return

    const resolved = segments.map((seg) => this.resolveSegment(seg, config))

    // Increment play counts
    for (const r of resolved) {
      const entry = this.collection.get(r.clipId)
      if (entry) {
        entry.playCount++
        entry.lastSeen = new Date()
      }
    }

    // Cancel any in-flight playback before starting new speech.
    this._sequenceCancelled = true
    this._currentAudioDurationMs = null
    if (this._subtitleTimer) {
      clearTimeout(this._subtitleTimer)
      this._subtitleTimer = null
    }
    if (this._subtitleResolve) {
      this._subtitleResolve()
      this._subtitleResolve = null
    }
    this._subtitleText = null
    if (this._currentAudio) {
      this._currentAudio.pause()
      this._currentAudio = null
    }

    // If browser TTS is active from a previous speak() call, cancel it
    // and wait a frame. Chrome silently drops speechSynthesis.speak() if
    // cancel() was called in the same synchronous frame, so the delay is
    // required to ensure the subsequent speak() is not swallowed.
    if (
      typeof window !== 'undefined' &&
      'speechSynthesis' in window &&
      (speechSynthesis.speaking || speechSynthesis.pending)
    ) {
      speechSynthesis.cancel()
      this._activeUtterances.clear()
      await new Promise<void>((r) => setTimeout(r, 50))
    }

    if (resolved.length === 1) {
      this.setPlaying(true)
      await this.playOneSegment(resolved[0], speakId)
      if (!this._isStale(speakId)) this.setPlaying(false)
    } else {
      await this.playSequence(resolved, speakId)
    }
  }
 
  /**
   * Speak using browser SpeechSynthesis.
   * Returns true if successful, false if unavailable or errored.
   */
  private speakBrowserTts(text: string): Promise<boolean> {
    if (typeof window === 'undefined' || !('speechSynthesis' in window)) {
      return Promise.resolve(false)
    }

    // Fast-fail when no active user gesture
    if (navigator.userActivation && !navigator.userActivation.isActive) {
      return Promise.resolve(false)
    }

    const voices = speechSynthesis.getVoices()
    if (voices.length === 0) {
      return Promise.resolve(false)
    }

    return new Promise<boolean>((resolve) => {
      let settled = false
      const settle = (value: boolean) => {
        if (settled) return
        settled = true
        clearTimeout(safetyTimeout)
        clearTimeout(startTimeout)
        resolve(value)
      }

      // Safety timeout — if browser hangs, don't block the UI forever
      const safetyTimeout = setTimeout(() => {
        speechSynthesis.cancel()
        this._activeUtterances.delete(utterance)
        settle(false)
      }, 10_000)

      // Start detection — if speaking hasn't started after 1.5s, it silently failed
      const startTimeout = setTimeout(() => {
        if (!speechSynthesis.speaking && !speechSynthesis.pending) {
          speechSynthesis.cancel()
          this._activeUtterances.delete(utterance)
          settle(false)
        }
      }, 1500)

      const utterance = new SpeechSynthesisUtterance(text)
      utterance.volume = this._volume
      utterance.rate = 0.9 * this._playbackRate
      utterance.onend = () => {
        this._activeUtterances.delete(utterance)
        settle(true)
      }
      utterance.onerror = () => {
        this._activeUtterances.delete(utterance)
        settle(false)
      }

      this._activeUtterances.add(utterance)
      speechSynthesis.speak(utterance)
    })
  }
 
  // Prevent GC of in-flight utterances — Chrome may corrupt the speech
  // queue if a queued SpeechSynthesisUtterance is garbage-collected.
  private _activeUtterances = new Set<SpeechSynthesisUtterance>()
 
  /**
   * Generate a clip on-demand via the voice's `generate()` method and play it.
   * On success, also adds the clip to the pregen cache so future
   * playback hits the pregenerated path directly.
   */
  private async generateAndPlay(
    source: VoiceSource,
    resolved: ResolvedSegment,
    speakId: number
  ): Promise<boolean> {
    let blobUrl: string | null = null
    try {
      const blob = await source.generate(resolved.clipId, resolved.fallbackText, resolved.tone)
      if (this._isStale(speakId)) return false
      if (!blob) {
        const voiceName =
          source instanceof PregeneratedVoice || source instanceof CustomVoice
            ? source.name
            : source.type
        this._lastError = `[TTS] Generation returned empty response (voice: ${voiceName}, clip: ${resolved.clipId})`
        console.error(this._lastError)
        this.notify()
        return false
      }
      blobUrl = URL.createObjectURL(blob)
      const ok = await this.playMp3(blobUrl)
      if (this._isStale(speakId)) return false
      if (ok) {
        // Clear error on success
        if (this._lastError) {
          this._lastError = null
          this.notify()
        }
        if (source instanceof PregeneratedVoice || source instanceof CustomVoice) {
          this.addToPregenCache(source.name, resolved.clipId)
        }
      }
      return ok
    } catch (err) {
      const msg = err instanceof Error ? err.message : String(err)
      this._lastError = `[TTS] generateAndPlay error: ${msg}`
      console.error(this._lastError)
      this.notify()
      return false
    } finally {
      if (blobUrl) URL.revokeObjectURL(blobUrl)
    }
  }
 
  /**
   * Fire-and-forget generation for additional voices that were missed.
   * On success, adds the clip to the pregen cache for future playback.
   */
  private generateInBackground(sources: VoiceSource[], resolved: ResolvedSegment): void {
    for (const source of sources) {
      source
        .generate(resolved.clipId, resolved.fallbackText, resolved.tone)
        .then((blob) => {
          if (blob && (source instanceof PregeneratedVoice || source instanceof CustomVoice)) {
            this.addToPregenCache(source.name, resolved.clipId)
          }
        })
        .catch(() => {
          // Silent — background generation is best-effort
        })
    }
  }
 
  private addToPregenCache(voice: string, clipId: string): void {
    let ids = this._pregenClipIds.get(voice)
    if (!ids) {
      ids = new Set()
      this._pregenClipIds.set(voice, ids)
    }
    ids.add(clipId)
  }
 
  stop(): void {
    // Invalidate any in-flight speak call so its chain stops
    this._activeSpeakId = -1
    // Cancel any running sequence
    this._sequenceCancelled = true
    this._currentAudioDurationMs = null
    this._pendingSeekMs = null
    // Stop any playing mp3
    if (this._currentAudio) {
      this._currentAudio.pause()
      this._currentAudio = null
    }
    // Stop browser TTS
    if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
      speechSynthesis.cancel()
    }
    this._activeUtterances.clear()
    // Clear subtitle display
    if (this._subtitleTimer) {
      clearTimeout(this._subtitleTimer)
      this._subtitleTimer = null
    }
    if (this._subtitleResolve) {
      this._subtitleResolve()
      this._subtitleResolve = null
    }
    this._subtitleText = null
    // Clear preloaded audio
    if (this._preloadedAudio) {
      this._preloadedAudio.audio.src = ''
      this._preloadedAudio = null
    }
    if (this._isPlaying) {
      this.setPlaying(false)
    }
  }
 
  // -- React integration: useSyncExternalStore --
 
  subscribe = (listener: Listener): (() => void) => {
    this.listeners.add(listener)
    return () => {
      this.listeners.delete(listener)
    }
  }
 
  getSnapshot = (): ManagerSnapshot => {
    return this._cachedSnapshot
  }
 
  // -- Introspection (for admin tools) --
 
  /**
   * Return per-voice clip availability for the current voice chain.
   * Read-only — mirrors what `playOneSegment` checks internally.
   */
  getClipAvailability(clipId: string): Array<{ source: VoiceSourceData; hasClip: boolean }> {
    return this._voiceChain.map((source) => ({
      source: source.toJSON(),
      hasClip:
        source instanceof PregeneratedVoice || source instanceof CustomVoice
          ? (this._pregenClipIds.get(source.name)?.has(clipId) ?? false)
          : true, // browser-tts, subtitle, generate are always "available"
    }))
  }
 
  /**
   * Dismiss the current subtitle early, advancing to the next segment.
   */
  dismissSubtitle(): void {
    if (this._subtitleTimer) {
      clearTimeout(this._subtitleTimer)
      this._subtitleTimer = null
    }
    if (this._subtitleResolve) {
      this._subtitleResolve()
      this._subtitleResolve = null
    }
  }
 
  // -- Cleanup --
 
  dispose(): void {
    this.stop()
  }
 
  // -- Private --
 
  private setPlaying(playing: boolean): void {
    if (this._isPlaying !== playing) {
      this._isPlaying = playing
      this.notify()
    }
  }
 
  private estimateReadingTimeMs(text: string): number {
    const words = text.trim().split(/\s+/).length
    const baseMs = (words / 200) * 60_000
    return Math.max(1500, baseMs * this._subtitleDurationMultiplier)
  }
 
  private notify(): void {
    // Rebuild cached snapshot so useSyncExternalStore sees a new reference
    this._cachedSnapshot = {
      isPlaying: this._isPlaying,
      isEnabled: this._isEnabled,
      volume: this._volume,
      subtitleText: this._subtitleText,
      subtitleDurationMultiplier: this._subtitleDurationMultiplier,
      subtitleDurationMs: this._currentSubtitleDurationMs,
      subtitleBottomOffset: this._subtitleBottomOffset,
      subtitleAnchor: this._subtitleAnchor,
      lastError: this._lastError,
    }
    for (const listener of this.listeners) {
      listener()
    }
  }
}