chat parseGeometricEntities.ts

78.75% Statements 152/193
92.3% Branches 24/26
42.85% Functions 3/7
78.75% Lines 152/193
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194 1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
 
 
 
 
 
 
 
1x
1x
 
 
 
 
 
 
 
1x
1x
1x
 
 
 
 
 
 
 
 
 
 
 
1x
1x
1x
1x
1x
1x
10x
10x
10x
5x
10x
1x
10x
1x
10x
3x
10x
 
10x
10x
1x
1x
15x
15x
15x
7x
5x
5x
2x
15x
2x
1x
1x
1x
15x
2x
1x
1x
1x
15x
4x
3x
3x
1x
15x
 
15x
15x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
1x
15x
15x
15x
15x
15x
15x
15x
15x
15x
15x
15x
10x
10x
15x
5x
5x
10x
10x
10x
10x
10x
10x
10x
10x
10x
15x
15x
15x
8x
8x
15x
15x
15x
1x
1x
14x
14x
14x
  /**
 * Parse chat text for structured geometric entity markers.
 *
 * The LLM is instructed to use markers like:
 *   {seg:AB}   → segment A–B
 *   {tri:ABC}  → triangle △ABC
 *   {ang:ABC}  → angle ∠ABC (vertex is middle letter)
 *   {pt:A}     → point A
 *
 * This is far more reliable than guessing from free-form text.
 * If the model doesn't use markers, text renders as-is with no highlights.
 */
 
export type GeometricEntityRef =
  | { type: 'segment'; from: string; to: string }
  | { type: 'triangle'; vertices: [string, string, string] }
  | { type: 'angle'; points: [string, string, string] }
  | { type: 'point'; label: string }
 
export type FoundationEntityRef =
  | { type: 'definition'; id: number }
  | { type: 'postulate'; id: number }
  | { type: 'commonNotion'; id: number }
  | { type: 'proposition'; id: number }
 
/** Union of all entity ref types supported in Euclid chat */
export type EuclidEntityRef = GeometricEntityRef | FoundationEntityRef
 
export function isGeometricEntity(entity: EuclidEntityRef): entity is GeometricEntityRef {
  return (
    entity.type === 'segment' ||
    entity.type === 'triangle' ||
    entity.type === 'angle' ||
    entity.type === 'point'
  )
}
 
export function isFoundationEntity(entity: EuclidEntityRef): entity is FoundationEntityRef {
  return (
    entity.type === 'definition' ||
    entity.type === 'postulate' ||
    entity.type === 'commonNotion' ||
    entity.type === 'proposition'
  )
}
 
/** Convert a FoundationEntityRef to the citation key format used by CitationPopover (e.g. "Def.15", "Post.1") */
export function foundationToCitationKey(entity: FoundationEntityRef): string {
  switch (entity.type) {
    case 'definition':
      return `Def.${entity.id}`
    case 'postulate':
      return `Post.${entity.id}`
    case 'commonNotion':
      return `C.N.${entity.id}`
    case 'proposition':
      return `I.${entity.id}`
  }
}
 
export type TextSegment =
  | { kind: 'text'; text: string }
  | { kind: 'entity'; text: string; entity: GeometricEntityRef }
 
/** Display text for each entity type */
function displayText(tag: string, labels: string): string {
  switch (tag) {
    case 'seg':
      return labels // "AB"
    case 'tri':
      return `△${labels}` // "△ABC"
    case 'ang':
      return `∠${labels}` // "∠ABC"
    case 'pt':
      return labels // "A"
    default:
      return labels
  }
}
 
/** Build entity ref from tag and labels, or null if invalid. */
function buildEntity(tag: string, labels: string): GeometricEntityRef | null {
  switch (tag) {
    case 'seg':
      if (labels.length === 2) {
        return { type: 'segment', from: labels[0], to: labels[1] }
      }
      return null
    case 'tri':
      if (labels.length === 3) {
        return { type: 'triangle', vertices: [labels[0], labels[1], labels[2]] }
      }
      return null
    case 'ang':
      if (labels.length === 3) {
        return { type: 'angle', points: [labels[0], labels[1], labels[2]] }
      }
      return null
    case 'pt':
      if (labels.length === 1) {
        return { type: 'point', label: labels[0] }
      }
      return null
    default:
      return null
  }
}
 
/**
 * Convert LaTeX-style geometric notation from voice transcripts to our marker syntax.
 *
 * Why post-processing instead of prompting the voice model to use {seg:AB} markers?
 * The voice model generates *speech* — the transcript is a byproduct of audio output.
 * If we instructed it to use {seg:AB} markers, it would literally say "open brace seg
 * colon A B close brace" aloud. The model naturally speaks "segment A B" and its
 * transcript formatter renders that as LaTeX: \( AB \). We convert that to our markers
 * so the shared chat history gets hoverable entity highlights.
 *
 * The text chat model CAN be prompted to use {seg:AB} directly because it only produces
 * text, never speech.
 *
 * Patterns handled:
 *   \( \triangle ABC \) → {tri:ABC}
 *   \( \angle ABC \)    → {ang:ABC}
 *   \( AB = CD \)       → {seg:AB} = {seg:CD}
 *   \( AB \)            → {seg:AB}
 *   \( A \)             → {pt:A}
 */
export function latexToMarkers(text: string): string {
  return (
    text
      // \( \triangle ABC \) → {tri:ABC}
      .replace(/\\\(\s*\\triangle\s+([A-Z]{3})\s*\\\)/g, '{tri:$1}')
      // \( \angle ABC \) → {ang:ABC}
      .replace(/\\\(\s*\\angle\s+([A-Z]{3})\s*\\\)/g, '{ang:$1}')
      // \( AB = CD \) → {seg:AB} = {seg:CD} (segment equations)
      .replace(/\\\(\s*([A-Z]{2})\s*=\s*([A-Z]{2})\s*\\\)/g, '{seg:$1} = {seg:$2}')
      // \( AB \) → {seg:AB} (two uppercase letters = segment)
      .replace(/\\\(\s*([A-Z]{2})\s*\\\)/g, '{seg:$1}')
      // \( A \) → {pt:A} (single uppercase letter = point)
      .replace(/\\\(\s*([A-Z])\s*\\\)/g, '{pt:$1}')
  )
}
 
// Match {tag:LABELS} where tag is seg|tri|ang|pt and LABELS is uppercase letters,
// or {tag:N} where tag is def|post|cn|prop and N is a number
const MARKER_RE = /\{(seg|tri|ang|pt):([A-Z]+)\}|\{(def|post|cn|prop):(\d+)\}/g
 
/**
 * Parse text for structured geometric entity markers.
 *
 * `knownLabels` is accepted for API compatibility but not used for filtering —
 * the LLM explicitly marks entities so we trust its output.
 */
export function parseGeometricEntities(text: string, _knownLabels?: Set<string>): TextSegment[] {
  const result: TextSegment[] = []
  let lastIndex = 0
 
  let match: RegExpExecArray | null
  // Reset regex state
  MARKER_RE.lastIndex = 0
 
  while ((match = MARKER_RE.exec(text)) !== null) {
    const [full, tag, labels] = match
    const entity = buildEntity(tag!, labels!)
    if (!entity) continue
 
    // Add preceding text
    if (match.index > lastIndex) {
      result.push({ kind: 'text', text: text.slice(lastIndex, match.index) })
    }
 
    result.push({
      kind: 'entity',
      text: displayText(tag!, labels!),
      entity,
    })
 
    lastIndex = match.index + full!.length
  }
 
  // Add trailing text
  if (lastIndex < text.length) {
    result.push({ kind: 'text', text: text.slice(lastIndex) })
  }
 
  // If no markers found, return the whole thing as text
  if (result.length === 0) {
    return [{ kind: 'text', text }]
  }
 
  return result
}