All files / web/src/components/toys/euclid/chat parseGeometricEntities.ts

78.75% Statements 152/193
92.3% Branches 24/26
42.85% Functions 3/7
78.75% Lines 152/193

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 1941x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x               1x 1x               1x 1x 1x                       1x 1x 1x 1x 1x 1x 10x 10x 10x 5x 10x 1x 10x 1x 10x 3x 10x   10x 10x 1x 1x 15x 15x 15x 7x 5x 5x 2x 15x 2x 1x 1x 1x 15x 2x 1x 1x 1x 15x 4x 3x 3x 1x 15x   15x 15x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x                             1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 15x 15x 15x 15x 15x 15x 15x 15x 15x 15x 15x 10x 10x 15x 5x 5x 10x 10x 10x 10x 10x 10x 10x 10x 10x 15x 15x 15x 8x 8x 15x 15x 15x 1x 1x 14x 14x 14x  
/**
 * Parse chat text for structured geometric entity markers.
 *
 * The LLM is instructed to use markers like:
 *   {seg:AB}   → segment A–B
 *   {tri:ABC}  → triangle △ABC
 *   {ang:ABC}  → angle ∠ABC (vertex is middle letter)
 *   {pt:A}     → point A
 *
 * This is far more reliable than guessing from free-form text.
 * If the model doesn't use markers, text renders as-is with no highlights.
 */
 
export type GeometricEntityRef =
  | { type: 'segment'; from: string; to: string }
  | { type: 'triangle'; vertices: [string, string, string] }
  | { type: 'angle'; points: [string, string, string] }
  | { type: 'point'; label: string }
 
export type FoundationEntityRef =
  | { type: 'definition'; id: number }
  | { type: 'postulate'; id: number }
  | { type: 'commonNotion'; id: number }
  | { type: 'proposition'; id: number }
 
/** Union of all entity ref types supported in Euclid chat */
export type EuclidEntityRef = GeometricEntityRef | FoundationEntityRef
 
export function isGeometricEntity(entity: EuclidEntityRef): entity is GeometricEntityRef {
  return (
    entity.type === 'segment' ||
    entity.type === 'triangle' ||
    entity.type === 'angle' ||
    entity.type === 'point'
  )
}
 
export function isFoundationEntity(entity: EuclidEntityRef): entity is FoundationEntityRef {
  return (
    entity.type === 'definition' ||
    entity.type === 'postulate' ||
    entity.type === 'commonNotion' ||
    entity.type === 'proposition'
  )
}
 
/** Convert a FoundationEntityRef to the citation key format used by CitationPopover (e.g. "Def.15", "Post.1") */
export function foundationToCitationKey(entity: FoundationEntityRef): string {
  switch (entity.type) {
    case 'definition':
      return `Def.${entity.id}`
    case 'postulate':
      return `Post.${entity.id}`
    case 'commonNotion':
      return `C.N.${entity.id}`
    case 'proposition':
      return `I.${entity.id}`
  }
}
 
export type TextSegment =
  | { kind: 'text'; text: string }
  | { kind: 'entity'; text: string; entity: GeometricEntityRef }
 
/** Display text for each entity type */
function displayText(tag: string, labels: string): string {
  switch (tag) {
    case 'seg':
      return labels // "AB"
    case 'tri':
      return `△${labels}` // "△ABC"
    case 'ang':
      return `∠${labels}` // "∠ABC"
    case 'pt':
      return labels // "A"
    default:
      return labels
  }
}
 
/** Build entity ref from tag and labels, or null if invalid. */
function buildEntity(tag: string, labels: string): GeometricEntityRef | null {
  switch (tag) {
    case 'seg':
      if (labels.length === 2) {
        return { type: 'segment', from: labels[0], to: labels[1] }
      }
      return null
    case 'tri':
      if (labels.length === 3) {
        return { type: 'triangle', vertices: [labels[0], labels[1], labels[2]] }
      }
      return null
    case 'ang':
      if (labels.length === 3) {
        return { type: 'angle', points: [labels[0], labels[1], labels[2]] }
      }
      return null
    case 'pt':
      if (labels.length === 1) {
        return { type: 'point', label: labels[0] }
      }
      return null
    default:
      return null
  }
}
 
/**
 * Convert LaTeX-style geometric notation from voice transcripts to our marker syntax.
 *
 * Why post-processing instead of prompting the voice model to use {seg:AB} markers?
 * The voice model generates *speech* — the transcript is a byproduct of audio output.
 * If we instructed it to use {seg:AB} markers, it would literally say "open brace seg
 * colon A B close brace" aloud. The model naturally speaks "segment A B" and its
 * transcript formatter renders that as LaTeX: \( AB \). We convert that to our markers
 * so the shared chat history gets hoverable entity highlights.
 *
 * The text chat model CAN be prompted to use {seg:AB} directly because it only produces
 * text, never speech.
 *
 * Patterns handled:
 *   \( \triangle ABC \) → {tri:ABC}
 *   \( \angle ABC \)    → {ang:ABC}
 *   \( AB = CD \)       → {seg:AB} = {seg:CD}
 *   \( AB \)            → {seg:AB}
 *   \( A \)             → {pt:A}
 */
export function latexToMarkers(text: string): string {
  return (
    text
      // \( \triangle ABC \) → {tri:ABC}
      .replace(/\\\(\s*\\triangle\s+([A-Z]{3})\s*\\\)/g, '{tri:$1}')
      // \( \angle ABC \) → {ang:ABC}
      .replace(/\\\(\s*\\angle\s+([A-Z]{3})\s*\\\)/g, '{ang:$1}')
      // \( AB = CD \) → {seg:AB} = {seg:CD} (segment equations)
      .replace(/\\\(\s*([A-Z]{2})\s*=\s*([A-Z]{2})\s*\\\)/g, '{seg:$1} = {seg:$2}')
      // \( AB \) → {seg:AB} (two uppercase letters = segment)
      .replace(/\\\(\s*([A-Z]{2})\s*\\\)/g, '{seg:$1}')
      // \( A \) → {pt:A} (single uppercase letter = point)
      .replace(/\\\(\s*([A-Z])\s*\\\)/g, '{pt:$1}')
  )
}
 
// Match {tag:LABELS} where tag is seg|tri|ang|pt and LABELS is uppercase letters,
// or {tag:N} where tag is def|post|cn|prop and N is a number
const MARKER_RE = /\{(seg|tri|ang|pt):([A-Z]+)\}|\{(def|post|cn|prop):(\d+)\}/g
 
/**
 * Parse text for structured geometric entity markers.
 *
 * `knownLabels` is accepted for API compatibility but not used for filtering —
 * the LLM explicitly marks entities so we trust its output.
 */
export function parseGeometricEntities(text: string, _knownLabels?: Set<string>): TextSegment[] {
  const result: TextSegment[] = []
  let lastIndex = 0
 
  let match: RegExpExecArray | null
  // Reset regex state
  MARKER_RE.lastIndex = 0
 
  while ((match = MARKER_RE.exec(text)) !== null) {
    const [full, tag, labels] = match
    const entity = buildEntity(tag!, labels!)
    if (!entity) continue
 
    // Add preceding text
    if (match.index > lastIndex) {
      result.push({ kind: 'text', text: text.slice(lastIndex, match.index) })
    }
 
    result.push({
      kind: 'entity',
      text: displayText(tag!, labels!),
      entity,
    })
 
    lastIndex = match.index + full!.length
  }
 
  // Add trailing text
  if (lastIndex < text.length) {
    result.push({ kind: 'text', text: text.slice(lastIndex) })
  }
 
  // If no markers found, return the whole thing as text
  if (result.length === 0) {
    return [{ kind: 'text', text }]
  }
 
  return result
}