markup route.ts

0% Statements 0/208
0% Branches 0/1
0% Functions 0/1
0% Lines 0/208
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  /**
 * API route for async entity markup of voice transcripts.
 *
 * POST /api/realtime/euclid/markup
 * Body: { text, propositionId, pointLabels, strict? }
 * Response: { markedText }
 *
 * Uses a fast model to insert {seg:AB}, {pt:A}, {def:N}, {post:N}, {cn:N}, {prop:N}
 * markers into plain text.
 *
 * Two modes:
 *  - Default (strict: false): sanity check via word overlap ratio — at least 60% of
 *    original words must survive marker stripping. Catches hallucinated rewrites
 *    while tolerating minor rephrasing. Suitable for LLM-generated text.
 *  - Strict (strict: true): validates that the remaining text (markers stripped) is a
 *    subsequence of the original. Use for user-written text where we must preserve
 *    every word exactly.
 */

import { withAuth } from '@/lib/auth/withAuth'
import { recordOpenAiChatUsage } from '@/lib/ai-usage/helpers'
import { AiFeature } from '@/lib/ai-usage/features'
import { stripEntityMarkers } from '@/lib/character/parseEntityMarkers'
import { EUCLID_ENTITY_MARKERS } from '@/components/toys/euclid/euclidEntityMarkers'
import { MARKER_RE, validateMarkupStrict, wordOverlapRatio } from './validation'

const expandMarkers = (text: string) => stripEntityMarkers(text, EUCLID_ENTITY_MARKERS)

/** Minimum word overlap ratio for non-strict (sanity check) mode. */
const SANITY_OVERLAP_THRESHOLD = 0.6

export const POST = withAuth(async (request, { userId }) => {
  const body = await request.json()
  const {
    text,
    propositionId,
    pointLabels,
    strict = false,
  } = body as {
    text: string
    propositionId?: number
    pointLabels?: string[]
    strict?: boolean
  }

  if (!text || typeof text !== 'string') {
    return Response.json({ error: 'text is required' }, { status: 400 })
  }

  // Skip if text already has markers
  if (/\{(seg|tri|ang|pt|def|post|cn|prop):/.test(text)) {
    return Response.json({ markedText: text })
  }

  const apiKey = process.env.LLM_OPENAI_API_KEY || process.env.OPENAI_API_KEY
  if (!apiKey) {
    return Response.json({ markedText: text }) // Silently return original on missing key
  }

  const pointList = pointLabels?.length ? pointLabels.join(', ') : 'unknown'
  const propContext = propositionId ? `Current proposition: I.${propositionId}` : ''

  const systemPrompt = `You are a precise text annotation tool. Your ONLY job is to insert marker tags around entity references. You are NOT an editor, NOT a proofreader, NOT a grammar checker. You must NEVER change, rewrite, rephrase, reorder, correct, or alter ANY other text — not even punctuation, whitespace, spelling, or grammar.

The input is user-written text. It may contain unconventional spelling, missing punctuation, sentence fragments, or informal phrasing — these are intentional. LEAVE EVERYTHING EXACTLY AS IT IS. Your job is markup, not correction.

ABSOLUTE RULE: Every character of the original text that is not inside a marker tag must appear EXACTLY as-is in the output — same spelling, same punctuation, same spacing. If punctuation is missing, leave it missing. If the input does NOT end with a period, the output must NOT end with a period. NEVER add, remove, or change punctuation. NEVER fix grammar or spelling.

Available markers:
  {pt:A} — for standalone point labels (single uppercase letter used as a geometric label)
  {seg:AB} — for explicit segment references (two uppercase point labels naming a segment)
  {tri:ABC} — for explicit triangle references (three uppercase point labels naming a triangle)
  {ang:ABC} — for explicit angle references (three uppercase point labels naming an angle)
  {def:N} — for "Definition N" (N is a number, e.g. {def:15})
  {post:N} — for "Postulate N" (N is a number, e.g. {post:3})
  {cn:N} — for "Common Notion N" (N is a number, e.g. {cn:1})
  {prop:N} — for "Proposition N" or "Proposition I.N" (N is JUST the number, e.g. {prop:1} NOT {prop:I.1})

DISPLAY TEXT OVERRIDE — use {tag:N|original text} to wrap text that doesn't match the canonical label:
  "Proposition I.2" → {prop:2|Proposition I.2}  (the override preserves the original phrasing)
  "my first proposition" → {prop:1|my first proposition}
  "my third postulate" → {post:3|my third postulate}
  "the fifteenth definition" → {def:15|the fifteenth definition}
  "the first common notion" → {cn:1|the first common notion}
  Only skip the override when text exactly matches the canonical form:
    "Postulate 3" → {post:3}   (exact match, no override needed)
    "Definition 15" → {def:15}  (exact match, no override needed)
    "Proposition 1" → {prop:1}  (exact match, no override needed)

Points in the current construction: [${pointList}]
${propContext}

Examples:
  Input:  "Place the compass at A and draw through B, by Postulate 3."
  Output: "Place the compass at {pt:A} and draw through {pt:B}, by {post:3}."

  Input:  "CA equals AB by Definition 15."
  Output: "{seg:CA} equals {seg:AB} by {def:15}."

  Input:  "By Proposition I.1, we constructed an equilateral triangle on segment B C."
  Output: "By {prop:1|Proposition I.1}, we constructed an equilateral triangle on segment {seg:BC}."

  Input:  "you're tackling Proposition I.2, are you?"
  Output: "you're tackling {prop:2|Proposition I.2}, are you?"

  Input:  "Draw a straight line from A to B by my first postulate."
  Output: "Draw a straight line from {pt:A} to {pt:B} by {post:1|my first postulate}."

  Input:  "We shall call upon Proposition I.1—my first proposition."
  Output: "We shall call upon {prop:1|Proposition I.1}—{prop:1|my first proposition}."

  Input:  "Use the third postulate to draw a circle."
  Output: "Use {post:3|the third postulate} to draw a circle."

  Input:  "Let us begin with the first step."
  Output: "Let us begin with the first step."

  Input:  "Describe circle with center A through B noting that we also have triangle △ABD"
  Output: "Describe circle with center {pt:A} through {pt:B} noting that we also have triangle {tri:ABD}"

  Input:  "we need ∠ABC to be a right angle"
  Output: "we need {ang:ABC} to be a right angle"

  Input:  "triangle △ABD is equilateral"
  Output: "triangle {tri:ABD} is equilateral"

  Input:  "so we no that segment AB is equil to segment CD rite"
  Output: "so we no that segment {seg:AB} is equil to segment {seg:CD} rite"

CRITICAL RULES — read carefully:
- NEVER add, remove, or change punctuation. If the input has no trailing period, the output must have no trailing period. Commas stay commas. Periods stay periods. Dashes stay dashes.
- The marker REPLACES only the reference words, keeping all surrounding punctuation intact.
- ONLY wrap specific named references, NOT generic nouns. "point A" → "point {pt:A}" but "a point" → leave as-is.
- "segment B C" or "B C" (as a geometric reference) → {seg:BC}. But "a segment" or "such a segment" → leave as-is.
- "triangle A B C" (naming specific points) → {tri:ABC}. But "equilateral triangle" or "a triangle" without point labels → leave as-is.
- For propositions: "Proposition I.1" or "Proposition 1" → {prop:1|...}. The marker VALUE is JUST the number. Never include "I." in the marker value. Use the override to preserve the original text.
- For foundation ordinals: ONLY wrap ordinals that explicitly refer to a foundation. "my third postulate" → {post:3|my third postulate}. But "the first step" → leave as-is because "first" modifies "step", not a foundation.
- When a foundation is referred to by both its formal name AND an informal paraphrase in the same phrase (e.g., "Proposition I.1—my first proposition"), mark BOTH references separately with overrides.
- Do NOT mark up "I", "THE", "We", "No", or any word that is not a geometric point label.
- Do NOT mark up the word "point" or "segment" or "triangle" itself — only the label letters.
- Unicode math symbols like △ and ∠ before point labels are rendered automatically from the marker. Strip them: "△ABD" → {tri:ABD}, "∠ABC" → {ang:ABC}. Do NOT use a display override for these symbols.
- Do NOT invent references. If the text says "triangle" without naming specific points, leave it alone.
- If unsure whether something is a geometric reference, leave it unmarked.
- NEVER correct spelling, grammar, or punctuation errors. The input is speech-to-text and errors are expected. Preserve them exactly.`

  try {
    const response = await fetch('https://api.openai.com/v1/chat/completions', {
      method: 'POST',
      headers: {
        Authorization: `Bearer ${apiKey}`,
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: 'gpt-4o-mini',
        messages: [
          { role: 'system', content: systemPrompt },
          { role: 'user', content: text },
        ],
        temperature: 0,
        max_tokens: Math.max(text.length * 4, 512),
      }),
    })

    if (!response.ok) {
      console.error('[euclid-markup] API error:', response.status)
      return Response.json({ markedText: text })
    }

    const result = await response.json()
    recordOpenAiChatUsage(result, { userId, feature: AiFeature.EUCLID_MARKUP })
    const markedText = result.choices?.[0]?.message?.content?.trim()

    if (!markedText) {
      return Response.json({ markedText: text })
    }

    if (strict) {
      // Strict: remaining text must be a character-level subsequence of the original
      if (!validateMarkupStrict(text, markedText, expandMarkers)) {
        console.warn(
          '[euclid-markup] Strict validation failed — model rewrote surrounding text. Returning original.'
        )
        console.warn('[euclid-markup] Original:', JSON.stringify(text))
        console.warn('[euclid-markup] Model   :', JSON.stringify(markedText))
        return Response.json({ markedText: text })
      }
    } else {
      // Non-strict sanity check: most original words should survive marker stripping
      const stripped = markedText.replace(MARKER_RE, '')
      const overlap = wordOverlapRatio(text, stripped)
      if (overlap < SANITY_OVERLAP_THRESHOLD) {
        console.warn(
          '[euclid-markup] Sanity check failed — word overlap %.0f%% < %.0f%% threshold. Returning original.',
          overlap * 100,
          SANITY_OVERLAP_THRESHOLD * 100
        )
        console.warn('[euclid-markup] Original:', JSON.stringify(text))
        console.warn('[euclid-markup] Model   :', JSON.stringify(markedText))
        return Response.json({ markedText: text })
      }
    }

    return Response.json({ markedText })
  } catch (err) {
    console.error('[euclid-markup] Error:', err)
    return Response.json({ markedText: text })
  }
})