All files / web/src/lib/tasks flowchart-embed.ts

0% Statements 0/140
0% Branches 0/1
0% Functions 0/1
0% Lines 0/140

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141                                                                                                                                                                                                                                                                                         
import { eq } from 'drizzle-orm'
import { db, schema } from '@/db'
import { generateFlowchartEmbeddings, EMBEDDING_VERSION } from '@/lib/flowcharts/embedding'
import { invalidateEmbeddingCache } from '@/lib/flowcharts/embedding-search'
import { createTask } from '../task-manager'
import type { FlowchartEmbedEvent } from './events'

/**
 * Input for the flowchart embedding task
 */
export interface FlowchartEmbedInput {
  /** If provided, only embed this specific flowchart */
  flowchartId?: string
}

/**
 * Output from the flowchart embedding task
 */
export interface FlowchartEmbedOutput {
  embeddedCount: number
  skippedCount: number
  flowcharts: Array<{ id: string; title: string }>
}

/**
 * Start a flowchart embedding task.
 *
 * Generates embeddings for all published flowcharts missing them or with
 * an outdated version. Reports per-flowchart progress via events.
 */
export async function startFlowchartEmbedding(input: FlowchartEmbedInput): Promise<string> {
  return createTask<FlowchartEmbedInput, FlowchartEmbedOutput, FlowchartEmbedEvent>(
    'flowchart-embed',
    input,
    async (handle, config) => {
      // Get all published flowcharts
      const dbFlowcharts = await db.query.teacherFlowcharts.findMany({
        where: eq(schema.teacherFlowcharts.status, 'published'),
        columns: {
          id: true,
          title: true,
          description: true,
          difficulty: true,
          embeddingVersion: true,
        },
      })

      // Filter to those needing embedding
      let needsEmbedding = dbFlowcharts.filter(
        (fc) => !fc.embeddingVersion || fc.embeddingVersion !== EMBEDDING_VERSION
      )

      // If a specific flowchart was requested, filter to just that one
      if (config.flowchartId) {
        needsEmbedding = needsEmbedding.filter((fc) => fc.id === config.flowchartId)
        if (needsEmbedding.length === 0) {
          // Check if the specific flowchart exists but already has current embedding
          const existing = dbFlowcharts.find((fc) => fc.id === config.flowchartId)
          if (existing) {
            // Force re-embed it
            needsEmbedding = [existing]
          }
        }
      }

      const skippedCount = dbFlowcharts.length - needsEmbedding.length

      handle.emit({
        type: 'embed_started',
        totalFlowcharts: needsEmbedding.length,
        skippedCount,
      })

      handle.setProgress(0, `Processing 0/${needsEmbedding.length} flowcharts`)

      const results: Array<{ id: string; title: string }> = []

      for (let i = 0; i < needsEmbedding.length; i++) {
        if (handle.isCancelled()) break

        const fc = needsEmbedding[i]

        handle.emit({
          type: 'embed_progress',
          currentIndex: i,
          totalFlowcharts: needsEmbedding.length,
          flowchartId: fc.id,
          flowchartTitle: fc.title,
        })

        try {
          const { embedding, promptEmbedding } = await generateFlowchartEmbeddings({
            title: fc.title,
            description: fc.description,
            topicDescription: null,
            difficulty: fc.difficulty,
          })

          await db
            .update(schema.teacherFlowcharts)
            .set({
              embedding,
              promptEmbedding,
              embeddingVersion: EMBEDDING_VERSION,
            })
            .where(eq(schema.teacherFlowcharts.id, fc.id))

          results.push({ id: fc.id, title: fc.title })
        } catch (err) {
          handle.emit({
            type: 'embed_error',
            flowchartId: fc.id,
            flowchartTitle: fc.title,
            error: err instanceof Error ? err.message : String(err),
          })
          // Continue with remaining flowcharts — individual failures are non-fatal
        }

        const progress = Math.round(((i + 1) / needsEmbedding.length) * 100)
        handle.setProgress(progress, `Processing ${i + 1}/${needsEmbedding.length} flowcharts`)
      }

      // Invalidate the cache so new embeddings are picked up
      invalidateEmbeddingCache()

      handle.emit({
        type: 'embed_complete',
        embeddedCount: results.length,
        skippedCount,
        flowcharts: results,
      })

      handle.complete({
        embeddedCount: results.length,
        skippedCount,
        flowcharts: results,
      })
    }
  )
}