Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import { readFileSync } from 'fs' import { join } from 'path' import { db, schema } from '@/db' import { bufferToEmbedding, EMBEDDING_DIMENSIONS } from './embedding' /** * Topic taxonomy for labeling flowchart clusters. * * In production, the taxonomy is stored in the database (`topic_taxonomy` table) * and can be regenerated via the admin panel. * * In development, the taxonomy falls back to static files generated by * `scripts/generateTopicTaxonomy.ts` if the database is empty. * * At runtime, the browse API loads the taxonomy once and includes all label * embeddings in the distance matrix alongside flowchart embeddings. The client * then assigns the nearest label to each cluster. */ export interface Taxonomy { labels: string[] embeddings: Float32Array[] breadths: number[] } interface TaxonomyIndex { version: string model: string labels: string[] } const LABEL_PREFIX = 'label:' let cachedTaxonomy: Taxonomy | null = null /** * Clear the taxonomy cache. Call this after regenerating the taxonomy. */ export function clearTaxonomyCache(): void { cachedTaxonomy = null } /** * Load the topic taxonomy from database (preferred) or disk (fallback). * Cached after first call. */ export async function loadTaxonomy(): Promise<Taxonomy> { if (cachedTaxonomy) return cachedTaxonomy // Try loading from database first const dbTaxonomy = await loadTaxonomyFromDb() if (dbTaxonomy) { cachedTaxonomy = dbTaxonomy return cachedTaxonomy } // Fall back to files (for development or if DB is empty) cachedTaxonomy = loadTaxonomyFromFiles() return cachedTaxonomy } /** * Load taxonomy from database. * Returns null if the database has no taxonomy entries. */ async function loadTaxonomyFromDb(): Promise<Taxonomy | null> { const rows = await db.select().from(schema.topicTaxonomy) if (rows.length === 0) { return null } const labels: string[] = [] const embeddings: Float32Array[] = [] const breadths: number[] = [] for (const row of rows) { labels.push(row.label) embeddings.push(bufferToEmbedding(row.embedding)) breadths.push(row.breadth ?? 0) // Default to 0 if not set (legacy data) } return { labels, embeddings, breadths } } /** * Load taxonomy from static files. * Reads `topic-taxonomy.json` (label index) and `topic-taxonomy.bin` * (packed Float32Array embeddings, 1536 floats per label). */ function loadTaxonomyFromFiles(): Taxonomy { const dir = join(process.cwd(), 'src/lib/flowcharts') const indexPath = join(dir, 'topic-taxonomy.json') const binPath = join(dir, 'topic-taxonomy.bin') const index: TaxonomyIndex = JSON.parse(readFileSync(indexPath, 'utf-8')) const binBuffer = readFileSync(binPath) const embeddings: Float32Array[] = [] const bytesPerLabel = EMBEDDING_DIMENSIONS * Float32Array.BYTES_PER_ELEMENT // 6144 for (let i = 0; i < index.labels.length; i++) { const offset = i * bytesPerLabel const slice = binBuffer.subarray(offset, offset + bytesPerLabel) // Copy into a fresh ArrayBuffer so the Float32Array is properly aligned const ab = new ArrayBuffer(bytesPerLabel) new Uint8Array(ab).set(slice) embeddings.push(new Float32Array(ab)) } // Static files don't have breadth data - default to 0 const breadths = new Array(index.labels.length).fill(0) return { labels: index.labels, embeddings, breadths } } /** Build a label ID from a label string (e.g. "label:Fraction Arithmetic") */ export function labelId(label: string): string { return `${LABEL_PREFIX}${label}` } /** Check if an ID is a taxonomy label ID */ export function isLabelId(id: string): boolean { return id.startsWith(LABEL_PREFIX) } /** Extract the label text from a label ID */ export function labelFromId(id: string): string { return id.slice(LABEL_PREFIX.length) } |