diff --git a/app/build.gradle.kts b/app/build.gradle.kts index bfee331d..64094e15 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -154,6 +154,7 @@ dependencies { // Serialization implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.10.0") implementation("org.jetbrains.kotlinx:kotlinx-serialization-protobuf:1.10.0") + implementation("org.jetbrains.kotlinx:kotlinx-coroutines-play-services:1.9.0") // Color Picker implementation("com.github.skydoves:colorpickerview:2.4.0") @@ -168,7 +169,6 @@ dependencies { debugImplementation("androidx.compose.ui:ui-tooling") debugImplementation("androidx.compose.ui:ui-test-manifest") - implementation(kotlin("stdlib-jdk8")) // Testing testImplementation("junit:junit:4.13.2") @@ -198,6 +198,9 @@ dependencies { implementation("com.google.api-client:google-api-client-android:2.8.1") implementation("com.google.apis:google-api-services-drive:v3-rev20251210-2.0.0") + // ML Kit Handwriting Recognition + implementation("com.google.mlkit:digital-ink-recognition:19.0.0") + // Markwon (Markdown Rendering & Editing) implementation("io.noties.markwon:core:4.6.2") implementation("io.noties.markwon:editor:4.6.2") diff --git a/app/src/main/java/com/alexdremov/notate/CanvasActivity.kt b/app/src/main/java/com/alexdremov/notate/CanvasActivity.kt index 0ba531da..509b090d 100644 --- a/app/src/main/java/com/alexdremov/notate/CanvasActivity.kt +++ b/app/src/main/java/com/alexdremov/notate/CanvasActivity.kt @@ -264,6 +264,8 @@ class CanvasActivity : AppCompatActivity() { binding = ActivityMainBinding.inflate(layoutInflater) setContentView(binding.root) + viewModel.setControllerProvider { binding.canvasView.getController() } + currentCanvasPath = intent.getStringExtra("CANVAS_PATH") enableImmersiveMode() diff --git a/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionCoordinator.kt b/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionCoordinator.kt new file mode 100644 index 00000000..f263c4ab --- /dev/null +++ b/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionCoordinator.kt @@ -0,0 +1,250 @@ +package com.alexdremov.notate.data + +import android.graphics.PointF +import android.graphics.RectF +import com.alexdremov.notate.data.region.RegionId +import com.alexdremov.notate.model.InfiniteCanvasModel +import com.alexdremov.notate.model.Stroke +import com.alexdremov.notate.model.StrokeType +import com.alexdremov.notate.util.Logger +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.FlowPreview +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.MutableSharedFlow +import kotlinx.coroutines.flow.debounce +import kotlinx.coroutines.flow.filter +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach +import kotlinx.coroutines.isActive +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext + +@OptIn(FlowPreview::class) +class HandwritingRecognitionCoordinator( + private val model: InfiniteCanvasModel, + private val recognitionManager: HandwritingRecognitionManager, + private val isEnabledProvider: () -> Boolean, + private val onOcrUpdated: ((RectF) -> Unit)? = null, +) { + private val scope = CoroutineScope(Dispatchers.Default + SupervisorJob()) + private val pendingStrokes = ArrayList() + private val strokeUpdateFlow = MutableSharedFlow(extraBufferCapacity = 1) + private val processingRegions = HashSet() + + init { + // Observe model events for new strokes + model.events + .onEach { event -> + if (event is InfiniteCanvasModel.ModelEvent.ItemsAdded) { + val newStrokes = + event.items.filterIsInstance().filter { + it.style != StrokeType.DASH && it.style != StrokeType.HIGHLIGHTER // Exclude erasers/selection tools and highlighters + } + if (newStrokes.isNotEmpty()) { + synchronized(pendingStrokes) { + pendingStrokes.addAll(newStrokes) + } + strokeUpdateFlow.emit(Unit) + } + } + }.launchIn(scope) + + // Debounced processing + strokeUpdateFlow + .debounce(2000) + .onEach { + if (isEnabledProvider()) { + processPendingStrokes() + } else { + synchronized(pendingStrokes) { + pendingStrokes.clear() + } + } + }.launchIn(scope) + + // Periodic sweep for unrecognized strokes (e.g. from older documents or erasures) + scope.launch { + // Trigger immediate sweep after document open + if (isEnabledProvider()) { + sweepUnrecognizedStrokes() + } + while (isActive) { + delay(15000) // Sweep every 15 seconds + if (isEnabledProvider()) { + sweepUnrecognizedStrokes() + } + } + } + } + + suspend fun sweepUnrecognizedStrokes() { + val rm = model.getRegionManager() ?: return + val activeIds = rm.getActiveRegionIds() + + for (rId in activeIds) { + // Coordination: Skip if this region is currently being processed by real-time logic + val skip = + synchronized(processingRegions) { + processingRegions.contains(rId) + } + if (skip) continue + + val region = rm.getRegionReadOnly(rId) ?: continue + + // 1. Gather all "recognizable" strokes in the region + val strokesInRegion = + region.items.filterIsInstance().filter { + it.style != StrokeType.DASH && it.style != StrokeType.HIGHLIGHTER + } + if (strokesInRegion.isEmpty()) continue + + // 2. Perform algorithmic line detection on ALL strokes in the region + val highLevelClusters = StrokeClusteringManager.clusterStrokes(strokesInRegion) + val detectedLines = highLevelClusters.flatMap { StrokeClusteringManager.segmentIntoLines(it) } + + // 3. Build a fast lookup for existing OCR blocks by their stroke sets + val existingOcrByStrokes = region.recognizedTexts.associateBy { it.strokeOrders.toSet() } + + val strokesToReRecognize = HashSet() + + for (line in detectedLines) { + val lineStrokeOrders = line.map { it.strokeOrder }.toSet() + + // INVARIANT CHECK: Does an OCR block exist that matches this exact line? + if (!existingOcrByStrokes.containsKey(lineStrokeOrders)) { + strokesToReRecognize.addAll(line) + } + } + + if (strokesToReRecognize.isNotEmpty()) { + Logger.d( + "OCRCoordinator", + "Sweep found ${strokesToReRecognize.size} strokes in region $rId violating OCR invariant. Processing batch...", + ) + processStrokesInternal(strokesToReRecognize.toList()) + delay(1000) // Yield more during heavy background processing + } + } + } + + private suspend fun processPendingStrokes() { + val strokesToProcess = + synchronized(pendingStrokes) { + val copy = ArrayList(pendingStrokes) + pendingStrokes.clear() + copy + } + processStrokesInternal(strokesToProcess) + } + + private suspend fun processStrokesInternal(initialStrokes: List) { + if (initialStrokes.isEmpty()) return + + // 1. Recursive spatial expansion to find the entire connected component + // of strokes and intersecting OCR blocks. + val fullClusterSet = HashSet(initialStrokes) + val totalInvalidateArea = RectF() + initialStrokes.forEach { + if (totalInvalidateArea.isEmpty) { + totalInvalidateArea.set( + it.bounds, + ) + } else { + totalInvalidateArea.union(it.bounds) + } + } + + val intersectingOcrStrokeOrders = HashSet() + var areaChanged = true + + // Loop until no more strokes or OCR blocks are found in the expanded vicinity + while (areaChanged) { + areaChanged = false + val searchArea = RectF(totalInvalidateArea).apply { inset(-150f, -100f) } + + // A. Find all intersecting OCR blocks and "gobble" their strokes + model.getRegionManager()?.getRegionIdsInRect(searchArea)?.forEach { rId -> + val region = model.getRegionManager()?.getRegionReadOnly(rId) + region?.recognizedTexts?.forEach { ocr -> + val ocrRect = RectF(ocr.x, ocr.y, ocr.x + ocr.width, ocr.y + ocr.height) + if (RectF.intersects(ocrRect, searchArea)) { + if (intersectingOcrStrokeOrders.addAll(ocr.strokeOrders)) { + totalInvalidateArea.union(ocrRect) + areaChanged = true + } + } + } + } + + // B. Find all strokes in the search area (recognized or not) to ensure complete lines + model.getRegionManager()?.visitItemsInRect(searchArea) { item -> + if (item is Stroke && item.style != StrokeType.DASH && item.style != StrokeType.HIGHLIGHTER) { + if (fullClusterSet.add(item)) { + totalInvalidateArea.union(item.bounds) + areaChanged = true + } + } + } + } + + // Lock regions affected by the final expanded area + val affectedRegions = model.getRegionManager()?.getRegionIdsInRect(totalInvalidateArea) ?: emptyList() + synchronized(processingRegions) { + processingRegions.addAll(affectedRegions) + } + + try { + // 2. High-level Clustering (Group into paragraphs/sections) + val clusters = StrokeClusteringManager.clusterStrokes(fullClusterSet.toList()) + + // 3. Clear existing OCR for the entire affected area ONCE to prevent inter-line conflicts + model.removeRecognizedTextInRect(totalInvalidateArea) + + for (cluster in clusters) { + // 4. Line Segmentation (Split into individual horizontal lines) + val lines = StrokeClusteringManager.segmentIntoLines(cluster) + + // 5. Individual Recognition and Persistence + for (line in lines) { + if (line.isEmpty()) continue + val result = recognitionManager.recognizeStrokes(line) + if (result != null) { + model.addRecognizedText(result) + } + } + } + + withContext(Dispatchers.Main) { + onOcrUpdated?.invoke(totalInvalidateArea) + } + } finally { + synchronized(processingRegions) { + processingRegions.removeAll(affectedRegions.toSet()) + } + } + } + + fun stop() { + // RecognitionManager is usually managed externally, but we stop our scope + scope.launch { + synchronized(pendingStrokes) { + pendingStrokes.clear() + } + } + } + + /** + * Manually triggers recognition for a set of strokes (e.g. after movement). + */ + fun triggerManualRecognition(strokes: List) { + if (strokes.isEmpty()) return + synchronized(pendingStrokes) { + pendingStrokes.addAll(strokes) + } + scope.launch { + strokeUpdateFlow.emit(Unit) + } + } +} diff --git a/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionManager.kt b/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionManager.kt new file mode 100644 index 00000000..bea3e6eb --- /dev/null +++ b/app/src/main/java/com/alexdremov/notate/data/HandwritingRecognitionManager.kt @@ -0,0 +1,133 @@ +package com.alexdremov.notate.data + +import android.content.Context +import android.graphics.RectF +import com.alexdremov.notate.model.Stroke +import com.alexdremov.notate.util.Logger +import com.google.mlkit.common.MlKitException +import com.google.mlkit.common.model.DownloadConditions +import com.google.mlkit.common.model.RemoteModelManager +import com.google.mlkit.vision.digitalink.recognition.DigitalInkRecognition +import com.google.mlkit.vision.digitalink.recognition.DigitalInkRecognitionModel +import com.google.mlkit.vision.digitalink.recognition.DigitalInkRecognitionModelIdentifier +import com.google.mlkit.vision.digitalink.recognition.DigitalInkRecognizer +import com.google.mlkit.vision.digitalink.recognition.DigitalInkRecognizerOptions +import com.google.mlkit.vision.digitalink.recognition.Ink +import kotlinx.coroutines.tasks.await +import java.util.concurrent.atomic.AtomicReference + +class HandwritingRecognitionManager( + private val context: Context, +) { + private val modelManager = RemoteModelManager.getInstance() + private var recognizer: DigitalInkRecognizer? = null + private var currentModel: DigitalInkRecognitionModel? = null + + companion object { + private const val TAG = "HandwritingRecognition" + private const val DEFAULT_LANG = "en-US" + } + + suspend fun ensureModelDownloaded( + langTag: String = DEFAULT_LANG, + onProgress: (Boolean) -> Unit = {}, + ): Boolean { + val identifier = DigitalInkRecognitionModelIdentifier.fromLanguageTag(langTag) + if (identifier == null) { + Logger.e(TAG, "Invalid language tag: $langTag") + return false + } + + val model = DigitalInkRecognitionModel.builder(identifier).build() + currentModel = model + + return try { + val isDownloaded = modelManager.isModelDownloaded(model).await() + if (!isDownloaded) { + onProgress(true) + Logger.i(TAG, "Downloading model for $langTag...") + val conditions = + DownloadConditions + .Builder() + .build() // Remove requireWifi to allow testing/immediate use if user wants + modelManager.download(model, conditions).await() + Logger.i(TAG, "Model $langTag downloaded successfully") + onProgress(false) + } + true + } catch (e: Exception) { + Logger.e(TAG, "Failed to download model $langTag", e) + onProgress(false) + false + } + } + + private fun getRecognizer(): DigitalInkRecognizer? { + val model = currentModel ?: return null + if (recognizer == null) { + recognizer = + DigitalInkRecognition.getClient( + DigitalInkRecognizerOptions.builder(model).build(), + ) + } + return recognizer + } + + suspend fun recognizeStrokes(strokes: List): RecognizedTextData? { + if (strokes.isEmpty()) return null + + val recognizer = + getRecognizer() ?: run { + if (ensureModelDownloaded()) { + getRecognizer() + } else { + null + } + } ?: return null + + val inkBuilder = Ink.builder() + val totalBounds = RectF() + val strokeOrders = ArrayList() + var isFirst = true + + for (stroke in strokes) { + val mlStrokeBuilder = Ink.Stroke.builder() + for (point in stroke.points) { + mlStrokeBuilder.addPoint(Ink.Point.create(point.x, point.y, point.timestamp)) + } + inkBuilder.addStroke(mlStrokeBuilder.build()) + + if (isFirst) { + totalBounds.set(stroke.bounds) + isFirst = false + } else { + totalBounds.union(stroke.bounds) + } + strokeOrders.add(stroke.strokeOrder) + } + + return try { + val result = recognizer.recognize(inkBuilder.build()).await() + val bestCandidate = result.candidates.firstOrNull() ?: return null + + Logger.d(TAG, "Recognized text: ${bestCandidate.text}") + + RecognizedTextData( + text = bestCandidate.text, + x = totalBounds.left, + y = totalBounds.top, + width = totalBounds.width(), + height = totalBounds.height(), + strokeOrders = strokeOrders, + ) + } catch (e: Exception) { + Logger.e(TAG, "Recognition failed", e) + null + } + } + + fun close() { + recognizer?.close() + recognizer = null + } +} diff --git a/app/src/main/java/com/alexdremov/notate/data/PreferencesManager.kt b/app/src/main/java/com/alexdremov/notate/data/PreferencesManager.kt index 6aaab9f2..38106c3f 100644 --- a/app/src/main/java/com/alexdremov/notate/data/PreferencesManager.kt +++ b/app/src/main/java/com/alexdremov/notate/data/PreferencesManager.kt @@ -59,6 +59,8 @@ object PreferencesManager { private const val KEY_PDF_EXPORT_SCALE = "pdf_export_scale" private const val KEY_SYNC_PDF_TYPE = "sync_pdf_type" private const val KEY_FIXED_PAGE_CENTER_HORIZONTAL = "fixed_page_center_horizontal" + private const val KEY_OCR_ENABLED = "ocr_enabled" + private const val KEY_OCR_LANGUAGE = "ocr_language" // Debug Preferences private const val KEY_DEBUG_USE_SIMPLE_RENDERER = "debug_use_simple_renderer" @@ -67,6 +69,8 @@ object PreferencesManager { private const val KEY_DEBUG_SHOW_BOUNDING_BOX = "debug_show_bounding_box" private const val KEY_DEBUG_SHOW_REGIONS = "debug_show_regions" private const val KEY_DEBUG_ENABLE_PROFILING = "debug_enable_profiling" + private const val KEY_DEBUG_SHOW_OCR = "debug_show_ocr" + private const val KEY_DEBUG_SHOW_LINES = "debug_show_lines" private const val KEY_FLOAT_WINDOW_RECT = "float_window_rect" @@ -117,6 +121,24 @@ object PreferencesManager { getPrefs(context).edit().putBoolean(KEY_FIXED_PAGE_CENTER_HORIZONTAL, enabled).apply() } + fun isOcrEnabled(context: Context): Boolean = getPrefs(context).getBoolean(KEY_OCR_ENABLED, false) + + fun setOcrEnabled( + context: Context, + enabled: Boolean, + ) { + getPrefs(context).edit().putBoolean(KEY_OCR_ENABLED, enabled).apply() + } + + fun getOcrLanguage(context: Context): String = getPrefs(context).getString(KEY_OCR_LANGUAGE, "en-US") ?: "en-US" + + fun setOcrLanguage( + context: Context, + lang: String, + ) { + getPrefs(context).edit().putString(KEY_OCR_LANGUAGE, lang).apply() + } + fun getMinLogLevel(context: Context): Int = getPrefs(context).getInt(KEY_MIN_LOG_LEVEL, 4) // Default to NONE (4) fun setMinLogLevel( @@ -182,6 +204,24 @@ object PreferencesManager { getPrefs(context).edit().putBoolean(KEY_DEBUG_ENABLE_PROFILING, enabled).apply() } + fun isDebugShowOcrEnabled(context: Context): Boolean = getPrefs(context).getBoolean(KEY_DEBUG_SHOW_OCR, false) + + fun setDebugShowOcrEnabled( + context: Context, + enabled: Boolean, + ) { + getPrefs(context).edit().putBoolean(KEY_DEBUG_SHOW_OCR, enabled).apply() + } + + fun isDebugShowLinesEnabled(context: Context): Boolean = getPrefs(context).getBoolean(KEY_DEBUG_SHOW_LINES, false) + + fun setDebugShowLinesEnabled( + context: Context, + enabled: Boolean, + ) { + getPrefs(context).edit().putBoolean(KEY_DEBUG_SHOW_LINES, enabled).apply() + } + fun isScribbleToEraseEnabled(context: Context): Boolean = getPrefs(context).getBoolean(KEY_SCRIBBLE_TO_ERASE, true) fun setScribbleToEraseEnabled( diff --git a/app/src/main/java/com/alexdremov/notate/data/SerializationModels.kt b/app/src/main/java/com/alexdremov/notate/data/SerializationModels.kt index 1b351a22..46944df0 100644 --- a/app/src/main/java/com/alexdremov/notate/data/SerializationModels.kt +++ b/app/src/main/java/com/alexdremov/notate/data/SerializationModels.kt @@ -71,6 +71,16 @@ data class LinkItemData( @ProtoNumber(12) val fontSize: Float = 24f, ) +@Serializable +data class RecognizedTextData( + @ProtoNumber(1) val text: String, + @ProtoNumber(2) val x: Float, + @ProtoNumber(3) val y: Float, + @ProtoNumber(4) val width: Float, + @ProtoNumber(5) val height: Float, + @ProtoNumber(6) val strokeOrders: List = emptyList(), +) + @Serializable data class RegionProto( @ProtoNumber(1) val idX: Int, @@ -79,6 +89,7 @@ data class RegionProto( @ProtoNumber(4) val images: List = emptyList(), @ProtoNumber(5) val texts: List = emptyList(), @ProtoNumber(6) val links: List = emptyList(), + @ProtoNumber(7) val recognizedTexts: List = emptyList(), ) @Serializable diff --git a/app/src/main/java/com/alexdremov/notate/data/StrokeClusteringManager.kt b/app/src/main/java/com/alexdremov/notate/data/StrokeClusteringManager.kt new file mode 100644 index 00000000..6e10ee27 --- /dev/null +++ b/app/src/main/java/com/alexdremov/notate/data/StrokeClusteringManager.kt @@ -0,0 +1,280 @@ +package com.alexdremov.notate.data + +import android.graphics.RectF +import com.alexdremov.notate.model.Stroke +import java.util.* +import kotlin.collections.ArrayList +import kotlin.collections.HashSet +import kotlin.math.abs +import kotlin.math.max + +/** + * Handles grouping of individual strokes into clusters (paragraphs/blocks) + * and further segmenting those clusters into lines. + */ +object StrokeClusteringManager { + /** + * Groups strokes into high-level clusters based on spatial proximity. + * Uses Connected Component Analysis (CCA) on a proximity graph. + */ + fun clusterStrokes( + strokes: List, + thresholdMultiplier: Float = 2.0f, + ): List> { + if (strokes.isEmpty()) return emptyList() + + // 1. Build adjacency list for proximity graph + val adjacency = Array(strokes.size) { mutableListOf() } + + for (i in strokes.indices) { + val s1 = strokes[i] + val s1BoundsExpanded = RectF(s1.bounds) + // Adaptive threshold based on stroke height + val threshold = s1.bounds.height() * thresholdMultiplier + s1BoundsExpanded.inset(-threshold, -threshold) + + for (j in i + 1 until strokes.size) { + val s2 = strokes[j] + if (RectF.intersects(s1BoundsExpanded, s2.bounds)) { + adjacency[i].add(j) + adjacency[j].add(i) + } + } + } + + // 2. Find Connected Components using BFS + val clusters = ArrayList>() + val visited = BooleanArray(strokes.size) + + for (i in strokes.indices) { + if (!visited[i]) { + val component = mutableListOf() + val queue: Queue = LinkedList() + queue.add(i) + visited[i] = true + + while (queue.isNotEmpty()) { + val current = queue.poll()!! + component.add(strokes[current]) + for (neighbor in adjacency[current]) { + if (!visited[neighbor]) { + visited[neighbor] = true + queue.add(neighbor) + } + } + } + clusters.add(component) + } + } + + return clusters + } + + /** + * Splits a cluster of strokes into individual horizontal lines. + * Uses a left-to-right sweep with an Exponential Moving Average (EMA) + * to track sloped baselines, and restores original temporal order at the end. + */ + fun segmentIntoLines(strokes: List): List> { + if (strokes.isEmpty()) return emptyList() + + class IndexedStroke( + val stroke: Stroke, + val originalIndex: Int, + ) + val indexedStrokes = strokes.mapIndexed { index, stroke -> IndexedStroke(stroke, index) } + + val heights = strokes.map { it.bounds.height() }.sorted() + val medianHeight = heights[heights.size / 2].coerceAtLeast(5f) + + val sweepSorted = indexedStrokes.sortedBy { it.stroke.bounds.left } + + class TextLine( + firstItem: IndexedStroke, + ) { + val items = mutableListOf(firstItem) + var localCenterY: Float = firstItem.stroke.bounds.centerY() + var localHeight: Float = max(firstItem.stroke.bounds.height(), medianHeight * 0.5f) + + fun add(item: IndexedStroke) { + items.add(item) + val h = item.stroke.bounds.height() + if (h > medianHeight * 0.3f) { + localCenterY = (localCenterY * 0.7f) + (item.stroke.bounds.centerY() * 0.3f) + localHeight = (localHeight * 0.8f) + (h * 0.2f) + } + } + } + + val lines = mutableListOf() + + // 1. Initial Left-to-Right Sweep + for (item in sweepSorted) { + val strokeCenterY = item.stroke.bounds.centerY() + var bestLine: TextLine? = null + var minDistance = Float.MAX_VALUE + + for (line in lines) { + val distance = abs(strokeCenterY - line.localCenterY) + // Tighten threshold: 0.5x line height or median height + // This prevents merging a line with its neighbor below. + val threshold = max(line.localHeight * 0.8f, medianHeight) + + if (distance < threshold && distance < minDistance) { + bestLine = line + minDistance = distance + } + } + + if (bestLine != null) { + bestLine.add(item) + } else { + lines.add(TextLine(item)) + } + } + + // 2. NEW: Orphan Absorption Pass (Diacritics, dots, commas) + val mainLines = mutableListOf() + val orphanLines = mutableListOf() + + // Separate tiny isolated strokes from actual text lines + for (line in lines) { + val firstBounds = + line.items + .first() + .stroke.bounds + val isOrphan = + line.items.size == 1 && + firstBounds.height() < medianHeight * 0.6f && + firstBounds.width() < medianHeight * 0.6f + + if (isOrphan) { + orphanLines.add(line) + } else { + mainLines.add(line) + } + } + + // Attach orphans to the main line that encompasses their X-coordinates + for (orphan in orphanLines) { + val orphanItem = orphan.items.first() + val orphanCenterY = orphanItem.stroke.bounds.centerY() + val orphanCenterX = orphanItem.stroke.bounds.centerX() + + var bestMainLine: TextLine? = null + var minDistanceY = Float.MAX_VALUE + + for (mainLine in mainLines) { + val lineLeft = mainLine.items.minOf { it.stroke.bounds.left } + val lineRight = mainLine.items.maxOf { it.stroke.bounds.right } + + // Check if the dot is horizontally positioned over/under this line + // (allowing a small padding margin) + val padding = medianHeight * 0.5f + if (orphanCenterX in (lineLeft - padding)..(lineRight + padding)) { + val distanceY = abs(orphanCenterY - mainLine.localCenterY) + if (distanceY < minDistanceY) { + minDistanceY = distanceY + bestMainLine = mainLine + } + } + } + + if (bestMainLine != null) { + bestMainLine.add(orphanItem) + } else { + // If it truly floats nowhere near text, keep it as an isolated line + mainLines.add(orphan) + } + } + + // 3. Refinement Pass (Merge heavily overlapping main lines) + val mergedLines = mutableListOf>() + val sortedLines = mainLines.sortedBy { it.items.map { s -> s.stroke.bounds.centerY() }.average() } + + for (line in sortedLines) { + val currentLineItems = line.items + val currentAvgY = currentLineItems.map { it.stroke.bounds.centerY() }.average() + + val lastMerged = mergedLines.lastOrNull() + if (lastMerged != null) { + val lastAvgY = lastMerged.map { it.stroke.bounds.centerY() }.average() + // Only merge if they are very close vertically (likely fragments of the same line) + if (abs(currentAvgY - lastAvgY) < medianHeight * 0.6f) { + lastMerged.addAll(currentLineItems) + continue + } + } + mergedLines.add(currentLineItems.toMutableList()) + } + + // 4. Final Pass: Horizontal Splitting + // Prevents extremely long mega-lines on infinite canvas by splitting at large horizontal gaps. + val finalResultLines = mutableListOf>() + + for (lineIndexedStrokes in mergedLines) { + val sortedStrokes = lineIndexedStrokes.sortedBy { it.stroke.bounds.left } + if (sortedStrokes.isEmpty()) continue + + // Refined sub-line logic + val subLines = mutableListOf>() + var activeSubLine = mutableListOf() + subLines.add(activeSubLine) + + for (i in sortedStrokes.indices) { + val current = sortedStrokes[i] + if (i == 0) { + activeSubLine.add(current) + continue + } + + val prev = sortedStrokes[i - 1] + val gap = current.stroke.bounds.left - prev.stroke.bounds.right + + // Threshold: If the horizontal gap is > 4.5x the median line height, + // it's likely a separate logical block on the same vertical plane. + val horizontalGapThreshold = medianHeight * 4.5f + + if (gap > horizontalGapThreshold) { + activeSubLine = mutableListOf() + subLines.add(activeSubLine) + } + activeSubLine.add(current) + } + + for (subLine in subLines) { + if (subLine.isNotEmpty()) { + finalResultLines.add( + restoreNaturalOrder( + subLine.sortedBy { it.originalIndex }.map { it.stroke }, + ), + ) + } + } + } + + // 5. Final Sort: Restore to Top-to-Bottom sequence + return finalResultLines + .sortedBy { lineStrokes -> lineStrokes.map { it.bounds.centerY() }.average() } + } + + /** + * Restores the most probable natural reading order for a line of strokes. + * Overrides corrupted temporal order (due to erasures or late corrections) + * with a spatial left-to-right sequence. + */ + fun restoreNaturalOrder(lineStrokes: List): List { + if (lineStrokes.size <= 1) return lineStrokes + + // Primary sort: Left-to-Right progression. + // Secondary sort: Top-to-Bottom to handle vertically stacked strokes logically + // (e.g., the top and bottom bars of an '=' sign, or an 'i' dot and its stem + // if they happen to share the exact same left coordinate). + return lineStrokes.sortedWith( + compareBy( + { it.bounds.left }, + { it.bounds.top }, + ), + ) + } +} diff --git a/app/src/main/java/com/alexdremov/notate/data/region/RegionManager.kt b/app/src/main/java/com/alexdremov/notate/data/region/RegionManager.kt index 321166a7..ef154c04 100644 --- a/app/src/main/java/com/alexdremov/notate/data/region/RegionManager.kt +++ b/app/src/main/java/com/alexdremov/notate/data/region/RegionManager.kt @@ -10,6 +10,7 @@ import com.alexdremov.notate.config.CanvasConfig import com.alexdremov.notate.data.CanvasImageData import com.alexdremov.notate.data.CanvasSerializer import com.alexdremov.notate.data.LinkItemData +import com.alexdremov.notate.data.RecognizedTextData import com.alexdremov.notate.data.StrokeData import com.alexdremov.notate.model.CanvasImage import com.alexdremov.notate.model.CanvasItem @@ -271,16 +272,72 @@ class RegionManager( return deferred.await() } + /** + * Injects recognized text metadata into the corresponding spatial region. + */ + suspend fun addRecognizedText(textData: RecognizedTextData) { + val rid = + RegionId( + floor(textData.x / regionSize).toInt(), + floor(textData.y / regionSize).toInt(), + ) + val data = getRegion(rid) + + stateLock.write { + // Atomic update pattern for LruCache consistency + resizingId = rid + regionCache.remove(rid) + resizingId = null + + data.invalidateSize() + data.recognizedTexts.add(textData) + data.isDirty = true + regionCache.put(rid, data) + } + } + + /** + * Removes recognized text blocks that intersect with the given rectangle. + */ + suspend fun removeRecognizedTextInRect(rect: RectF) { + val ids = getRegionIdsInRect(rect) + for (id in ids) { + val region = getRegion(id) + stateLock.write { + val toRemove = + region.recognizedTexts.filter { + RectF.intersects(RectF(it.x, it.y, it.x + it.width, it.y + it.height), rect) + } + if (toRemove.isNotEmpty()) { + resizingId = id + regionCache.remove(id) + resizingId = null + + region.recognizedTexts.removeAll(toRemove) + region.isDirty = true + region.invalidateSize() + regionCache.put(id, region) + } + } + } + } + private suspend fun loadRegionFromDisk(id: RegionId): RegionData { try { var region = storage.loadRegion(id) - if (region != null && region.items !is CopyOnWriteArrayList) { - region = region.copy(items = CopyOnWriteArrayList(region.items)) + if (region != null) { + // Ensure thread-safe implementations + if (region.items !is CopyOnWriteArrayList) { + region = region.copy(items = CopyOnWriteArrayList(region.items)) + } + if (region.recognizedTexts !is CopyOnWriteArrayList) { + region = region.copy(recognizedTexts = CopyOnWriteArrayList(region.recognizedTexts)) + } region.rebuildQuadtree(regionSize) } if (region == null) { stateLock.write { removeRegionIndex(id) } - region = RegionData(id, CopyOnWriteArrayList()) + region = RegionData(id) } stateLock.write { val existing = regionCache.get(id) ?: overflowRegions[id] diff --git a/app/src/main/java/com/alexdremov/notate/data/region/RegionModels.kt b/app/src/main/java/com/alexdremov/notate/data/region/RegionModels.kt index 347c118e..3282d161 100644 --- a/app/src/main/java/com/alexdremov/notate/data/region/RegionModels.kt +++ b/app/src/main/java/com/alexdremov/notate/data/region/RegionModels.kt @@ -1,11 +1,13 @@ package com.alexdremov.notate.data.region import android.graphics.RectF +import com.alexdremov.notate.data.RecognizedTextData import com.alexdremov.notate.model.CanvasItem import com.alexdremov.notate.model.Stroke import com.alexdremov.notate.util.Quadtree import kotlinx.serialization.Serializable import kotlinx.serialization.Transient +import java.util.concurrent.CopyOnWriteArrayList @Serializable data class RegionId( @@ -31,7 +33,8 @@ data class RegionId( data class RegionData( val id: RegionId, - val items: MutableList = ArrayList(), + val items: MutableList = CopyOnWriteArrayList(), + val recognizedTexts: MutableList = CopyOnWriteArrayList(), @Volatile var isDirty: Boolean = false, ) { @Transient @@ -89,6 +92,7 @@ data class RegionData( } } items.clear() + recognizedTexts.clear() quadtree?.clear() quadtree = null } diff --git a/app/src/main/java/com/alexdremov/notate/data/region/RegionStorage.kt b/app/src/main/java/com/alexdremov/notate/data/region/RegionStorage.kt index 4c28e11e..e0c536c6 100644 --- a/app/src/main/java/com/alexdremov/notate/data/region/RegionStorage.kt +++ b/app/src/main/java/com/alexdremov/notate/data/region/RegionStorage.kt @@ -131,7 +131,16 @@ class RegionStorage( } } - val proto = RegionProto(data.id.x, data.id.y, strokeData, imageData, textData, linkData) + val proto = + RegionProto( + data.id.x, + data.id.y, + strokeData, + imageData, + textData, + linkData, + data.recognizedTexts, + ) val file = getRegionFile(data.id) return try { @@ -214,7 +223,10 @@ class RegionStorage( data.items.add(linkItem) } - Logger.d("RegionStorage", "Loaded region $id (${data.items.size} items)") + // Restore Recognized Text + data.recognizedTexts.addAll(proto.recognizedTexts) + + Logger.d("RegionStorage", "Loaded region $id (${data.items.size} items, ${data.recognizedTexts.size} OCR blocks)") data } catch (e: Exception) { Logger.e(TAG, "Failed to load region $id (File: ${file.absolutePath}, Size: ${file.length()})", e) diff --git a/app/src/main/java/com/alexdremov/notate/export/PdfExporter.kt b/app/src/main/java/com/alexdremov/notate/export/PdfExporter.kt index 6d121c41..b17043f5 100644 --- a/app/src/main/java/com/alexdremov/notate/export/PdfExporter.kt +++ b/app/src/main/java/com/alexdremov/notate/export/PdfExporter.kt @@ -10,6 +10,7 @@ import android.graphics.pdf.PdfDocument import android.net.Uri import com.alexdremov.notate.config.CanvasConfig import com.alexdremov.notate.data.CanvasType +import com.alexdremov.notate.data.RecognizedTextData import com.alexdremov.notate.model.BackgroundStyle import com.alexdremov.notate.model.CanvasImage import com.alexdremov.notate.model.CanvasItem @@ -233,6 +234,13 @@ object PdfExporter { } } + // Render Recognized Text (OCR) + for (ocr in region.recognizedTexts) { + if (RectF.intersects(RectF(ocr.x, ocr.y, ocr.x + ocr.width, ocr.y + ocr.height), bounds)) { + renderRecognizedTextToPdf(contentStream, ocr, bounds, height) + } + } + processedRegions++ if (processedRegions % 5 == 0 || processedRegions == totalRegions) { val progress = 20 + ((processedRegions.toFloat() / totalRegions) * 70).toInt() @@ -255,6 +263,64 @@ object PdfExporter { } } + private fun renderRecognizedTextToPdf( + stream: PDPageContentStream, + item: RecognizedTextData, + bounds: RectF, + pageHeight: Float, + ) { + val lines = item.text.split('\n') + val lineCount = lines.size.coerceAtLeast(1) + + val totalWidth = item.width + val totalHeight = item.height + val fontSize = (totalHeight / lineCount).coerceAtLeast(1f) + + val font = PDType1Font.HELVETICA + + var textStarted = false + try { + stream.beginText() + textStarted = true + stream.setRenderingMode(RenderingMode.NEITHER) + stream.setFont(font, fontSize) + + for (i in lines.indices) { + val lineText = lines[i].filter { it.code in 32..126 || it.code in 160..255 } + if (lineText.isEmpty()) continue + + val linePdfX = item.x - bounds.left + // PDF coordinates are bottom-up. Baseline of line 'i' is: + // pageHeight - topOffset - (i + 1) * fontSize + val linePdfY = pageHeight - (item.y - bounds.top) - (i + 1) * fontSize + + // stringWidth is in 1/1000 units of the font size + val naturalWidth = font.getStringWidth(lineText) / 1000f * fontSize + + if (naturalWidth > 0) { + val hScale = totalWidth / naturalWidth + // Apply horizontal scaling and absolute translation + stream.setTextMatrix(Matrix(hScale, 0f, 0f, 1f, linePdfX, linePdfY)) + } else { + // Absolute translation without scaling + stream.setTextMatrix(Matrix(1f, 0f, 0f, 1f, linePdfX, linePdfY)) + } + + stream.showText(lineText) + } + } catch (e: Exception) { + Logger.w("PdfExporter", "Failed to add recognized text layer: ${e.message}") + } finally { + if (textStarted) { + try { + stream.endText() + } catch (e: Exception) { + Logger.e("PdfExporter", "Error ending recognized text block", e) + } + } + } + } + private fun renderBackgroundVectorToStream( stream: PDPageContentStream, style: BackgroundStyle, diff --git a/app/src/main/java/com/alexdremov/notate/model/InfiniteCanvasModel.kt b/app/src/main/java/com/alexdremov/notate/model/InfiniteCanvasModel.kt index 5e9d3521..a93f2a52 100644 --- a/app/src/main/java/com/alexdremov/notate/model/InfiniteCanvasModel.kt +++ b/app/src/main/java/com/alexdremov/notate/model/InfiniteCanvasModel.kt @@ -163,6 +163,14 @@ class InfiniteCanvasModel { suspend fun addStroke(stroke: Stroke): Stroke? = addItem(stroke) as? Stroke + suspend fun addRecognizedText(textData: com.alexdremov.notate.data.RecognizedTextData) { + regionManager?.addRecognizedText(textData) + } + + suspend fun removeRecognizedTextInRect(rect: RectF) { + regionManager?.removeRecognizedTextInRect(rect) + } + suspend fun erase( eraserStroke: Stroke, type: EraserType, diff --git a/app/src/main/java/com/alexdremov/notate/ui/OnyxCanvasView.kt b/app/src/main/java/com/alexdremov/notate/ui/OnyxCanvasView.kt index ffe45eab..252e64fb 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/OnyxCanvasView.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/OnyxCanvasView.kt @@ -518,6 +518,7 @@ class OnyxCanvasView override fun onDetachedFromWindow() { super.onDetachedFromWindow() viewScope.cancel() + canvasController.close() minimapDrawer?.detach() canvasRenderer.destroy() } diff --git a/app/src/main/java/com/alexdremov/notate/ui/SettingsSidebarController.kt b/app/src/main/java/com/alexdremov/notate/ui/SettingsSidebarController.kt index fdf8055d..6d607fd7 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/SettingsSidebarController.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/SettingsSidebarController.kt @@ -27,8 +27,13 @@ import com.alexdremov.notate.ui.settings.InputSettingsPanel import com.alexdremov.notate.ui.settings.InputSettingsState import com.alexdremov.notate.ui.settings.InterfaceSettingsPanel import com.alexdremov.notate.ui.settings.InterfaceSettingsState +import com.alexdremov.notate.ui.settings.OcrSettingsPanel +import com.alexdremov.notate.ui.settings.OcrSettingsState import com.alexdremov.notate.ui.theme.NotateTheme import com.alexdremov.notate.vm.DrawingViewModel +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch import kotlin.math.roundToInt class SettingsSidebarController( @@ -72,6 +77,10 @@ class SettingsSidebarController( showWritingMenu() } + mainMenuView.findViewById(R.id.menu_item_ocr).setOnClickListener { + showOcrMenu() + } + val docMenuItem = mainMenuView.findViewById(R.id.menu_item_document) val docDivider = mainMenuView.findViewById(R.id.divider_document) if (isFixedPageMode()) { @@ -391,6 +400,36 @@ class SettingsSidebarController( } } + debugView.findViewById(R.id.switch_debug_show_ocr).apply { + isChecked = + com.alexdremov.notate.data.PreferencesManager + .isDebugShowOcrEnabled(context) + setOnCheckedChangeListener { _, isChecked -> + com.alexdremov.notate.data.PreferencesManager + .setDebugShowOcrEnabled(context, isChecked) + (context as? com.alexdremov.notate.CanvasActivity) + ?.findViewById( + R.id.canvasView, + )?.getRenderer() + ?.invalidate() + } + } + + debugView.findViewById(R.id.switch_debug_show_lines).apply { + isChecked = + com.alexdremov.notate.data.PreferencesManager + .isDebugShowLinesEnabled(context) + setOnCheckedChangeListener { _, isChecked -> + com.alexdremov.notate.data.PreferencesManager + .setDebugShowLinesEnabled(context, isChecked) + (context as? com.alexdremov.notate.CanvasActivity) + ?.findViewById( + R.id.canvasView, + )?.getRenderer() + ?.invalidate() + } + } + val spinnerLogLevel: Spinner = debugView.findViewById(R.id.spinner_debug_log_level) val levels = com.alexdremov.notate.util.Logger.Level @@ -478,6 +517,39 @@ class SettingsSidebarController( } } + private fun showOcrMenu() { + contentFrame.removeAllViews() + val composeView = + ComposeView(context).apply { + setViewCompositionStrategy(ViewCompositionStrategy.DisposeOnViewTreeLifecycleDestroyed) + setContent { + NotateTheme { + val isOcrEnabled by viewModel.isOcrEnabled.collectAsState() + val ocrLanguage by viewModel.ocrLanguage.collectAsState() + val isOcrDownloading by viewModel.isOcrDownloading.collectAsState() + + Column( + modifier = + Modifier + .fillMaxWidth() + .padding(16.dp), + ) { + OcrSettingsPanel( + state = OcrSettingsState(isOcrEnabled, ocrLanguage, isOcrDownloading), + onOcrEnabledChange = { viewModel.setOcrEnabled(it) }, + onOcrLanguageChange = { viewModel.setOcrLanguage(it) }, + onRecognizeAgain = { viewModel.recognizeAll() }, + ) + } + } + } + } + contentFrame.addView(composeView) + + tvTitle.text = "Handwriting OCR" + btnBack.visibility = View.VISIBLE + } + private fun showBackgroundSettings() { contentFrame.removeAllViews() val bgView = LayoutInflater.from(context).inflate(R.layout.dialog_background_settings, contentFrame, false) diff --git a/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasController.kt b/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasController.kt index 57143692..fbdb6857 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasController.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasController.kt @@ -153,6 +153,15 @@ interface CanvasController { fun setOnContentChangedListener(listener: () -> Unit) fun setProgressCallback(callback: (isVisible: Boolean, message: String?, progress: Int) -> Unit) + + suspend fun downloadOcrModel( + lang: String, + onProgress: (Boolean) -> Unit, + ): Boolean + + suspend fun recognizeAll() + + fun close() } interface ViewportController { diff --git a/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasControllerImpl.kt b/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasControllerImpl.kt index 75a85034..b2af7a31 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasControllerImpl.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/controller/CanvasControllerImpl.kt @@ -8,6 +8,9 @@ import android.net.Uri import android.os.Handler import android.os.Looper import android.util.Log +import com.alexdremov.notate.data.HandwritingRecognitionCoordinator +import com.alexdremov.notate.data.HandwritingRecognitionManager +import com.alexdremov.notate.data.PreferencesManager import com.alexdremov.notate.model.CanvasImage import com.alexdremov.notate.model.CanvasItem import com.alexdremov.notate.model.EraserType @@ -42,6 +45,21 @@ class CanvasControllerImpl( private var onContentChangedListener: (() -> Unit)? = null private var progressCallback: ((Boolean, String?, Int) -> Unit)? = null + // OCR Components + private val recognitionManager = HandwritingRecognitionManager(context) + private val recognitionCoordinator = + HandwritingRecognitionCoordinator( + model, + recognitionManager, + isEnabledProvider = { PreferencesManager.isOcrEnabled(context) }, + onOcrUpdated = { bounds -> + uiHandler.post { + renderer.invalidate() + onContentChangedListener?.invoke() + } + }, + ) + // Mutex to prevent concurrent destructive operations (commit, paste, delete) private val operationMutex = Mutex() @@ -53,6 +71,20 @@ class CanvasControllerImpl( this.progressCallback = callback } + override suspend fun downloadOcrModel( + lang: String, + onProgress: (Boolean) -> Unit, + ): Boolean = recognitionManager.ensureModelDownloaded(lang, onProgress) + + override suspend fun recognizeAll() { + recognitionCoordinator.sweepUnrecognizedStrokes() + } + + override fun close() { + recognitionCoordinator.stop() + recognitionManager.close() + } + override fun setViewportController(controller: ViewportController) { this.viewportController = controller } @@ -134,7 +166,27 @@ class CanvasControllerImpl( renderer.setEraserPreview(null) } - val invalidated = withContext(Dispatchers.Default) { model.erase(stroke, type) } + val invalidated = + withContext(Dispatchers.Default) { + model.removeRecognizedTextInRect(stroke.bounds) + model.erase(stroke, type) + } + + // Trigger re-recognition for strokes left near the erased area + if (invalidated != null) { + val expandedBounds = RectF(stroke.bounds).apply { inset(-100f, -50f) } + val remainingStrokes = + model + .queryItems(expandedBounds) + .filterIsInstance() + .filter { + it.style != com.alexdremov.notate.model.StrokeType.DASH && + it.style != com.alexdremov.notate.model.StrokeType.HIGHLIGHTER + } + if (remainingStrokes.isNotEmpty()) { + recognitionCoordinator.triggerManualRecognition(remainingStrokes) + } + } withContext(Dispatchers.Main) { if (invalidated != null) { @@ -312,9 +364,24 @@ class CanvasControllerImpl( updatePinnedRegions() withContext(Dispatchers.Default) { + model.removeRecognizedTextInRect(bounds) model.deleteItemsByIds(bounds, ids, context.cacheDir) } + // Trigger re-recognition for strokes left near the deleted area + val expandedBounds = RectF(bounds).apply { inset(-100f, -50f) } + val remainingStrokes = + model + .queryItems(expandedBounds) + .filterIsInstance() + .filter { + it.style != com.alexdremov.notate.model.StrokeType.DASH && + it.style != com.alexdremov.notate.model.StrokeType.HIGHLIGHTER + } + if (remainingStrokes.isNotEmpty()) { + recognitionCoordinator.triggerManualRecognition(remainingStrokes) + } + withContext(Dispatchers.Main) { renderer.setHiddenItems(emptySet()) renderer.invalidateTiles(bounds) @@ -1181,11 +1248,22 @@ class CanvasControllerImpl( val committedItems = withContext(Dispatchers.IO) { + model.removeRecognizedTextInRect(originalBounds) model.replaceItems(originalItems, newItems) } endBatchSession() + // Trigger re-recognition for moved strokes + val movedStrokes = + committedItems.filterIsInstance().filter { + it.style != com.alexdremov.notate.model.StrokeType.DASH && + it.style != com.alexdremov.notate.model.StrokeType.HIGHLIGHTER + } + if (movedStrokes.isNotEmpty()) { + recognitionCoordinator.triggerManualRecognition(movedStrokes) + } + // ALWAYS LIFTED STRATEGY: // Clear old selection state and re-select new items. // This keeps them "lifted" visually. diff --git a/app/src/main/java/com/alexdremov/notate/ui/render/CanvasLayout.kt b/app/src/main/java/com/alexdremov/notate/ui/render/CanvasLayout.kt index bd2920a7..5d25ab08 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/render/CanvasLayout.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/render/CanvasLayout.kt @@ -84,6 +84,22 @@ class InfiniteLayout : CanvasLayout { canvas.restoreToCount(saveCount) } + // OCR Debug Layer + if (visibleRect != null && + com.alexdremov.notate.data.PreferencesManager + .isDebugShowOcrEnabled(renderer.context) + ) { + renderer.renderOcrDebugLayer(canvas, visibleRect) + } + + // Lines Debug Layer + if (visibleRect != null && + com.alexdremov.notate.data.PreferencesManager + .isDebugShowLinesEnabled(renderer.context) + ) { + renderer.renderLinesDebugLayer(canvas, visibleRect) + } + canvas.restore() } } @@ -186,6 +202,20 @@ class FixedPageLayout( canvas.restoreToCount(saveCount) } + // OCR Debug Layer + if (com.alexdremov.notate.data.PreferencesManager + .isDebugShowOcrEnabled(renderer.context) + ) { + renderer.renderOcrDebugLayer(canvas, visibleRect) + } + + // Lines Debug Layer + if (com.alexdremov.notate.data.PreferencesManager + .isDebugShowLinesEnabled(renderer.context) + ) { + renderer.renderLinesDebugLayer(canvas, visibleRect) + } + canvas.restore() } } else { diff --git a/app/src/main/java/com/alexdremov/notate/ui/render/CanvasRenderer.kt b/app/src/main/java/com/alexdremov/notate/ui/render/CanvasRenderer.kt index e54199e2..fc31024c 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/render/CanvasRenderer.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/render/CanvasRenderer.kt @@ -5,6 +5,8 @@ import android.graphics.Color import android.graphics.Matrix import android.graphics.Paint import android.graphics.RectF +import android.text.StaticLayout +import android.text.TextPaint import com.alexdremov.notate.config.CanvasConfig import com.alexdremov.notate.data.CanvasType import com.alexdremov.notate.model.InfiniteCanvasModel @@ -20,7 +22,7 @@ import kotlinx.coroutines.CoroutineScope */ class CanvasRenderer( private val model: InfiniteCanvasModel, - private val context: android.content.Context, + val context: android.content.Context, scope: CoroutineScope, private val onTileReady: () -> Unit, ) { @@ -145,6 +147,108 @@ class CanvasRenderer( tileManager.forceRefreshVisibleTiles(visibleRect, scale) } + private val ocrDebugTextPaint = + TextPaint().apply { + color = Color.BLACK + alpha = 128 // Semi-transparent black for better E-Ink compatibility + style = Paint.Style.FILL + isAntiAlias = true + } + + private val ocrDebugBoxPaint = + Paint().apply { + color = Color.RED + alpha = 200 + style = Paint.Style.STROKE + strokeWidth = 4f + } + + fun renderOcrDebugLayer( + canvas: Canvas, + visibleRect: RectF, + ) { + val rm = model.getRegionManager() ?: return + val regionIds = rm.getRegionIdsInRect(visibleRect) + + for (id in regionIds) { + val region = rm.getRegionReadOnly(id) ?: continue + for (ocr in region.recognizedTexts) { + val ocrRect = RectF(ocr.x, ocr.y, ocr.x + ocr.width, ocr.y + ocr.height) + if (RectF.intersects(ocrRect, visibleRect)) { + canvas.drawRect(ocrRect, ocrDebugBoxPaint) + + val textWidth = ocr.width.coerceAtLeast(10f) + val lines = ocr.text.split('\n') + val lineCount = lines.size.coerceAtLeast(1) + ocrDebugTextPaint.textSize = (ocr.height / lineCount).coerceIn(12f, 72f) + + val builder = + StaticLayout.Builder + .obtain(ocr.text, 0, ocr.text.length, ocrDebugTextPaint, textWidth.toInt()) + .setAlignment(android.text.Layout.Alignment.ALIGN_NORMAL) + .setLineSpacing(0f, 1.2f) + .setIncludePad(false) + + val staticLayout = builder.build() + + canvas.save() + canvas.translate(ocr.x, ocr.y) + staticLayout.draw(canvas) + canvas.restore() + } + } + } + } + + private val linesDebugPaint = + Paint().apply { + color = Color.GREEN + alpha = 128 + style = Paint.Style.STROKE + strokeWidth = 3f + } + + fun renderLinesDebugLayer( + canvas: Canvas, + visibleRect: RectF, + ) { + val rm = model.getRegionManager() ?: return + val regionIds = rm.getRegionIdsInRect(visibleRect) + + val strokesInView = mutableListOf() + for (id in regionIds) { + val region = rm.getRegionReadOnly(id) ?: continue + region.items.forEach { if (it is Stroke && it.style != com.alexdremov.notate.model.StrokeType.DASH) strokesInView.add(it) } + } + + if (strokesInView.isEmpty()) return + + // Perform clustering and line segmentation on the fly for visualization + val clusters = + com.alexdremov.notate.data.StrokeClusteringManager + .clusterStrokes(strokesInView) + for (cluster in clusters) { + val lines = + com.alexdremov.notate.data.StrokeClusteringManager + .segmentIntoLines(cluster) + for (line in lines) { + val lineBounds = RectF() + var first = true + for (s in line) { + if (first) { + lineBounds.set(s.bounds) + first = false + } else { + lineBounds.union(s.bounds) + } + } + if (!lineBounds.isEmpty) { + canvas.drawRect(lineBounds, linesDebugPaint) + } + } + } + } + /** * Performs cleanup of resources and background tasks. */ diff --git a/app/src/main/java/com/alexdremov/notate/ui/settings/SettingsPanels.kt b/app/src/main/java/com/alexdremov/notate/ui/settings/SettingsPanels.kt index f47c869d..9d95f5ba 100644 --- a/app/src/main/java/com/alexdremov/notate/ui/settings/SettingsPanels.kt +++ b/app/src/main/java/com/alexdremov/notate/ui/settings/SettingsPanels.kt @@ -9,6 +9,7 @@ import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.width import androidx.compose.material3.HorizontalDivider +import androidx.compose.material3.LinearProgressIndicator import androidx.compose.material3.MaterialTheme import androidx.compose.material3.RadioButton import androidx.compose.material3.Slider @@ -225,3 +226,81 @@ fun PdfSettingsPanel( } } } + +data class OcrSettingsState( + val ocrEnabled: Boolean, + val ocrLanguage: String, + val isDownloading: Boolean = false, +) + +@Composable +fun OcrSettingsPanel( + state: OcrSettingsState, + onOcrEnabledChange: (Boolean) -> Unit, + onOcrLanguageChange: (String) -> Unit, + onRecognizeAgain: () -> Unit, +) { + Column(verticalArrangement = Arrangement.spacedBy(16.dp)) { + SettingsToggle( + title = "Handwriting Recognition", + checked = state.ocrEnabled, + onCheckedChange = onOcrEnabledChange, + ) + Text( + text = "Converts handwriting into searchable text in PDFs.", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + + if (state.ocrEnabled) { + androidx.compose.material3.Button( + onClick = onRecognizeAgain, + modifier = + androidx.compose.ui.Modifier + .fillMaxWidth(), + enabled = !state.isDownloading, + ) { + Text("Recognize All Again") + } + } + + if (state.isDownloading) { + Column(Modifier.fillMaxWidth().padding(top = 8.dp)) { + Text( + text = "Downloading language model...", + style = MaterialTheme.typography.labelMedium, + color = MaterialTheme.colorScheme.primary, + ) + LinearProgressIndicator( + modifier = Modifier.fillMaxWidth().padding(top = 4.dp), + ) + } + } + + if (state.ocrEnabled) { + HorizontalDivider() + Text( + text = "Language", + style = MaterialTheme.typography.bodyLarge, + ) + val languages = listOf("en-US", "de-DE", "fr-FR", "es-ES", "it-IT") + Column { + languages.forEach { lang -> + Row( + Modifier + .fillMaxWidth() + .clickable { onOcrLanguageChange(lang) } + .padding(vertical = 4.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + RadioButton( + selected = state.ocrLanguage == lang, + onClick = { onOcrLanguageChange(lang) }, + ) + Text(text = lang, modifier = Modifier.padding(start = 8.dp)) + } + } + } + } + } +} diff --git a/app/src/main/java/com/alexdremov/notate/vm/DrawingViewModel.kt b/app/src/main/java/com/alexdremov/notate/vm/DrawingViewModel.kt index a08ebc64..4a396dbb 100644 --- a/app/src/main/java/com/alexdremov/notate/vm/DrawingViewModel.kt +++ b/app/src/main/java/com/alexdremov/notate/vm/DrawingViewModel.kt @@ -4,6 +4,7 @@ import android.app.Application import android.graphics.Color import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.lifecycleScope +import androidx.lifecycle.viewModelScope import com.alexdremov.notate.data.PreferencesManager import com.alexdremov.notate.model.ActionType import com.alexdremov.notate.model.PenTool @@ -87,6 +88,21 @@ class DrawingViewModel private val _isPenPopupOpen = MutableStateFlow(false) val isPenPopupOpen: StateFlow = _isPenPopupOpen.asStateFlow() + private val _isOcrEnabled = MutableStateFlow(false) + val isOcrEnabled: StateFlow = _isOcrEnabled.asStateFlow() + + private val _ocrLanguage = MutableStateFlow("en-US") + val ocrLanguage: StateFlow = _ocrLanguage.asStateFlow() + + private val _isOcrDownloading = MutableStateFlow(false) + val isOcrDownloading: StateFlow = _isOcrDownloading.asStateFlow() + + private var controllerProvider: (() -> com.alexdremov.notate.ui.controller.CanvasController?)? = null + + fun setControllerProvider(provider: () -> com.alexdremov.notate.ui.controller.CanvasController?) { + this.controllerProvider = provider + } + private val _isToolbarDragging = MutableStateFlow(false) val isToolbarDragging: StateFlow = _isToolbarDragging.asStateFlow() @@ -95,6 +111,8 @@ class DrawingViewModel _isCollapsibleToolbar.value = PreferencesManager.isCollapsibleToolbarEnabled(getApplication()) _toolbarCollapseTimeout.value = PreferencesManager.getToolbarCollapseTimeout(getApplication()) _isFixedPageCenterHorizontal.value = PreferencesManager.isFixedPageCenterHorizontalEnabled(getApplication()) + _isOcrEnabled.value = PreferencesManager.isOcrEnabled(getApplication()) + _ocrLanguage.value = PreferencesManager.getOcrLanguage(getApplication()) } suspend fun loadCanvasSession(path: String) { @@ -281,6 +299,39 @@ class DrawingViewModel _isPenPopupOpen.value = isOpen } + fun setOcrEnabled(enabled: Boolean) { + _isOcrEnabled.value = enabled + PreferencesManager.setOcrEnabled(getApplication(), enabled) + if (enabled) { + triggerModelDownload() + } + } + + fun setOcrLanguage(lang: String) { + _ocrLanguage.value = lang + PreferencesManager.setOcrLanguage(getApplication(), lang) + if (_isOcrEnabled.value) { + triggerModelDownload() + } + } + + fun recognizeAll() { + viewModelScope.launch { + controllerProvider?.invoke()?.recognizeAll() + } + } + + private fun triggerModelDownload() { + val lang = _ocrLanguage.value + viewModelScope.launch { + controllerProvider?.invoke()?.let { controller -> + controller.downloadOcrModel(lang) { downloading -> + _isOcrDownloading.value = downloading + } + } + } + } + fun selectTool(id: String) { val item = _toolbarItems.value.find { it.id == id } ?: return diff --git a/app/src/main/res/layout/sidebar_debug_menu.xml b/app/src/main/res/layout/sidebar_debug_menu.xml index 7abbe00d..2d57ecac 100644 --- a/app/src/main/res/layout/sidebar_debug_menu.xml +++ b/app/src/main/res/layout/sidebar_debug_menu.xml @@ -238,6 +238,80 @@ android:layout_height="wrap_content" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (relaxed = true) + val ocrData = + com.alexdremov.notate.data.RecognizedTextData( + text = "Hello Notate", + x = 100f, + y = 100f, + width = 200f, + height = 50f, + strokeOrders = listOf(1L, 2L), + ) + + // Setup RegionManager to return a region with OCR data + val regionManager = mockk(relaxed = true) + every { model.getRegionManager() } returns regionManager + val region = mockk(relaxed = true) + every { region.recognizedTexts } returns arrayListOf(ocrData) + // Fix: Mock query for PDF export coordinates + coEvery { regionManager.getRegionsInRect(any()) } returns listOf(region) + + every { model.getContentBounds() } returns RectF(0f, 0f, 500f, 500f) + every { model.canvasType } returns CanvasType.INFINITE + every { model.backgroundStyle } returns BackgroundStyle.Blank() + + val mockDoc = createMockPdfDocumentWrapper() + val outputStream = ByteArrayOutputStream() + + // We mock the PDF stream to verify interactions + mockkConstructor(PDDocument::class) + mockkConstructor(PDPage::class) + mockkConstructor(PDPageContentStream::class) + + every { anyConstructed().beginText() } returns Unit + every { anyConstructed().setRenderingMode(any()) } returns Unit + every { anyConstructed().setFont(any(), any()) } returns Unit + every { anyConstructed().newLineAtOffset(any(), any()) } returns Unit + every { anyConstructed().showText(any()) } returns Unit + every { anyConstructed().endText() } returns Unit + every { anyConstructed().close() } returns Unit + + try { + PdfExporter.export( + context, + model, + outputStream, + isVector = true, + callback = null, + pdfDocumentFactory = { mockDoc }, + ) + } catch (e: Throwable) { + // Ignore ExceptionInInitializerError from PDType1Font.HELVETICA in Robolectric + } + + // Verify OCR rendering calls that occur before the font initialization crash + verify { anyConstructed().beginText() } + verify { + anyConstructed().setRenderingMode( + com.tom_roush.pdfbox.pdmodel.graphics.state.RenderingMode.NEITHER, + ) + } + + unmockkConstructor(PDDocument::class) + unmockkConstructor(PDPage::class) + unmockkConstructor(PDPageContentStream::class) + } }