cfe-lab
diff --git a/‎alldata/bblab_site/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎alldata/bblab_site/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎alldata/bblab_site/tools/isoforms_plot/index.html‎
Lines changed: 60 additions & 5 deletions b/‎alldata/bblab_site/tools/isoforms_plot/index.html‎
Lines changed: 60 additions & 5 deletions
diff --git a/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/__main__.py‎
Lines changed: 3 additions & 2 deletions b/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/__main__.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/compiler.py‎
Lines changed: 26 additions & 17 deletions b/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/compiler.py‎
Lines changed: 26 additions & 17 deletions
diff --git a/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/exceptions.py‎
Lines changed: 7 additions & 7 deletions b/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/exceptions.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/lexer.py‎
Lines changed: 71 additions & 0 deletions b/‎alldata/bblab_site/tools/isoforms_plot/isoforms_plot/lexer.py‎
Lines changed: 71 additions & 0 deletions
@@ -37,7 +37,7 @@ dependencies = [
   "django-nyt==1.4.1",
   "django-sekizai==4.1.0",
   "drawsvg==2.4.0",
-  "multicsv==1.0.5",
+  "multicsv==1.0.6",
   "genetracks @ git+https://github.com/cfe-lab/genetracks.git@a7f20f644ab86b451e870c9ace0621af8ecae0df",
   "html5lib==1.1",
   "httplib2==0.20.4",
 
@@ -150,6 +150,7 @@
 			padding: 40px;
 			text-align: center;
 			transition: all 0.3s ease;
+			cursor: pointer;
 		}
 
 		.upload-section:hover {
@@ -474,7 +475,7 @@ <h3>Transcripts</h3>
 		</p>
 
 		<p>
-			Three optional fields control how transcripts appear: <code>label</code>, <code>group</code>, and <code>comment</code>. The <code>label</code> appears at the right side above the transcript (typically a gene or isoform name). The <code>group</code> field clusters related transcripts visually—transcripts sharing the same group name are drawn as a block, marked by a vertical line on the left edge of the plot. The <code>comment</code> appears on the right side of the transcript (commonly used for read counts or sample metadata).
+			Three optional fields control how transcripts appear: <code>label</code>, <code>group</code>, and <code>N_observed</code>. The <code>label</code> appears at the right side above the transcript (typically a gene or isoform name). The <code>group</code> field clusters related transcripts visually—transcripts sharing the same group name are drawn as a block, marked by a vertical line on the left edge of the plot. The <code>N_observed</code> appears on the right side of the transcript (commonly used for read counts or sample metadata).
 		</p>
 
 		<p>
@@ -509,10 +510,13 @@ <h3>Technical Details</h3>
 </div>
 
 <script>
-	document.getElementById('file').addEventListener('change', function (e) {
-		const fileDisplay = document.getElementById('fileNameDisplay');
-		const form = document.getElementById('mainForm');
+	const fileInput = document.getElementById('file');
+	const fileDisplay = document.getElementById('fileNameDisplay');
+	const form = document.getElementById('mainForm');
+	const uploadSection = document.querySelector('.upload-section');
 
+	// Handle file selection via button
+	fileInput.addEventListener('change', function (e) {
 		if (e.target.files.length > 0) {
 			fileDisplay.textContent = '📄 ' + e.target.files[0].name;
 			fileDisplay.style.color = '#667eea';
@@ -524,5 +528,56 @@ <h3>Technical Details</h3>
 			fileDisplay.textContent = '';
 		}
 	});
+
+	// Prevent default drag behaviors
+	['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
+		uploadSection.addEventListener(eventName, preventDefaults, false);
+		document.body.addEventListener(eventName, preventDefaults, false);
+	});
+
+	function preventDefaults(e) {
+		e.preventDefault();
+		e.stopPropagation();
+	}
+
+	// Highlight drop area when item is dragged over it
+	['dragenter', 'dragover'].forEach(eventName => {
+		uploadSection.addEventListener(eventName, highlight, false);
+	});
+
+	['dragleave', 'drop'].forEach(eventName => {
+		uploadSection.addEventListener(eventName, unhighlight, false);
+	});
+
+	function highlight(e) {
+		uploadSection.style.borderColor = '#667eea';
+		uploadSection.style.backgroundColor = '#f9fafb';
+	}
+
+	function unhighlight(e) {
+		uploadSection.style.borderColor = '#d1d5db';
+		uploadSection.style.backgroundColor = 'white';
+	}
+
+	// Handle dropped files
+	uploadSection.addEventListener('drop', handleDrop, false);
+
+	function handleDrop(e) {
+		const dt = e.dataTransfer;
+		const files = dt.files;
+
+		if (files.length > 0) {
+			// Assign dropped file to the file input
+			fileInput.files = files;
+
+			// Display file name
+			fileDisplay.textContent = '📄 ' + files[0].name;
+			fileDisplay.style.color = '#667eea';
+			fileDisplay.style.fontWeight = '500';
+
+			// Auto-submit the form
+			form.submit();
+		}
+	}
 </script>
-{% endblock %}
+{% endblock %}
@@ -3,11 +3,12 @@
 import sys
 from typing import Sequence
 
-from isoforms_plot import plotter, parser, compiler
+from isoforms_plot import lexer, plotter, parser, compiler
 
 
 def main_typed(input_csv: Path, output_svg: Path) -> None:
-    parsed = parser.parse(input_csv)
+    lexed = lexer.lex(input_csv)
+    parsed = parser.parse(lexed)
     compiled = compiler.compile(parsed)
     plot = plotter.plot(
         compiled.transcripts,
 
@@ -6,14 +6,14 @@
 - resolve "end" (None) to END_POS
 """
 
-from collections import Counter, defaultdict
+from collections import Counter
 from dataclasses import dataclass
 from typing import Literal, Optional, Sequence, Tuple
 
 from isoforms_plot.parser import AST, Transcript, SpliceSiteColour
 import isoforms_plot.exceptions as ex
 
-END_POS = 9632
+END_POS = 9719
 
 
 @dataclass(frozen=True)
@@ -132,11 +132,14 @@ def compile_transcripts(
                     next_fragment=(next_part.start, next_part.end),
                 )
 
+        # Wrap N_observed in parentheses for display as comment
+        comment = f"({transcript.N_observed})" if transcript.N_observed else None
+
         compiled_transcripts.append(
             CompiledTranscript(
                 parts=parts_tuple,
                 label=transcript.label,
-                comment=transcript.comment,
+                comment=comment,
             )
         )
 
@@ -147,7 +150,7 @@ def compile_transcripts(
     for transcript in compiled_transcripts:
         current_label = transcript.label
         if current_label is not None and current_label == prev_label:
-            # Remove duplicate consecutive label
+            # Remove duplicate consecutive label (keep comment intact)
             deduplicated_transcripts.append(
                 CompiledTranscript(
                     parts=transcript.parts,
@@ -162,25 +165,31 @@ def compile_transcripts(
     compiled_transcripts = deduplicated_transcripts
 
     # Build groups structure
-    # Preserve order of first appearance
-    groups_order = []
-    group_counts = defaultdict(int)
-    last_group = None
-    last_group_count = 0
+    # Groups are consecutive runs of transcripts with the same group value
+    # Preserve order and allow the same group name to appear multiple times
+    groups_list = []  # List of (group_name, size) tuples
+    SENTINEL = object()  # Unique sentinel that's not None
+    last_group = SENTINEL
+    current_group_size = 0
+
     for transcript in parsed_transcripts:
         if transcript.group != last_group:
-            group_counts[last_group] = last_group_count
+            # Save the previous group if it exists
+            if last_group is not SENTINEL:
+                groups_list.append((last_group, current_group_size))
+            # Start a new group
             last_group = transcript.group
-            groups_order.append(transcript.group)
-            last_group_count = 0
-        last_group_count += 1
+            current_group_size = 0
+        current_group_size += 1
 
-    group_counts[last_group] = last_group_count  # for the final group
+    # Don't forget the final group
+    if last_group is not SENTINEL:
+        groups_list.append((last_group, current_group_size))
 
-    # Build groups list
+    # Build CompiledGroup objects
     compiled_groups = [
-        CompiledGroup(name=group_name, size=group_counts[group_name])
-        for group_name in groups_order
+        CompiledGroup(name=group_name, size=size)
+        for group_name, size in groups_list
     ]
 
     return compiled_transcripts, compiled_groups
 
@@ -31,7 +31,7 @@ def __init__(self, fragment_str: str, previous_str: str, next_str: str) -> None:
         self.next_str = next_str
         super().__init__(
             f"Invalid fragment string: '{fragment_str}'. Expected format 'start-end'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -44,7 +44,7 @@ def __init__(self, fragment_str: str, previous_str: str, next_str: str) -> None:
         self.next_str = next_str
         super().__init__(
             f"Empty fragment string found.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -60,7 +60,7 @@ def __init__(
         self.next_str = next_str
         super().__init__(
             f"Fragment start '{start_str}' is not a valid integer in fragment '{fragment_str}'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -76,7 +76,7 @@ def __init__(
         self.next_str = next_str
         super().__init__(
             f"Fragment start {start} must be positive (>= 1) in fragment '{fragment_str}'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -92,7 +92,7 @@ def __init__(
         self.next_str = next_str
         super().__init__(
             f"Fragment end '{end_str}' is not a valid integer or 'end' keyword in fragment '{fragment_str}'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -108,7 +108,7 @@ def __init__(
         self.next_str = next_str
         super().__init__(
             f"Fragment end {end} must be positive (>= 1) in fragment '{fragment_str}'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
@@ -125,7 +125,7 @@ def __init__(
         self.next_str = next_str
         super().__init__(
             f"Fragment end {end} cannot be less than start {start} in fragment '{fragment_str}'.\n"
-            f"Context: ...{previous_str}|HERE|{next_str}..."
+            f"Context: {previous_str}|HERE|{next_str}"
         )
 
 
 
@@ -0,0 +1,71 @@
+"""
+Lexer handles encodings, line endings, and other low-level details of reading the CSV file, providing a clean interface for the parser to work with text lines regardless of the original file's format.
+"""
+
+from io import StringIO
+from pathlib import Path
+from typing import BinaryIO, TextIO
+from multicsv import MultiCSVFile
+import multicsv
+
+
+def decode_bytes(content: bytes) -> str:
+    """Try to decode bytes using multiple common encodings."""
+    # Try common encodings in order of likelihood, starting with those that can fail
+    # (so we detect them properly) and ending with latin-1 which accepts all bytes
+    encodings = [
+        "utf-8-sig",  # UTF-8 with BOM (Excel, modern tools)
+        "utf-8",  # UTF-8 without BOM (most common)
+        "cp1252",  # Windows Western European (common in Excel exports)
+        "iso-8859-1",  # Latin-1 / ISO 8859-1 (Western European)
+        "cp1250",  # Windows Central European
+        "latin-1",  # ISO 8859-1 alias (accepts all byte sequences as fallback)
+    ]
+
+    for enc in encodings:
+        try:
+            return content.decode(enc)
+        except (UnicodeDecodeError, LookupError) as err:
+            last = err
+            continue
+
+    # If all encodings fail, raise an error
+    raise last
+
+
+def normalize_line_endings(content: str) -> str:
+    """Normalize line endings to Unix-style (\\n).
+
+    Handles:
+    - Windows (\\r\\n) -> \\n
+    - Old Mac (\\r) -> \\n
+    - Unix (\\n) -> \\n (unchanged)
+    """
+    # Replace CRLF with LF first, then any remaining CR with LF
+    content = content.replace("\r\n", "\n")
+    content = content.replace("\r", "\n")
+    return content
+
+
+def open_csv_file(input: Path | TextIO | BinaryIO) -> multicsv.MultiCSVFile:
+    if isinstance(input, Path):
+        content_bytes = input.read_bytes()
+        text_content = decode_bytes(content_bytes)
+        text_content = normalize_line_endings(text_content)
+        stream = StringIO(text_content)
+        return multicsv.wrap(stream)
+    else:
+        content = input.read()
+
+        # Handle bytes (from file uploads)
+        if isinstance(content, bytes):
+            content = decode_bytes(content)
+
+        # Normalize line endings for all text content
+        content = normalize_line_endings(content)
+        stream = StringIO(content)
+        return multicsv.wrap(stream)
+
+
+def lex(file: Path | TextIO | BinaryIO) -> MultiCSVFile:
+    return open_csv_file(file)