Skip to content

Commit f52d713

Browse files
authored
Merge pull request #51 from cfe-lab/small-improvements-0004
Small improvements 0004
2 parents 3e2aea5 + 3ceee5e commit f52d713

14 files changed

Lines changed: 292 additions & 181 deletions

File tree

alldata/bblab_site/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
"django-nyt==1.4.1",
3838
"django-sekizai==4.1.0",
3939
"drawsvg==2.4.0",
40-
"multicsv==1.0.5",
40+
"multicsv==1.0.6",
4141
"genetracks @ git+https://github.com/cfe-lab/genetracks.git@a7f20f644ab86b451e870c9ace0621af8ecae0df",
4242
"html5lib==1.1",
4343
"httplib2==0.20.4",

alldata/bblab_site/tools/isoforms_plot/index.html

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150
padding: 40px;
151151
text-align: center;
152152
transition: all 0.3s ease;
153+
cursor: pointer;
153154
}
154155

155156
.upload-section:hover {
@@ -474,7 +475,7 @@ <h3>Transcripts</h3>
474475
</p>
475476

476477
<p>
477-
Three optional fields control how transcripts appear: <code>label</code>, <code>group</code>, and <code>comment</code>. The <code>label</code> appears at the right side above the transcript (typically a gene or isoform name). The <code>group</code> field clusters related transcripts visually—transcripts sharing the same group name are drawn as a block, marked by a vertical line on the left edge of the plot. The <code>comment</code> appears on the right side of the transcript (commonly used for read counts or sample metadata).
478+
Three optional fields control how transcripts appear: <code>label</code>, <code>group</code>, and <code>N_observed</code>. The <code>label</code> appears at the right side above the transcript (typically a gene or isoform name). The <code>group</code> field clusters related transcripts visually—transcripts sharing the same group name are drawn as a block, marked by a vertical line on the left edge of the plot. The <code>N_observed</code> appears on the right side of the transcript (commonly used for read counts or sample metadata).
478479
</p>
479480

480481
<p>
@@ -509,10 +510,13 @@ <h3>Technical Details</h3>
509510
</div>
510511

511512
<script>
512-
document.getElementById('file').addEventListener('change', function (e) {
513-
const fileDisplay = document.getElementById('fileNameDisplay');
514-
const form = document.getElementById('mainForm');
513+
const fileInput = document.getElementById('file');
514+
const fileDisplay = document.getElementById('fileNameDisplay');
515+
const form = document.getElementById('mainForm');
516+
const uploadSection = document.querySelector('.upload-section');
515517

518+
// Handle file selection via button
519+
fileInput.addEventListener('change', function (e) {
516520
if (e.target.files.length > 0) {
517521
fileDisplay.textContent = '📄 ' + e.target.files[0].name;
518522
fileDisplay.style.color = '#667eea';
@@ -524,5 +528,56 @@ <h3>Technical Details</h3>
524528
fileDisplay.textContent = '';
525529
}
526530
});
531+
532+
// Prevent default drag behaviors
533+
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
534+
uploadSection.addEventListener(eventName, preventDefaults, false);
535+
document.body.addEventListener(eventName, preventDefaults, false);
536+
});
537+
538+
function preventDefaults(e) {
539+
e.preventDefault();
540+
e.stopPropagation();
541+
}
542+
543+
// Highlight drop area when item is dragged over it
544+
['dragenter', 'dragover'].forEach(eventName => {
545+
uploadSection.addEventListener(eventName, highlight, false);
546+
});
547+
548+
['dragleave', 'drop'].forEach(eventName => {
549+
uploadSection.addEventListener(eventName, unhighlight, false);
550+
});
551+
552+
function highlight(e) {
553+
uploadSection.style.borderColor = '#667eea';
554+
uploadSection.style.backgroundColor = '#f9fafb';
555+
}
556+
557+
function unhighlight(e) {
558+
uploadSection.style.borderColor = '#d1d5db';
559+
uploadSection.style.backgroundColor = 'white';
560+
}
561+
562+
// Handle dropped files
563+
uploadSection.addEventListener('drop', handleDrop, false);
564+
565+
function handleDrop(e) {
566+
const dt = e.dataTransfer;
567+
const files = dt.files;
568+
569+
if (files.length > 0) {
570+
// Assign dropped file to the file input
571+
fileInput.files = files;
572+
573+
// Display file name
574+
fileDisplay.textContent = '📄 ' + files[0].name;
575+
fileDisplay.style.color = '#667eea';
576+
fileDisplay.style.fontWeight = '500';
577+
578+
// Auto-submit the form
579+
form.submit();
580+
}
581+
}
527582
</script>
528-
{% endblock %}
583+
{% endblock %}

alldata/bblab_site/tools/isoforms_plot/isoforms_plot/__main__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import sys
44
from typing import Sequence
55

6-
from isoforms_plot import plotter, parser, compiler
6+
from isoforms_plot import lexer, plotter, parser, compiler
77

88

99
def main_typed(input_csv: Path, output_svg: Path) -> None:
10-
parsed = parser.parse(input_csv)
10+
lexed = lexer.lex(input_csv)
11+
parsed = parser.parse(lexed)
1112
compiled = compiler.compile(parsed)
1213
plot = plotter.plot(
1314
compiled.transcripts,

alldata/bblab_site/tools/isoforms_plot/isoforms_plot/compiler.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
- resolve "end" (None) to END_POS
77
"""
88

9-
from collections import Counter, defaultdict
9+
from collections import Counter
1010
from dataclasses import dataclass
1111
from typing import Literal, Optional, Sequence, Tuple
1212

1313
from isoforms_plot.parser import AST, Transcript, SpliceSiteColour
1414
import isoforms_plot.exceptions as ex
1515

16-
END_POS = 9632
16+
END_POS = 9719
1717

1818

1919
@dataclass(frozen=True)
@@ -132,11 +132,14 @@ def compile_transcripts(
132132
next_fragment=(next_part.start, next_part.end),
133133
)
134134

135+
# Wrap N_observed in parentheses for display as comment
136+
comment = f"({transcript.N_observed})" if transcript.N_observed else None
137+
135138
compiled_transcripts.append(
136139
CompiledTranscript(
137140
parts=parts_tuple,
138141
label=transcript.label,
139-
comment=transcript.comment,
142+
comment=comment,
140143
)
141144
)
142145

@@ -147,7 +150,7 @@ def compile_transcripts(
147150
for transcript in compiled_transcripts:
148151
current_label = transcript.label
149152
if current_label is not None and current_label == prev_label:
150-
# Remove duplicate consecutive label
153+
# Remove duplicate consecutive label (keep comment intact)
151154
deduplicated_transcripts.append(
152155
CompiledTranscript(
153156
parts=transcript.parts,
@@ -162,25 +165,31 @@ def compile_transcripts(
162165
compiled_transcripts = deduplicated_transcripts
163166

164167
# Build groups structure
165-
# Preserve order of first appearance
166-
groups_order = []
167-
group_counts = defaultdict(int)
168-
last_group = None
169-
last_group_count = 0
168+
# Groups are consecutive runs of transcripts with the same group value
169+
# Preserve order and allow the same group name to appear multiple times
170+
groups_list = [] # List of (group_name, size) tuples
171+
SENTINEL = object() # Unique sentinel that's not None
172+
last_group = SENTINEL
173+
current_group_size = 0
174+
170175
for transcript in parsed_transcripts:
171176
if transcript.group != last_group:
172-
group_counts[last_group] = last_group_count
177+
# Save the previous group if it exists
178+
if last_group is not SENTINEL:
179+
groups_list.append((last_group, current_group_size))
180+
# Start a new group
173181
last_group = transcript.group
174-
groups_order.append(transcript.group)
175-
last_group_count = 0
176-
last_group_count += 1
182+
current_group_size = 0
183+
current_group_size += 1
177184

178-
group_counts[last_group] = last_group_count # for the final group
185+
# Don't forget the final group
186+
if last_group is not SENTINEL:
187+
groups_list.append((last_group, current_group_size))
179188

180-
# Build groups list
189+
# Build CompiledGroup objects
181190
compiled_groups = [
182-
CompiledGroup(name=group_name, size=group_counts[group_name])
183-
for group_name in groups_order
191+
CompiledGroup(name=group_name, size=size)
192+
for group_name, size in groups_list
184193
]
185194

186195
return compiled_transcripts, compiled_groups

alldata/bblab_site/tools/isoforms_plot/isoforms_plot/exceptions.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(self, fragment_str: str, previous_str: str, next_str: str) -> None:
3131
self.next_str = next_str
3232
super().__init__(
3333
f"Invalid fragment string: '{fragment_str}'. Expected format 'start-end'.\n"
34-
f"Context: ...{previous_str}|HERE|{next_str}..."
34+
f"Context: {previous_str}|HERE|{next_str}"
3535
)
3636

3737

@@ -44,7 +44,7 @@ def __init__(self, fragment_str: str, previous_str: str, next_str: str) -> None:
4444
self.next_str = next_str
4545
super().__init__(
4646
f"Empty fragment string found.\n"
47-
f"Context: ...{previous_str}|HERE|{next_str}..."
47+
f"Context: {previous_str}|HERE|{next_str}"
4848
)
4949

5050

@@ -60,7 +60,7 @@ def __init__(
6060
self.next_str = next_str
6161
super().__init__(
6262
f"Fragment start '{start_str}' is not a valid integer in fragment '{fragment_str}'.\n"
63-
f"Context: ...{previous_str}|HERE|{next_str}..."
63+
f"Context: {previous_str}|HERE|{next_str}"
6464
)
6565

6666

@@ -76,7 +76,7 @@ def __init__(
7676
self.next_str = next_str
7777
super().__init__(
7878
f"Fragment start {start} must be positive (>= 1) in fragment '{fragment_str}'.\n"
79-
f"Context: ...{previous_str}|HERE|{next_str}..."
79+
f"Context: {previous_str}|HERE|{next_str}"
8080
)
8181

8282

@@ -92,7 +92,7 @@ def __init__(
9292
self.next_str = next_str
9393
super().__init__(
9494
f"Fragment end '{end_str}' is not a valid integer or 'end' keyword in fragment '{fragment_str}'.\n"
95-
f"Context: ...{previous_str}|HERE|{next_str}..."
95+
f"Context: {previous_str}|HERE|{next_str}"
9696
)
9797

9898

@@ -108,7 +108,7 @@ def __init__(
108108
self.next_str = next_str
109109
super().__init__(
110110
f"Fragment end {end} must be positive (>= 1) in fragment '{fragment_str}'.\n"
111-
f"Context: ...{previous_str}|HERE|{next_str}..."
111+
f"Context: {previous_str}|HERE|{next_str}"
112112
)
113113

114114

@@ -125,7 +125,7 @@ def __init__(
125125
self.next_str = next_str
126126
super().__init__(
127127
f"Fragment end {end} cannot be less than start {start} in fragment '{fragment_str}'.\n"
128-
f"Context: ...{previous_str}|HERE|{next_str}..."
128+
f"Context: {previous_str}|HERE|{next_str}"
129129
)
130130

131131

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Lexer handles encodings, line endings, and other low-level details of reading the CSV file, providing a clean interface for the parser to work with text lines regardless of the original file's format.
3+
"""
4+
5+
from io import StringIO
6+
from pathlib import Path
7+
from typing import BinaryIO, TextIO
8+
from multicsv import MultiCSVFile
9+
import multicsv
10+
11+
12+
def decode_bytes(content: bytes) -> str:
13+
"""Try to decode bytes using multiple common encodings."""
14+
# Try common encodings in order of likelihood, starting with those that can fail
15+
# (so we detect them properly) and ending with latin-1 which accepts all bytes
16+
encodings = [
17+
"utf-8-sig", # UTF-8 with BOM (Excel, modern tools)
18+
"utf-8", # UTF-8 without BOM (most common)
19+
"cp1252", # Windows Western European (common in Excel exports)
20+
"iso-8859-1", # Latin-1 / ISO 8859-1 (Western European)
21+
"cp1250", # Windows Central European
22+
"latin-1", # ISO 8859-1 alias (accepts all byte sequences as fallback)
23+
]
24+
25+
for enc in encodings:
26+
try:
27+
return content.decode(enc)
28+
except (UnicodeDecodeError, LookupError) as err:
29+
last = err
30+
continue
31+
32+
# If all encodings fail, raise an error
33+
raise last
34+
35+
36+
def normalize_line_endings(content: str) -> str:
37+
"""Normalize line endings to Unix-style (\\n).
38+
39+
Handles:
40+
- Windows (\\r\\n) -> \\n
41+
- Old Mac (\\r) -> \\n
42+
- Unix (\\n) -> \\n (unchanged)
43+
"""
44+
# Replace CRLF with LF first, then any remaining CR with LF
45+
content = content.replace("\r\n", "\n")
46+
content = content.replace("\r", "\n")
47+
return content
48+
49+
50+
def open_csv_file(input: Path | TextIO | BinaryIO) -> multicsv.MultiCSVFile:
51+
if isinstance(input, Path):
52+
content_bytes = input.read_bytes()
53+
text_content = decode_bytes(content_bytes)
54+
text_content = normalize_line_endings(text_content)
55+
stream = StringIO(text_content)
56+
return multicsv.wrap(stream)
57+
else:
58+
content = input.read()
59+
60+
# Handle bytes (from file uploads)
61+
if isinstance(content, bytes):
62+
content = decode_bytes(content)
63+
64+
# Normalize line endings for all text content
65+
content = normalize_line_endings(content)
66+
stream = StringIO(content)
67+
return multicsv.wrap(stream)
68+
69+
70+
def lex(file: Path | TextIO | BinaryIO) -> MultiCSVFile:
71+
return open_csv_file(file)

0 commit comments

Comments
 (0)