-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenome_tags.py
More file actions
executable file
·87 lines (74 loc) · 3.45 KB
/
genome_tags.py
File metadata and controls
executable file
·87 lines (74 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import argparse
from pathlib import Path
from collections import Counter
FEATURE_NAMES = [
"Vocal Register", "Vocal Timbre Thin to Full", "Vocal Breathiness", "Vocal Smoothness",
"Vocal Grittiness", "Vocal Nasality", "Vocal Accompaniment", "Minor / Major Key Tonality",
"Harmonic Sophistication", "Tempo", "Cut Time Feel", "Triple Meter", "Compound Meter",
"Odd Meter", "Swing Feel", "Shuffle Feel", "Syncopation Low to High", "Backbeat",
"Danceability", "Drum Set", "Drum Aggressiveness", "Synthetic Drums", "Percussion",
"Electric Guitar", "Electric Guitar Distortion", "Acoustic Guitar", "String Ensemble",
"Horn Ensemble", "Piano", "Organ", "Rhodes", "Synthesizer", "Synth Timbre", "Bass Guitar",
"Reed Instrument", "Angry Lyrics", "Sad Lyrics", "Happy/Joyful Lyrics", "Humorous Lyrics",
"Love/Romance Lyrics", "Social/Political Lyrics", "Abstract Lyrics", "Explicit Lyrics",
"Live Recording", "Audio Production", "Aural Intensity", "Acoustic Sonority",
"Electric Sonority", "Synthetic Sonority", "Focus on Lead Vocal", "Focus on Lyrics",
"Focus on Melody", "Focus on Vocal Accompaniment", "Focus on Rhythmic Groove",
"Focus on Musical Arrangements", "Focus on Form", "Focus on Riffs", "Focus on Performance"
]
MAJOR_MINOR_IDX = FEATURE_NAMES.index("Minor / Major Key Tonality")
def value_to_tag(i, v):
"""Map a value to the correct tag string."""
if v is None:
return None
if i == MAJOR_MINOR_IDX:
if v < 0.33:
return "Minor"
elif v < 0.66:
return "Ambiguous Key Mode"
else:
return "Major"
else:
if v > 0.0 and v < 0.33:
return FEATURE_NAMES[i] + " Low"
elif v >= 0.33 and v < 0.66:
return FEATURE_NAMES[i] + " Moderate"
elif v >= 0.66:
return FEATURE_NAMES[i] + " High"
def values_to_tags(values):
tags = [value_to_tag(i, v) for i, v in enumerate(values)]
return [tag for tag in tags if tag is not None]
def convert_file(in_path: Path, out_path: Path):
data = json.loads(Path(in_path).read_text(encoding="utf-8"))
counter = Counter()
for k, item in data.items():
values = item.get("gene_values")
if isinstance(values, list):
if len(values) != len(FEATURE_NAMES):
print(f"Warning: entry {k} has {len(values)} values, "
f"but feature list has {len(FEATURE_NAMES)} names.")
tags = values_to_tags(values)
item["positive_tags"] = tags
counter.update(tags)
item.pop("gene_values", None)
else:
item.setdefault("positive_tags", [])
# create global tag index sorted by count
sorted_tags = counter.most_common()
print(sorted_tags)
print(len(sorted_tags))
out_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Saved to: {out_path}")
def main():
parser = argparse.ArgumentParser(description="Convert gene_values to positive_tags with 3-level bins.")
parser.add_argument("-i", "--input", type=Path, default=Path("genome_index_split.json"),
help="Input JSON path.")
parser.add_argument("-o", "--output", type=Path, default=Path("genome_index_split_tags.json"),
help="Output JSON path.")
args = parser.parse_args()
convert_file(args.input, args.output)
if __name__ == "__main__":
main()