index.html

<!doctype html><html lang="en" ><head><link rel="canonical" href="https://jinchaoli.com/"><meta charset="utf-8"><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta name="theme-color" media="(prefers-color-scheme: light)" content="#f7f7f7"><meta name="theme-color" media="(prefers-color-scheme: dark)" content="#1b1b1e"><meta name="apple-mobile-web-app-capable" content="yes"><meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, user-scalable=no initial-scale=1, shrink-to-fit=no, viewport-fit=cover" ><link rel="shortcut icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🎓</text></svg>" ><title>Jinchao Li</title><meta name="author" content="Jinchao Li"><meta name="description" content="Jinchao Li's Homepage" ><meta name="keywords" content="jinchao li, 李锦超, cuhk, 香港中文大学, research, speech and natural language processing"><meta property="og:site_name" content="Jinchao Li"><meta property="og:type" content="website"><meta property="og:title" content="Jinchao Li | About"><meta property="og:url" content="https://jinchaoli.com/" ><meta property="og:description" content="Jinchao Li's Homepage" ><meta property="og:locale" content="en"><meta name="twitter:card" content="summary"><meta name="twitter:title" content="About"><meta name="twitter:description" content="Jinchao Li's Homepage" ><meta name="twitter:site" content="@JinchaoLove"><meta name="twitter:creator" content="@JinchaoLove"> <script type="application/ld+json"> { "author": { "@type": "Person", "name": "Jinchao Li" }, "url": "https://jinchaoli.com/", "@type": "WebSite", "description": "Jinchao Li's Homepage", "headline": "About", "sameAs": ["https://github.com/JinchaoLove", "https://twitter.com/JinchaoLove", "https://scholar.google.com/citations?hl=en&user=0LWE_dkAAAAJ", "https://www.researchgate.net/profile/Jinchao-Li-3", "https://zhihu.com/people/jinchaoli"], "name": "Jinchao Li", "@context": "https://schema.org" } </script><link href="/assets/lib/fonts/main.css" rel="stylesheet"><link rel="stylesheet" href="/assets/lib/bootstrap/bootstrap.min.css"><link rel="stylesheet" href="/assets/lib/fontawesome-free/css/all.min.css"><link rel="stylesheet" href="/assets/css/theme.css"><link rel="stylesheet" href="/assets/lib/tocbot/tocbot.css"><link defer rel="stylesheet" href="/assets/lib/loading-attribute-polyfill/loading-attribute-polyfill.min.css"><link defer rel="stylesheet" href="/assets/lib/magnific-popup/magnific-popup.css"><link rel="stylesheet" href="/assets/lib/waline/waline.css"> <script type="text/javascript" defer> class ModeToggle { static get MODE_KEY() { return 'mode'; } static get MODE_ATTR() { return 'data-mode'; } static get DARK_MODE() { return 'dark'; } static get LIGHT_MODE() { return 'light'; } static get ID() { return 'mode-toggle'; } constructor() { if (this.hasMode) { if (this.isDarkMode) { if (!this.isSysDarkPrefer) { this.setDark(); } } else { if (this.isSysDarkPrefer) { this.setLight(); } } } let self = this; /* always follow the system prefers */ this.sysDarkPrefers.addEventListener('change', () => { if (self.hasMode) { if (self.isDarkMode) { if (!self.isSysDarkPrefer) { self.setDark(); } } else { if (self.isSysDarkPrefer) { self.setLight(); } } self.clearMode(); } self.notify(); }); } /* constructor() */ get sysDarkPrefers() { return window.matchMedia('(prefers-color-scheme: dark)'); } get isSysDarkPrefer() { return this.sysDarkPrefers.matches; } get isDarkMode() { return this.mode === ModeToggle.DARK_MODE; } get isLightMode() { return this.mode === ModeToggle.LIGHT_MODE; } get hasMode() { return this.mode != null; } get mode() { return sessionStorage.getItem(ModeToggle.MODE_KEY); } /* get the current mode on screen */ get modeStatus() { if (this.isDarkMode || (!this.hasMode && this.isSysDarkPrefer)) { return ModeToggle.DARK_MODE; } else { return ModeToggle.LIGHT_MODE; } } setDark() { document.documentElement.setAttribute(ModeToggle.MODE_ATTR, ModeToggle.DARK_MODE); sessionStorage.setItem(ModeToggle.MODE_KEY, ModeToggle.DARK_MODE); } setLight() { document.documentElement.setAttribute(ModeToggle.MODE_ATTR, ModeToggle.LIGHT_MODE); sessionStorage.setItem(ModeToggle.MODE_KEY, ModeToggle.LIGHT_MODE); } clearMode() { document.documentElement.removeAttribute(ModeToggle.MODE_ATTR); sessionStorage.removeItem(ModeToggle.MODE_KEY); } /* Notify another plugins that the theme mode has changed */ notify() { window.postMessage( { direction: ModeToggle.ID, message: this.modeStatus }, '*' ); } flipMode() { if (this.hasMode) { if (this.isSysDarkPrefer) { if (this.isLightMode) { this.clearMode(); } else { this.setLight(); } } else { if (this.isDarkMode) { this.clearMode(); } else { this.setDark(); } } } else { if (this.isSysDarkPrefer) { this.setLight(); } else { this.setDark(); } } this.notify(); } /* flipMode() */ } /* ModeToggle */ const modeToggle = new ModeToggle(); </script><body><aside aria-label="Sidebar" id="sidebar" class="d-flex flex-column align-items-end"><header class="profile-wrapper"> <a href="/" id="avatar" class="rounded-circle"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/img/avatar/Jinchao-480.webp 480w, /assets/img/avatar/Jinchao-800.webp 800w, /assets/img/avatar/Jinchao-1280.webp 1280w, " sizes="95vw" type="image/webp" loading="lazy" data-proofer-ignore > <img src="/assets/img/avatar/Jinchao.png" width="112" height="auto" alt="Jinchao Li" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" > </picture></figure></a><h1 class="site-title"> <a href="/">Jinchao Li</a></h1><p class="site-subtitle fst-italic mb-0">Jinchao Li's Homepage</p><div class="social"> <a href="javascript:location.href = 'mailto:' + ['jinchaolovefy','gmail.com'].join('@')" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="email" data-bs-original-title="email" link-attr-ignore style="color: #0072c5;" > <i class="fas fa-envelope"></i> </a> <a href="https://scholar.google.com/citations?hl=en&user=SB7xjMoAAAAJ" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="google-scholar" data-bs-original-title="google-scholar" link-attr-ignore target="_blank" rel="noopener noreferrer" style="color: #5c92f6;" > <i class="fa-brands fa-google-scholar"></i> </a> <a href="https://github.com/JinchaoLove" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="github" data-bs-original-title="github" link-attr-ignore target="_blank" rel="noopener noreferrer" > <i class="fab fa-github"></i> </a> <a href="/feed.xml" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="rss" data-bs-original-title="rss" link-attr-ignore style="color: #ee802f;" > <i class="fas fa-rss"></i> </a></div></header><nav class="flex-column flex-grow-1 w-100 ps-0"><ul class="nav"><li class="nav-item active"> <a href="/" class="nav-link"> <i class="fa-fw fas fa-home"></i> <span>HOME</span> </a><li class="nav-item"> <a href="/publications/" class="nav-link"> <i class="fa-fw fas fa-book"></i> <span>PUBLICATIONS</span> </a><li class="nav-item"> <a href="/space/" class="nav-link"> <i class="fa-fw fas fa-globe"></i> <span>SPACE</span> </a></ul><div class="blog-wrapper d-flex flex-wrap align-items-center w-100"><div class="nav-buttons"> <a id="nav-blog" class="nav-icon" href="/blog/" title="" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Blog" data-bs-original-title="Blog" role="button" > <i class="fa-fw fas fa-edit"></i> </a> <a id="nav-categories" class="nav-icon" href="/blog/categories/" title="" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Categories" data-bs-original-title="Categories" role="button" > <i class="fa-fw fas fa-layer-group"></i> </a> <a id="nav-tags" class="nav-icon" href="/blog/tags/" title="" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Tags" data-bs-original-title="Tags" role="button" > <i class="fa-fw fas fa-tags"></i> </a> <a id="nav-archives" class="nav-icon" href="/blog/archives/" title="" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Archives" data-bs-original-title="Archives" role="button" > <i class="fa-fw fas fa-archive"></i> </a></div><div class="nav-contents"><div class="nav-blog"> <a href="/blog/" class="nav-title"> <i class="fa-fw fas fa-edit"></i> <span>BLOG</span> </a></div><div class="nav-categories"> <a href="/blog/categories/" class="nav-title"> <i class="fa-fw fas fa-layer-group"></i> <span>CATEGORIES</span> </a> <a href="/blog/categories/blogging/" class="tag">Blogging</a> <a href="/blog/categories/demo/" class="tag">Demo</a> <a href="/blog/categories/tutorial/" class="tag">Tutorial</a></div><div class="nav-tags"> <a href="/blog/tags/" class="nav-title"> <i class="fa-fw fas fa-tags"></i> <span>TAGS</span> </a><div class="tag"> <a class="tag-name" href="/blog/tags/getting-started/">getting started<span class="tag-size text-muted">1</span> </a></div><div class="tag"> <a class="tag-name" href="/blog/tags/homepage/">homepage<span class="tag-size text-muted">1</span> </a></div><div class="tag"> <a class="tag-name" href="/blog/tags/theme/">theme<span class="tag-size text-muted">1</span> </a></div><div class="tag"> <a class="tag-name" href="/blog/tags/typography/">typography<span class="tag-size text-muted">1</span> </a></div><div class="tag"> <a class="tag-name" href="/blog/tags/writing/">writing<span class="tag-size text-muted">1</span> </a></div></div><div class="nav-archives"> <a href="/blog/archives/" class="nav-title"> <i class="fa-fw fas fa-archive"></i> <span>ARCHIVES</span> </a><ul><li> <a href="/blog/exchange-homepage-and-about/">Exchange Homepage and About</a><li> <a href="/blog/customize-jekyll-chirpy/">Customize Your Jekyll Chirpy Theme</a><li> <a href="/blog/getting-started/">Getting Started</a><li> <a href="/blog/write-a-new-post/">Writing a New Post</a><li> <a href="/blog/text-and-typography/">Text and Typography</a></ul></div></div></div></nav></aside><header id="topbar-wrapper" aria-label="Top Bar"><div id="topbar" class="d-flex align-items-center justify-content-between px-lg-3 h-100" ><div class="d-flex align-items-center"> <button type="button" id="sidebar-trigger" class="btn btn-link" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Hide/show sidebar" data-bs-original-title="Hide/show sidebar" > <i class="fas fa-map-signs fa-fw"></i> </button><nav id="breadcrumb" aria-label="Breadcrumb"> <span>Home</span></nav><div id="topbar-title"> Home</div></div><div class="d-flex align-items-center"> <button type="button" id="search-trigger" class="btn btn-link" data-bs-toggle="tooltip" data-bs-placement="left" aria-label="Search (⌘/⌃ + k)" data-bs-original-title="Search (⌘/⌃ + k)" > <i class="fas fa-search fa-fw"></i> </button><div id="search-box"> <search class="align-items-center"> <i class="fas fa-search fa-fw"></i> <input class="form-control" id="search-input" type="search" aria-label="search" placeholder="Search (⌘/⌃ + k)" tabindex="0" autocomplete="true" list="suggestions" > <datalist id="suggestions"></datalist> </search></div><button type="button" id="search-cancel" class="btn btn-link text-decoration-none" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Cancel (⎋esc)" data-bs-original-title="Cancel (⎋esc)" > <i class="fa fa-times" aria-hidden="true"></i> </button> <span class="icon-border"></span> <button type="button" class="mode-toggle btn light" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="Light/dark theme" data-bs-original-title="Light/dark theme" > <i class="fas fa-star-and-crescent"></i> </button> <button type="button" class="mode-toggle btn dark" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="Light/dark theme" data-bs-original-title="Light/dark theme" > <i class="fas fa-sun"></i> </button> <span class="icon-border"></span> <button type="button" id="toc-trigger" class="btn btn-link" data-bs-toggle="tooltip" data-bs-placement="bottom" aria-label="Hide/show TOC" data-bs-original-title="Hide/show panel" > <i class="fas fa-list-ul fa-fw"></i> </button></div></div></header><div id="main-wrapper" class="d-flex justify-content-center"><div class="container-fluid d-flex flex-column px-xxl-3"><div class="row flex-grow-1"><main aria-label="Main Content" id="content-wrapper" class="col-10" ><article class="px-1"><div class="content"><div class="row"><div class="col-sm-3"><div class="preview d-flex align-items-center"> <a href="/assets/img/avatar/Jinchao.png" class="popup img-link" title="Jinchao Li"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/img/avatar/Jinchao-480.webp 480w, /assets/img/avatar/Jinchao-800.webp 800w, /assets/img/avatar/Jinchao-1280.webp 1280w, " sizes="95vw" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/img/avatar/Jinchao.png" class="preview rounded" width="200" height="auto" alt="Jinchao Li" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div class="col-sm-9"><p> <strong style="font-size: 1.5rem;">Jinchao Li (李锦超)</strong><br /> Ph.D. Candidate <a href="/assets/CV_jinchaoli.pdf" target="_blank">[Resume]</a><br /> The Chinese University of Hong Kong<br /> Hong Kong, China<br /> Email: jinchaolovefy [at] gmail.com<br /><div class="social"> <a href="javascript:location.href = 'mailto:' + ['jinchaolovefy','gmail.com'].join('@')" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="email" data-bs-original-title="email" link-attr-ignore="" style="color: #0072c5;"> <i class="fas fa-envelope"></i> </a> <a href="https://scholar.google.com/citations?hl=en&amp;user=SB7xjMoAAAAJ" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="google-scholar" data-bs-original-title="google-scholar" link-attr-ignore="" target="_blank" rel="noopener noreferrer" style="color: #5c92f6;"> <i class="fa-brands fa-google-scholar"></i> </a> <a href="https://github.com/JinchaoLove" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="github" data-bs-original-title="github" link-attr-ignore="" target="_blank" rel="noopener noreferrer"> <i class="fab fa-github"></i> </a> <a href="/feed.xml" data-bs-toggle="tooltip" data-bs-placement="top" aria-label="rss" data-bs-original-title="rss" link-attr-ignore="" style="color: #ee802f;"> <i class="fas fa-rss"></i> </a></div></p></div></div><h2 id="short-bio"><a href="#short-bio" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2">Short Bio</span></h2><p>I am a final-year Ph.D. student at the <a href="https://www.se.cuhk.edu.hk/laboratories/human-computer-communications-laboratory/">Human-Computer Communications Laboratory</a> (HCCL) in <a href="https://cuhk.edu.hk">The Chinese University of Hong Kong</a>, advised by Prof. <a href="https://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/">Helen Meng</a>.<br /> Before that, I obtained my B.S. with honors from <a href="https://www.nju.edu.cn">Nanjing University</a> in 2019.<br /> My research interests encompass <strong>human-centred AI in speech, language and healthcare</strong>, such as:</p><ul><li>Neurocognitive Disorder Recognition<li>Multimodal Emotion Recognition<li>Multimodal Large Language Models</ul><hr /><h2 id="news"><a href="#news" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2">News</span></h2><blockquote class="prompt-info"><p><b>I am actively seeking academic and industry opportunities, including Postdoctoral, Research Fellow, and Research Internship positions. Interested collaborators are welcome to connect!</b></p></blockquote><ul><li>2024.12: One paper submitted to journal (<a href="https://arxiv.org/pdf/2501.03727">under review</a>).<li>2023.01: Two papers (<a href="https://arxiv.org/pdf/2303.08019">1</a>, <a href="https://arxiv.org/pdf/2303.08027">2</a>) accepted by ICASSP 2023<li>Fall 2022: Co-teach “Conversational AI systems” (ASR part) with Prof. Meng and other nice colleagues in CUHK<li>2022.09: Winner of two tasks in the “<a href="https://www.competitions.hume.ai/avb2022">ACII Affective Vocal Bursts (A-VB)</a>” competition organized by <a href="https://hume.ai">Hume AI</a></ul><hr /><h2 id="publications"><a href="#publications" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2"><a href="/publications/">Publications</a></span></h2><p>🤗Thanks to all the collaborators for their great work! Check out my <a href="https://scholar.google.com/citations?hl=en&amp;user=SB7xjMoAAAAJ">Google Scholar</a> for more information.</p><p><em>* indicates equal contributions.</em></p><div class="publications" style="font-size: 1.01rem;"><ol class="bibliography"><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/24/titan.png" class="popup img-link" title="papers/24/titan.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/24/titan-480.webp 480w, /assets/papers/24/titan-800.webp 800w, /assets/papers/24/titan-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/24/titan.png" class="preview rounded" width="100%" height="auto" alt="papers/24/titan.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="li2025detecting" class="col-sm-10"><div class="title">Detecting Neurocognitive Disorders through Analyses of Topic Evolution and Cross-modal Consistency in Visual-Stimulated Narratives</div><div class="author"> <span class="highlight-author">Jinchao Li*</span>,&nbsp;Yuejiao Wang*,&nbsp;Junan Li*,&nbsp;Jiawen Kang*,&nbsp;Bo Zheng,&nbsp;Simon Wong, <span class="more-authors" title="click to view 10 more authors" onclick=" var element = $(this); element.attr('title', ''); var more_authors_text = element.text() == '10 more authors' ? 'Brian Mak, Helene Fung, Jean Woo, Man-Wai Mak, Timothy Kwok, Vincent Mok, Xianmin Gong, Xixin Wu, Xunying Liu, Patrick Wong' : '10 more authors'; var cursorPosition = 0; var textAdder = setInterval(function(){ element.text(more_authors_text.substring(0, cursorPosition + 1)); if (++cursorPosition == more_authors_text.length){ clearInterval(textAdder); } }, '10'); ">10 more authors</span>, Helen Meng</div><div class="periodical"> <em>In JSTSP (under review)</em>, 2025</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://arxiv.org/pdf/2501.03727" class="btn btn-sm z-depth-0" role="button">PDF</a></div><div class="abstract hidden"><p>Early detection of neurocognitive disorders (NCDs) is crucial for timely intervention and disease management. Speech analysis offers a non-intrusive and scalable screening method, particularly through narrative tasks in neuropsychological assessment tools. Traditional narrative analysis often focuses on local indicators in microstructure, such as word usage and syntax. While these features provide insights into language production abilities, they often fail to capture global narrative patterns, or microstructures. Macrostructures include coherence, thematic organization, and logical progressions, reflecting essential cognitive skills potentially critical for recognizing NCDs. Addressing this gap, we propose to investigate specific cognitive and linguistic challenges by analyzing topical shifts, temporal dynamics, and the coherence of narratives over time, aiming to reveal cognitive deficits by identifying narrative impairments, and exploring their impact on communication and cognition. The investigation is based on the CU-MARVEL Rabbit Story corpus, which comprises recordings of a story-telling task from 758 older adults. We developed two approaches: the Dynamic Topic Models (DTM)-based temporal analysis to examine the evolution of topics over time, and the Text-Image Temporal Alignment Network (TITAN) to evaluate the coherence between spoken narratives and visual stimuli. DTM-based approach validated the effectiveness of dynamic topic consistency as a macrostructural metric (F1=0.61, AUC=0.78). The TITAN approach achieved the highest performance (F1=0.72, AUC=0.81), surpassing established microstructural and macrostructural feature sets. Cross-comparison and regression tasks further demonstrated the effectiveness of proposed dynamic macrostructural modeling approaches for NCD detection.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">li2025detecting</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://arxiv.org/abs/2501.03727}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{Detecting Neurocognitive Disorders through Analyses of Topic Evolution and Cross-modal Consistency in Visual-Stimulated Narratives}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Li*, Jinchao and Wang*, Yuejiao and Li*, Junan and Kang*, Jiawen and Zheng, Bo and Wong, Simon and Mak, Brian and Fung, Helene and Woo, Jean and Mak, Man-Wai and Kwok, Timothy and Mok, Vincent and Gong, Xianmin and Wu, Xixin and Liu, Xunying and Wong, Patrick and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{JSTSP (under review)}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2025}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/23/emotion2vec.png" class="popup img-link" title="papers/23/emotion2vec.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/23/emotion2vec-480.webp 480w, /assets/papers/23/emotion2vec-800.webp 800w, /assets/papers/23/emotion2vec-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/23/emotion2vec.png" class="preview rounded" width="100%" height="auto" alt="papers/23/emotion2vec.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="ma2023emotion2vec" class="col-sm-10"><div class="title">emotion2vec: Self-Supervised Pre-Training for Speech Emotion Representation</div><div class="author"> Ziyang Ma,&nbsp;Zhisheng Zheng,&nbsp;Jiaxin Ye,&nbsp;<span class="highlight-author">Jinchao Li</span>,&nbsp;Zhifu Gao,&nbsp;Shiliang Zhang, Xie Chen</div><div class="periodical"> <em>In ACL</em>, 2024</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://arxiv.org/pdf/2312.15185" class="btn btn-sm z-depth-0" role="button">PDF</a> <a href="https://www.modelscope.cn/models/iic/emotion2vec_base_finetuned/summary" class="btn btn-sm z-depth-0" role="button">DEMO</a> <a href="https://github.com/ddlBoJack/emotion2vec" class="btn btn-sm z-depth-0" role="button">CODE</a></div><div class="abstract hidden"><p>We propose emotion2vec, a universal speech emotion representation model. emotion2vec is pre-trained on open-source unlabeled emotion data through self-supervised online distillation, combining utterance-level loss and frame-level loss during pre-training. emotion2vec outperforms state-of-the-art pre-trained universal models and emotion specialist models by only training linear layers for the speech emotion recognition task on the mainstream IEMOCAP dataset. In addition, emotion2vec shows consistent improvements among 10 different languages of speech emotion recognition datasets. emotion2vec also shows excellent results on other emotion tasks, such as song emotion recognition, emotion prediction in conversation, and sentiment analysis. Comparison experiments, ablation experiments, and visualization comprehensively demonstrate the universal capability of the proposed emotion2vec. To the best of our knowledge, emotion2vec is the first universal representation model in various emotion-related tasks, filling a gap in the field.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">ma2023emotion2vec</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://arxiv.org/abs/2312.15185}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{emotion2vec: Self-Supervised Pre-Training for Speech Emotion Representation}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Ma, Ziyang and Zheng, Zhisheng and Ye, Jiaxin and Li, Jinchao and Gao, Zhifu and Zhang, Shiliang and Chen, Xie}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{ACL}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2024}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{ACL}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/23/MultiAD_ICASSP23.png" class="popup img-link" title="papers/23/MultiAD_ICASSP23.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/23/MultiAD_ICASSP23-480.webp 480w, /assets/papers/23/MultiAD_ICASSP23-800.webp 800w, /assets/papers/23/MultiAD_ICASSP23-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/23/MultiAD_ICASSP23.png" class="preview rounded" width="100%" height="auto" alt="papers/23/MultiAD_ICASSP23.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="li2023leveraging" class="col-sm-10"><div class="title">Leveraging Pretrained Representations With Task-Related Keywords for Alzheimer’s Disease Detection</div><div class="author"> <span class="highlight-author">Jinchao Li</span>,&nbsp;Kaitao Song,&nbsp;Junan Li,&nbsp;Bo Zheng,&nbsp;Dongsheng Li,&nbsp;Xixin Wu, Xunying Liu, Helen Meng</div><div class="periodical"> <em>In ICASSP</em>, 2023</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://arxiv.org/pdf/2303.08019" class="btn btn-sm z-depth-0" role="button">PDF</a> <a href="/assets/papers/23/AD_poster.pdf" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">POSTER</a></div><div class="abstract hidden"><p>With the global population aging rapidly, Alzheimer’s disease (AD) is particularly prominent in older adults, which has an insidious onset and leads to a gradual, irreversible deterioration in cognitive domains (memory, communication, etc.). Speech-based AD detection opens up the possibility of widespread screening and timely disease intervention. Recent advances in pre-trained models motivate AD detection modeling to shift from low-level features to high-level representations. This paper presents several efficient methods to extract better AD-related cues from high-level acoustic and linguistic features. Based on these features, the paper also proposes a novel task-oriented approach by modeling the relationship between the participants’ description and the cognitive task. Experiments are carried out on the ADReSS dataset in a binary classification setup, and models are evaluated on the unseen test set. Results and comparison with recent literature demonstrate the efficiency and superior performance of proposed acoustic, linguistic and task-oriented methods. The findings also show the importance of semantic and syntactic information, and feasibility of automation and generalization with the promising audio-only and task-oriented methods for the AD detection task.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">li2023leveraging</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://ieeexplore.ieee.org/document/10096205}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{Leveraging Pretrained Representations With Task-Related Keywords for Alzheimer’s Disease Detection}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Li, Jinchao and Song, Kaitao and Li, Junan and Zheng, Bo and Li, Dongsheng and Wu, Xixin and Liu, Xunying and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{ICASSP}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2023}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/23/AVB_ICASSP23.png" class="popup img-link" title="papers/23/AVB_ICASSP23.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/23/AVB_ICASSP23-480.webp 480w, /assets/papers/23/AVB_ICASSP23-800.webp 800w, /assets/papers/23/AVB_ICASSP23-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/23/AVB_ICASSP23.png" class="preview rounded" width="100%" height="auto" alt="papers/23/AVB_ICASSP23.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="li2023hierarchical" class="col-sm-10"><div class="title">A Hierarchical Regression Chain Framework for Affective Vocal Burst Recognition</div><div class="author"> <span class="highlight-author">Jinchao Li</span>,&nbsp;Xixin Wu,&nbsp;Kaitao Song,&nbsp;Dongsheng Li,&nbsp;Xunying Liu, Helen Meng</div><div class="periodical"> <em>In ICASSP</em>, 2023</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://arxiv.org/pdf/2303.08027" class="btn btn-sm z-depth-0" role="button">PDF</a> <a href="/assets/papers/23/AVB_poster.pdf" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">POSTER</a> <a href="https://github.com/JinchaoLove/AffectiveVocalBurstRecognition" class="btn btn-sm z-depth-0" role="button">CODE</a></div><div class="abstract hidden"><p>As a common way of emotion signaling via non-linguistic vocalizations, vocal burst (VB) plays an important role in daily social interaction. Understanding and modeling human vocal bursts are indispensable for developing robust and general artificial intelligence. Exploring computational approaches for understanding vocal bursts is attracting increasing research attention. In this work, we propose a hierarchical framework, based on chain regression models, for affective recognition from VBs, that explicitly considers multiple relationships: (i) between emotional states and diverse cultures; (ii) between low-dimensional (arousal &amp; valence) and high-dimensional (10 emotion classes) emotion spaces; and (iii) between various emotion classes within the high-dimensional space. To address the challenge of data sparsity, we also use self-supervised learning (SSL) representations with layer-wise and temporal aggregation modules. The proposed systems participated in the ACII Affective Vocal Burst (A-VB) Challenge 2022 and ranked first in the "TWO” and "CULTURE” tasks. Experimental results based on the ACII Challenge 2022 dataset demonstrate the superior performance of the proposed system and the effectiveness of considering multiple relationships using hierarchical regression chain models.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">li2023hierarchical</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://ieeexplore.ieee.org/document/10096395/}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{A Hierarchical Regression Chain Framework for Affective Vocal Burst Recognition}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Li, Jinchao and Wu, Xixin and Song, Kaitao and Li, Dongsheng and Liu, Xunying and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{ICASSP}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2023}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/22/MER.png" class="popup img-link" title="papers/22/MER.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/22/MER-480.webp 480w, /assets/papers/22/MER-800.webp 800w, /assets/papers/22/MER-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/22/MER.png" class="preview rounded" width="100%" height="auto" alt="papers/22/MER.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="li2022context" class="col-sm-10"><div class="title">Context-Aware Multimodal Fusion for Emotion Recognition</div><div class="author"> <span class="highlight-author">Jinchao Li</span>,&nbsp;Shuai Wang,&nbsp;Yang Chao,&nbsp;Xunying Liu, Helen Meng</div><div class="periodical"> <em>In INTERSPEECH</em>, 2022</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="/assets/papers/22/MER_paper_IS22.pdf" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">PDF</a> <a href="/assets/papers/22/MER_poster_IS22.png" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">POSTER</a></div><div class="abstract hidden"><p>Automatic emotion recognition (AER) is an inherently complex multimodal task that aims to automatically determine the emotional state of a given expression. Recent works have witnessed the benefits of upstream pretrained models in both audio and textual modalities for the AER task. However, efforts are still needed to effectively integrate features across multiple modalities, devoting due considerations to granularity mismatch and asynchrony in time steps. In this work, we first validate the effectiveness of the upstream models in a unimodal setup and empirically find that partial fine-tuning of the pretrained model in the feature space can significantly boost performance. Moreover, we take the context of the current sentence to model a more accurate emotional state. Based on the unimodal setups, we further propose several multimodal fusion methods to combine high-level features from the audio and text modalities. Experiments are carried out on the IEMOCAP dataset in a 4-category classification problem and compared with state-of-the-art methods in recent literature. Results show that the proposed models gave a superior performance of up to 84.45% and 80.36% weighted accuracy scores respectively in Session 5 and 5-fold cross-validation settings.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">li2022context</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://www.isca-speech.org/archive/interspeech_2022/li22v_interspeech.html}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{Context-Aware Multimodal Fusion for Emotion Recognition}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Li, Jinchao and Wang, Shuai and Chao, Yang and Liu, Xunying and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{INTERSPEECH}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2022}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/21/Comp_AD.png" class="popup img-link" title="papers/21/Comp_AD.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/21/Comp_AD-480.webp 480w, /assets/papers/21/Comp_AD-800.webp 800w, /assets/papers/21/Comp_AD-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/21/Comp_AD.png" class="preview rounded" width="100%" height="auto" alt="papers/21/Comp_AD.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="li2021comparative" class="col-sm-10"><div class="title">A Comparative Study of Acoustic and Linguistic Features Classification for Alzheimer’s Disease Detection</div><div class="author"> <span class="highlight-author">Jinchao Li</span>,&nbsp;Jianwei Yu,&nbsp;Zi Ye,&nbsp;Simon Wong,&nbsp;Manwai Mak,&nbsp;Brian Mak, Xunying Liu, Helen Meng</div><div class="periodical"> <em>In ICASSP</em>, 2021</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://www1.se.cuhk.edu.hk/~hccl/publications/pub/ICASSP_jcli.pdf" class="btn btn-sm z-depth-0" role="button">PDF</a> <a href="/assets/papers/21/Comp_AD.pdf" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">POSTER</a> <a href="https://github.com/JinchaoLove/NCDdetection_ICASSP2021" class="btn btn-sm z-depth-0" role="button">CODE</a></div><div class="abstract hidden"><p>With the global population ageing rapidly, Alzheimer’s Disease (AD) is particularly prominent in older adults, which has an insidious onset followed by gradual, irreversible deterioration in cognitive domains (memory, communication, etc). Thus the detection of Alzheimer’s Disease is crucial for timely intervention to slow down disease progression. This paper presents a comparative study of different acoustic and linguistic features for the AD detection using various classifiers. Experimental results on ADReSS dataset reflect that the proposed models using ComParE, X-vector, Linguistics, TFIDF and BERT features are able to detect AD with high accuracy and sensitivity, and are comparable with the state-of-the-art results reported. While most previous work used manual transcripts, our results also indicate that similar or even better performance could be obtained using automatically recognized transcripts over manually collected ones. This work achieves accuracy scores at 0.67 for acoustic features and 0.88 for linguistic features on either manual or ASR transcripts on the ADReSS Challenge test set.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">li2021comparative</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://ieeexplore.ieee.org/document/9414147}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{A Comparative Study of Acoustic and Linguistic Features Classification for Alzheimer's Disease Detection}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Li, Jinchao and Yu, Jianwei and Ye, Zi and Wong, Simon and Mak, Manwai and Mak, Brian and Liu, Xunying and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{ICASSP}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2021}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div><li><div class="row"><div class="col-sm-2"><div class="preview d-flex align-items-center"> <a href="/assets/papers/21/ASR_AD.png" class="popup img-link" title="papers/21/ASR_AD.png"><figure><picture> <source class="responsive-img-srcset" srcset=" /assets/papers/21/ASR_AD-480.webp 480w, /assets/papers/21/ASR_AD-800.webp 800w, /assets/papers/21/ASR_AD-1280.webp 1280w, " sizes="200px" type="image/webp" loading="lazy" data-proofer-ignore="" /> <img src="/assets/papers/21/ASR_AD.png" class="preview rounded" width="100%" height="auto" alt="papers/21/ASR_AD.png" data-zoomable="" loading="lazy" decoding="async" onerror="this.onerror=null; $('.responsive-img-srcset').remove();" /> </picture></figure></a></div></div><div id="ye2021development" class="col-sm-10"><div class="title">Development of the CUHK Elderly Speech Recognition System for Neurocognitive Disorder Detection Using the DementiaBank Corpus</div><div class="author"> Zi Ye,&nbsp;Shoukang Hu,&nbsp;<span class="highlight-author">Jinchao Li</span>,&nbsp;Xurong Xie,&nbsp;Mengzhe Geng,&nbsp;Jianwei Yu, <span class="more-authors" title="click to view 4 more authors" onclick=" var element = $(this); element.attr('title', ''); var more_authors_text = element.text() == '4 more authors' ? 'Junhao Xu, Boyang Xue, Shansong Liu, Xunying Liu' : '4 more authors'; var cursorPosition = 0; var textAdder = setInterval(function(){ element.text(more_authors_text.substring(0, cursorPosition + 1)); if (++cursorPosition == more_authors_text.length){ clearInterval(textAdder); } }, '10'); ">4 more authors</span>, Helen Meng</div><div class="periodical"> <em>In ICASSP</em>, 2021</div><div class="periodical"></div><div class="links"> <button class="abstract btn btn-sm z-depth-0" title="Click to show/hide abstract"> ABS <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <button class="bibtex btn btn-sm z-depth-0" title="Click to show/hide bibtex"> BIB <i class="fas fa-sort open"></i> <i class="fas fa-times hidden"></i> </button> <a href="https://www1.se.cuhk.edu.hk/~hccl/publications/pub/zye_revised_v4.pdf" class="btn btn-sm z-depth-0" role="button">PDF</a> <a href="/assets/papers/21/ASR_AD.pdf" class="btn btn-sm z-depth-0" role="button" target="_blank" rel="noopener noreferrer">POSTER</a></div><div class="abstract hidden"><p>Early diagnosis of Neurocognitive Disorder (NCD) is crucial in facilitating preventive care and timely treatment to delay further progression. This paper presents the development of a state-of-the-art automatic speech recognition (ASR) system built on the Dementia-Bank Pitt corpus for automatic NCD detection. Speed perturbation based audio data augmentation expanded the limited elderly speech data by four times. Large quantities of out-of-domain, non-aged adult speech were exploited by cross-domain adapting a 1000-hour LibriSpeech corpus trained LF-MMI factored TDNN system to DementiaBank. The variability among elderly speakers was modeled using i-Vector and learning hidden unit contributions (LHUC) based speaker adaptive training. Robust Bayesian estimation of TDNN systems and LHUC transforms were used in both cross-domain and speaker adaptation. A Transformer language model was also built to improve the final system performance. A word error rate (WER) reduction of 11.72% absolute (26.11% relative) was obtained over the baseline i-Vector adapted LF-MMI TDNN system on the evaluation data of 48 elderly speakers. The best NCD detection accuracy of 88%, comparable to that using the ground truth speech transcripts, was obtained using the textual features extracted from the final ASR system outputs.</p></div><div class="bibtex hidden"><figure class="highlight"><pre><code class="language-bibtex" data-lang="bibtex"><span class="nc">@inproceedings</span><span class="p">{</span><span class="nl">ye2021development</span><span class="p">,</span>
  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://ieeexplore.ieee.org/document/9413634}</span><span class="p">,</span>
  <span class="na">title</span> <span class="p">=</span> <span class="s">{Development of the CUHK Elderly Speech Recognition System for Neurocognitive Disorder Detection Using the DementiaBank Corpus}</span><span class="p">,</span>
  <span class="na">author</span> <span class="p">=</span> <span class="s">{Ye, Zi and Hu, Shoukang and Li, Jinchao and Xie, Xurong and Geng, Mengzhe and Yu, Jianwei and Xu, Junhao and Xue, Boyang and Liu, Shansong and Liu, Xunying and Meng, Helen}</span><span class="p">,</span>
  <span class="na">booktitle</span> <span class="p">=</span> <span class="s">{ICASSP}</span><span class="p">,</span>
  <span class="na">year</span> <span class="p">=</span> <span class="s">{2021}</span><span class="p">,</span>
  <span class="na">organization</span> <span class="p">=</span> <span class="s">{IEEE}</span>
<span class="p">}</span></code></pre></figure></div></div></div></ol></div><hr /><h2 id="experiences"><a href="#experiences" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2">Experiences</span></h2><p><strong><em>PhD Project: Neurocognitive Disorder (NCD) Detection</em></strong></p><p>  Advised by Prof. <a href="https://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/">Helen Meng</a> @<a href="https://www.se.cuhk.edu.hk/laboratories/human-computer-communications-laboratory/">HCCL</a>, <a href="https://cuhk.edu.hk">CUHK</a>, Jul. 2020 - Now</p><ul><li>Speech and language based NCD detection: feature engineering, multimodal and multilevel fusion<li>Comparatively analyzed NCD-related acoustic and linguistic features<li>Combined narratives with visual stimuli to model macro-level topic evolution and cross-modal consistency</ul><p><strong><em>Speech-Empowered Large Language Model (LLM)</em></strong></p><p>  Advised by Dr. <a href="https://scholar.google.com/citations?user=uIUfGxYAAAAJ&amp;hl=zh-CN">Ming Yan</a> and <a href="https://scholar.google.com/citations?user=bS8Ku4MAAAAJ&amp;hl=zh-CN">Guohai Xu</a> @<a href="https://damo.alibaba.com">Alibaba</a> DAMO Academy, Aug. 2023 - Nov. 2023</p><ul><li>Processed large-scale audio and textual corpora<li>Empowered LLM with modularized speech ability for adaptive dialogue policy</ul><p><strong><em>Multimodal NCD Detection and Affective Computing</em></strong></p><p>  Advised by Dr. <a href="https://recmind.cn/">Dongsheng Li</a> and <a href="https://www.microsoft.com/en-us/research/people/kaitaosong">Kaitao Song</a> @<a href="https://www.msra.cn">MSRA</a> Shanghai AI lab, Jun. 2022 - Oct. 2022</p><ul><li>Task-related text-visual NCD detection<li>Hierarchical multi-output regression for affective vocal burst recognition</ul><p><strong><em>Emotion Recognition (ER)</em></strong>, <strong><em>Speech Enhancement (SE)</em></strong></p><p>  Advised by Dr. <a href="https://wsstriving.github.io">Shuai Wang</a> @<a href="https://www.lightspeed-studios.com">Tencent</a> Lightspeed &amp; Quantum Studios, Oct. 2021 - May 2022</p><ul><li>Multimodal ER: context-aware multimodal fusion for the ER task<li>Real-time monaural SE: FullSubNet-based denoiser for ASR with fbank information</ul><p><strong><em>Source Counting (SC)</em></strong></p><p>  Advised by Prof. <a href="https://acoustics.nju.edu.cn/rydw/szgk/js/lj/index.html">Jing Lu</a> @<a href="https://www.nju.edu.cn">NJU</a> and Mr. <a href="https://www.linkedin.com/in/长宝-朱-a9b778b6/">Changbao Zhu</a> @<a href="https://en.horizon.cc">Horizon Robotics</a>, Dec. 2018 - Apr. 2019</p><ul><li>Binaural speech SC with similarity and correlation features in various acoustic scenarios<li>Honored the “Excellent Undergraduate Thesis” at NJU in 2019, and published a patent in 2021</ul><hr /><h2 id="honors--awards"><a href="#honors--awards" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2">Honors &amp; Awards</span></h2><ul><li>2022: Winner of two tasks in the <a href="https://www.competitions.hume.ai/avb2022">ACII Affective Vocal Bursts (A-VB)</a> competition<li>2019: Excellent Undergraduate Thesis of Nanjing University<li>2018: Meritorious winner prize in <a href="https://www.comap.com/contests/mcm-icm">American Mathematical Contest in Modeling</a><li>2017: Meritorious winner prize in <a href="https://en.mcm.edu.cn">CUMCM</a>, ranked top 1.5% in China<li>2017: National Scholarship, awarded by the Ministry of Education in China<br /></ul><hr /><h2 id="academic-activities"><a href="#academic-activities" class="anchor"><i class="fas fa-anchor"></i></a><span class="me-2">Academic Activities</span></h2><ul><li>Reviewer of TASLP, ICASSP, INTERSPEECH, COLING, etc.<li>Co-teach “Conversational AI systems” (ASR part) with Prof. Meng and other nice colleagues in CUHK in Fall 2022<li>Every term 2 during 2019-2023 in CUHK: teaching assistant in ENGG1120 (Linear Algebra for Engineers)</ul></div></article></main><aside aria-label="Panel" id="panel-wrapper" class="col-2 ps-2 mb-5 text-muted"><div class="access"><section id="access-lastmod"><h2 class="panel-heading">Recently Updated</h2><ul class="content list-unstyled ps-0 pb-1 ms-1 mt-2"><li class="text-truncate lh-lg"> <a href="/blog/getting-started/">Getting Started</a><li class="text-truncate lh-lg"> <a href="/blog/text-and-typography/">Text and Typography</a><li class="text-truncate lh-lg"> <a href="/blog/write-a-new-post/">Writing a New Post</a></ul></section></div><div class="access"><section><h2 class="panel-heading">Trending Tags</h2><div class="d-flex flex-wrap mt-3 mb-1 me-3"> <a class="post-tag btn btn-outline-primary" href="/blog/tags/getting-started/">getting started</a> <a class="post-tag btn btn-outline-primary" href="/blog/tags/homepage/">homepage</a> <a class="post-tag btn btn-outline-primary" href="/blog/tags/theme/">theme</a></div></section></div><section id="toc-wrapper" class="ps-0 pe-4"><h2 class="panel-heading ps-3 pt-2 mb-2">Contents</h2><nav id="toc"></nav></section></aside></div><div style="height: 2rem;"></div><div id="search-result-wrapper" class="d-flex justify-content-center unloaded"><div class="col-11 content"><div id="search-results" class="d-flex flex-wrap justify-content-center text-muted mt-3"></div></div></div></div><aside aria-label="Scroll to Top"> <button id="back-to-top" type="button" class="btn btn-lg btn-box-shadow"> <i class="fas fa-angle-up"></i> </button></aside></div><div id="mask"></div><aside id="notification" class="toast" role="alert" aria-live="assertive" aria-atomic="true" data-bs-animation="true" data-bs-autohide="false" ><div class="toast-header"> <button type="button" class="btn-close ms-auto" data-bs-dismiss="toast" aria-label="Close" ></button></div><div class="toast-body text-center pt-0"><p class="px-2 mb-3">A new version of content is available.</p><button type="button" class="btn btn-primary" aria-label="Update"> Update </button></div></aside><script src="/assets/lib/jquery/jquery.min.js"></script> <script src="/assets/lib/bootstrap/bootstrap.bundle.min.js"></script> <script src="/assets/lib/lunr/lunr.min.js"></script> <script src="/assets/lib/loading-attribute-polyfill/loading-attribute-polyfill.umd.min.js"></script> <script src="/assets/lib/magnific-popup/jquery.magnific-popup.min.js"></script> <script src="/assets/lib/clipboard/clipboard.min.js"></script> <script src="/assets/lib/dayjs/dayjs.min.js"></script> <script src="/assets/lib/dayjs/locale/en.min.js"></script> <script src="/assets/lib/dayjs/plugin/relativeTime.min.js"></script> <script src="/assets/lib/dayjs/plugin/localizedFormat.min.js"></script> <script src="/assets/lib/tocbot/tocbot.min.js"></script> <script defer src="/assets/js/dist/post.min.js"></script> <script async src="/assets/js/just-the-docs.js"></script> <script defer src="/assets/js/dist/app.min.js"></script> <script type="text/javascript"> $(document).ready(function () { $('.mode-toggle').click(function () { let theme = localStorage.getItem('theme'); if (modeToggle.isDarkMode || (!modeToggle.hasMode && modeToggle.isSysDarkPrefer)) { theme = 'dark'; } else { theme = 'light'; } console.log('theme:' + theme); // Set jupyter notebooks themes. let jupyterNotebooks = document.getElementsByClassName('jupyter-notebook-iframe-container'); for (let i = 0; i < jupyterNotebooks.length; i++) { let iframeDocument = jupyterNotebooks[i].getElementsByTagName('iframe')[0].contentWindow.document; let bodyElement = iframeDocument.body; let headElement = iframeDocument.head; // Remove existing theme link elements let existingLinks = headElement.querySelectorAll('link[data-jp-theme-link]'); for (let link of existingLinks) { headElement.removeChild(link); } // Get original style-sheets let styleElements = headElement.querySelectorAll('style'); let variablesElement; for (let style of styleElements) { if (style.innerHTML.includes('The following CSS variables define the main, public API for styling JupyterLab.')) { variablesElement = style; break; } } // console.log(styleElements); // New style-sheets const cssLink = document.createElement('link'); cssLink.setAttribute('data-jp-theme-link', ''); cssLink.rel = 'stylesheet'; cssLink.type = 'text/css'; if (theme == 'dark') { cssLink.href = '/assets/css/jupyter-dark.css'; bodyElement.setAttribute('data-jp-theme-light', 'false'); bodyElement.setAttribute('data-jp-theme-name', 'JupyterLab Dark'); } else { cssLink.href = '/assets/css/jupyter-light.css'; bodyElement.setAttribute('data-jp-theme-light', 'true'); bodyElement.setAttribute('data-jp-theme-name', 'JupyterLab Light'); } // Import the cssLink if (variablesElement) { // variablesElement.insertAdjacentElement('beforebegin', cssLink); variablesElement.insertAdjacentElement('afterend', cssLink); } else { headElement.appendChild(cssLink); } } }); }); </script>