studies-for/index.html at main · sony/studies-for · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
<!DOCTYPE html>
<html>
<head>
  <script type="text/javascript" src="http://cdn.mathjax.org/mathjax/2.0-latest/MathJax.js?config=TeX-MML-AM_HTMLorMML-full"> </script>

  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model">
  <meta property="og:title" content="Studies for"/>
  <meta property="og:description" content="A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model"/>
  <meta property="og:url" content="URL OF THE WEBSITE"/>
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
  <meta property="og:image" content="static/images/icon.jpg" />
  <meta property="og:image:width" content="2048"/>
  <meta property="og:image:height" content="1365"/>


  <meta name="twitter:title" content="Studies for">
  <meta name="twitter:description" content="A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model">
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
  <meta name="twitter:image" content="static/images/icon.jpg">
  <meta name="twitter:card" content="static/images/icon.jpg">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="KEYWORDS SHOULD BE PLACED HERE">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>Studies for Demo Page</title>
  <link rel="icon" type="image/x-icon" href="static/images/icon.jpg">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">
  <!-- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> -->

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">


  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


<section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">‘Studies for’: A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model</h1>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->
            <span class="author-block">
                <a href="https://scholar.google.com/citations?hl=en&user=tAaGMqYAAAAJ" target="_blank">Chihiro Nagashima</a>,
            </span>
            <span class="author-block">
                <a href="https://scholar.google.com/citations?hl=en&user=oKUpOaQAAAAJ" target="_blank">Akira Takahashi</a>,
            </span>
            <span class="author-block">
              <a href="https://scholar.google.com/citations?hl=en&user=iRVT3A8AAAAJ" target="_blank">Zhi Zhong</a>,
            </span>
            <span class="author-block">
                <a href="https://scholar.google.com/citations?hl=en&user=_mhxayYAAAAJ" target="_blank">Shusuke Takahashi</a>,
            </span>
            <span class="author-block">
                <a href="https://www.yukimitsufuji.com/" target="_blank">Yuki Mitsufuji</a>,
            </span>
            </span>
            </div>
            <div class="is-size-5 publication-authors">
            <span class="author-block">Sony Group Corporation<br>
            <span class="author-block">
            <span class="author-block">
<!--                     <span class="eql-cntrb"><small><br><center><img src="static/images/cifar_badge.svg" alt="MY ALT TEXT" width="100%"/></center></small></span> -->
            </div>
            <div class="column has-text-centered">
            <div class="publication-links">
            <!-- ArXiv abstract Link -->
            <span class="link-block">
            <a href="https://arxiv.org/abs/2510.25228" target="_blank"
            class="external-link button is-normal is-rounded is-dark">
            <span class="icon">
                <i class="ai ai-arxiv"></i>
            </span>
            <span>arXiv</span>
            </a>
            </span>
    </span>
    </div>
    </div>
          </div>
        </div>
      </div>
    </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Paper abstract -->
    <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
            <h2 class="title is-3">Abstract</h2>
            <div class="content has-text-justified">
              <p>
                This paper explores the integration of AI technologies into the artistic workflow through the creation of Studies for, a generative sound installation developed in collaboration with sound artist evala. The installation employs SpecMaskGIT, a lightweight yet high-quality sound generation AI model, to generate and playback eight-channel sound in real-time, creating an immersive auditory experience over the course of a three-month exhibition. The work is grounded in the concept of a "new form of archive," which aims to preserve the artistic style of an artist while expanding beyond artists' past artworks by continued generation of new sound elements. This speculative approach to archival preservation is facilitated by training the AI model on a dataset consisting of over 200 hours of Evala’s past sound artworks.
                By addressing key requirements in the co-creation of art using AI, this study highlights the value of the following aspects: (1) the necessity of integrating artist feedback, (2) datasets derived from an artist's past works, and (3) ensuring the inclusion of unexpected, novel outputs. In Studies for, the model was designed to reflect the artist's artistic identity while generating new, previously unheard sounds, making it a fitting realization of the concept of "a new form of archive." We propose a Human-AI co-creation framework for effectively incorporating sound generation AI models into the sound art creation process and suggest new possibilities for creating and archiving sound art that extend an artist's work beyond their physical existence.
              </p>
            </div>
        </div>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">

    <div class="columns is-centered">

      <!-- Demo Video -->
      <div class="column is-full-width">
        <h2 class="title is-3">Demo Video</h2>
          <div class="publication-video">
            <iframe width="560" height="315" src="https://www.youtube.com/embed/-KNZaEyBhws?si=KKc1_85zJm3Fjzsr" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
          </div>
        <div class="content has-text-justified">
          <p>
            The original work was created in 8-channel spatial audio.
            The audio in this video is a stereo mix provided solely for documentation purposes and does not include binaural or other spatial processing.
          </p>
        </div>
      </div>
    </div>
    <!-- Demo Video -->
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
<!-- Concurrent Work. -->
    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Acknowledgements / Related Links</h2>

        <div class="content has-text-justified">
          <p>
            This work was realized through the generous collaboration of sound artist <a href="https://www.evala.jp/Profile">evala</a>
          </p>
          <p>
            It was exhibited at the <a href="https://www.ntticc.or.jp/en/archive/works/studies-for/">NTT InterCommunication Center [ICC]</a> in Tokyo, Japan, from December 14, 2024 to March 9, 2025.
          </p>
          <p>
            The videos on this website and the photos in the paper are courtesy of ICC.
          </p>
          <p>
            For more details about the sound generation model SpecMaskGIT used in this work, please see [<a href="https://arxiv.org/abs/2406.17672">link</a>].
          </p>
        </div>
      </div>
    </div>
    <!--/ Concurrent Work. -->
  </div>
</section>


<!--BibTex citation -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>
        @inproceedings{chihiro2025studiesfor,
        title={‘Studies for’: A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model},
        author={Chihiro, Nagashima and Takahashi, Akira and Zhong, Zhi and Takahashi, Shusuke and Mitsufuji, Yuki},
        booktitle={NeurIPS Creative AI Track 2025},
        year={2025}
        }
      </code></pre>
    </div>
</section>

<!--End BibTex citation -->


<!-- Statcounter tracking code -->

<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->

  </body>
  </html>