|
| 1 | +--- |
| 2 | +{ |
| 3 | + "name": "EV-ASL", |
| 4 | + "aliases": [], |
| 5 | + "year": 2021, |
| 6 | + "modalities": [ |
| 7 | + "Vision" |
| 8 | + ], |
| 9 | + "sensors": [ |
| 10 | + "DVS128" |
| 11 | + ], |
| 12 | + "other_sensors": [], |
| 13 | + "category": "Human-centric Recordings", |
| 14 | + "tags": [ |
| 15 | + "Sign Language", |
| 16 | + "Hand Pose Detection" |
| 17 | + ], |
| 18 | + "description": "American Sign Language Dataset", |
| 19 | + "dataset_properties": { |
| 20 | + "available_online": false, |
| 21 | + "has_real_data": true, |
| 22 | + "has_simulated_data": false, |
| 23 | + "has_ground_truth": false, |
| 24 | + "has_frames": true, |
| 25 | + "has_biases": false, |
| 26 | + "distribution_methods": [ |
| 27 | + "Baidu" |
| 28 | + ], |
| 29 | + "file_formats": [], |
| 30 | + "availability_comment": "Download link has a single zip file", |
| 31 | + "dataset_links": [ |
| 32 | + { |
| 33 | + "name": "Baidu", |
| 34 | + "url": "https://pan.baidu.com/s/1xPYenSSL8w_LcX8pe5i_0g", |
| 35 | + "format": "Binary", |
| 36 | + "available": true |
| 37 | + } |
| 38 | + ], |
| 39 | + "size_gb": 2.11, |
| 40 | + "size_type": "Compressed" |
| 41 | + }, |
| 42 | + "paper": { |
| 43 | + "title": "Event-Based American Sign Language Recognition Using Dynamic Vision Sensor", |
| 44 | + "doi": "10.1007/978-3-030-86137-7_1", |
| 45 | + "authors": [ |
| 46 | + "Yong Wang", |
| 47 | + "Xian Zhang", |
| 48 | + "Yanxiang Wang", |
| 49 | + "Hongbin Wang", |
| 50 | + "Chanying Huang", |
| 51 | + "Yiran Shen" |
| 52 | + ], |
| 53 | + "abstract": "American Sign language (ASL) is one of the most effective communication tools for people with hearing difficulties. However, most of people do not understand ASL. To bridge this gap, we propose EV-ASL, an automatic ASL interpretation system based on dynamic vision sensor (DVS). Compared to the traditional RGB-based approach, DVS consumes significantly less resources (energy, computation, bandwidth) and it outputs the moving objects only without the need of background subtraction due to its event-based nature. At last, because of its wide dynamic response range, it enables the EV-ASL to work under a variety of lighting conditions. EV-ASL proposes novel representation of event streams and facilitates deep convolutional neural network for sign recognition. In order to evaluate the performance of EV-ASL, we recruited 10 participants and collected 11,200 samples from 56 different ASL words. The evaluation shows that EV-ASL achieves a recognition accuracy of 93.25%.\n", |
| 54 | + "open_access": false |
| 55 | + }, |
| 56 | + "citation_counts": [ |
| 57 | + { |
| 58 | + "source": "crossref", |
| 59 | + "count": 1, |
| 60 | + "updated": "2025-09-14T23:04:08.758443" |
| 61 | + }, |
| 62 | + { |
| 63 | + "source": "scholar", |
| 64 | + "count": 4, |
| 65 | + "updated": "2025-09-14T23:04:09.465721" |
| 66 | + } |
| 67 | + ], |
| 68 | + "links": [ |
| 69 | + { |
| 70 | + "type": "paper", |
| 71 | + "url": "https://link.springer.com/chapter/10.1007/978-3-030-86137-7_1" |
| 72 | + }, |
| 73 | + { |
| 74 | + "type": "github_page", |
| 75 | + "url": "https://github.com/zhangxiann/EV_ASL/" |
| 76 | + } |
| 77 | + ], |
| 78 | + "full_name": "", |
| 79 | + "additional_metadata": { |
| 80 | + "num_subjects": "10", |
| 81 | + "num_males": "6", |
| 82 | + "num_females": "4" |
| 83 | + }, |
| 84 | + "referenced_papers": [ |
| 85 | + { |
| 86 | + "doi": "10.1109/CVPRW.2019.00205", |
| 87 | + "source": "crossref" |
| 88 | + }, |
| 89 | + { |
| 90 | + "doi": "10.1109/CVPR.2017.781", |
| 91 | + "source": "crossref" |
| 92 | + }, |
| 93 | + { |
| 94 | + "doi": "10.1109/ICCV.2019.00058", |
| 95 | + "source": "crossref" |
| 96 | + }, |
| 97 | + { |
| 98 | + "doi": "10.1109/TIP.2020.3023597", |
| 99 | + "source": "crossref" |
| 100 | + }, |
| 101 | + { |
| 102 | + "doi": "10.1109/ICCV.2017.332", |
| 103 | + "source": "crossref" |
| 104 | + }, |
| 105 | + { |
| 106 | + "doi": "10.1609/aaai.v32i1.11903", |
| 107 | + "source": "crossref" |
| 108 | + }, |
| 109 | + { |
| 110 | + "doi": "10.1109/TPAMI.2016.2574707", |
| 111 | + "source": "crossref" |
| 112 | + }, |
| 113 | + { |
| 114 | + "doi": "10.1109/JSSC.2007.914337", |
| 115 | + "source": "crossref" |
| 116 | + }, |
| 117 | + { |
| 118 | + "doi": "10.3389/fncom.2015.00099", |
| 119 | + "source": "crossref" |
| 120 | + }, |
| 121 | + { |
| 122 | + "doi": "10.1007/978-3-319-16178-5_40", |
| 123 | + "source": "crossref" |
| 124 | + }, |
| 125 | + { |
| 126 | + "doi": "10.1109/WACV.2019.00199", |
| 127 | + "source": "crossref" |
| 128 | + }, |
| 129 | + { |
| 130 | + "doi": "10.1109/CVPR.2019.00652", |
| 131 | + "source": "crossref" |
| 132 | + }, |
| 133 | + { |
| 134 | + "doi": "10.1109/TPAMI.2021.3054886", |
| 135 | + "source": "crossref" |
| 136 | + }, |
| 137 | + { |
| 138 | + "doi": "10.15607/RSS.2018.XIV.062", |
| 139 | + "source": "crossref" |
| 140 | + } |
| 141 | + ], |
| 142 | + "bibtex": { |
| 143 | + "pages": "3\u201310", |
| 144 | + "year": 2021, |
| 145 | + "author": "Wang, Yong and Zhang, Xian and Wang, Yanxiang and Wang, Hongbin and Huang, Chanying and Shen, Yiran", |
| 146 | + "publisher": "Springer International Publishing", |
| 147 | + "booktitle": "Wireless Algorithms, Systems, and Applications", |
| 148 | + "doi": "10.1007/978-3-030-86137-7_1", |
| 149 | + "url": "http://dx.doi.org/10.1007/978-3-030-86137-7_1", |
| 150 | + "issn": "1611-3349", |
| 151 | + "isbn": "9783030861377", |
| 152 | + "title": "Event-Based American Sign Language Recognition Using Dynamic Vision Sensor", |
| 153 | + "type": "book", |
| 154 | + "key": "Wang_2021" |
| 155 | + } |
| 156 | +} |
| 157 | +--- |
| 158 | + |
| 159 | +# Dataset Description |
| 160 | + |
| 161 | +To evaluate the recognition accuracy of EV-ASL, a dataset consisting of event-streams when different users are performing ASL words in front of DVS camera was collected. 56 words (26 one-hand words and 30 two-hand words) are included in the dataset. The words are frequent verbs, nouns, adjectives and pronouns, which are commonly used in daily life. |
| 162 | + |
| 163 | +When collecting the dataset, 10 participants (4 females, 6 males) were recruited to perform hands movement corresponding to each of the selected ASL word. Due to the constraints of Human IRB, all the participants have normal hearing ability. They learn the movement according to the ASL words by watching online learning videos for two hours. When doing the experiments, the environment and other conditions were not strictly controlled. |
| 164 | + |
| 165 | +During each experiment session, the participants perform the hands movement of each word for 20 times, so that we collected a total of 11,200 (= 10×56×20) samples. |
0 commit comments