Skip to content

Commit 2ed7a7b

Browse files
committed
init repository
0 parents  commit 2ed7a7b

File tree

10 files changed

+116
-0
lines changed

10 files changed

+116
-0
lines changed

.github/workflows/aid-ci.yml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# This workflow will install Python dependencies, run tests and lint with a single version of Python
2+
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3+
4+
name: Testing
5+
6+
on:
7+
push:
8+
branches: [ master ]
9+
pull_request:
10+
branches: [ master ]
11+
12+
jobs:
13+
build:
14+
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- uses: actions/checkout@v2
19+
- name: Test AID Model
20+
uses: autoai-org/[email protected]

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
docker*
2+
pretrained/
3+
runner*
4+
*.pyc

README.md

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# eth-library-lab/speech
2+
3+
![AID Testing](https://github.com/eth-library-lab/speech/actions/workflows/aid-ci.yml/badge.svg)
4+
5+
> Speech Analysis Package
6+
7+
This project is based on the AID Project. Common topics are discussed in the [AID docs](https://aid.autoai.org). Make sure to read it!
8+
9+
## Usage
10+
11+
12+
## Reference

aid.toml

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[package]
2+
name="speech"
3+
vendor="eth-library-lab"
4+
tagline="Speech Analysis Package"
5+
6+
[[solvers]]
7+
name="speechSolver"
8+
class="speech/solver/speechSolver"

prepip.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
echo "Installing pip-required dependencies"
2+
apt-get update && apt-get install sox -y

pretrained.toml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
mlpm
2+
textblob
3+
deepspeech

setup.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
echo "Finished Build..."
2+
python -m textblob.download_corpora

speech/bundle.py

Whitespace-only changes.

speech/solver.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from os import fsdecode
2+
from mlpm.solver import Solver
3+
from deepspeech import Model
4+
import wave
5+
import shlex
6+
import subprocess
7+
import numpy as np
8+
from textblob import TextBlob
9+
import textblob
10+
try:
11+
from shhlex import quote
12+
except ImportError:
13+
from pipes import quote
14+
15+
def convert_sample_rate(audio_path, desired_sample_rate):
16+
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), desired_sample_rate)
17+
try:
18+
output = subprocess.check_output(shlex.split(sox_cmd), stderr=subprocess.PIPE)
19+
except subprocess.CalledProcessError as e:
20+
raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr))
21+
except OSError as e:
22+
raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(desired_sample_rate, e.strerror))
23+
24+
return desired_sample_rate, np.frombuffer(output, np.int16)
25+
26+
class speechSolver(Solver):
27+
def __init__(self, toml_file=None):
28+
super().__init__(toml_file)
29+
# Do you Init Work here
30+
self.ds = Model("pretrained/deepspeech-0.9.3-models.pbmm")
31+
self.scorepath = ("pretrained/deepspeech-0.9.3-models.scorer")
32+
self.ds.enableExternalScorer(self.scorepath)
33+
self.desired_sample_rate = self.ds.sampleRate()
34+
self.ready()
35+
def infer(self, data):
36+
# if you need to get file uploaded, get the path from input_file_path in data
37+
# First convert audio file to wav format
38+
fin = wave.open(data['input_file_path'], 'rb')
39+
fs_orig = fin.getframerate()
40+
resampled = False
41+
if fs_orig != self.desired_sample_rate:
42+
resampled = True
43+
fs_new, audio = convert_sample_rate(data['input_file_path'], self.desired_sample_rate)
44+
else:
45+
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
46+
47+
audio_length = fin.getnframes() * (1/fs_orig)
48+
fin.close()
49+
50+
result = self.ds.stt(audio)
51+
textblob_analyzer = TextBlob(result)
52+
sentiment = []
53+
for sentence in textblob_analyzer.sentences:
54+
sentiment.append({
55+
'sentence': str(sentence),
56+
'polarity': sentence.sentiment.polarity,
57+
'subjectivity': sentence.sentiment.subjectivity
58+
})
59+
return {
60+
"transcript": result,
61+
"audio_length": audio_length,
62+
"resampled": resampled,
63+
"sentiment": sentiment
64+
}

0 commit comments

Comments
 (0)