Skip to content

Commit 5f8389c

Browse files
committed
Add binary protocol v2 & v3 to websocket
1 parent 1f03d77 commit 5f8389c

File tree

5 files changed

+86
-13
lines changed

5 files changed

+86
-13
lines changed

main/ota.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ bool Ota::CheckVersion() {
154154
cJSON_ArrayForEach(item, websocket) {
155155
if (item->type == cJSON_String) {
156156
settings.SetString(item->string, item->valuestring);
157+
} else if (item->type == cJSON_Number) {
158+
settings.SetInt(item->string, item->valueint);
157159
}
158160
}
159161
has_websocket_config_ = true;

main/protocols/mqtt_protocol.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ bool MqttProtocol::OpenAudioChannel() {
207207
}
208208
udp_ = Board::GetInstance().CreateUdp();
209209
udp_->OnMessage([this](const std::string& data) {
210+
/*
211+
* UDP Encrypted OPUS Packet Format:
212+
* |type 1u|flags 1u|payload_len 2u|ssrc 4u|timestamp 4u|sequence 4u|
213+
* |payload payload_len|
214+
*/
210215
if (data.size() < sizeof(aes_nonce_)) {
211216
ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
212217
return;

main/protocols/protocol.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,21 @@
55
#include <string>
66
#include <functional>
77
#include <chrono>
8+
#include <vector>
9+
10+
struct AudioStreamPacket {
11+
uint32_t timestamp;
12+
std::vector<uint8_t> payload;
13+
};
14+
15+
struct BinaryProtocol2 {
16+
uint16_t version;
17+
uint16_t type; // Message type (0: OPUS, 1: JSON)
18+
uint32_t reserved; // Reserved for future use
19+
uint32_t timestamp; // Timestamp in milliseconds (used for server-side AEC)
20+
uint32_t payload_size; // Payload size in bytes
21+
uint8_t payload[]; // Payload data
22+
} __attribute__((packed));
823

924
struct BinaryProtocol3 {
1025
uint8_t type;

main/protocols/websocket_protocol.cc

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,37 @@ void WebsocketProtocol::SendAudio(const std::vector<uint8_t>& data) {
3333
return;
3434
}
3535

36-
busy_sending_audio_ = true;
37-
websocket_->Send(data.data(), data.size(), true);
38-
busy_sending_audio_ = false;
36+
if (version_ == 2) {
37+
std::string packet;
38+
packet.resize(sizeof(BinaryProtocol2) + data.size());
39+
auto bp2 = (BinaryProtocol2*)packet.data();
40+
bp2->version = htons(version_);
41+
bp2->type = 0;
42+
bp2->reserved = 0;
43+
bp2->timestamp = htonl(0);
44+
bp2->payload_size = htonl(data.size());
45+
memcpy(bp2->payload, data.data(), data.size());
46+
47+
busy_sending_audio_ = true;
48+
websocket_->Send(packet.data(), packet.size(), true);
49+
busy_sending_audio_ = false;
50+
} else if (version_ == 3) {
51+
std::string packet;
52+
packet.resize(sizeof(BinaryProtocol3) + data.size());
53+
auto bp3 = (BinaryProtocol3*)packet.data();
54+
bp3->type = 0;
55+
bp3->reserved = 0;
56+
bp3->payload_size = htons(data.size());
57+
memcpy(bp3->payload, data.data(), data.size());
58+
59+
busy_sending_audio_ = true;
60+
websocket_->Send(packet.data(), packet.size(), true);
61+
busy_sending_audio_ = false;
62+
} else {
63+
busy_sending_audio_ = true;
64+
websocket_->Send(data.data(), data.size(), true);
65+
busy_sending_audio_ = false;
66+
}
3967
}
4068

4169
bool WebsocketProtocol::SendText(const std::string& text) {
@@ -71,25 +99,47 @@ bool WebsocketProtocol::OpenAudioChannel() {
7199
Settings settings("websocket", false);
72100
std::string url = settings.GetString("url");
73101
std::string token = settings.GetString("token");
102+
int version = settings.GetInt("version");
103+
if (version != 0) {
104+
version_ = version;
105+
}
74106

75107
busy_sending_audio_ = false;
76108
error_occurred_ = false;
77-
78-
// If token not starts with "Bearer " or "bearer ", add it
79-
if (token.empty() || (token.find("Bearer ") != 0 && token.find("bearer ") != 0)) {
80-
token = "Bearer " + token;
81-
}
82109

83110
websocket_ = Board::GetInstance().CreateWebSocket();
84-
websocket_->SetHeader("Authorization", token.c_str());
85-
websocket_->SetHeader("Protocol-Version", "1");
111+
112+
if (!token.empty()) {
113+
// If token not has a space, add "Bearer " prefix
114+
if (token.find(" ") == std::string::npos) {
115+
token = "Bearer " + token;
116+
}
117+
websocket_->SetHeader("Authorization", token.c_str());
118+
}
119+
websocket_->SetHeader("Protocol-Version", std::to_string(version_).c_str());
86120
websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
87121
websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
88122

89123
websocket_->OnData([this](const char* data, size_t len, bool binary) {
90124
if (binary) {
91125
if (on_incoming_audio_ != nullptr) {
92-
on_incoming_audio_(std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len));
126+
if (version_ == 2) {
127+
BinaryProtocol2* bp2 = (BinaryProtocol2*)data;
128+
bp2->version = ntohs(bp2->version);
129+
bp2->type = ntohs(bp2->type);
130+
bp2->timestamp = ntohl(bp2->timestamp);
131+
bp2->payload_size = ntohl(bp2->payload_size);
132+
auto payload = (uint8_t*)bp2->payload;
133+
on_incoming_audio_(std::vector<uint8_t>(payload, payload + bp2->payload_size));
134+
} else if (version_ == 3) {
135+
BinaryProtocol3* bp3 = (BinaryProtocol3*)data;
136+
bp3->type = bp3->type;
137+
bp3->payload_size = ntohs(bp3->payload_size);
138+
auto payload = (uint8_t*)bp3->payload;
139+
on_incoming_audio_(std::vector<uint8_t>(payload, payload + bp3->payload_size));
140+
} else {
141+
on_incoming_audio_(std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len));
142+
}
93143
}
94144
} else {
95145
// Parse JSON data
@@ -118,7 +168,7 @@ bool WebsocketProtocol::OpenAudioChannel() {
118168
}
119169
});
120170

121-
ESP_LOGI(TAG, "Connecting to websocket server: %s with token: %s", url.c_str(), token.c_str());
171+
ESP_LOGI(TAG, "Connecting to websocket server: %s with version: %d", url.c_str(), version_);
122172
if (!websocket_->Connect(url.c_str())) {
123173
ESP_LOGE(TAG, "Failed to connect to websocket server");
124174
SetError(Lang::Strings::SERVER_NOT_FOUND);
@@ -129,7 +179,7 @@ bool WebsocketProtocol::OpenAudioChannel() {
129179
// keys: message type, version, audio_params (format, sample_rate, channels)
130180
std::string message = "{";
131181
message += "\"type\":\"hello\",";
132-
message += "\"version\": 1,";
182+
message += "\"version\": " + std::to_string(version_) + ",";
133183
message += "\"transport\":\"websocket\",";
134184
message += "\"audio_params\":{";
135185
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);

main/protocols/websocket_protocol.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class WebsocketProtocol : public Protocol {
2424
private:
2525
EventGroupHandle_t event_group_handle_;
2626
WebSocket* websocket_ = nullptr;
27+
int version_ = 1;
2728

2829
void ParseServerHello(const cJSON* root);
2930
bool SendText(const std::string& text) override;

0 commit comments

Comments
 (0)