|
5 | 5 | #include <rtc/plihandler.hpp> |
6 | 6 |
|
7 | 7 | #include <atomic> |
| 8 | +#include <cctype> |
8 | 9 | #include <chrono> |
9 | 10 | #include <condition_variable> |
10 | 11 | #include <cstdlib> |
|
15 | 16 | #include <inttypes.h> |
16 | 17 | #include <mutex> |
17 | 18 | #include <optional> |
| 19 | +#include <string> |
18 | 20 | #include <string_view> |
19 | 21 | #include <thread> |
20 | 22 | #include <utility> |
| 23 | +#include <vector> |
21 | 24 |
|
22 | 25 | namespace tenbox::daemon { |
23 | 26 |
|
@@ -70,6 +73,50 @@ const char* PeerStateName(int state) { |
70 | 73 | } |
71 | 74 | } |
72 | 75 |
|
| 76 | +// Returns the configured STUN URL list. `TENBOX_STUN_SERVERS` is a |
| 77 | +// comma-separated list (whitespace tolerated) of full URLs such as |
| 78 | +// "stun:stun.qq.com:3478,stun:stun.miwifi.com:3478". Empty entries |
| 79 | +// are skipped. |
| 80 | +// |
| 81 | +// Defaults are skewed for mainland China deployments because that's where |
| 82 | +// most of the production fleet lives today and Google STUN |
| 83 | +// (stun.l.google.com:19302) is regularly UDP-blackholed by CN ISPs, |
| 84 | +// which previously made WebRTC setup time out at "creating answer". |
| 85 | +// Order matters - libdatachannel probes them in sequence: |
| 86 | +// 1. Tencent - rock solid in CN, used by every domestic Live SDK |
| 87 | +// 2. Xiaomi - secondary domestic option, embedded in MiWiFi routers |
| 88 | +// 3. Cloudflare - global fallback for users outside CN; CF anycast is |
| 89 | +// reachable with low latency from the mainland too, |
| 90 | +// so it doubles as a third tier when the first two are |
| 91 | +// busy or filtered. |
| 92 | +// Operators can override the whole list with TENBOX_STUN_SERVERS, e.g. |
| 93 | +// to point at a self-hosted coturn or to add TURN once we have one. |
| 94 | +std::vector<std::string> ConfiguredStunServers() { |
| 95 | + const char* raw = std::getenv("TENBOX_STUN_SERVERS"); |
| 96 | + std::vector<std::string> out; |
| 97 | + if (raw && raw[0] != '\0') { |
| 98 | + std::string_view view(raw); |
| 99 | + size_t start = 0; |
| 100 | + while (start <= view.size()) { |
| 101 | + size_t comma = view.find(',', start); |
| 102 | + size_t end = comma == std::string_view::npos ? view.size() : comma; |
| 103 | + size_t s = start; |
| 104 | + size_t e = end; |
| 105 | + while (s < e && std::isspace(static_cast<unsigned char>(view[s]))) ++s; |
| 106 | + while (e > s && std::isspace(static_cast<unsigned char>(view[e - 1]))) --e; |
| 107 | + if (e > s) out.emplace_back(view.substr(s, e - s)); |
| 108 | + if (comma == std::string_view::npos) break; |
| 109 | + start = comma + 1; |
| 110 | + } |
| 111 | + } |
| 112 | + if (out.empty()) { |
| 113 | + out.emplace_back("stun:stun.qq.com:3478"); |
| 114 | + out.emplace_back("stun:stun.miwifi.com:3478"); |
| 115 | + out.emplace_back("stun:stun.cloudflare.com:3478"); |
| 116 | + } |
| 117 | + return out; |
| 118 | +} |
| 119 | + |
73 | 120 | std::string StripLipSyncGroups(std::string sdp) { |
74 | 121 | std::string filtered; |
75 | 122 | filtered.reserve(sdp.size()); |
@@ -101,7 +148,16 @@ class NativeWebRtcPeer final : public WebRtcPeer { |
101 | 148 | : frame_reader_(std::move(frame_reader)), |
102 | 149 | preferred_video_format_(preferred_video_format) { |
103 | 150 | rtc::Configuration config; |
104 | | - config.iceServers.emplace_back("stun:stun.l.google.com:19302"); |
| 151 | + for (const auto& url : ConfiguredStunServers()) { |
| 152 | + try { |
| 153 | + config.iceServers.emplace_back(url); |
| 154 | + } catch (const std::exception& e) { |
| 155 | + std::fprintf(stdout, |
| 156 | + "[WARN] remote_webrtc: ignoring invalid STUN url %s: %s\n", |
| 157 | + url.c_str(), e.what()); |
| 158 | + std::fflush(stdout); |
| 159 | + } |
| 160 | + } |
105 | 161 | config.disableAutoNegotiation = true; |
106 | 162 | peer_ = std::make_shared<rtc::PeerConnection>(std::move(config)); |
107 | 163 | peer_->onStateChange([this](rtc::PeerConnection::State state) { |
@@ -131,11 +187,32 @@ class NativeWebRtcPeer final : public WebRtcPeer { |
131 | 187 | cv_.notify_all(); |
132 | 188 | }); |
133 | 189 | peer_->onLocalCandidate([this](rtc::Candidate candidate) { |
134 | | - std::lock_guard<std::mutex> lock(mu_); |
135 | | - candidates_.push_back({ |
| 190 | + nlohmann::json entry = { |
136 | 191 | {"candidate", std::string(candidate)}, |
137 | 192 | {"sdpMid", candidate.mid()}, |
138 | | - }); |
| 193 | + }; |
| 194 | + LocalIceCandidateHandler handler; |
| 195 | + { |
| 196 | + std::lock_guard<std::mutex> lock(mu_); |
| 197 | + candidates_.push_back(entry); |
| 198 | + handler = local_ice_handler_; |
| 199 | + } |
| 200 | + // Trickle every host-side candidate to the embedder so the |
| 201 | + // browser can start probing as soon as gathering produces |
| 202 | + // host / srflx entries, instead of having to wait for the |
| 203 | + // initial answer's `candidates[]` (which we cap at a short |
| 204 | + // gathering window so STUN-blackholed networks don't stall |
| 205 | + // session creation). |
| 206 | + if (handler) { |
| 207 | + try { |
| 208 | + handler(std::move(entry)); |
| 209 | + } catch (const std::exception& e) { |
| 210 | + std::fprintf(stdout, |
| 211 | + "[ERROR] remote_webrtc: local ice handler threw: %s\n", |
| 212 | + e.what()); |
| 213 | + std::fflush(stdout); |
| 214 | + } |
| 215 | + } |
139 | 216 | }); |
140 | 217 | peer_->onGatheringStateChange([this](rtc::PeerConnection::GatheringState state) { |
141 | 218 | if (state == rtc::PeerConnection::GatheringState::Complete) { |
@@ -183,13 +260,36 @@ class NativeWebRtcPeer final : public WebRtcPeer { |
183 | 260 | peer_->setRemoteDescription(rtc::Description(sdp, "offer")); |
184 | 261 | peer_->setLocalDescription(rtc::Description::Type::Answer); |
185 | 262 |
|
| 263 | + // Wait long enough to (a) absolutely require the answer SDP and |
| 264 | + // (b) opportunistically pick up gathered candidates so the |
| 265 | + // initial answer carries something useful. |
| 266 | + // |
| 267 | + // `description_ready_` is hard: without local SDP we have no |
| 268 | + // answer to hand back, so we error out. `gathering_complete_` |
| 269 | + // is intentionally soft - if STUN servers are unreachable |
| 270 | + // (common on locked-down networks: 19302/UDP black-holed by |
| 271 | + // ISP, no TURN configured), libdatachannel keeps waiting on |
| 272 | + // the binding response indefinitely. Returning the SDP with |
| 273 | + // whatever host candidates we already have lets LAN peers |
| 274 | + // connect, and the `LocalIceCandidateHandler` continues to |
| 275 | + // trickle later srflx/relay candidates as they arrive. |
186 | 276 | std::unique_lock<std::mutex> lock(mu_); |
187 | | - const bool ready = cv_.wait_for(lock, std::chrono::seconds(5), [this] { |
| 277 | + cv_.wait_for(lock, std::chrono::seconds(10), [this] { |
188 | 278 | return description_ready_ && gathering_complete_; |
189 | 279 | }); |
190 | | - if (!ready || local_sdp_.empty()) { |
| 280 | + if (local_sdp_.empty()) { |
191 | 281 | return WebRtcAnswer{.ok = false, .error = "timed out creating WebRTC answer"}; |
192 | 282 | } |
| 283 | + if (!gathering_complete_) { |
| 284 | + std::fprintf(stdout, |
| 285 | + "[WARN] remote_webrtc: returning answer before ICE " |
| 286 | + "gathering finished (%zu host candidate(s) so far); " |
| 287 | + "remaining candidates will trickle. Check STUN " |
| 288 | + "reachability or set TENBOX_STUN_SERVERS to a " |
| 289 | + "reachable server list.\n", |
| 290 | + candidates_.size()); |
| 291 | + std::fflush(stdout); |
| 292 | + } |
193 | 293 | return WebRtcAnswer{ |
194 | 294 | .ok = true, |
195 | 295 | .sdp = local_sdp_, |
@@ -242,6 +342,33 @@ class NativeWebRtcPeer final : public WebRtcPeer { |
242 | 342 | dc_handler_ = std::move(handler); |
243 | 343 | } |
244 | 344 |
|
| 345 | + void SetLocalIceCandidateHandler(LocalIceCandidateHandler handler) override { |
| 346 | + // Snapshot any candidates already gathered before the embedder |
| 347 | + // installed the trickle handler (typical race: handler is |
| 348 | + // attached right after CreateWebRtcPeer but onLocalCandidate |
| 349 | + // fires from the libdatachannel worker as soon as |
| 350 | + // setLocalDescription is called). Replaying ensures the browser |
| 351 | + // ends up with the same candidate set regardless of timing. |
| 352 | + std::vector<nlohmann::json> backlog; |
| 353 | + { |
| 354 | + std::lock_guard<std::mutex> lock(mu_); |
| 355 | + local_ice_handler_ = handler; |
| 356 | + if (handler && !candidates_.empty()) { |
| 357 | + for (const auto& c : candidates_) backlog.push_back(c); |
| 358 | + } |
| 359 | + } |
| 360 | + for (auto& c : backlog) { |
| 361 | + try { |
| 362 | + handler(std::move(c)); |
| 363 | + } catch (const std::exception& e) { |
| 364 | + std::fprintf(stdout, |
| 365 | + "[ERROR] remote_webrtc: local ice handler threw on backlog: %s\n", |
| 366 | + e.what()); |
| 367 | + std::fflush(stdout); |
| 368 | + } |
| 369 | + } |
| 370 | + } |
| 371 | + |
245 | 372 | void SetDataChannelOpenHandler(DataChannelOpenHandler handler) override { |
246 | 373 | // Snapshot any already-open channels so a handler installed after |
247 | 374 | // open() still gets a chance to seed initial state. Channels we |
@@ -1153,6 +1280,7 @@ class NativeWebRtcPeer final : public WebRtcPeer { |
1153 | 1280 | std::vector<std::shared_ptr<rtc::DataChannel>> data_channels_; |
1154 | 1281 | DataChannelMessageHandler dc_handler_; |
1155 | 1282 | DataChannelOpenHandler dc_open_handler_; |
| 1283 | + LocalIceCandidateHandler local_ice_handler_; |
1156 | 1284 | std::shared_ptr<rtc::Track> video_track_; |
1157 | 1285 | std::shared_ptr<rtc::Track> audio_track_; |
1158 | 1286 | std::thread video_thread_; |
@@ -1187,4 +1315,8 @@ bool NativeWebRtcAvailable() { |
1187 | 1315 | return true; |
1188 | 1316 | } |
1189 | 1317 |
|
| 1318 | +std::vector<std::string> ResolvedStunServers() { |
| 1319 | + return ConfiguredStunServers(); |
| 1320 | +} |
| 1321 | + |
1190 | 1322 | } // namespace tenbox::daemon |
0 commit comments