Skip to content

Commit 8db1f4d

Browse files
authored
fix: Fixes a deadlock which we observed. (#345)
The call reInviteParticipants triggered by an ICE failure notification resulted in a synchronous call to expireChannels (while holding the lock on #participantLock), followed by an attempt to lock #bridges in inviteParticipant. This fix makes sure that we never lock #bridges first and then #participants. =================================================== "pool-1-thread-1": at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2426) - waiting to lock <0x0000000700ece2b8> (a java.lang.Object) at org.jitsi.jicofo.JitsiMeetConferenceImpl.onBridgeDown(JitsiMeetConferenceImpl.java:2295) - locked <0x0000000700ece758> (a java.util.LinkedList) at org.jitsi.jicofo.JitsiMeetConferenceImpl.handleEvent(JitsiMeetConferenceImpl.java:2235) at org.jitsi.eventadmin.EventAdminImpl.callEventHandler(EventAdminImpl.java:207) at org.jitsi.eventadmin.EventAdminImpl.access$000(EventAdminImpl.java:34) at org.jitsi.eventadmin.EventAdminImpl$1.run(EventAdminImpl.java:187) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) "Smack-Single Threaded Executor 0 (0)": at org.jitsi.jicofo.JitsiMeetConferenceImpl.inviteParticipant(JitsiMeetConferenceImpl.java:798) - waiting to lock <0x0000000700ece758> (a java.util.LinkedList) at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipants(JitsiMeetConferenceImpl.java:2448) - locked <0x0000000700ece2b8> (a java.lang.Object) at org.jitsi.jicofo.JitsiMeetConferenceImpl.reInviteParticipant(JitsiMeetConferenceImpl.java:2414) at org.jitsi.jicofo.JitsiMeetConferenceImpl.onSessionInfo(JitsiMeetConferenceImpl.java:1543) at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.processJingleIQ(AbstractOperationSetJingle.java:349) at org.jitsi.protocol.xmpp.AbstractOperationSetJingle.handleIQRequest(AbstractOperationSetJingle.java:78) at org.jivesoftware.smack.AbstractXMPPConnection$3.run(AbstractXMPPConnection.java:1154) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)
1 parent 614a381 commit 8db1f4d

File tree

1 file changed

+31
-5
lines changed

1 file changed

+31
-5
lines changed

src/main/java/org/jitsi/jicofo/JitsiMeetConferenceImpl.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,18 @@
5151
* participants, as well as the COLIBRI session with the jitsi-videobridge
5252
* instances used for the conference.
5353
*
54+
* A note on synchronization: this class uses a lot of 'synchronized' blocks,
55+
* on 4 different objects ({@link #bridges}, {@link #participantLock},
56+
* {@code this} and {@link BridgeSession#octoParticipant}). At the time of this
57+
* writing it seems that multiple locks are acquired only in the following
58+
* orders:
59+
* {@code participantsLock} -> {@code bridges}, and
60+
* {@code participantsLock} -> {@code this} -> {@code bridges}.
61+
*
62+
* This seems safe, but it is hard to maintain this way, and we should
63+
* re-factor to simplify.
64+
*
65+
*
5466
* @author Pawel Domas
5567
* @author Boris Grozev
5668
*/
@@ -151,6 +163,11 @@ public class JitsiMeetConferenceImpl
151163

152164
/**
153165
* This lock is used to synchronise write access to {@link #participants}.
166+
*
167+
* WARNING: To avoid deadlocks we must make sure that any code paths that
168+
* lock both {@link #bridges} and {@link #participantLock} does so in the
169+
* correct order. The lock on {@link #participantLock} must be acquired
170+
* first.
154171
*/
155172
private final Object participantLock = new Object();
156173

@@ -228,6 +245,12 @@ public class JitsiMeetConferenceImpl
228245

229246
/**
230247
* The list of {@link BridgeSession} currently in use by this conference.
248+
*
249+
* WARNING: To avoid deadlocks we must make sure that any code paths that
250+
* lock both {@link #bridges} and {@link #participantLock} does so in the
251+
* correct order. The lock on {@link #participantLock} must be acquired
252+
* first.
253+
*
231254
*/
232255
private final List<BridgeSession> bridges = new LinkedList<>();
233256

@@ -2273,6 +2296,8 @@ private void onBridgeUp(Jid bridgeJid)
22732296
*/
22742297
void onBridgeDown(Jid bridgeJid)
22752298
{
2299+
List<Participant> participantsToReinvite = Collections.EMPTY_LIST;
2300+
22762301
synchronized (bridges)
22772302
{
22782303
BridgeSession bridgeSession = findBridgeSession(bridgeJid);
@@ -2282,19 +2307,21 @@ void onBridgeDown(Jid bridgeJid)
22822307

22832308
// Note: the Jingle sessions are still alive, we'll just
22842309
// (try to) move to a new bridge and send transport-replace.
2285-
List<Participant> participantsToReinvite
2286-
= bridgeSession.terminateAll();
2310+
participantsToReinvite = bridgeSession.terminateAll();
22872311

22882312
bridges.remove(bridgeSession);
22892313
setConferenceProperty(
22902314
ConferenceProperties.KEY_BRIDGE_COUNT,
22912315
Integer.toString(bridges.size()));
22922316

22932317
updateOctoRelays();
2294-
2295-
reInviteParticipants(participantsToReinvite);
22962318
}
22972319
}
2320+
2321+
if (!participantsToReinvite.isEmpty())
2322+
{
2323+
reInviteParticipants(participantsToReinvite);
2324+
}
22982325
}
22992326

23002327
/**
@@ -2707,7 +2734,6 @@ private List<Participant> terminateAll()
27072734
public boolean terminate(AbstractParticipant participant,
27082735
boolean syncExpire)
27092736
{
2710-
//TODO synchronize?
27112737
boolean octo = participant == this.octoParticipant;
27122738
boolean removed = octo || participants.remove(participant);
27132739
if (removed)

0 commit comments

Comments
 (0)