Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9a178e3
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 23, 2026
8d57698
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 23, 2026
2688ac8
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 23, 2026
ff4b57e
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 23, 2026
417aa3d
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 28, 2026
542b5be
fix: Redact SSO PII before deletion
ktyagiapphelix2u Apr 28, 2026
1b46be6
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 4, 2026
74d655b
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 4, 2026
08b491f
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 6, 2026
bbb5643
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 6, 2026
07b82ff
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 7, 2026
15bcdc0
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 7, 2026
2a9fba8
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 7, 2026
dd7ac9c
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 7, 2026
5ca020f
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 8, 2026
cdb49a2
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 8, 2026
bd3c108
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 8, 2026
7528c08
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 8, 2026
2af3cb4
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 11, 2026
9a8ba84
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 11, 2026
a75fb7f
Merge branch 'master' into ktyagi/SSOPII
ktyagiapphelix2u May 11, 2026
0cbee49
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 12, 2026
c902e56
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 12, 2026
5b3312e
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 12, 2026
9aa4192
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 12, 2026
36192df
fix: Redact SSO PII before deletion
ktyagiapphelix2u May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/user_api/accounts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from django.utils.text import format_lazy
from django.utils.translation import gettext_lazy as _

# Import signals to ensure they are registered
from . import signals # noqa: F401, pylint: disable=unused-import

# The maximum length for the bio ("about me") account field
BIO_MAX_LENGTH = 300

Expand Down
55 changes: 54 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,24 @@
Django Signal related functionality for user_api accounts
"""

import logging

from django.dispatch import Signal
from django.db.models.signals import pre_delete
from django.dispatch import Signal, receiver
from social_django.models import UserSocialAuth

logger = logging.getLogger(__name__)

# Prefix and suffix used to build a per-record redacted uid for UserSocialAuth.
REDACTED_SOCIAL_AUTH_UID_PREFIX = 'redacted-before-delete-'
REDACTED_SOCIAL_AUTH_UID_SUFFIX = '@safe.com'
Comment thread
robrap marked this conversation as resolved.
Outdated


def get_redacted_social_auth_uid(pk):
"""
Return the redacted uid for a UserSocialAuth record. Single source of truth for this format.
Comment thread
robrap marked this conversation as resolved.
Outdated
"""
return f'{REDACTED_SOCIAL_AUTH_UID_PREFIX}{pk}{REDACTED_SOCIAL_AUTH_UID_SUFFIX}'

# Signal to retire a user from LMS-initiated mailings (course mailings, etc)
# providing_args=["user"]
Expand All @@ -16,3 +32,40 @@
# Signal to retire LMS misc information
# providing_args=["user"]
USER_RETIRE_LMS_MISC = Signal()


@receiver(pre_delete, sender=UserSocialAuth)
def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument
"""
Redacts PII fields (uid, extra_data) before UserSocialAuth deletion.

Replaces uid with get_redacted_social_auth_uid(pk) and clears extra_data.
and clears extra_data.
Blocks deletion if redaction fails to prevent PII leaks to downstream systems.
"""
if not instance or not instance.pk:
Comment thread
robrap marked this conversation as resolved.
Outdated
return

try:
update_fields = {}
redacted_uid = get_redacted_social_auth_uid(instance.pk)

# These fields may have already been redacted as part of a bulk retirement,
# so we skip the update if it is already done to reduce query count.
if instance.uid != redacted_uid:
update_fields['uid'] = redacted_uid
if instance.extra_data:
update_fields['extra_data'] = {}

if not update_fields:
return

UserSocialAuth.objects.filter(pk=instance.pk).update(**update_fields)
except Exception: # pylint: disable=broad-except
logger.exception(
"Failed to redact PII for UserSocialAuth before deletion: user_id=%s, provider=%s",
instance.user_id,
instance.provider,
)
# Re-raise to prevent deletion from proceeding without redaction
raise
145 changes: 144 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
from completion.test_utils import CompletionWaffleTestMixin
from django.test import TestCase
from django.test.utils import override_settings
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import CourseEnrollment
from common.djangoapps.student.tests.factories import UserFactory
from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed
from openedx.core.djangoapps.user_api.accounts.signals import get_redacted_social_auth_uid
from openedx.core.djangoapps.user_api.accounts.utils import (
retrieve_last_sitewide_block_completed,
)
from openedx.core.djangolib.testing.utils import skip_unless_lms
from xmodule.modulestore.tests.django_utils import (
SharedModuleStoreTestCase, # lint-amnesty, pylint: disable=wrong-import-order
Expand Down Expand Up @@ -133,3 +137,142 @@ def test_retrieve_last_sitewide_block_completed(self):
)

assert empty_block_url is None


@skip_unless_lms
class RedactUserSocialAuthPIITest(TestCase):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. The signal tests belong in a test_signals.py file with an appropriate class name. Some reasonable signal tests:
  • Does the signal warn and redact if not already redacted?
  • Does the signal skip warning (and redaction) if already redacted?
  • Optional: Using mock, confirm redact_and_delete_social_auth is called with skip_delete=True.
  1. For utils tests of direct calls to redact_and_delete_social_auth, you can cover any items you didn't cover in signals (like maybe test_delete_redacts_multiple_sso_providers), and this shouldn't require signal setup and teardown.

Note: You have much of what you need, so hopefully this is minor refactoring and clean-up.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

signal tests moved to test_signals.py using mock.patch, and utils tests now call redact_and_delete_social_auth directly without any signal setup/teardown.

"""
Tests for SSO PII redaction before deletion.
"""

def setUp(self):
super().setUp()
self.user = UserFactory.create(username='testuser', email='testuser@example.com')

def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None):
"""
Helper method to create UserSocialAuth instances for testing.
"""
if extra_data is None:
extra_data = {
'email': 'user@example.com',
'name': 'Test User',
'id': '123456789',
}
return UserSocialAuth.objects.create(
user=self.user,
provider=provider,
uid=uid,
extra_data=extra_data,
)

def test_get_redacted_social_auth_uid_format(self):
"""
Test that get_redacted_social_auth_uid returns the expected string format.

This is the single source of truth for the redacted uid format.
If this test breaks, the bulk retirement Concat/Cast in utils.py and
retire_user.py must also be updated to match.
"""
assert get_redacted_social_auth_uid(42) == 'redacted-before-delete-42@safe.com'
assert get_redacted_social_auth_uid(1) == 'redacted-before-delete-1@safe.com'

def test_delete_redacts_user_social_auth_pii(self):
"""
Test that deleting social auth redacts uid and extra_data before removal.
"""
social_auth = self.create_social_auth()
social_auth_id = social_auth.id

captured_states = []

def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
instance.refresh_from_db()
captured_states.append({
'id': instance.id,
'uid': instance.uid,
'extra_data': dict(instance.extra_data) if instance.extra_data else {},
})
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using a pre_delete' signal for testing a pre_deletesignal makes this confusing. Is that what is being done here? How do you know what order thepre_delete` signals will get called? I'd rather it wasn't confusing in this way, and you used some other mechanism to test, like checking that there is an appropriate UPDATE query before the DELETE query, as we did in the earlier PR. You can retain the not exists assertion at the end.

Also, If this were needed, you've got a lot of code redundancy. You could use setUpClass or setUp and tearDownClass or tearDown, or helper functions to keep things DRY (Don't Repeat Yourself).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored to use CaptureQueriesContext to assert UPDATE precedes DELETE, and moved the signal tests to a new test_signals.py.


from django.db.models.signals import pre_delete
Comment thread
robrap marked this conversation as resolved.
Outdated

pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth)
try:
social_auth.delete()
finally:
pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth)

assert captured_states == [{
'id': social_auth_id,
'uid': get_redacted_social_auth_uid(social_auth_id),
'extra_data': {},
}]
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

def test_delete_already_redacted_user_social_auth_is_idempotent(self):
"""
Test that deleting an already redacted social auth keeps the redacted state.
"""
social_auth = self.create_social_auth()
social_auth.uid = get_redacted_social_auth_uid(social_auth.pk)
social_auth.extra_data = {}
social_auth.save(update_fields=['uid', 'extra_data'])
social_auth_id = social_auth.id

captured_states = []

def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
instance.refresh_from_db()
captured_states.append((instance.uid, instance.extra_data))

from django.db.models.signals import pre_delete

pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth)
try:
social_auth.delete()
finally:
pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth)

assert captured_states == [
(get_redacted_social_auth_uid(social_auth_id), {}),
]
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

def test_delete_redacts_multiple_sso_providers(self):
"""
Test that deletion redacts multiple SSO providers before removal.
"""
auths = [
self.create_social_auth(
provider='google-oauth2',
uid='google@example.com',
extra_data={'email': 'google@example.com', 'name': 'Google User'}
),
self.create_social_auth(
provider='tpa-saml',
uid='saml@example.com',
extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'}
),
]
# Save IDs before deletion (they become None after delete)
auth_ids = [auth.pk for auth in auths]

captured_states = []

def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
instance.refresh_from_db()
captured_states.append((instance.provider, instance.uid, instance.extra_data))

from django.db.models.signals import pre_delete

pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth)
try:
for auth in auths:
auth.delete()
finally:
pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth)

assert sorted(captured_states) == sorted([
('google-oauth2', get_redacted_social_auth_uid(auth_ids[0]), {}),
('tpa-saml', get_redacted_social_auth_uid(auth_ids[1]), {}),
])
8 changes: 7 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order

from ..models import UserRetirementStatus
from .signals import get_redacted_social_auth_uid

ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH = 'enable_secondary_email_feature'
LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -204,7 +205,12 @@ def create_retirement_request_and_deactivate_account(user):
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)

# Unlink LMS social auth accounts
# Redact and unlink LMS social auth accounts.
social_auth_records = list(UserSocialAuth.objects.filter(user_id=user.id))
for auth in social_auth_records:
auth.uid = get_redacted_social_auth_uid(auth.pk)
auth.extra_data = {}
UserSocialAuth.objects.bulk_update(social_auth_records, ['uid', 'extra_data'])
UserSocialAuth.objects.filter(user_id=user.id).delete()

# Change LMS password & email
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models

from ...accounts.signals import get_redacted_social_auth_uid
from ...models import BulkUserRetirementConfig, UserRetirementStatus

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -144,7 +145,12 @@ def handle(self, *args, **options):
for user in users:
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)
# Unlink LMS social auth accounts
# Redact and unlink LMS social auth accounts.
social_auth_records = list(UserSocialAuth.objects.filter(user_id=user.id))
Comment thread
robrap marked this conversation as resolved.
Outdated
for auth in social_auth_records:
auth.uid = get_redacted_social_auth_uid(auth.pk)
auth.extra_data = {}
UserSocialAuth.objects.bulk_update(social_auth_records, ['uid', 'extra_data'])
Comment thread
robrap marked this conversation as resolved.
Outdated
UserSocialAuth.objects.filter(user_id=user.id).delete()
# Change LMS password & email
user.email = get_retired_email_by_email(user.email)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
import pytest
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.core.management import CommandError, call_command
from django.db.models.signals import pre_delete
from social_django.models import UserSocialAuth

from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order
from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order
setup_retirement_states, # noqa: F401
)
from openedx.core.djangolib.testing.utils import skip_unless_lms # lint-amnesty, pylint: disable=wrong-import-order

from ...accounts.signals import get_redacted_social_auth_uid
from ...models import UserRetirementStatus

pytestmark = pytest.mark.django_db
Expand Down Expand Up @@ -107,3 +110,99 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a
with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'):
call_command('retire_user', user_file=user_file, username=username, user_email=user_email)
remove_user_file()


@skip_unless_lms
def test_retire_user_redacts_sso_pii_before_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811
Comment thread
ktyagiapphelix2u marked this conversation as resolved.
Outdated
"""
Test that SSO PII is redacted before UserSocialAuth records are deleted during retirement.

This test verifies the order of operations by capturing the record's state
at the moment of deletion to ensure it was already redacted.
"""
user = UserFactory.create(username='sso-user', email='sso-user@example.com')
social_auth = UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='sso-user@example.com',
extra_data={
'email': 'sso-user@example.com',
'name': 'SSO Test User',
'id': '123456789',
}
)
social_auth_id = social_auth.id

captured_states = []

def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. You may want to use the same UPDATE/DELETE query assertion you set up for the other test. See other comment for details.
  2. You'll also want to ensure that the real receiver you set up is not interfering with this test. For example, if you deleted the redaction from retire_user.py, would this test still pass because the signal is taking care of the redaction for you? One way to to fix this would be to disconnect that signal in setUpClass (with an appropriate comment) and to re-connect it in tearDownClass. An alternative is to mock logging and ensure that there is no log.warn from the signal (about redacting). You can test that these assertions work by temporarily removing the redaction you are testing.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

switched to CaptureQueriesContext for UPDATE-before-DELETE assertions, and disconnected the safety-net pre_delete signal handler around both tests so they'd fail if retire_user itself stopped redacting.

"""Capture the database state seen by the pre_delete signal."""
instance.refresh_from_db()
captured_states.append({
'id': instance.id,
'uid': instance.uid,
'extra_data': dict(instance.extra_data) if instance.extra_data else {},
})

pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth)
try:
call_command('retire_user', username=user.username, user_email=user.email)
finally:
pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth)

# Verify that at the moment of deletion, the record was already redacted
assert captured_states == [{
'id': social_auth_id,
'uid': get_redacted_social_auth_uid(social_auth_id),
'extra_data': {},
}], \
"SSO records should be redacted before deletion"

# Verify deletion completed
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first()
assert retired_user_status is not None
assert retired_user_status.original_email == 'sso-user@example.com'


@skip_unless_lms
def test_retire_user_redacts_each_social_auth_before_bulk_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811
"""
Test that each UserSocialAuth record is redacted before bulk deletion during retirement.
"""
user = UserFactory.create(username='multi-sso-user', email='multi-sso@example.com')
google_auth = UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='google-multi@example.com',
extra_data={'email': 'google-multi@example.com', 'name': 'Google User'}
)
saml_auth = UserSocialAuth.objects.create(
user=user,
provider='tpa-saml',
uid='saml-multi@example.com',
extra_data={'email': 'saml-multi@example.com', 'name': 'SAML User', 'uid': 'saml-123'}
)
# Save IDs before deletion (they become None after delete)
google_auth_id = google_auth.id
saml_auth_id = saml_auth.id

captured_states = []

def capture_state_before_delete(sender, instance, **kwargs): # pylint: disable=unused-argument
"""Capture the database state seen by the pre_delete signal."""
instance.refresh_from_db()
extra = dict(instance.extra_data) if instance.extra_data else {}
captured_states.append((instance.provider, instance.uid, extra))

pre_delete.connect(capture_state_before_delete, sender=UserSocialAuth)
try:
call_command('retire_user', username=user.username, user_email=user.email)
finally:
pre_delete.disconnect(capture_state_before_delete, sender=UserSocialAuth)

assert sorted(captured_states) == sorted([
('google-oauth2', get_redacted_social_auth_uid(google_auth_id), {}),
('tpa-saml', get_redacted_social_auth_uid(saml_auth_id), {}),
])
Loading