Skip to content

Commit 5ea5de0

Browse files
✨(web-infrastructure) set offers.source_id as a foreign key (#651)
* ✨(web-infrastructure) set offers.source_id as a foreign key * ♻️(web-infrastructure) offer.source_id is a UUID * ✅(web-infrastructure) review PR comments * ♻️(web-infrastructure) drop index on source_id
1 parent 263b064 commit 5ea5de0

24 files changed

Lines changed: 198 additions & 78 deletions

File tree

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from dataclasses import dataclass
2+
from uuid import UUID
23

34

45
@dataclass
56
class ArchiveOfferByReferenceInput:
67
reference: str
7-
source_id: str
8+
source_id: UUID

src/web/domain/entities/offer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class Offer(IEntity):
2828
publication_date: datetime
2929
beginning_date: Optional[LimitDate]
3030
family_code: Optional[str] = None
31-
source_id: Optional[str] = None
31+
source_id: UUID = field(default_factory=uuid4)
3232
processing: bool = False
3333
processed_at: Optional[datetime] = None
3434
archived_at: Optional[datetime] = None

src/web/domain/repositories/offers_repository_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def get_by_ids(self, offer_ids: List[UUID]) -> List[Offer]: ...
2929
def get_by_external_id(self, external_id: str) -> Offer: ...
3030

3131
def get_by_reference_and_source_id(
32-
self, reference: str, source_id: str
32+
self, reference: str, source_id: UUID
3333
) -> Offer: ...
3434

3535
def get_by_external_ids(self, external_ids: List[str]) -> List[Offer]: ...

src/web/infrastructure/di/ingestion/ingestion_container.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class IngestionContainer(containers.DeclarativeContainer):
102102
concours_repository=concours_repository,
103103
offers_repository=offers_repository,
104104
metiers_repository=metiers_repository,
105+
source_repository=source_repository,
105106
)
106107
)
107108

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import django.db.models.deletion
2+
from django.db import migrations, models
3+
4+
5+
def hydrate_source(apps, schema_editor):
6+
OfferModel = apps.get_model("shared", "OfferModel")
7+
SourceModel = apps.get_model("ingestion", "SourceModel")
8+
source = SourceModel.objects.first()
9+
if source:
10+
OfferModel.objects.update(source=source)
11+
12+
13+
class Migration(migrations.Migration):
14+
15+
dependencies = [
16+
("ingestion", "0024_alter_sourcemodel_id_alter_sourcemodel_source_id"),
17+
("shared", "0029_populate_offer_source_id"),
18+
]
19+
20+
operations = [
21+
migrations.RemoveIndex(
22+
model_name="offermodel",
23+
name="offers_source_id_idx",
24+
),
25+
migrations.RemoveField(
26+
model_name="offermodel",
27+
name="source_id",
28+
),
29+
migrations.AddField(
30+
model_name="offermodel",
31+
name="source",
32+
field=models.ForeignKey(
33+
null=True,
34+
on_delete=django.db.models.deletion.PROTECT,
35+
related_name="offers",
36+
to="ingestion.sourcemodel",
37+
to_field="source_id",
38+
),
39+
),
40+
migrations.RunPython(hydrate_source, reverse_code=migrations.RunPython.noop),
41+
migrations.AlterField(
42+
model_name="offermodel",
43+
name="source",
44+
field=models.ForeignKey(
45+
on_delete=django.db.models.deletion.PROTECT,
46+
related_name="offers",
47+
to="ingestion.sourcemodel",
48+
to_field="source_id",
49+
),
50+
),
51+
]

src/web/infrastructure/django_apps/shared/models/offer.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from domain.value_objects.localisation import Localisation
1212
from domain.value_objects.region import Region
1313
from domain.value_objects.verse import Verse
14+
from infrastructure.django_apps.ingestion.models.source import SourceModel
1415

1516

1617
class OfferModel(models.Model):
@@ -42,7 +43,12 @@ class OfferModel(models.Model):
4243
organization = models.CharField(max_length=500)
4344
offer_url = models.URLField(null=True, blank=True)
4445
code_emploi_csp = models.CharField(max_length=50, null=True, blank=True)
45-
source_id = models.UUIDField(null=True, blank=True)
46+
source = models.ForeignKey(
47+
SourceModel,
48+
to_field="source_id",
49+
on_delete=models.PROTECT,
50+
related_name="offers",
51+
)
4652

4753
# Localisation fields stored separately
4854
area = models.CharField(max_length=2, null=True, blank=True)
@@ -66,7 +72,6 @@ class Meta:
6672
verbose_name_plural = "Offers"
6773
indexes = [
6874
models.Index(fields=["external_id"]),
69-
models.Index(fields=["source_id"], name="offers_source_id_idx"),
7075
]
7176

7277
def to_entity(self) -> Offer:
@@ -109,7 +114,7 @@ def to_entity(self) -> Offer:
109114
processed_at=self.processed_at,
110115
archived_at=self.archived_at,
111116
family_code=self.code_emploi_csp,
112-
source_id=str(self.source_id) if self.source_id else None,
117+
source_id=self.source_id,
113118
)
114119

115120
@classmethod

src/web/infrastructure/gateways/ingestion/document_cleaner.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from domain.repositories.corps_repository_interface import ICorpsRepository
1313
from domain.repositories.metier_repository_interface import IMetierRepository
1414
from domain.repositories.offers_repository_interface import IOffersRepository
15+
from domain.repositories.source_repository_interface import ISourceRepository
1516
from domain.services.document_cleaner_interface import CleaningResult, IDocumentCleaner
1617
from domain.services.logger_interface import ILogger
1718
from infrastructure.gateways.ingestion.concours_cleaner import ConcoursCleaner
@@ -28,11 +29,14 @@ def __init__(
2829
concours_repository: IConcoursRepository,
2930
offers_repository: IOffersRepository,
3031
metiers_repository: IMetierRepository,
32+
source_repository: ISourceRepository,
3133
):
3234
self._cleaners = {
3335
DocumentType.CORPS: CorpsCleaner(logger, corps_repository),
3436
DocumentType.CONCOURS: ConcoursCleaner(logger, concours_repository),
35-
DocumentType.OFFERS: OffersCleaner(logger, offers_repository),
37+
DocumentType.OFFERS: OffersCleaner(
38+
logger, offers_repository, source_repository
39+
),
3640
DocumentType.METIERS: MetierCleaner(logger, metiers_repository),
3741
}
3842

src/web/infrastructure/gateways/ingestion/offers_cleaner.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import datetime
22
from typing import List, Optional
3+
from uuid import UUID
34

45
from pydantic import HttpUrl, ValidationError
56

@@ -8,6 +9,7 @@
89
from domain.exceptions.document_error import InvalidDocumentTypeError
910
from domain.exceptions.offer_errors import OfferDoesNotExist
1011
from domain.repositories.offers_repository_interface import IOffersRepository
12+
from domain.repositories.source_repository_interface import ISourceRepository
1113
from domain.services.document_cleaner_interface import CleaningResult, IDocumentCleaner
1214
from domain.services.logger_interface import ILogger
1315
from domain.value_objects.area import GeographicalArea
@@ -34,15 +36,27 @@
3436

3537

3638
class OffersCleaner(IDocumentCleaner[Offer]):
37-
def __init__(self, logger: ILogger, offers_repository: IOffersRepository):
39+
def __init__(
40+
self,
41+
logger: ILogger,
42+
offers_repository: IOffersRepository,
43+
source_repository: ISourceRepository,
44+
):
3845
self.logger = logger
3946
self.offers_repository = offers_repository
47+
self.source_repository = source_repository
4048

4149
def clean(self, raw_documents: List[Document]) -> CleaningResult[Offer]:
4250
for document in raw_documents:
4351
if document.type != DocumentType.OFFERS:
4452
raise InvalidDocumentTypeError(document.type.value) # todo: test
4553

54+
sources = self.source_repository.get_all()
55+
if not sources:
56+
raise ValueError("No source found in repository")
57+
# TODO: replace when we will have multiple instances of Talentsoft
58+
source_id = sources[0].source_id
59+
4660
validated_offers = []
4761
cleaning_errors = []
4862

@@ -63,7 +77,7 @@ def clean(self, raw_documents: List[Document]) -> CleaningResult[Offer]:
6377
for talentsoft_offer in validated_offers:
6478
self.logger.info(f"Processing offer {talentsoft_offer.reference}")
6579
try:
66-
offer = self._map_talentsoft_to_offer(talentsoft_offer)
80+
offer = self._map_talentsoft_to_offer(talentsoft_offer, source_id)
6781
offers_list.append(offer)
6882
self.logger.debug(
6983
f"Successfully processed offer {talentsoft_offer.reference}"
@@ -88,7 +102,7 @@ def clean(self, raw_documents: List[Document]) -> CleaningResult[Offer]:
88102
return CleaningResult(entities=offers_list, cleaning_errors=cleaning_errors)
89103

90104
def _map_talentsoft_to_offer(
91-
self, talentsoft_offer: TalentsoftDetailOffer
105+
self, talentsoft_offer: TalentsoftDetailOffer, source_id: UUID
92106
) -> Offer:
93107
# Extract verse from salaryRange if available
94108
ts_verse = (
@@ -148,6 +162,7 @@ def _map_talentsoft_to_offer(
148162
publication_date=publication_date,
149163
beginning_date=beginning_date,
150164
family_code=family_code_value,
165+
source_id=source_id,
151166
)
152167
try:
153168
existing_offer = self.offers_repository.get_by_external_id(

src/web/infrastructure/repositories/shared/postgres_offers_repository.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def get_by_external_id(self, external_id: str) -> Offer:
116116
except OfferModel.DoesNotExist as e:
117117
raise OfferDoesNotExist(external_id) from e
118118

119-
def get_by_reference_and_source_id(self, reference: str, source_id: str) -> Offer:
119+
def get_by_reference_and_source_id(self, reference: str, source_id: UUID) -> Offer:
120120
try:
121121
offer_model = OfferModel.objects.get(
122122
external_id__endswith=f"-{reference}", source_id=source_id

src/web/presentation/ingestion/mappers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Optional
2+
from uuid import UUID
23

34
from domain.ddd.mapper_interface import IToDomainMapper
45
from domain.entities.offer import Offer
@@ -57,5 +58,5 @@ def to_domain(self, data: Optional[dict]) -> Optional[Offer]:
5758
localisation=self._localisation_mapper.to_domain(raw_localisation),
5859
beginning_date=LimitDate(debut_contrat) if debut_contrat else None,
5960
family_code=data["profession"]["metier"],
60-
source_id=data["identification"]["source"],
61+
source_id=UUID(data["identification"]["source"]),
6162
)

0 commit comments

Comments
 (0)