Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions Assignment4/Assignment4_LauraSilva_24C024_task06
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
"""Task06.ipynb

Automatically generated by Colab.

Original file is located at
https://colab.research.google.com/github/FacultadInformatica-LinkedData/Curso2025-2026-DataScience/blob/master/Assignment4/course_materials/notebooks/Task06.ipynb

**Task 06: Modifying RDF(s)**
"""

#!pip install rdflib
import urllib.request
url = 'https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/refs/heads/master/Assignment4/course_materials/python/validation.py'
urllib.request.urlretrieve(url, 'validation.py')
github_storage = "https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials"

"""Import RDFLib main methods"""

from rdflib import Graph, Namespace, Literal, XSD
from rdflib.namespace import RDF, RDFS
from validation import Report
g = Graph()
g.namespace_manager.bind('ns', Namespace("http://somewhere#"), override=False)
r = Report()

"""Create a new class named Researcher"""

ns = Namespace("http://mydomain.org#")
g.add((ns.Researcher, RDF.type, RDFS.Class))
for s, p, o in g:
print(s,p,o)

"""**Task 6.0: Create new prefixes for "ontology" and "person" as shown in slide 14 of the Slidedeck 01a.RDF(s)-SPARQL shown in class.**"""

# this task is validated in the next step
person = Namespace("http://oeg.fi.upm.es/def/people#")
ontology = Namespace("http://oeg.fi.upm.es/def/ontology#")

g.namespace_manager.bind("person", person, override=True)
g.namespace_manager.bind("ontology", ontology, override=True)
g.namespace_manager.bind("rdf", RDF, override=True)
g.namespace_manager.bind("rdfs", RDFS, override=True)
g.namespace_manager.bind("xsd", XSD, override=True)

"""**TASK 6.1: Reproduce the taxonomy of classes shown in slide 34 in class (all the classes under "Vocabulario", Slidedeck: 01a.RDF(s)-SPARQL). Add labels for each of them as they are in the diagram (exactly) with no language tags. Remember adding the correct datatype (xsd:String) when appropriate**

"""

# TO DO
clases_y_padres = [("Person", None),("Professor", "Person"),("FullProfessor", "Professor"),("AssociateProfessor", "Professor"),("InterimAssociateProfessor", "AssociateProfessor")]

for nombre, padre in clases_y_padres:
clase = person[nombre]
g.add((clase, RDF.type, RDFS.Class))
g.add((clase, RDFS.label, Literal(nombre, datatype=XSD.string)))
if padre:
g.add((clase, RDFS.subClassOf, person[padre]))

# Visualize the results
for s, p, o in g:
print(s,p,o)

# Validation. Do not remove
r.validate_task_06_01(g)

"""**TASK 6.2: Add the 3 properties shown in slide 36. Add labels for each of them (exactly as they are in the slide, with no language tags), and their corresponding domains and ranges using RDFS. Remember adding the correct datatype (xsd:String) when appropriate. If a property has no range, make it a literal (string)**"""

# TO DO
def agregar_propiedad(nombre, dominio=None, rango=None):
prop = person[nombre]
g.add((prop, RDF.type, RDF.Property))
g.add((prop, RDFS.label, Literal(nombre, datatype=XSD.string)))
if dominio:
g.add((prop, RDFS.domain, dominio))
if rango:
g.add((prop, RDFS.range, rango))
return prop

agregar_propiedad("hasColleague", dominio=person.Person, rango=person.Person)
agregar_propiedad("hasName", dominio=person.Person, rango=RDFS.Literal)
agregar_propiedad("hasHomePage", dominio=person.FullProfessor, rango=RDFS.Literal)

# Visualize the results
for s, p, o in g:
print(s,p,o)

# Validation. Do not remove
r.validate_task_06_02(g)

"""**TASK 6.3: Create the individuals shown in slide 36 under "Datos". Link them with the same relationships shown in the diagram."**"""

# TO DO
datos = Namespace("http://oeg.fi.upm.es/resource/person/")
g.namespace_manager.bind("data", datos, True)

def agregar_individuo(nombre, tipo_rdf, etiqueta, propiedades=None):
sujeto = datos[nombre]
g.remove((sujeto, None, None))
g.add((sujeto, RDF.type, tipo_rdf))
g.add((sujeto, RDFS.label, Literal(etiqueta, datatype=XSD.string)))
if propiedades:
for predicado, objeto in propiedades:
g.add((sujeto, predicado, objeto))
return sujeto

oscar = agregar_individuo("Oscar", person.FullProfessor, "Oscar")
asun = agregar_individuo("Asun", person.AssociateProfessor, "Asun")
raul = agregar_individuo("Raul", person.InterimAssociateProfessor, "Raul")

g.add((oscar, person.hasColleague, asun))
g.add((oscar, person.hasName, Literal("Oscar Corcho García", datatype=XSD.string)))
g.add((asun, person.hasHomePage, Literal("http://www.oeg-upm.net/", datatype=XSD.string)))
g.add((asun, person.hasColleague, raul))


# Visualize the results
for s, p, o in g:
print(s,p,o)

r.validate_task_06_03(g)

"""**TASK 6.4: Add to the individual person:Oscar the email address, given and family names. Use the properties already included in example 4 to describe Jane and John (https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials/rdf/example4.rdf). Do not import the namespaces, add them manually**

"""

# TO DO
VCARD = Namespace("http://www.w3.org/2001/vcard-rdf/3.0/")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

# el namespace de individuos que usaste en 6.3
data = Namespace("http://oeg.fi.upm.es/resource/person/")
oscar = data.Oscar

# Añadir tres propiedades a Oscar (todas como xsd:string)
g.add((oscar, VCARD.Given, Literal("Oscar", datatype=XSD.string)))
g.add((oscar, VCARD.Family, Literal("Corcho", datatype=XSD.string)))
g.add((oscar, FOAF.email, Literal("oscar@oeg-upm.net", datatype=XSD.string)))
# Visualize the results
for s, p, o in g:
print(s,p,o)

# Validation. Do not remove
r.validate_task_06_04(g)
r.save_report("_Task_06")
178 changes: 178 additions & 0 deletions Assignment4/Assignment4_LauraSilva_24C024_task07
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# -*- coding: utf-8 -*-
"""Task07.ipynb

Automatically generated by Colab.

Original file is located at
https://colab.research.google.com/github/Laauraaxsc/Curso2025-2026-DataScience/blob/master/Assignment4/course_materials/notebooks/Task07.ipynb

**Task 07: Querying RDF(s)**
"""

#!pip install rdflib
import urllib.request
url = 'https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/refs/heads/master/Assignment4/course_materials/python/validation.py'
urllib.request.urlretrieve(url, 'validation.py')
github_storage = "https://raw.githubusercontent.com/FacultadInformatica-LinkedData/Curso2025-2026/master/Assignment4/course_materials"

from validation import Report

"""First let's read the RDF file"""

from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDF, RDFS
# Do not change the name of the variables
g = Graph()
g.namespace_manager.bind('ns', Namespace("http://somewhere#"), override=False)
g.parse(github_storage+"/rdf/data06.ttl", format="TTL")
report = Report()

"""**TASK 7.1a: For all classes, list each classURI. If the class belogs to another class, then list its superclass.**
**Do the exercise in RDFLib returning a list of Tuples: (class, superclass) called "result". If a class does not have a super class, then return None as the superclass**
"""

# TO DO

def listar_clases_y_superclase(grafo):
clases = set(grafo.subjects(RDF.type, RDFS.Class))
clases.update(grafo.subjects(RDFS.subClassOf, None))
clases.update(grafo.objects(None, RDFS.subClassOf))

resultado = []
for c in clases:
sc = grafo.value(subject=c, predicate=RDFS.subClassOf, object=None)
resultado.append((c, sc))
return resultado

# Visualize the results
result = listar_clases_y_superclase(g) #list of tuples
for r in result:
print(r)

## Validation: Do not remove
report.validate_07_1a(result)

"""**TASK 7.1b: Repeat the same exercise in SPARQL, returning the variables ?c (class) and ?sc (superclass)**"""

query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?c ?sc
WHERE {
{
{ ?c a rdfs:Class }
UNION
{ ?c rdfs:subClassOf ?_any }
UNION
{ ?_x rdfs:subClassOf ?c }
}
OPTIONAL { ?c rdfs:subClassOf ?sc } # su superclase si existe
}
"""

for r in g.query(query):
print(r.c, r.sc)

# Validation: Do not remove
report.validate_07_1b(query,g)

"""**TASK 7.2a: List all individuals of "Person" with RDFLib (remember the subClasses). Return the individual URIs in a list called "individuals"**

"""

ns = Namespace("http://oeg.fi.upm.es/def/people#")

clase_person = ns.Person
clases = [clase_person]

for c in clases:
for sc in g.subjects(RDFS.subClassOf, c):
if sc not in clases:
clases.append(sc)

individuals = []
for c in clases:
for ind in g.subjects(RDF.type, c):
if ind not in individuals:
individuals.append(ind)

for i in individuals:
print(i)

# validation. Do not remove
report.validate_07_02a(individuals)

"""**TASK 7.2b: Repeat the same exercise in SPARQL, returning the individual URIs in a variable ?ind**"""

query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ns: <http://oeg.fi.upm.es/def/people#>

SELECT DISTINCT ?ind
WHERE {
?c rdfs:subClassOf* ns:Person .
?ind rdf:type ?c .
}
"""

for r in g.query(query):
print(r.ind)
# Visualize the results

## Validation: Do not remove
report.validate_07_02b(g, query)

"""**TASK 7.3: List the name and type of those who know Rocky (in SPARQL only). Use name and type as variables in the query**"""

query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ns: <http://oeg.fi.upm.es/def/people#>

SELECT DISTINCT ?name ?type
WHERE {
?ind ?p ns:Rocky .
?ind rdfs:label ?name .
?ind rdf:type ?type .
}
"""
# Visualize the results
for r in g.query(query):
print(r.name, r.type)

## Validation: Do not remove
report.validate_07_03(g, query)

"""**Task 7.4: List the name of those entities who have a colleague with a dog, or that have a collegue who has a colleague who has a dog (in SPARQL). Return the results in a variable called name**"""

query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ns: <http://oeg.fi.upm.es/def/people#>

SELECT DISTINCT ?name
WHERE {
?ind rdfs:label ?name .

{
?ind ns:hasColleague ?c .
}
UNION
{
?ind ns:hasColleague/ns:hasColleague ?c .
}

?c ?any ns:Rocky .
}
"""

for r in g.query(query):
print(r.name)

# TO DO
# Visualize the results

## Validation: Do not remove
report.validate_07_04(g,query)
report.save_report("_Task_07")
Loading
Loading