123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- #!/usr/bin/env python3
- # author: Harshvardhan Pandit
- # Generates RDF graph for GDPR text from JSON (gdpr.json)
- # The JSON file is in ../deliverables/gdpr.json
- # The algorithm is roughly as follows:
- # For every chapter, article, subpoint, et. al. it defines the following
- # attributes using the ELI vocabulary
- # @prefix ELI https://publications.europa.eu/en/mdr/resource/eli/eli.owl
- #
- # - ELI:title (xsd:string) title of the item
- # - ELI:number (xsd:string) number of the item in text
- # - ELI:TITLE_ALT (xsd:string) an alternative representation of the
- # item as <item_type> followed by <item_number>; e.g. Chapter 1
- # - ELI:DESC (xsd:string) text of the item
- # - ELI:PART_OF (ELI:LegalResource) gdpr:GDPR
- # here, this is an instance of a LegalResource created to represent
- # the GDPR, and is present in another OWL file, which is being edited
- # in Protege. The reference does not need to be resolved.
- # - ELI:id_local (xsd:string) the ID of the resource in the HTML file
- # This is a bungling of the correct use of the vocabulary, but it keeps
- # all terms within the intended usage. The same ID can be used to
- # lookup the resource in the HTML file (so it is the ID attribute of
- # the resource).
- #
- # This script generates the RDF pairings for the text of the GDPR. The bulk
- # of the work and the part where the GDPR itself is referenced is actually
- # done using Protege, and does not need the use of a script, such as this,
- # since it is a manual labour.
- ##############################################################################
- from rdflib import Graph, RDF, RDFS, XSD, Literal, URIRef, BNode
- from rdflib import Namespace
- # Load the JSON from file
- with open('../deliverables/gdpr.json') as fd:
- import json
- gdpr_json = json.load(fd)
- # import p# print
- # p# print.p# print(gdpr_json)
- # This will be the graph used to hold the triples as they are being generated
- graph = Graph()
- GDPRtEXT_URI = URIRef('http://purl.org/adaptcentre/ontologies/GDPRtEXT#')
- GDPRtEXT = Namespace(GDPRtEXT_URI)
- graph.namespace_manager.bind('GDPRtEXT', GDPRtEXT)
- DCTERMS = Namespace('http://purl.org/dc/terms/')
- graph.namespace_manager.bind('dcterms', DCTERMS)
- # This is the ELI namespace, used as the legal vocabulary for EU text
- ELI = Namespace("http://data.europa.eu/eli/ontology#")
- graph.namespace_manager.bind('eli', ELI)
- # bind ELI items to names that are easier to access (argumentative)
- LRS = ELI.LegalResourceSubdivision
- TITLE = ELI.title
- NOS = ELI.number
- TITLE_ALT = ELI.title_alternative
- PART_OF = ELI.is_part_of
- # graph.add((PART_OF, RDF.type, OWL.TransitiveProperty))
- DESC = ELI.description
- HAS_RECITAL = GDPRtEXT['hasRecital']
- ##############################################################################
- # GDPR as named individual
- # base URI
- GDPR_URI = URIRef('http://purl.org/adaptcentre/resources/GDPRtEXT#')
- GDPR = Namespace(GDPR_URI)
- graph.namespace_manager.bind('gdpr', GDPR)
- gdpr = GDPR.GDPR
- # graph.add((gdpr, RDF.type, OWL.NamedIndividual))
- graph.add((gdpr, RDF.type, ELI.LR))
- graph.add((gdpr, RDF.type, DCTERMS.Policy))
- graph.add((gdpr, DCTERMS.identifier, Literal(
- '2016/679', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.language, Literal(
- 'English', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.publisher, Literal(
- 'Official Journal of the European Union', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.source, Literal(
- 'http://eur-lex.europa.eu/eli/reg/2016/679/oj', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.title, Literal(
- 'General Data Protection Regulation', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.title_alternative, Literal(
- 'GDPR', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.title_alternative, Literal(
- 'REGULATION (EU) 2016/679', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.creator, Literal(
- 'European Parliament', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.creator, Literal(
- 'Council of the European Union', datatype=XSD.string)))
- graph.add((gdpr, DCTERMS.abstract, Literal((
- 'REGULATION (EU) 2016/679 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL '
- 'of 27 April 2016 '
- 'on the protection of natural persons with regard to the processing of '
- 'personal data and on the free movement of such data, '
- 'and repealing Directive 95/46/EC '
- '(General Data Protection Regulation)'), datatype=XSD.string)))
- gdpr_description = GDPR['description']
- graph.add((gdpr_description, RDF.type, ELI.LRS))
- graph.add((gdpr_description, DCTERMS.description, Literal((
- 'THE EUROPEAN PARLIAMENT AND THE COUNCIL OF THE EUROPEAN UNION, '
- 'Having regard to the Treaty on the Functioning of the European Union, '
- 'and in particular Article 16 thereof, Having regard to the proposal from '
- 'the European Commission, After transmission of the draft legislative act '
- 'to the national parliaments, '
- 'Having regard to the opinion of the European Economic '
- 'and Social Committee, '
- 'Having regard to the opinion of the Committee of the Regions, '
- 'Acting in accordance with the ordinary legislative procedure,'
- ), datatype=XSD.string)))
- graph.add((gdpr_description, ELI.cites, GDPR['citation1']))
- graph.add((gdpr_description, ELI.cites, GDPR['citation2']))
- graph.add((gdpr_description, ELI.cites, GDPR['citation3']))
- graph.add((gdpr, DCTERMS.description, gdpr_description))
- graph.add((gdpr, ELI.date_document, Literal('2016-04-27', datatype=XSD.date)))
- graph.add((gdpr, DCTERMS.date, Literal('2016-04-27', datatype=XSD.date)))
- graph.add((gdpr, ELI.date_publication, Literal(
- '2016-05-04', datatype=XSD.date)))
- graph.add((gdpr, DCTERMS.issued, Literal('2016-05-04', datatype=XSD.date)))
- graph.add((gdpr, ELI.in_force, Literal(
- '2016-05-24', datatype=XSD.date)))
- graph.add((gdpr, ELI.date_applicability, Literal(
- '2018-05-25', datatype=XSD.date)))
- class_Chapter = GDPRtEXT['Chapter']
- class_Section = GDPRtEXT['Section']
- class_Article = GDPRtEXT['Article']
- class_Point = GDPRtEXT['Point']
- class_SubPoint = GDPRtEXT['SubPoint']
- class_Recital = GDPRtEXT['Recital']
- class_Citation = GDPRtEXT['Citation']
- property_partof_chapter = GDPRtEXT['isPartOfChapter']
- property_partof_section = GDPRtEXT['isPartOfSection']
- property_partof_article = GDPRtEXT['isPartOfArticle']
- property_partof_point = GDPRtEXT['isPartOfPoint']
- property_has_chapter = GDPRtEXT['hasChapter']
- property_has_section = GDPRtEXT['hasSection']
- property_has_article = GDPRtEXT['hasArticle']
- property_has_point = GDPRtEXT['hasPoint']
- property_has_subpoint = GDPRtEXT['hasSubPoint']
- #############################################################################
- # The chapters are in json.chapters as an array of dicts
- # { NOS, title, contents }
- # A chapter may contain sections, and if it does, then it has the same
- # structure as well.
- # If a chapter has sections, then the section, else the chapter itself, has
- # several articles with the same structure.
- # Each article has several points, which may or may not be numbered.
- # Each point may have several points, which may or may not be numbered.
- def graph_subpoint(
- subpoint, article_number, point_number,
- point, article, section=None, chapter=None):
- '''adds subpoint to graph'''
- # print('SP', subpoint['number'])
- node_subpoint = GDPR['article{}-{}-{}'.format(
- article_number, point_number, subpoint['number'])]
- graph.add((node_subpoint, NOS, Literal(
- subpoint['number'], datatype=XSD.string)))
- graph.add((node_subpoint, TITLE_ALT, Literal(
- 'Article' + article_number + '({})({})'.format(
- point_number, subpoint['number']),
- datatype=XSD.string)))
- graph.add((node_subpoint, RDF.type, LRS))
- graph.add((node_subpoint, RDF.type, class_SubPoint))
- graph.add((node_subpoint, PART_OF, chapter))
- graph.add((node_subpoint, property_partof_chapter, chapter))
- if section is not None:
- graph.add((node_subpoint, PART_OF, section))
- graph.add((node_subpoint, property_partof_section, section))
- graph.add((node_subpoint, PART_OF, article))
- graph.add((node_subpoint, property_partof_article, article))
- graph.add((node_subpoint, PART_OF, point))
- graph.add((node_subpoint, property_partof_point, point))
- graph.add((point, property_has_subpoint, node_subpoint))
- graph.add((node_subpoint, PART_OF, gdpr))
- graph.add((node_subpoint, DESC, Literal(
- subpoint['text'], datatype=XSD.string)))
- def graph_point(point, article_number, article, section=None, chapter=None):
- '''adds point to graph'''
- # print('P', point['number'])
- node_point = GDPR['article{}-{}'.format(
- article_number, point['number'])]
- graph.add((node_point, NOS, Literal(
- point['number'], datatype=XSD.string)))
- graph.add((node_point, TITLE_ALT, Literal(
- 'Article' + article_number + '({})'.format(point['number']),
- datatype=XSD.string)))
- graph.add((node_point, RDF.type, LRS))
- graph.add((node_point, RDF.type, class_Point))
- graph.add((node_point, PART_OF, chapter))
- graph.add((node_point, property_partof_chapter, chapter))
- if section is not None:
- graph.add((node_point, PART_OF, section))
- graph.add((node_point, property_partof_section, section))
- graph.add((node_point, PART_OF, article))
- graph.add((node_point, property_partof_article, article))
- graph.add((article, property_has_point, node_point))
- graph.add((node_point, PART_OF, gdpr))
- graph.add((node_point, DESC, Literal(
- point['text'], datatype=XSD.string)))
- # subpoint number to be used only when they are un-numbered
- subpoint_nos = 1
- for subpoint in point['subpoints']:
- if subpoint['number'] is None:
- subpoint['number'] = subpoint_nos
- subpoint_nos += 1
- graph_subpoint(
- subpoint, article_number, point['number'],
- node_point, article, section, chapter)
- def graph_article(article, section=None, chapter=None):
- '''adds article to graph'''
- # print('A', article['number'])
- node_article = GDPR['article{}'.format(article['number'])]
- graph.add((node_article, RDF.type, LRS))
- graph.add((node_article, RDF.type, class_Article))
- graph.add((node_article, NOS, Literal(
- article['number'], datatype=XSD.string)))
- graph.add((node_article, TITLE_ALT, Literal(
- 'Article ' + article['number'], datatype=XSD.string)))
- graph.add((node_article, PART_OF, chapter))
- graph.add((node_article, property_partof_chapter, chapter))
- graph.add((chapter, property_has_article, node_article))
- graph.add((gdpr, property_has_article, node_article))
- graph.add((node_article, PART_OF, gdpr))
- if section is not None:
- graph.add((node_article, PART_OF, section))
- graph.add((node_article, property_partof_section, section))
- graph.add((section, property_has_article, node_article))
- # point number for when points are unnumbered
- point_nos = 1
- for point in article['contents']:
- if point['number'] is None:
- point['number'] = point_nos
- point_nos += 1
- graph_point(point, article['number'], node_article, section, chapter)
- def graph_section(section, chapter, chapter_number):
- '''adds section to graph'''
- # print('S', section['number'], section['title'])
- node_section = GDPR[
- 'chapter{}-{}'.format(chapter_number, section['number'])]
- graph.add((node_section, RDF.type, LRS))
- graph.add((node_section, RDF.type, class_Section))
- graph.add((node_section, TITLE, Literal(
- section['title'], datatype=XSD.string)))
- graph.add((node_section, NOS, Literal(
- section['number'], datatype=XSD.string)))
- graph.add((node_section, TITLE_ALT, Literal(
- 'Section ' + section['number'], datatype=XSD.string)))
- graph.add((node_section, PART_OF, chapter))
- graph.add((node_section, PART_OF, gdpr))
- graph.add((node_section, property_partof_chapter, chapter))
- graph.add((chapter, property_has_section, node_section))
- for article in section['contents']:
- graph_article(article, node_section, chapter)
- def graph_chapter(chapter):
- '''adds chapter to graph'''
- # print('C', chapter['number'], chapter['title'])
- node_chapter = GDPR['chapter{}'.format(chapter['number'])]
- graph.add((node_chapter, RDF.type, LRS))
- graph.add((node_chapter, RDF.type, class_Chapter))
- graph.add((node_chapter, TITLE, Literal(
- chapter['title'], datatype=XSD.string)))
- graph.add((node_chapter, NOS, Literal(
- chapter['number'], datatype=XSD.string)))
- graph.add((node_chapter, TITLE_ALT, Literal(
- 'Chapter ' + chapter['number'], datatype=XSD.string)))
- graph.add((node_chapter, PART_OF, gdpr))
- graph.add((gdpr, property_has_chapter, node_chapter))
- contents = chapter['contents']
- # Section (if any)
- if contents[0]['type'] == 'section':
- for item in contents:
- graph_section(item, node_chapter, chapter['number'])
- else:
- for item in contents:
- graph_article(item, None, node_chapter)
- for chapter in gdpr_json['chapters']:
- graph_chapter(chapter)
- for recital in gdpr_json['recitals']:
- node_recital = GDPR['recital{}'.format(recital['number'])]
- graph.add((node_recital, RDF.type, LRS))
- graph.add((node_recital, RDF.type, class_Recital))
- graph.add((node_recital, NOS, Literal(
- recital['number'], datatype=XSD.string)))
- graph.add((node_recital, DESC, Literal(
- recital['text'], datatype=XSD.string)))
- graph.add((node_recital, PART_OF, gdpr))
- graph.add((gdpr, HAS_RECITAL, node_recital))
- for citation in gdpr_json['citations'].values():
- node_citation = GDPR['citation{}'.format(citation['number'])]
- graph.add((node_citation, RDF.type, LRS))
- graph.add((node_citation, RDF.type, class_Citation))
- graph.add((node_citation, NOS, Literal(
- citation['number'], datatype=XSD.string)))
- graph.add((node_citation, DESC, Literal(
- citation['text'], datatype=XSD.string)))
- graph.add((node_citation, PART_OF, gdpr))
- graph.add((gdpr, ELI.cites, node_citation))
- # Add citations
- graph.add((GDPR['recital3'], ELI.cites, GDPR['citation4']))
- graph.add((GDPR['recital13'], ELI.cites, GDPR['citation5']))
- graph.add((GDPR['recital17'], ELI.cites, GDPR['citation6']))
- graph.add((GDPR['recital19'], ELI.cites, GDPR['citation7']))
- graph.add((GDPR['recital21'], ELI.cites, GDPR['citation8']))
- graph.add((GDPR['recital35'], ELI.cites, GDPR['citation9']))
- graph.add((GDPR['recital42'], ELI.cites, GDPR['citation10']))
- graph.add((GDPR['recital54'], ELI.cites, GDPR['citation11']))
- graph.add((GDPR['recital106'], ELI.cites, GDPR['citation12']))
- graph.add((GDPR['recital147'], ELI.cites, GDPR['citation13']))
- graph.add((GDPR['recital154'], ELI.cites, GDPR['citation14']))
- graph.add((GDPR['recital161'], ELI.cites, GDPR['citation15']))
- graph.add((GDPR['recital163'], ELI.cites, GDPR['citation16']))
- graph.add((GDPR['recital172'], ELI.cites, GDPR['citation17']))
- graph.add((GDPR['recital173'], ELI.cites, GDPR['citation18']))
- graph.add((GDPR['article4-25'], ELI.cites, GDPR['citation19']))
- graph.add((GDPR['article43-1-2'], ELI.cites, GDPR['citation20']))
- graph.add((GDPR['article76-2'], ELI.cites, GDPR['citation21']))
- # Serialize
- graph.serialize(destination='../deliverables/gdpr.ttl', format='turtle')
- graph.serialize(destination='../deliverables/gdpr.rdf', format='pretty-xml')
- graph.serialize(destination='../deliverables/gdpr.n3', format='n3')
- graph.serialize(destination='../deliverables/gdpr.nt', format='nt')
- graph.serialize(destination='../deliverables/gdpr.jsonld', format='json-ld')
|