Browse Source

fix:subpoint numbers were not captured

The subpoint numbers in Articles were not captured due to an error
in the regex parsing the table (in the original GDPR doc). This
has been fixed by updating the regex in parse_gdpr.js, and the
files have been updated with the correct numbers.
Harshvardhan Pandit 7 years ago
parent
commit
024aa79642
9 changed files with 5813 additions and 15884 deletions
  1. 0 6198
      deliverables/gdpr.json
  2. 306 289
      deliverables/gdpr.jsonld
  3. 180 178
      deliverables/gdpr.n3
  4. 0 3826
      deliverables/gdpr.nt
  5. 16 0
      deliverables/gdpr.owl
  6. 5121 5209
      deliverables/gdpr.rdf
  7. 180 178
      deliverables/gdpr.ttl
  8. 2 4
      scripts/GDPR_en.html
  9. 8 2
      scripts/parse_gdpr.js

File diff suppressed because it is too large
+ 0 - 6198
deliverables/gdpr.json


File diff suppressed because it is too large
+ 306 - 289
deliverables/gdpr.jsonld


File diff suppressed because it is too large
+ 180 - 178
deliverables/gdpr.n3


File diff suppressed because it is too large
+ 0 - 3826
deliverables/gdpr.nt


+ 16 - 0
deliverables/gdpr.owl

@@ -2,19 +2,35 @@
 <rdf:RDF xmlns="http://purl.org/adaptcentre/ontologies/GDPRtEXT#"
      xml:base="http://purl.org/adaptcentre/ontologies/GDPRtEXT"
      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+     xmlns:foaf="http://xmlns.com/foaf/0.1/"
+     xmlns:terms="http://purl.org/dc/terms/"
      xmlns:owl="http://www.w3.org/2002/07/owl#"
      xmlns:xml="http://www.w3.org/XML/1998/namespace"
      xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+     xmlns:skos="http://www.w3.org/2004/02/skos/core#"
      xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
      xmlns:dc="http://purl.org/dc/elements/1.1/"
      xmlns:gdprtext="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+     xmlns:vann="http://purl.org/vocab/vann/"
+     xmlns:ontology="http://data.europa.eu/eli/ontology#"
+     xmlns:dc="http://purl.org/dc/elements/1.1/">
     <owl:Ontology rdf:about="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+        <owl:imports rdf:resource="http://data.europa.eu/eli/ontology#"/>
+        <terms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-15</terms:created>
+        <terms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-18</terms:modified>
         <dc:title rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPRtEXT</dc:title>
         <rdfs:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This is an ontology to represent GDPR text as a set of RDF resources</rdfs:comment>
         <dc:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</dc:description>
+        <terms:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://harshp.com#me</terms:creator>
+        <terms:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</terms:description>
         <rdfs:label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPR text EXTensions</rdfs:label>
         <dc:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Harshvardhan J. Pandit</dc:creator>
         <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.1</owl:versionInfo>
+        <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">0.1</owl:versionInfo>
+        <vann:preferredNamespacePrefix>gdprtext</vann:preferredNamespacePrefix>
+        <vann:preferredNamespaceUri>http://purl.org/adaptcentre/openscience/ontologies/GDPRtEXT</vann:preferredNamespaceUri>
+        <foaf:homepage>https://openscience.adaptcentre.ie/projects/GDPRtEXT/</foaf:homepage>
+        <terms:license rdf:datatype="https://w3.org/2001/XMLSchema#anyURI">http://creativecommons.org/licenses/by/4.0/</terms:license>
     </owl:Ontology>
     
 

File diff suppressed because it is too large
+ 5121 - 5209
deliverables/gdpr.rdf


File diff suppressed because it is too large
+ 180 - 178
deliverables/gdpr.ttl


+ 2 - 4
scripts/GDPR_en.html

@@ -4,8 +4,6 @@
 <!-- CONVEX # converter_version:6.7.1 # generated_on:20160504-0034 -->
 <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
-   <link type="text/css" rel="stylesheet"
-         href="./../../../../css/oj/oj.css"/>
    <title>L_2016119EN.01000101.xml</title>
 </head>
 <body>
@@ -8787,7 +8785,7 @@
       <a id="ntr21-L_2016119EN.01000101-E0021" href="#ntc21-L_2016119EN.01000101-E0021">(<span class="super">21</span>)</a>  Regulation (EC) No 1049/2001 of the European Parliament and of the Council of 30 May 2001 regarding public access to European Parliament, Council and Commission documents (<a href="./../../../../legal-content/EN/AUTO/?uri=OJ:L:2001:145:TOC">OJ L 145, 31.5.2001, p. 43</a>).</p>
    <hr class="doc-end"/>
 <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
-<script src="./scripts/parse_gdpr.js"></script>
-<script src="./scripts/fancy_format_gdpr.js"></script>
+<script src="parse_gdpr.js"></script>
+<script src="fancy_format_gdpr.js"></script>
 </body>
 </html>

+ 8 - 2
scripts/parse_gdpr.js

@@ -217,6 +217,7 @@ var extract_points_from_article4 = function(article4) {
  * Extracts points from article
  */
 var extract_points_from_article = function(article) {
+    // console.log(article.number);
 	var text = article.contents;
 	// The extraction mechanism works on the basis of sequential points.
 	// This means that if a point has a subpoint,
@@ -252,7 +253,7 @@ var extract_points_from_article = function(article) {
 		} else if (element_type == 'TABLE') {
 			var p_in_element = element.find('p');
 			var p_number = $(p_in_element[0]).text().trim();
-			var match = p_number.match('^(\\w+).');
+			var match = p_number.match('(\\w+)');
 			if (match == undefined || match == null) {
 				p_number = null;
 			} else {
@@ -267,7 +268,9 @@ var extract_points_from_article = function(article) {
 		}
 	}
 	for(pt of points) {
-		// console.log(pt.type, pt.number, pt.text);
+        for(spt of pt.subpoints) {
+            // console.log(article.number, pt.number, spt.number);
+        }
 	}
 
 	return points;
@@ -435,6 +438,9 @@ data.citations = $('p.note').map(function(index, element) {
 /**
  * Download data as JSON
  */
+delete data.citations.prevObject;
+delete data.citations.context;
+delete data.citations.length;
 $('<a id="downloadAnchorElem" style="display:none"></a>').appendTo('body');
 var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data));
 var btn_download = document.getElementById('downloadAnchorElem');

Some files were not shown because too many files changed in this diff