Ver código fonte

fix:subpoint numbers were not captured

The subpoint numbers in Articles were not captured due to an error
in the regex parsing the table (in the original GDPR doc). This
has been fixed by updating the regex in parse_gdpr.js, and the
files have been updated with the correct numbers.
Harshvardhan Pandit 7 anos atrás
pai
commit
024aa79642
9 arquivos alterados com 5813 adições e 15884 exclusões
  1. 0 6198
      deliverables/gdpr.json
  2. 306 289
      deliverables/gdpr.jsonld
  3. 180 178
      deliverables/gdpr.n3
  4. 0 3826
      deliverables/gdpr.nt
  5. 16 0
      deliverables/gdpr.owl
  6. 5121 5209
      deliverables/gdpr.rdf
  7. 180 178
      deliverables/gdpr.ttl
  8. 2 4
      scripts/GDPR_en.html
  9. 8 2
      scripts/parse_gdpr.js

Diferenças do arquivo suprimidas por serem muito extensas
+ 0 - 6198
deliverables/gdpr.json


Diferenças do arquivo suprimidas por serem muito extensas
+ 306 - 289
deliverables/gdpr.jsonld


Diferenças do arquivo suprimidas por serem muito extensas
+ 180 - 178
deliverables/gdpr.n3


Diferenças do arquivo suprimidas por serem muito extensas
+ 0 - 3826
deliverables/gdpr.nt


+ 16 - 0
deliverables/gdpr.owl

@@ -2,19 +2,35 @@
 <rdf:RDF xmlns="http://purl.org/adaptcentre/ontologies/GDPRtEXT#"
      xml:base="http://purl.org/adaptcentre/ontologies/GDPRtEXT"
      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+     xmlns:foaf="http://xmlns.com/foaf/0.1/"
+     xmlns:terms="http://purl.org/dc/terms/"
      xmlns:owl="http://www.w3.org/2002/07/owl#"
      xmlns:xml="http://www.w3.org/XML/1998/namespace"
      xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+     xmlns:skos="http://www.w3.org/2004/02/skos/core#"
      xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
      xmlns:dc="http://purl.org/dc/elements/1.1/"
      xmlns:gdprtext="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+     xmlns:vann="http://purl.org/vocab/vann/"
+     xmlns:ontology="http://data.europa.eu/eli/ontology#"
+     xmlns:dc="http://purl.org/dc/elements/1.1/">
     <owl:Ontology rdf:about="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+        <owl:imports rdf:resource="http://data.europa.eu/eli/ontology#"/>
+        <terms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-15</terms:created>
+        <terms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-18</terms:modified>
         <dc:title rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPRtEXT</dc:title>
         <rdfs:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This is an ontology to represent GDPR text as a set of RDF resources</rdfs:comment>
         <dc:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</dc:description>
+        <terms:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://harshp.com#me</terms:creator>
+        <terms:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</terms:description>
         <rdfs:label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPR text EXTensions</rdfs:label>
         <dc:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Harshvardhan J. Pandit</dc:creator>
         <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.1</owl:versionInfo>
+        <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">0.1</owl:versionInfo>
+        <vann:preferredNamespacePrefix>gdprtext</vann:preferredNamespacePrefix>
+        <vann:preferredNamespaceUri>http://purl.org/adaptcentre/openscience/ontologies/GDPRtEXT</vann:preferredNamespaceUri>
+        <foaf:homepage>https://openscience.adaptcentre.ie/projects/GDPRtEXT/</foaf:homepage>
+        <terms:license rdf:datatype="https://w3.org/2001/XMLSchema#anyURI">http://creativecommons.org/licenses/by/4.0/</terms:license>
     </owl:Ontology>
     
 

Diferenças do arquivo suprimidas por serem muito extensas
+ 5121 - 5209
deliverables/gdpr.rdf


Diferenças do arquivo suprimidas por serem muito extensas
+ 180 - 178
deliverables/gdpr.ttl


+ 2 - 4
scripts/GDPR_en.html

@@ -4,8 +4,6 @@
 <!-- CONVEX # converter_version:6.7.1 # generated_on:20160504-0034 -->
 <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
-   <link type="text/css" rel="stylesheet"
-         href="./../../../../css/oj/oj.css"/>
    <title>L_2016119EN.01000101.xml</title>
 </head>
 <body>
@@ -8787,7 +8785,7 @@
       <a id="ntr21-L_2016119EN.01000101-E0021" href="#ntc21-L_2016119EN.01000101-E0021">(<span class="super">21</span>)</a>  Regulation (EC) No 1049/2001 of the European Parliament and of the Council of 30 May 2001 regarding public access to European Parliament, Council and Commission documents (<a href="./../../../../legal-content/EN/AUTO/?uri=OJ:L:2001:145:TOC">OJ L 145, 31.5.2001, p. 43</a>).</p>
    <hr class="doc-end"/>
 <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
-<script src="./scripts/parse_gdpr.js"></script>
-<script src="./scripts/fancy_format_gdpr.js"></script>
+<script src="parse_gdpr.js"></script>
+<script src="fancy_format_gdpr.js"></script>
 </body>
 </html>

+ 8 - 2
scripts/parse_gdpr.js

@@ -217,6 +217,7 @@ var extract_points_from_article4 = function(article4) {
  * Extracts points from article
  */
 var extract_points_from_article = function(article) {
+    // console.log(article.number);
 	var text = article.contents;
 	// The extraction mechanism works on the basis of sequential points.
 	// This means that if a point has a subpoint,
@@ -252,7 +253,7 @@ var extract_points_from_article = function(article) {
 		} else if (element_type == 'TABLE') {
 			var p_in_element = element.find('p');
 			var p_number = $(p_in_element[0]).text().trim();
-			var match = p_number.match('^(\\w+).');
+			var match = p_number.match('(\\w+)');
 			if (match == undefined || match == null) {
 				p_number = null;
 			} else {
@@ -267,7 +268,9 @@ var extract_points_from_article = function(article) {
 		}
 	}
 	for(pt of points) {
-		// console.log(pt.type, pt.number, pt.text);
+        for(spt of pt.subpoints) {
+            // console.log(article.number, pt.number, spt.number);
+        }
 	}
 
 	return points;
@@ -435,6 +438,9 @@ data.citations = $('p.note').map(function(index, element) {
 /**
  * Download data as JSON
  */
+delete data.citations.prevObject;
+delete data.citations.context;
+delete data.citations.length;
 $('<a id="downloadAnchorElem" style="display:none"></a>').appendTo('body');
 var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data));
 var btn_download = document.getElementById('downloadAnchorElem');

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff