ソースを参照

fix:subpoint numbers were not captured

The subpoint numbers in Articles were not captured due to an error
in the regex parsing the table (in the original GDPR doc). This
has been fixed by updating the regex in parse_gdpr.js, and the
files have been updated with the correct numbers.
Harshvardhan Pandit 7 年 前
コミット
024aa79642
9 ファイル変更5813 行追加15884 行削除
  1. 0 6198
      deliverables/gdpr.json
  2. 306 289
      deliverables/gdpr.jsonld
  3. 180 178
      deliverables/gdpr.n3
  4. 0 3826
      deliverables/gdpr.nt
  5. 16 0
      deliverables/gdpr.owl
  6. 5121 5209
      deliverables/gdpr.rdf
  7. 180 178
      deliverables/gdpr.ttl
  8. 2 4
      scripts/GDPR_en.html
  9. 8 2
      scripts/parse_gdpr.js

ファイルの差分が大きいため隠しています
+ 0 - 6198
deliverables/gdpr.json


ファイルの差分が大きいため隠しています
+ 306 - 289
deliverables/gdpr.jsonld


ファイルの差分が大きいため隠しています
+ 180 - 178
deliverables/gdpr.n3


ファイルの差分が大きいため隠しています
+ 0 - 3826
deliverables/gdpr.nt


+ 16 - 0
deliverables/gdpr.owl

@@ -2,19 +2,35 @@
 <rdf:RDF xmlns="http://purl.org/adaptcentre/ontologies/GDPRtEXT#"
      xml:base="http://purl.org/adaptcentre/ontologies/GDPRtEXT"
      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+     xmlns:foaf="http://xmlns.com/foaf/0.1/"
+     xmlns:terms="http://purl.org/dc/terms/"
      xmlns:owl="http://www.w3.org/2002/07/owl#"
      xmlns:xml="http://www.w3.org/XML/1998/namespace"
      xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+     xmlns:skos="http://www.w3.org/2004/02/skos/core#"
      xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
      xmlns:dc="http://purl.org/dc/elements/1.1/"
      xmlns:gdprtext="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+     xmlns:vann="http://purl.org/vocab/vann/"
+     xmlns:ontology="http://data.europa.eu/eli/ontology#"
+     xmlns:dc="http://purl.org/dc/elements/1.1/">
     <owl:Ontology rdf:about="http://purl.org/adaptcentre/ontologies/GDPRtEXT#">
+        <owl:imports rdf:resource="http://data.europa.eu/eli/ontology#"/>
+        <terms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-15</terms:created>
+        <terms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2017-08-18</terms:modified>
         <dc:title rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPRtEXT</dc:title>
         <rdfs:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This is an ontology to represent GDPR text as a set of RDF resources</rdfs:comment>
         <dc:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</dc:description>
+        <terms:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://harshp.com#me</terms:creator>
+        <terms:description rdf:datatype="http://www.w3.org/2001/XMLSchema#string">This ontology extends the canonical (official) GDPR text with additional annotations</terms:description>
         <rdfs:label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">GDPR text EXTensions</rdfs:label>
         <dc:creator rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Harshvardhan J. Pandit</dc:creator>
         <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0.1</owl:versionInfo>
+        <owl:versionInfo rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">0.1</owl:versionInfo>
+        <vann:preferredNamespacePrefix>gdprtext</vann:preferredNamespacePrefix>
+        <vann:preferredNamespaceUri>http://purl.org/adaptcentre/openscience/ontologies/GDPRtEXT</vann:preferredNamespaceUri>
+        <foaf:homepage>https://openscience.adaptcentre.ie/projects/GDPRtEXT/</foaf:homepage>
+        <terms:license rdf:datatype="https://w3.org/2001/XMLSchema#anyURI">http://creativecommons.org/licenses/by/4.0/</terms:license>
     </owl:Ontology>
     
 

ファイルの差分が大きいため隠しています
+ 5121 - 5209
deliverables/gdpr.rdf


ファイルの差分が大きいため隠しています
+ 180 - 178
deliverables/gdpr.ttl


+ 2 - 4
scripts/GDPR_en.html

@@ -4,8 +4,6 @@
 <!-- CONVEX # converter_version:6.7.1 # generated_on:20160504-0034 -->
 <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
-   <link type="text/css" rel="stylesheet"
-         href="./../../../../css/oj/oj.css"/>
    <title>L_2016119EN.01000101.xml</title>
 </head>
 <body>
@@ -8787,7 +8785,7 @@
       <a id="ntr21-L_2016119EN.01000101-E0021" href="#ntc21-L_2016119EN.01000101-E0021">(<span class="super">21</span>)</a>  Regulation (EC) No 1049/2001 of the European Parliament and of the Council of 30 May 2001 regarding public access to European Parliament, Council and Commission documents (<a href="./../../../../legal-content/EN/AUTO/?uri=OJ:L:2001:145:TOC">OJ L 145, 31.5.2001, p. 43</a>).</p>
    <hr class="doc-end"/>
 <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
-<script src="./scripts/parse_gdpr.js"></script>
-<script src="./scripts/fancy_format_gdpr.js"></script>
+<script src="parse_gdpr.js"></script>
+<script src="fancy_format_gdpr.js"></script>
 </body>
 </html>

+ 8 - 2
scripts/parse_gdpr.js

@@ -217,6 +217,7 @@ var extract_points_from_article4 = function(article4) {
  * Extracts points from article
  */
 var extract_points_from_article = function(article) {
+    // console.log(article.number);
 	var text = article.contents;
 	// The extraction mechanism works on the basis of sequential points.
 	// This means that if a point has a subpoint,
@@ -252,7 +253,7 @@ var extract_points_from_article = function(article) {
 		} else if (element_type == 'TABLE') {
 			var p_in_element = element.find('p');
 			var p_number = $(p_in_element[0]).text().trim();
-			var match = p_number.match('^(\\w+).');
+			var match = p_number.match('(\\w+)');
 			if (match == undefined || match == null) {
 				p_number = null;
 			} else {
@@ -267,7 +268,9 @@ var extract_points_from_article = function(article) {
 		}
 	}
 	for(pt of points) {
-		// console.log(pt.type, pt.number, pt.text);
+        for(spt of pt.subpoints) {
+            // console.log(article.number, pt.number, spt.number);
+        }
 	}
 
 	return points;
@@ -435,6 +438,9 @@ data.citations = $('p.note').map(function(index, element) {
 /**
  * Download data as JSON
  */
+delete data.citations.prevObject;
+delete data.citations.context;
+delete data.citations.length;
 $('<a id="downloadAnchorElem" style="display:none"></a>').appendTo('body');
 var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data));
 var btn_download = document.getElementById('downloadAnchorElem');

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません