7 роки тому · fd5b669ab7
--- a/README.md
+++ b/README.md
@@ -0,0 +1,24 @@
 
				+# GDPR tEXT
			
 
				+Project for annotating GDPR text to make it referenceable using Linked Open
			
 
				+Data ontologies/vocabularies. 
			
 
				+The canonical GDPR text can be found [here](http://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016R0679&qid=1499685310592&from=en).
			
 
				+This repository acts as version control for the project. The official
			
 
				+website is at [openscience.adaptcentre.ie](https://openscience.adaptcentre.ie/).
			
 
				+
			
 
				+## Deliverables
			
 
				+
			
 
				+1. [GDPR (En) text with HTML id for each item/article](https://openscience.adaptcentre.ie/resources/GDPRtEXT/gdpr.html)
			
 
				+2. [GDPR (En) text as JSON](https://openscience.adaptcentre.ie/resources/GDPRtEXT/gdpr.json)
			
 
				+3. [GDPR (En) as RDF dataset](https://openscience.adaptcentre.ie/resources/GDPRtEXT/gdpr.rdf)
			
 
				+4. [GDPR (En) text with HTML id + RDFa](https://openscience.adaptcentre.ie/resources/GDPRtEXT/gdpr_rdfa.html)
			
 
				+
			
 
				+## Project Goals
			
 
				+The project aims to annotate this text to fulfil the following goals:
			
 
				+
			
 
				+1. Reference individual chapters, articles, and points using HTML ID attributes
			
 
				+2. Provide additional annotations for the content related to the context and use
			
 
				+3. Provide additional annotations for the GDPRov project
			
 
				+4. Annotate using legal ontologies and (perhaps) SKOS
			
 
				+
			
 
				+## ADAPT Centre
			
 
				+Made by Harshvardhan Pandit as part of PhD research
			
--- a/deliverables/gdpr.html
+++ b/deliverables/gdpr.html
--- a/deliverables/gdpr.json
+++ b/deliverables/gdpr.json
--- a/scripts/GDPR_en.html
+++ b/scripts/GDPR_en.html
--- a/scripts/fancy_format_gdpr.js
+++ b/scripts/fancy_format_gdpr.js
@@ -0,0 +1,153 @@
 
				+/**
			
 
				+ * FANCY FORMAT GDPR TEXT
			
 
				+ * @author Harshvardhan Pandit
			
 
				+ * @author me a.t. harshp dot com
			
 
				+ * LICENSE: MIT
			
 
				+ *
			
 
				+ * Using the data from gdpr.json, this script restructures the HTML to create
			
 
				+ * a hierarchical page that can be referenced by HTML id attributes
			
 
				+ */
			
 
				+
			
 
				+// clear the body
			
 
				+$('body').empty();
			
 
				+
			
 
				+// insert title and other items
			
 
				+$('body').append(
			
 
				+    $('<h1>', {
			
 
				+        'id': 'title',
			
 
				+        'class': 'title',
			
 
				+        'text': data.title + ' (' + data.abbrv + ')' 
			
 
				+    }));
			
 
				+$('body').append(
			
 
				+    $('<p>', {
			
 
				+        'id': 'about',
			
 
				+        'class': 'description',
			
 
				+        'text': data.about
			
 
				+    }));
			
 
				+
			
 
				+// recitals
			
 
				+var element_recital = $('<div>', { 'id': 'recitals' });
			
 
				+element_recital.append(
			
 
				+    $('<h2>', {
			
 
				+        'class': 'recital-title',
			
 
				+        'text': 'Recitals'
			
 
				+    }));
			
 
				+data.recitals.map(function(recital) {
			
 
				+   element_recital.append(
			
 
				+      $('<p>', {
			
 
				+         'id': 'recital-' + recital.number,
			
 
				+         'class': 'recital',
			
 
				+         'text': '(' + recital.number + ') ' + recital.text
			
 
				+      })); 
			
 
				+});
			
 
				+$('body').append(element_recital);
			
 
				+
			
 
				+//  chapters
			
 
				+data.chapters.map(function(chapter) {
			
 
				+    var element_chapter = $('<div>', {
			
 
				+        'id': 'chapter' + chapter.number,
			
 
				+        'class': 'chapter'
			
 
				+    });
			
 
				+    element_chapter.append(
			
 
				+        $('<h2>', {
			
 
				+            'class': 'chapter-title',
			
 
				+            'text': "Chapter " + chapter.number + " " + chapter.title
			
 
				+        }));
			
 
				+
			
 
				+    // process entries in chapter
			
 
				+    // store the article handler so that it can be called on sections and
			
 
				+    // articles and reduces the things written in if-else blocks
			
 
				+
			
 
				+    /**
			
 
				+     * handles article and creates elements
			
 
				+     * returns article div containing points and sub-points
			
 
				+     */
			
 
				+    var handler_article = function(article) {
			
 
				+        var element_article = $('<div>', {
			
 
				+            'id': 'article' + article.number,
			
 
				+            'class': 'article'
			
 
				+        });
			
 
				+        element_article.append(
			
 
				+            $('<h3>', {
			
 
				+                'class': 'article-title',
			
 
				+                'text': 'Article ' + article.number + " " + article.title
			
 
				+            }));
			
 
				+        // handle points
			
 
				+        article.contents.map(function(point) {
			
 
				+            if (point.number != null) {
			
 
				+                var element_point = $('<p>', {
			
 
				+                    'id': 'article' + article.number + '-' + point.number,
			
 
				+                    'class': 'point',
			
 
				+                    'text': '(' + point.number + ') ' + point.text
			
 
				+                });
			
 
				+            } else {
			
 
				+                var element_point = $('<p>', {
			
 
				+                    'class': 'point',
			
 
				+                    'text': point.text
			
 
				+                });
			
 
				+            }
			
 
				+            point.subpoints.map(function(subpoint) {
			
 
				+                if (subpoint.number != null) {
			
 
				+                    element_point.append(
			
 
				+                        $('<p>', {
			
 
				+                           'id': 'article' + article.number + '-' + point.number
			
 
				+                                + '-' + subpoint.number,
			
 
				+                           'class': 'subpoint',
			
 
				+                           'text': '(' + subpoint.number + ') ' + subpoint.text
			
 
				+                        })); 
			
 
				+                } else {
			
 
				+                    element_point.append(
			
 
				+                        $('<p>', {
			
 
				+                           'class': 'subpoint',
			
 
				+                           'text': '- ' + subpoint.text
			
 
				+                        })); 
			
 
				+                }
			
 
				+            });
			
 
				+            element_article.append(element_point);
			
 
				+        });
			
 
				+    
			
 
				+        return element_article;
			
 
				+    };
			
 
				+    // process sections if chapter has sections
			
 
				+    if (chapter.contents[0].type == 'section') {
			
 
				+        chapter.contents.map(function(section) {
			
 
				+            var element_section = $('<div>', {
			
 
				+                'id': 'section' + section.number,
			
 
				+                'class': 'section'
			
 
				+            });
			
 
				+            element_section.append(
			
 
				+                $('<h3>', {
			
 
				+                    'class': 'section-title',
			
 
				+                    'text': 'Section ' + section.number + " " + section.title
			
 
				+                }));
			
 
				+            section.contents.map(function(article) {
			
 
				+                element_section.append(handler_article(article));
			
 
				+            });
			
 
				+            element_chapter.append(element_section);
			
 
				+        });
			
 
				+    } else {
			
 
				+        chapter.contents.map(function(article) {
			
 
				+            element_chapter.append(handler_article(article));
			
 
				+        });
			
 
				+    }
			
 
				+
			
 
				+    // append chapter to body
			
 
				+    $('body').append(element_chapter);
			
 
				+});
			
 
				+
			
 
				+// Citations / References
			
 
				+var element_citation = $('<div>', { 'id': 'citations' });
			
 
				+element_citation.append(
			
 
				+    $('<h2>', {
			
 
				+        'class': 'citation-title',
			
 
				+        'text': 'References'
			
 
				+    }));
			
 
				+data.citations.map(function(index, citation) {
			
 
				+    element_citation.append(
			
 
				+      $('<p>', {
			
 
				+         'id': 'citation-' + citation.number,
			
 
				+         'class': 'citation',
			
 
				+         'text': '(' + citation.number + ') ' + citation.text
			
 
				+      })); 
			
 
				+});
			
 
				+$('body').append(element_citation);
			
--- a/scripts/parse_gdpr.js
+++ b/scripts/parse_gdpr.js
@@ -0,0 +1,444 @@
 
				+/**

			
 
				+ * Parse GDPR text

			
 
				+ * @author Harshvardhan Pandit me a.t. harshp.com

			
 
				+ * @name parse_gdpr

			
 
				+ * LICENSE: MIT

			
 
				+ *

			
 
				+ * This script parses GDPR text from the webpage into a JSON document

			
 
				+ * It also re-arranges the text into a better version where each individual

			
 
				+ * item in the text is assigned a contextual ID attribute making it

			
 
				+ * easy to reference the particular item.

			
 
				+ */

			
 
				+

			
 
				+var FLG_DEBUG = true;

			
 
				+if (!FLG_DEBUG) {

			
 
				+	console.log = function() {};

			
 
				+}

			
 
				+

			
 
				+/**

			
 
				+ * @return {jQuery array} returns all GDPR article text

			
 
				+ */

			
 
				+var extract_gdpr_articles_from_page = function() {

			
 
				+    // starting element is the one just before CHAPTER I

			
 
				+    var begin = $('p#d1e1374-1-1').prev();

			
 
				+    // ending element is the div marked final

			
 
				+    var end = $('div.final');

			
 
				+    // first processing element is the one after begin (CHAPTER I)

			
 
				+    var element = begin.next();

			
 
				+    var text = [];

			
 
				+

			
 
				+    // continue while we haven't reached the end element

			
 
				+    while (!element.is(end)) {

			
 
				+        text.push(element);

			
 
				+        element = element.next();

			
 
				+    }

			
 
				+

			
 
				+    return text;

			
 
				+}

			
 
				+var txt_gdpr = extract_gdpr_articles_from_page();

			
 
				+

			
 
				+/**

			
 
				+ * @param  {jQuery array} text contains the text of the GDPR

			
 
				+ * @return {[array of arrays]} chapters extracted from text

			
 
				+ * Each chapter contains the sections and articles within it sequentially

			
 
				+ * in the arrays inside it

			
 
				+ * [chapters] where [chapter] contains everything as jQuery array

			
 
				+ */

			
 
				+var extract_chapters_from_gdpr_text = function(text) {

			
 
				+    var chapters = [];

			
 
				+    var chapter = null;

			
 
				+    var chapter_contents = null;

			
 
				+    for (var i=0; i<text.length; i++) {

			
 
				+        var element = text[i];

			
 
				+        // mark for new chapter

			
 
				+        if(

			
 
				+            // check element type is P

			
 
				+            element.prop('nodeName') == 'P' &&

			
 
				+            // test id syntax

			
 
				+            /^d1e\d+-1-1$/.test(element.prop('id')) && 

			
 
				+            // test text is chapter

			
 
				+            /^\s*CHAPTER [IVX]+\s*$/.test(element.text())

			
 
				+        ) {

			
 
				+            // extract one chapter

			
 
				+        	// the title is in the next element

			
 
				+        	var chapter_title = text[i+1].text().trim();

			
 
				+        	i += 1

			
 
				+        	chapter_contents = [];

			
 
				+			chapter = {

			
 
				+                'number': element.text().trim().slice(8),

			
 
				+                'title': chapter_title,

			
 
				+                "type": "chapter",

			
 
				+                "contents": chapter_contents

			
 
				+            };

			
 
				+            chapters.push(chapter);

			
 
				+        } else {

			
 
				+        	chapter_contents.push(element);

			
 
				+        }

			
 
				+    }

			
 
				+    for(let ch of chapters) {

			
 
				+        // console.log(ch.type, ch.number, ch.contents.length);

			
 
				+    }

			
 
				+    return chapters;

			
 
				+}

			
 
				+var chapters = extract_chapters_from_gdpr_text(txt_gdpr);

			
 
				+

			
 
				+/**

			
 
				+ * @param  {[jQuery array]} list contents of chapter

			
 
				+ * @return {[array of arrays]} articles within that chapter

			
 
				+ * Extracts articles from text (chapter)

			
 
				+ */

			
 
				+var extract_articles_from_text = function(text) {

			
 
				+    var articles = [];

			
 
				+    var article = null;

			
 
				+    var article_contents = null;

			
 
				+    for (var i=0; i<text.length; i++) {

			
 
				+        var element = text[i];

			
 
				+        if (

			
 
				+            // if its a P element

			
 
				+            // element.prop('nodeName') == 'P' &&

			
 
				+            // with id of the form

			
 
				+            /^d1e\d+-1-1$/.test(element.prop('id')) &&

			
 
				+            // and text is Article

			
 
				+            /^Article \d+$/.test(element.text())

			
 
				+        ) {

			
 
				+        	var article_title = text[i+1].text().trim();

			
 
				+        	i += 1;

			
 
				+        	article_contents = [];

			
 
				+        	article = {

			
 
				+        		'number': element.text().slice(8),

			
 
				+        		'title': article_title,

			
 
				+        		'type': 'article',

			
 
				+        		'contents': article_contents

			
 
				+        	};

			
 
				+            articles.push(article);

			
 
				+        } else {

			
 
				+        	article_contents.push(element);

			
 
				+        }

			
 
				+    }

			
 
				+    for (let at of articles) {

			
 
				+    	// console.log(at.type, at.number, at.contents.length);

			
 
				+    }

			
 
				+

			
 
				+    return articles;

			
 
				+}

			
 
				+

			
 
				+/** Article 4 needs special handling

			
 
				+ *	This is because in Article 4, the first point is a generic point without a number,

			
 
				+ *	but the other points are all (weirdly) in a table. Additionally, some of the points

			
 
				+ *	have subpoints as internal tables.

			
 
				+ *

			
 
				+ * The difference between the points in Article 4 and the other articles is the way

			
 
				+ * they are parsed. In Article 4, in addition to each point being a table, the subpoints

			
 
				+ * are embedded in the point itself, whereas in other articles, the subpoints though

			
 
				+ * being a table, are a separate element/item.

			
 
				+ */

			
 
				+

			
 
				+/**

			
 
				+ * @param  {[jQuery array]}

			
 
				+ * @return {[map of points]}

			
 
				+ * Each point has the attribute

			
 
				+ * 	- number: digit/null

			
 
				+ * 	- text: text of the point

			
 
				+ * 	- type: text/point

			
 
				+ * 	- subpoints: []

			
 
				+ * 	This function is only to extract points from Article 4

			
 
				+ */

			
 
				+var extract_points_from_article4 = function(article4) {

			
 
				+	var text = article4.contents;

			
 
				+	var points = [];

			
 
				+	var point = null;

			
 
				+	var subpoints = null;

			
 
				+	var subpoint = null;

			
 
				+	for (var i=0; i<text.length; i++) {

			
 
				+		var element = text[i];

			
 
				+		var p_in_element = element.find('p');

			
 
				+		if (p_in_element.length == 0) {

			
 
				+			points.push({

			
 
				+				'number': null,

			
 
				+				'text': element.text().trim(),

			
 
				+				'type': 'text',

			
 
				+				'subpoints': []

			
 
				+			});

			
 
				+			continue;

			
 
				+		}

			
 
				+		var p_number = $(p_in_element[0]);

			
 
				+		var match = p_number.text().match('^\\((\\d+)\\)');

			
 
				+        if (match != undefined && match != null) {

			
 
				+            p_number = match[1];

			
 
				+        } else {

			
 
				+        	p_number = null;

			
 
				+        }

			
 
				+		var p_text = $(p_in_element[1]).text().trim();

			
 
				+		var p_subpoints = p_in_element.slice(2);

			
 
				+		subpoints = [];

			
 
				+		for (var j=0; j<p_subpoints.length; j++) {

			
 
				+			var subpoint_number = $(p_subpoints[j]).text();

			
 
				+			var match = subpoint_number.match('^\\((\\w+)\\)');

			
 
				+			if (match != undefined && match != null) {

			
 
				+	            subpoint_number = match[1];

			
 
				+	        } else {

			
 
				+	        	subpoint_number = null;

			
 
				+	        }

			
 
				+			var subpoint_text = $(p_subpoints[j+1]).text().trim();

			
 
				+			j += 1;

			
 
				+			subpoints.push({

			
 
				+				'number': subpoint_number,

			
 
				+				'text': subpoint_text,

			
 
				+				'type': 'subpoint'

			
 
				+			});

			
 
				+		}

			
 
				+		points.push({

			
 
				+			'number': p_number,

			
 
				+			'text': p_text,

			
 
				+			'type': 'point',

			
 
				+			'subpoints': subpoints

			
 
				+		});

			
 
				+	}

			
 
				+	for(let pt of points) {

			
 
				+		// console.log(pt.type, pt.number, pt.subpoints.length);

			
 
				+		for(let spt of pt.subpoints) {

			
 
				+			// console.log(spt.type, spt.number);

			
 
				+		}

			
 
				+	}

			
 
				+

			
 
				+	return points;

			
 
				+}

			
 
				+// var points_in_article4 = extract_points_from_article4(article4);

			
 
				+

			
 
				+

			
 
				+/**

			
 
				+ * @param  {[jQuery array]} article

			
 
				+ * @return {[map of points]}

			
 
				+ * Each point has the attribute

			
 
				+ * 	- number: digit/null

			
 
				+ * 	- text: text of the point

			
 
				+ * 	- type: text/point

			
 
				+ * 	- subpoints: []

			
 
				+ * Extracts points from article

			
 
				+ */

			
 
				+var extract_points_from_article = function(article) {

			
 
				+	var text = article.contents;

			
 
				+	// The extraction mechanism works on the basis of sequential points.

			
 
				+	// This means that if a point has a subpoint,

			
 
				+	// then it will be in the NEXT element as a table

			
 
				+

			
 
				+	var points = [];

			
 
				+	var point = null;

			
 
				+	var subpoints = null;

			
 
				+	for(var element of text) {

			
 
				+		var element_type = element.prop('nodeName');

			
 
				+		if (element_type == 'P') {

			
 
				+			var element_text = element.text().trim();

			
 
				+			var match = element_text.match('^(\\d+).');

			
 
				+			if (match == undefined || match == null) {

			
 
				+				// this point has no number

			
 
				+				point = {

			
 
				+					'number': null,

			
 
				+					'text': element_text,

			
 
				+					'type': 'text',

			
 
				+					'subpoints': []

			
 
				+				};

			
 
				+				points.push(point);

			
 
				+			} else {

			
 
				+				// point has a number

			
 
				+				point = {

			
 
				+					'number': match[1],

			
 
				+					'type': 'point',

			
 
				+					'subpoints': []

			
 
				+				};

			
 
				+				point.text = element_text.match('^\\d+.\\s+(.*)')[1];

			
 
				+				points.push(point);

			
 
				+			}

			
 
				+		} else if (element_type == 'TABLE') {

			
 
				+			var p_in_element = element.find('p');

			
 
				+			var p_number = $(p_in_element[0]).text().trim();

			
 
				+			var match = p_number.match('^(\\w+).');

			
 
				+			if (match == undefined || match == null) {

			
 
				+				p_number = null;

			
 
				+			} else {

			
 
				+				p_number = match[1];

			
 
				+			}

			
 
				+			var p_text = $(p_in_element[1]).text().trim();

			
 
				+			point.subpoints.push({

			
 
				+				'number': p_number,

			
 
				+				'text': p_text,

			
 
				+				'type': 'subpoint'

			
 
				+			});

			
 
				+		}

			
 
				+	}

			
 
				+	for(pt of points) {

			
 
				+		// console.log(pt.type, pt.number, pt.text);

			
 
				+	}

			
 
				+

			
 
				+	return points;

			
 
				+}

			
 
				+

			
 
				+/**

			
 
				+ * combine articles in chapter 1 together

			
 
				+ */

			
 
				+var articles_in_chapter_1 = extract_articles_from_text(chapters[0].contents);

			
 
				+var points_in_articles = articles_in_chapter_1.slice(0,3).map(extract_points_from_article);

			
 
				+points_in_articles.push(extract_points_from_article4(articles_in_chapter_1[3]));

			
 
				+for(var i=0; i<articles_in_chapter_1.length; i++) {

			
 
				+	var article = articles_in_chapter_1[i];

			
 
				+	article.contents = points_in_articles[i];

			
 
				+}

			
 
				+chapters[0].contents = articles_in_chapter_1;

			
 
				+

			
 
				+/**

			
 
				+ * create the final data object that will hold all the items together

			
 
				+ */

			
 
				+var data = {

			
 
				+    'title': "General Data Protection Regulation",

			
 
				+    'abbrv': "GDPR",

			
 
				+    'regulation': "2016/679",

			
 
				+    'dated': "27/04/2016",

			
 
				+    'updated': "04/05/2016",

			
 
				+    'about': "protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing Directive 95/46/EC (General Data Protection Regulation)",

			
 
				+    'identifier': "L 119/1",

			
 
				+    'language': "EN",

			
 
				+	'chapters': [chapters[0]],

			
 
				+    'recitals': []

			
 
				+};

			
 
				+// chapters yet to be processed

			
 
				+chapters = chapters.slice(1);

			
 
				+

			
 
				+/**

			
 
				+ * Process the other chapters

			
 
				+ * Now these other chapters can also contain sections, so the first test to be made

			
 
				+ * is whether a chapter contains any sections. If it does, then the sections get

			
 
				+ * extracted first, and after that the articles need to be extracted from the sections.

			
 
				+ * If a chapter does not contain any sections, then the articles will be extracted

			
 
				+ * directly from the section.

			
 
				+ */

			
 
				+

			
 
				+var extract_sections_from_text = function(text) {

			
 
				+	var sections = [];

			
 
				+    var section = null;

			
 
				+    var section_contents = null;

			
 
				+    for (var i=0; i<text.length; i++) {

			
 
				+        var element = text[i];

			
 
				+        if (

			
 
				+            element.prop('nodeName') == 'P' &&

			
 
				+            /^d1e\d+-1-1$/.test(element.prop('id')) &&

			
 
				+            $(element).children('span.expanded').length == 1

			
 
				+        ) {

			
 
				+        	var section_title = text[i+1].text().trim();

			
 
				+        	i += 1;

			
 
				+        	section_contents = [];

			
 
				+        	section = {

			
 
				+        		'number': element.text().trim().slice(7).trim(),

			
 
				+        		'title': section_title,

			
 
				+        		'type': 'section',

			
 
				+        		'contents': section_contents

			
 
				+        	};

			
 
				+            sections.push(section);

			
 
				+        } else {

			
 
				+        	if (section_contents == null) {

			
 
				+        		return null;

			
 
				+        	}

			
 
				+        	section_contents.push(element);

			
 
				+        }

			
 
				+    }

			
 
				+    for (let sec of sections) {

			
 
				+    	// console.log(sec.type, sec.number, sec.contents.length);

			
 
				+    }

			
 
				+

			
 
				+    return sections;

			
 
				+}

			
 
				+for(let ch of chapters) {

			
 
				+	var sections = extract_sections_from_text(ch.contents);

			
 
				+	if (sections == null) {

			
 
				+		// console.log(ch.type, ch.number, "does not have Sections");

			
 
				+		var articles = extract_articles_from_text(ch.contents);

			
 
				+		ch.contents = articles;

			
 
				+		for(let at of articles) {

			
 
				+			// console.log(at.type, at.number);

			
 
				+			at.contents = extract_points_from_article(at);

			
 
				+		}

			
 
				+	} else {

			
 
				+		// console.log(ch.type, ch.number, "has Sections");

			
 
				+		ch.contents = sections;

			
 
				+		for(let sec of sections) {

			
 
				+			var articles = extract_articles_from_text(sec.contents);

			
 
				+			for(let at of articles) {

			
 
				+				at.contents = extract_points_from_article(at);

			
 
				+			}

			
 
				+;			sec.contents = articles;

			
 
				+		}

			
 
				+	}

			
 
				+	data.chapters.push(ch);

			
 
				+}

			
 
				+

			
 
				+/**

			
 
				+ * DEBUG OUTPUT

			
 
				+ */

			
 
				+// for(let ch of data.chapters) {

			
 
				+// 	console.log(ch.type, ch.number, ch.contents.length);

			
 
				+// 	if (ch.contents[0].type == 'section') {

			
 
				+// 		for(let sec of ch.contents) {

			
 
				+// 			console.log(sec.type, sec.number, sec.contents.length);

			
 
				+// 			for(let at of sec.contents) {

			
 
				+// 				console.log(at.type, at.number, at.contents.length);

			
 
				+// 				for(let pt of at.contents) {

			
 
				+// 					console.log(pt.type, pt.number, pt.subpoints.length);

			
 
				+// 				}

			
 
				+// 			}

			
 
				+// 		}

			
 
				+// 	} else {

			
 
				+// 		for(let at of ch.contents) {

			
 
				+// 			console.log(at.type, at.number, at.contents.length);

			
 
				+// 			for(let pt of at.contents) {

			
 
				+// 				console.log(pt.type, pt.number, pt.subpoints.length);

			
 
				+// 			}

			
 
				+// 		}

			
 
				+// 	}

			
 
				+// }

			
 
				+

			
 
				+

			
 
				+/**

			
 
				+ * RECITALS

			
 
				+ */

			
 
				+// The recitals start at the table (second) which is after the p element

			
 
				+// with id = d1e40-1-1

			
 
				+var element = $('p#d1e40-1-1');

			
 
				+while(element.prop('nodeName') != 'TABLE') element = element.next();

			
 
				+// At this stage, element points to the table where recitals begin

			
 
				+// Each table holds two p elements, where the first one contains the number

			
 
				+// and the second contains the text 

			
 
				+while(element.prop('nodeName') != 'P') {

			
 
				+    var p_in_element = element.find('p');

			
 
				+    var p_text = $(p_in_element[1]).text().trim();

			
 
				+    var p_number = $(p_in_element[0]).text().match('^\\((\\d+)\\)')[1];

			
 
				+    data.recitals.push({

			
 
				+        'number': p_number,

			
 
				+        'text': p_text,

			
 
				+        'type': 'recital'

			
 
				+    });

			
 
				+    element = element.next();

			
 
				+}

			
 
				+

			
 
				+/**

			
 
				+ * CITATIONS

			
 
				+ */

			
 
				+// citations are p elements with class note

			
 
				+data.citations = $('p.note').map(function(index, element) {

			
 
				+    var match = $(element).text().trim().match('^\\((\\d+)\\)\\s+(.*)$')

			
 
				+    return {

			
 
				+        'number': match[1],

			
 
				+        'text': match[2],

			
 
				+        'type': 'citation'

			
 
				+    };

			
 
				+});

			
 
				+

			
 
				+

			
 
				+/**

			
 
				+ * Download data as JSON

			
 
				+ */

			
 
				+$('<a id="downloadAnchorElem" style="display:none"></a>').appendTo('body');

			
 
				+var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data));

			
 
				+var btn_download = document.getElementById('downloadAnchorElem');

			
 
				+btn_download.setAttribute("href", dataStr);

			
 
				+btn_download.setAttribute("download", "gdpr.json");

			
 
				+/** TO DOWNLOAD, CALL THIS IN BROWSER CONSOLE **/

			
 
				+// btn_download.click();