|
@@ -0,0 +1,212 @@
|
|
|
+package r2rml;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileWriter;
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+import org.apache.commons.cli.CommandLine;
|
|
|
+import org.apache.commons.cli.CommandLineParser;
|
|
|
+import org.apache.commons.cli.DefaultParser;
|
|
|
+import org.apache.commons.cli.HelpFormatter;
|
|
|
+import org.apache.commons.cli.Options;
|
|
|
+import org.apache.commons.cli.ParseException;
|
|
|
+import org.apache.commons.csv.CSVFormat;
|
|
|
+import org.apache.commons.csv.CSVPrinter;
|
|
|
+import org.apache.commons.io.IOUtils;
|
|
|
+import org.apache.jena.query.QueryExecution;
|
|
|
+import org.apache.jena.query.QueryExecutionFactory;
|
|
|
+import org.apache.jena.query.QuerySolution;
|
|
|
+import org.apache.jena.query.ResultSet;
|
|
|
+import org.apache.jena.rdf.model.Model;
|
|
|
+import org.apache.jena.rdf.model.ModelFactory;
|
|
|
+import org.apache.jena.rdf.model.RDFNode;
|
|
|
+import org.apache.log4j.Logger;
|
|
|
+
|
|
|
+public class Downlift {
|
|
|
+
|
|
|
+ private static Logger logger = Logger.getLogger(Downlift.class.getName());
|
|
|
+
|
|
|
+ private static File csvFile = null;
|
|
|
+ private static File mapFile = null;
|
|
|
+ private static List<File> rdfFiles = new ArrayList<File>();
|
|
|
+
|
|
|
+ private static String pred = null;
|
|
|
+
|
|
|
+ private static List<String> headers = new ArrayList<String>();
|
|
|
+ private static List<String> predicates = new ArrayList<String>();
|
|
|
+
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+ try {
|
|
|
+ parseArguments(args);
|
|
|
+ prepareStructure();
|
|
|
+ prepareData();
|
|
|
+ } catch (Exception e) {
|
|
|
+ logger.error(e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void prepareStructure() throws IOException {
|
|
|
+ logger.info("Pre-processing structure of new CSV file.");
|
|
|
+
|
|
|
+ Model mapping = ModelFactory.createDefaultModel();
|
|
|
+ logger.info("Loading " + mapFile);
|
|
|
+ mapping.read(mapFile.getCanonicalPath());
|
|
|
+
|
|
|
+ // Add structure from mapping based on metadata
|
|
|
+ String s = IOUtils.toString(Downlift.class.getResourceAsStream("/structure-query.sparql"));
|
|
|
+ QueryExecution qex = QueryExecutionFactory.create(s, mapping);
|
|
|
+ ResultSet rs = qex.execSelect();
|
|
|
+
|
|
|
+ while(rs.hasNext()) {
|
|
|
+ QuerySolution qs = rs.next();
|
|
|
+ String label = qs.getLiteral("label").getString();
|
|
|
+ String predicate = qs.getResource("predicate").getURI();
|
|
|
+ headers.add(label);
|
|
|
+ predicates.add(predicate);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Add predicate
|
|
|
+ headers.add(pred);
|
|
|
+ predicates.add(pred);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void prepareData() throws IOException, DownliftException {
|
|
|
+ // load all the RDF files
|
|
|
+ logger.info("Loading RDF files...");
|
|
|
+ Model data = ModelFactory.createDefaultModel();
|
|
|
+ for(File file : rdfFiles) {
|
|
|
+ logger.info("Loading " + file);
|
|
|
+ data.read(file.getCanonicalPath());
|
|
|
+ }
|
|
|
+
|
|
|
+ // write the header
|
|
|
+ logger.info("Writing headers to CSV file...");
|
|
|
+ FileWriter fileWriter = new FileWriter(csvFile);
|
|
|
+ CSVFormat csvFileFormat = CSVFormat.RFC4180;
|
|
|
+ CSVPrinter csvFilePrinter = new CSVPrinter(fileWriter, csvFileFormat);
|
|
|
+ csvFilePrinter.printRecord(headers.toArray());
|
|
|
+
|
|
|
+ // create and execute query to prepare data
|
|
|
+ logger.info("Writing records to CSV file...");
|
|
|
+
|
|
|
+ String s = "", vars = "";
|
|
|
+ s += "PREFIX odef:<http://adaptcentre.ie/ont/odef#>";
|
|
|
+ s += "SELECT *** WHERE { ";
|
|
|
+ s += "?x a odef:Record .";
|
|
|
+ String p = null;
|
|
|
+ for(int i = 0; i < predicates.size(); i++) {
|
|
|
+ p = predicates.get(i);
|
|
|
+ s += "OPTIONAL { ?x <" + p + "> ?x" + i + ".}";
|
|
|
+ vars += " ?x" + i;
|
|
|
+ }
|
|
|
+ s += "} ORDER BY ?x";
|
|
|
+ s = s.replace("***", vars);
|
|
|
+
|
|
|
+ System.out.println(s);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Though Jena provides a method for writing the result set to a CSV
|
|
|
+ * file, relying on the CSVFilePrinter allows us to extend the tool
|
|
|
+ * with some options such as defining the delimiter and type of CSV
|
|
|
+ * file (excel, MySQL, ...) .
|
|
|
+ */
|
|
|
+ QueryExecution qe = QueryExecutionFactory.create(s, data);
|
|
|
+ ResultSet rs = qe.execSelect();
|
|
|
+ List<String> sol = null;
|
|
|
+ QuerySolution qs = null;
|
|
|
+ RDFNode node = null;
|
|
|
+ while(rs.hasNext()) {
|
|
|
+ qs = rs.next();
|
|
|
+ sol = new ArrayList<String>();
|
|
|
+ for(String var : rs.getResultVars()) {
|
|
|
+ node = qs.get(var);
|
|
|
+ if(node == null) sol.add(null);
|
|
|
+ else if(node.isLiteral()) sol.add(node.asLiteral().getValue().toString());
|
|
|
+ else sol.add(node.toString());
|
|
|
+ }
|
|
|
+ csvFilePrinter.printRecord(sol);
|
|
|
+ }
|
|
|
+
|
|
|
+ // close the output files
|
|
|
+ logger.info("Finishing up writing to CSV file...");
|
|
|
+ try {
|
|
|
+ fileWriter.flush();
|
|
|
+ fileWriter.close();
|
|
|
+ csvFilePrinter.close();
|
|
|
+ } catch (IOException e) {
|
|
|
+ throw new DownliftException("Error while flushing or closing FileWriter or CSVPrinter.", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void parseArguments(String[] args) throws DownliftException {
|
|
|
+ Options options = new Options();
|
|
|
+ options.addOption("p", true, "The URI of the predicate for the additional column.");
|
|
|
+ options.addOption("o", true, "The filename for the output CSV file (default = output.csv).");
|
|
|
+ options.addOption("m", true, "The filename for the R2RML mapping file.");
|
|
|
+ options.addOption("f", true, "List of RDF files.");
|
|
|
+ options.addOption("h", false, "Display help.");
|
|
|
+
|
|
|
+ try {
|
|
|
+ CommandLineParser parser = new DefaultParser();
|
|
|
+ CommandLine cmd = parser.parse(options, args);
|
|
|
+
|
|
|
+ // Display help
|
|
|
+ if(cmd.hasOption("h")) {
|
|
|
+ HelpFormatter formatter = new HelpFormatter();
|
|
|
+ formatter.printHelp("downlift.jar", options);
|
|
|
+ System.exit(0);
|
|
|
+ }
|
|
|
+
|
|
|
+ String out = null, map = null;
|
|
|
+
|
|
|
+ // Process the R2RML mapping file
|
|
|
+ if(cmd.hasOption("m")) {
|
|
|
+ map = cmd.getOptionValue("m");
|
|
|
+ logger.info("Using R2RML mapping file " + map);
|
|
|
+ } else {
|
|
|
+ throw new DownliftException("Providing an R2RML mapping file is mandatory.");
|
|
|
+ }
|
|
|
+
|
|
|
+ mapFile = new File(map);
|
|
|
+ if(!mapFile.exists()) {
|
|
|
+ throw new DownliftException("R2RML mapping file does not exist.");
|
|
|
+ }
|
|
|
+
|
|
|
+ // Process output CSV file
|
|
|
+ out = cmd.getOptionValue("o", "output.csv");
|
|
|
+ logger.info("Using output CSV file " + out);
|
|
|
+ csvFile = new File(out);
|
|
|
+
|
|
|
+ // Process predicate for additional column
|
|
|
+ if(cmd.hasOption("p")) {
|
|
|
+ pred = cmd.getOptionValue("p");
|
|
|
+ logger.info("Searching for links with URI " + pred);
|
|
|
+ } else {
|
|
|
+ throw new DownliftException("Providing URI to a predicate is mandatory.");
|
|
|
+ }
|
|
|
+
|
|
|
+ // Process RDF files
|
|
|
+ if(cmd.hasOption("f") && cmd.getOptionValues("f").length > 0) {
|
|
|
+ String[] files = cmd.getOptionValues("f");
|
|
|
+ for(String file : files) {
|
|
|
+ File f = new File(file);
|
|
|
+ if(!f.exists()) {
|
|
|
+ throw new DownliftException(f + " does not exist.");
|
|
|
+ }
|
|
|
+ logger.info("Using RDF file " + f);
|
|
|
+ rdfFiles.add(f);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ throw new DownliftException("Providing RDF files is mandatory.");
|
|
|
+ }
|
|
|
+
|
|
|
+ } catch(ParseException e) {
|
|
|
+ throw new DownliftException("Parsing failed. Reason: " + e.getMessage(), e);
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+}
|