TermMap.java 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. package r2rml.model;
  2. import java.net.MalformedURLException;
  3. import java.util.ArrayList;
  4. import java.util.HashMap;
  5. import java.util.HashSet;
  6. import java.util.List;
  7. import java.util.Map;
  8. import java.util.Set;
  9. import java.util.regex.Matcher;
  10. import java.util.regex.Pattern;
  11. import javax.script.ScriptException;
  12. import org.apache.commons.lang3.StringEscapeUtils;
  13. import org.apache.jena.datatypes.RDFDatatype;
  14. import org.apache.jena.enhanced.UnsupportedPolymorphismException;
  15. import org.apache.jena.iri.IRI;
  16. import org.apache.jena.iri.IRIFactory;
  17. import org.apache.jena.rdf.model.RDFList;
  18. import org.apache.jena.rdf.model.RDFNode;
  19. import org.apache.jena.rdf.model.Resource;
  20. import org.apache.jena.rdf.model.ResourceFactory;
  21. import org.apache.jena.rdf.model.Statement;
  22. import org.apache.jena.util.iterator.ExtendedIterator;
  23. import org.apache.jena.vocabulary.XSD;
  24. import org.apache.log4j.Logger;
  25. import r2rml.database.Row;
  26. import r2rml.engine.R2RML;
  27. import r2rml.engine.R2RMLException;
  28. import r2rml.engine.R2RMLTypeMapper;
  29. import r2rml.engine.RRF;
  30. import r2rml.function.JSEnv;
  31. /**
  32. * TermMap Class.
  33. *
  34. * @author Christophe Debruyne
  35. * @version 0.2
  36. *
  37. */
  38. public abstract class TermMap extends R2RMLResource {
  39. private static Logger logger = Logger.getLogger(TermMap.class.getName());
  40. private Resource termType = null;
  41. /*
  42. * Term generation rules for blank nodes. If the term type is rr:BlankNode:
  43. * Return a blank node that is unique to the natural RDF lexical form
  44. * corresponding to value. This seems to imply that there is a one-on-one
  45. * mapping for each value and blank node. We will thus map the outcome
  46. * of the constant, template, or column to the same blank node. In other
  47. * words, if two TermMaps use "test"^^xsd:string, return same blank node.
  48. * "1"^^xsd:string and "1"^^xsd:integer are different.
  49. *
  50. */
  51. private static Map<Object, Resource> blankNodeMap = new HashMap<Object, Resource>();
  52. private String template;
  53. private RDFNode constant;
  54. private String column;
  55. private FunctionCall functionCall;
  56. protected String language = null;
  57. protected Resource datatype = null;
  58. protected String baseIRI = null;
  59. public TermMap(Resource description, String baseIRI) {
  60. super(description);
  61. this.baseIRI = baseIRI;
  62. }
  63. @Override
  64. protected boolean preProcessAndValidate() {
  65. logger.info("Processing TermMap " + description);
  66. List<Statement> templates = description.listProperties(R2RML.template).toList();
  67. List<Statement> constants = description.listProperties(R2RML.constant).toList();
  68. List<Statement> columns = description.listProperties(R2RML.column).toList();
  69. List<Statement> functions = description.listProperties(RRF.functionCall).toList();
  70. // Having exactly one of rr:constant, rr:column, rr:template
  71. if(templates.size() + constants.size() + columns.size() + functions.size() != 1) {
  72. logger.error("TermMap must have exactly one of rr:constant, rr:column, and rr:template.");
  73. logger.error(description);
  74. return false;
  75. }
  76. // The value of the rr:column property must be a valid column name.
  77. if(columns.size() == 1) {
  78. column = distillColumnName(columns.get(0).getObject());
  79. if(column == null) {
  80. logger.error("The value of the rr:column property must be a valid column name.");
  81. logger.error(description);
  82. return false;
  83. }
  84. } else if(templates.size() == 1) {
  85. // Check whether it is a valid template
  86. template = distillTemplate(templates.get(0).getObject());
  87. if(template == null) {
  88. logger.error("The value of the rr:template property must be a valid string template.");
  89. logger.error(description);
  90. return false;
  91. }
  92. // Check whether the referenced column names are valid
  93. for(String columnName : getReferencedColumns()) {
  94. if(!R2RMLUtil.isValidColumnName(columnName)) {
  95. logger.error("Invalid column name in rr:template: " + columnName);
  96. logger.error(description);
  97. return false;
  98. }
  99. }
  100. } else if(constants.size() == 1) {
  101. // the check for ConstantValuedTermMaps are local (different rules
  102. // for different TermMaps.
  103. constant = distillConstant(constants.get(0).getObject());
  104. if(constant == null)
  105. return false;
  106. } else if(functions.size() == 1) {
  107. functionCall = distillFunction(functions.get(0).getObject());
  108. if(functionCall == null)
  109. return false;
  110. // Check whether the referenced column names are valid
  111. for(String columnName : getReferencedColumns()) {
  112. if(!R2RMLUtil.isValidColumnName(columnName)) {
  113. logger.error("Invalid column name in rrf:functionCall " + columnName);
  114. logger.error(description);
  115. return false;
  116. }
  117. }
  118. }
  119. // Validity of the termType is also local.
  120. // At most one and compute default one if absent.
  121. List<Statement> list = description.listProperties(R2RML.termType).toList();
  122. if(list.size() > 1) {
  123. logger.error("TermMap can have at most one rr:termType.");
  124. logger.error(description);
  125. return false;
  126. } else if (list.size() == 0) {
  127. termType = inferTermType();
  128. } else {
  129. // We have exactly one value. Check validity.
  130. // Is it a valid IRI?
  131. if(!list.get(0).getObject().isURIResource()) {
  132. logger.error("TermMap's rr:termType must be a valid IRI.");
  133. logger.error(description);
  134. return false;
  135. }
  136. termType = list.get(0).getObject().asResource();
  137. // Is it a valid option?
  138. if(!isChosenTermTypeValid())
  139. return false;
  140. }
  141. return true;
  142. }
  143. private FunctionCall distillFunction(RDFNode node) {
  144. if(node.isLiteral())
  145. return null;
  146. // fcn stands for Function Call Node
  147. Resource fcn = node.asResource();
  148. List<Statement> functions = fcn.listProperties(RRF.function).toList();
  149. if(functions.size() != 1) {
  150. logger.error("Function valued TermMap must have exactly one rrf:function.");
  151. logger.error(description);
  152. return null;
  153. }
  154. // Process the function, get the function name and then the parameters
  155. RDFNode f = functions.get(0).getObject();
  156. String functionname = JSEnv.registerFunction(f);
  157. if(functionname == null) {
  158. // Something went wrong, reported by the function.
  159. return null;
  160. }
  161. List<Statement> pbindings = fcn.listProperties(RRF.parameterBindings).toList();
  162. if(pbindings.size() != 1) {
  163. logger.error("Function valued TermMap must have exactly one rrf:parameterBindings.");
  164. logger.error(description);
  165. return null;
  166. }
  167. RDFList list = null;
  168. try {
  169. list = pbindings.get(0).getObject().as(RDFList.class);
  170. } catch(UnsupportedPolymorphismException e) {
  171. logger.error("rrf:parameterBindings must be an RDF collection.");
  172. logger.error(description);
  173. return null;
  174. }
  175. functionCall = new FunctionCall(functionname);
  176. ExtendedIterator<RDFNode> iter = list.iterator();
  177. while(iter.hasNext()) {
  178. RDFNode param = iter.next();
  179. if(!param.isResource()) {
  180. logger.error("Parameters in rrf:parameterBindings have to be resources.");
  181. logger.error(description);
  182. return null;
  183. }
  184. ObjectMap om = new ObjectMap(param.asResource(), baseIRI);
  185. if(om.preProcessAndValidate()) {
  186. functionCall.addParameter(om);
  187. } else {
  188. logger.error("Something went wrong processing parameter.");
  189. logger.error(description);
  190. return null;
  191. }
  192. }
  193. return functionCall;
  194. }
  195. /**
  196. * Infer "default" termtype.
  197. * @return
  198. */
  199. protected abstract Resource inferTermType();
  200. /**
  201. * True if chosen TermType is valid for subclasses.
  202. * @return
  203. */
  204. protected abstract boolean isChosenTermTypeValid();
  205. /**
  206. * True if the conditions for constant values for one of the TermMap's subclasses
  207. * are met.
  208. * @return
  209. */
  210. protected abstract RDFNode distillConstant(RDFNode node);
  211. private String distillTemplate(RDFNode node) {
  212. if(!node.isLiteral())
  213. return null;
  214. if(!node.asLiteral().getDatatype().getURI().equals(XSD.xstring.getURI()))
  215. return null;
  216. // TODO: check the actual value of the template
  217. return node.asLiteral().toString();
  218. }
  219. private Set<String> getReferencedColumns() {
  220. Set<String> set = new HashSet<String>();
  221. if(isColumnValuedTermMap()) {
  222. // Singleton
  223. set.add(column);
  224. } else if(isTemplateValuedTermMap()) {
  225. // Little hack to "ignore" escaped curly braces.
  226. String temp = template.replace("\\{", "--").replace("\\}", "--");
  227. Matcher m = Pattern.compile("\\{([^}]+)\\}").matcher(temp);
  228. while(m.find()) {
  229. set.add(template.substring(m.start(1), m.end(1)));
  230. }
  231. } else if(isFunctionValuedTermMap()) {
  232. for(TermMap tm : functionCall.getTermMaps()) {
  233. set.addAll(tm.getReferencedColumns());
  234. }
  235. } // else constant and thus empty set.
  236. return set;
  237. }
  238. private String distillColumnName(RDFNode node) {
  239. if(!node.isLiteral())
  240. return null;
  241. if(!node.asLiteral().getDatatype().getURI().equals(XSD.xstring.getURI()))
  242. return null;
  243. String s = node.asLiteral().toString();
  244. if(!R2RMLUtil.isValidColumnName(s))
  245. return null;
  246. return s;
  247. }
  248. public boolean isTemplateValuedTermMap() {
  249. return template != null;
  250. }
  251. public boolean isColumnValuedTermMap() {
  252. return column != null;
  253. }
  254. public boolean isConstantValuedTermMap() {
  255. return constant != null;
  256. }
  257. public boolean isFunctionValuedTermMap() {
  258. return functionCall != null;
  259. }
  260. public Resource getTermType() {
  261. return termType;
  262. }
  263. public boolean isTermTypeBlankNode() {
  264. return getTermType().getURI().equals(R2RML.BLANKNODE.getURI());
  265. }
  266. public boolean isTermTypeIRI() {
  267. return getTermType().getURI().equals(R2RML.IRI.getURI());
  268. }
  269. public boolean isTermTypeLiteral() {
  270. return getTermType().getURI().equals(R2RML.LITERAL.getURI());
  271. }
  272. public RDFNode generateRDFTerm(Row row) throws R2RMLException {
  273. Object value = getValueForRDFTerm(row);
  274. // If value is NULL, then no RDF term is generated.
  275. if(value == null) {
  276. return null;
  277. }
  278. else if(isTermTypeIRI()) {
  279. /* Otherwise, if the term map's term type is rr:IRI: 1. Let value
  280. * be the natural RDF lexical form corresponding to value. 2. If
  281. * value is a valid absolute IRI [RFC3987], then return an IRI
  282. * generated from value. 3. Otherwise, prepend value with the base
  283. * IRI. If the result is a valid absolute IRI [RFC3987], then
  284. * return an IRI generated from the result. 4. Otherwise, raise a
  285. * data error.
  286. */
  287. IRI iri = IRIFactory.iriImplementation().create(value.toString());
  288. if(iri.isAbsolute())
  289. return ResourceFactory.createResource(convertToIRISafeVersion(iri));
  290. iri = IRIFactory.iriImplementation().create(baseIRI + value);
  291. if(iri.isAbsolute())
  292. return ResourceFactory.createResource(convertToIRISafeVersion(iri));
  293. throw new R2RMLException("Data error. " + baseIRI + value + " is not a valid absolute IRI", null);
  294. }
  295. /*
  296. * Otherwise, if the term type is rr:BlankNode: Return a blank node
  297. * that is unique to the natural RDF lexical form corresponding to
  298. * value.
  299. */
  300. else if(isTermTypeBlankNode()) {
  301. Resource r = blankNodeMap.get(value);
  302. if(r == null) {
  303. r = ResourceFactory.createResource();
  304. blankNodeMap.put(value, r);
  305. }
  306. return r;
  307. }
  308. /*
  309. * Otherwise, if the term type is rr:Literal:
  310. * 1. If the term map has a specified language tag, then return a plain
  311. * literal with that language tag and with the natural RDF lexical
  312. * form corresponding to value.
  313. * 2. Otherwise, if the term map has a non-empty specified datatype
  314. * that is different from the natural RDF datatype corresponding to
  315. * the term map's implicit SQL datatype, then return the datatype-
  316. * override RDF literal corresponding to value and the specified
  317. * datatype.
  318. * Otherwise, return the natural RDF literal corresponding to value.
  319. *
  320. * // TODO: we use Jena's converter...
  321. */
  322. else if(isTermTypeLiteral()) {
  323. if(language != null) {
  324. return ResourceFactory.createLangLiteral(value.toString(), language);
  325. }
  326. if(datatype != null) {
  327. RDFDatatype d = R2RMLTypeMapper.getTypeByName(datatype);
  328. return ResourceFactory.createTypedLiteral(value.toString(), d);
  329. }
  330. return ResourceFactory.createTypedLiteral(value);
  331. }
  332. return null;
  333. }
  334. /**
  335. * Private method to create safe IRIs. It does percent encoding. According
  336. * to the R2RML Standard, however, some characters (like kanji) should not
  337. * be percent encoded. This is what jena does. We need to find a library
  338. * that better complies with the standard.
  339. *
  340. * 42 -> 42 OK
  341. * Hello World! -> Hello%20World%21 OK
  342. * 2011-08-23T22:17:00Z -> 2011-08-23T22%3A17%3A00Z OK
  343. * ~A_17.1-2 -> ~A_17.1-2 OK
  344. * 葉篤正 -> 葉篤正 NOK!
  345. *
  346. * TODO: Better compliant safe IRI conversion.
  347. *
  348. * @param iri
  349. * @return
  350. * @throws R2RMLException
  351. */
  352. private String convertToIRISafeVersion(IRI iri) throws R2RMLException {
  353. try {
  354. return iri.toASCIIString();
  355. } catch (MalformedURLException e) {
  356. throw new R2RMLException("Problem generating safe IRI " + iri, e);
  357. }
  358. }
  359. private Object getValueForRDFTerm(Row row) throws R2RMLException {
  360. if(isConstantValuedTermMap()) {
  361. return constant;
  362. } else if(isColumnValuedTermMap()) {
  363. return row.getObject(column);
  364. } else if(isTemplateValuedTermMap()) {
  365. String value = new String(template);
  366. for(String reference : getReferencedColumns()) {
  367. Object object = row.getObject(reference);
  368. // If one of the values is NULL, we don't generate the term.
  369. // We need to check if this is the desired approach for templates
  370. // with multiple variables.
  371. if(object == null)
  372. return null;
  373. String string = object.toString();
  374. // first argument is a regular expression, therefore we
  375. // have to escape the curly braces, but in a string that
  376. // also means escaping the escape character.
  377. value = value.replaceAll("\\{" + reference + "\\}", string);
  378. }
  379. // Unescape all the values!
  380. value = StringEscapeUtils.unescapeJava(value);
  381. return value;
  382. } else if (isFunctionValuedTermMap()) {
  383. List<Object> arguments = new ArrayList<>();
  384. for(TermMap tm : functionCall.getTermMaps()) {
  385. Object argument = tm.getValueForRDFTerm(row);
  386. arguments.add(argument);
  387. }
  388. try {
  389. return JSEnv.invoke(functionCall.getFunctionName(), arguments.toArray());
  390. } catch (NoSuchMethodException | ScriptException e) {
  391. throw new R2RMLException("Error invoking function.", e);
  392. }
  393. }
  394. return null;
  395. }
  396. }