Skip to content

Commit

Permalink
Add tests demonstrating forward-slash behaviors in Turtle, JSON-LD, a…
Browse files Browse the repository at this point in the history
…nd SPARQL (#1872)

This patch adds test to verify that forward-slashes, and escaped forward slashes, work correctly in Turtle, JSON-LD and SPARQL.

Some of the added tests check that SPARQL queries that contain seemingly escaped forward slashes as part of `PN_LOCAL` fail to execute as the SPARQL spec does not allow for escaped forward slashes in `PN_LOCAL`.

Currently the RDFLib SPARQL processor does however allow escaped forward slashes as part of `PN_LOCAL`, so these tests have been marked with xfail as RDFLib should provide some way to parse SPARQL in strict mode, and in this mode leaniancies should be disabled and any non-standard SPARQL should be treated as invalid.

Co-authored-by: Iwan Aucamp <[email protected]>
  • Loading branch information
ajnelson-nist and aucampia authored May 17, 2022
1 parent 1e16caf commit ccb9c4a
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 0 deletions.
10 changes: 10 additions & 0 deletions test/data/variants/forward_slash-asserts.json
Original file line number Diff line number Diff line change
@@ -0,0 1,10 @@
{
"quad_count": 4,
"exact_match": true,
"has_subject_iris": [
"http://example.org/kb/individual-a",
"http://example.org/kb/individual-b",
"http://example.org/ontology/core/MyClassA",
"http://example.org/ontology/core/MyClassB"
]
}
33 changes: 33 additions & 0 deletions test/data/variants/forward_slash-variant-prefixed.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 1,33 @@
{
"@context": {
"ex": "http://example.org/ontology/",
"kb": "http://example.org/kb/",
"owl": "http://www.w3.org/2002/07/owl#"
},
"_comment": [
"The JSON-LD spec does not provide a grammar production rule set in,",
"EBNF. However, the section on compact IRIs indicates that an IRI can",
"be prefixed at any point that would not result in a suffix starting",
"with \"//\". Hence, an unpaired forward slash, as a legal character of",
"an IRI, can appear in the suffix component of a compact IRI.",
"https://json-ld.org/spec/latest/json-ld/#compact-iris"
],
"@graph": [
{
"@id": "kb:individual-a",
"@type": "ex:core/MyClassA"
},
{
"@id": "ex:core/MyClassA",
"@type": "owl:Class"
},
{
"@id": "kb:individual-b",
"@type": "ex:core/MyClassB"
},
{
"@id": "ex:core/MyClassB",
"@type": "owl:Class"
}
]
}
18 changes: 18 additions & 0 deletions test/data/variants/forward_slash-variant-prefixed.ttl
Original file line number Diff line number Diff line change
@@ -0,0 1,18 @@
@prefix ex: <http://example.org/ontology/> .
@prefix kb: <http://example.org/kb/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .

# Spell a class name with prefixing, but have the prefixing NOT include
# one of the forward-slashed path components.
# The forward slash must be escaped, according to Turtle grammar
# production rules grammar rules including and between PN_LOCAL and
# PN_LOCAL_ESC.
# https://www.w3.org/TR/turtle/#sec-grammar-grammar

ex:core\/MyClassA a owl:Class .

kb:individual-a a ex:core\/MyClassA .

ex:core\/MyClassB a owl:Class .

kb:individual-b a ex:core\/MyClassB .
24 changes: 24 additions & 0 deletions test/data/variants/forward_slash.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 1,24 @@
{
"@context": {
"kb": "http://example.org/kb/",
"owl": "http://www.w3.org/2002/07/owl#"
},
"@graph": [
{
"@id": "kb:individual-a",
"@type": "http://example.org/ontology/core/MyClassA"
},
{
"@id": "http://example.org/ontology/core/MyClassA",
"@type": "owl:Class"
},
{
"@id": "kb:individual-b",
"@type": "http://example.org/ontology/core/MyClassB"
},
{
"@id": "http://example.org/ontology/core/MyClassB",
"@type": "owl:Class"
}
]
}
4 changes: 4 additions & 0 deletions test/data/variants/forward_slash.nt
Original file line number Diff line number Diff line change
@@ -0,0 1,4 @@
<http://example.org/ontology/core/MyClassA> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class> .
<http://example.org/kb/individual-a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/ontology/core/MyClassA> .
<http://example.org/ontology/core/MyClassB> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class> .
<http://example.org/kb/individual-b> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/ontology/core/MyClassB> .
10 changes: 10 additions & 0 deletions test/data/variants/forward_slash.ttl
Original file line number Diff line number Diff line change
@@ -0,0 1,10 @@
@prefix kb: <http://example.org/kb/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .

<http://example.org/ontology/core/MyClassA> a owl:Class .

kb:individual-a a <http://example.org/ontology/core/MyClassA> .

<http://example.org/ontology/core/MyClassB> a owl:Class .

kb:individual-b a <http://example.org/ontology/core/MyClassB> .
14 changes: 14 additions & 0 deletions test/test_graph/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 49,26 @@ class GraphAsserts:

quad_count: Optional[int] = None
exact_match: bool = False
has_subject_iris: Optional[List[str]] = None

def check(
self, first_graph: Optional[ConjunctiveGraph], graph: ConjunctiveGraph
) -> None:
"""
if `first_graph` is `None` then this is the first check before any
other graphs have been processed.
"""
if self.quad_count is not None:
assert self.quad_count == len(list(graph.quads()))
if first_graph is not None and self.exact_match:
GraphHelper.assert_quad_sets_equals(first_graph, graph)
if first_graph is None and self.has_subject_iris is not None:
subjects_iris = {
f"{subject}"
for subject in graph.subjects()
if isinstance(subject, URIRef)
}
assert set(self.has_subject_iris) == subjects_iris


@dataclass(order=True)
Expand Down Expand Up @@ -219,6 231,8 @@ def test_variants(graph_variant: GraphVariants) -> None:
assert len(graph_variant.variants) > 0
first_graph: Optional[ConjunctiveGraph] = None
first_path: Optional[Path] = None
logging.debug("graph_variant.asserts = %s", graph_variant.asserts)

for variant_key, variant_path in graph_variant.variants.items():
logging.debug("variant_path = %s", variant_path)
format = guess_format(variant_path.name, fmap=SUFFIX_FORMAT_MAP)
Expand Down
140 changes: 140 additions & 0 deletions test/test_sparql/test_forward_slash_escapes.py
Original file line number Diff line number Diff line change
@@ -0,0 1,140 @@
#!/usr/bin/env python3

# This software was developed at the National Institute of Standards
# and Technology by employees of the Federal Government in the course
# of their official duties. Pursuant to title 17 Section 105 of the
# United States Code this software is not subject to copyright
# protection and is in the public domain. NIST assumes no
# responsibility whatsoever for its use by other parties, and makes
# no guarantees, expressed or implied, about its quality,
# reliability, or any other characteristic.
#
# We would appreciate acknowledgement if the software is used.

"""
This test-set demonstrates usage of identifier prefixing and the
forward-slash character in Turtle, JSON-LD, and SPARQL. The motivating
use case originated with attempts to interact with IANA Media Types as
prefixed concepts, e.g. "application/json" somehow being
"mime:application/json".
"""

from test.data import TEST_DATA_DIR
from test.utils.graph import cached_graph
from typing import Set

import pytest

from rdflib import Graph
from rdflib.plugins.sparql.processor import prepareQuery
from rdflib.plugins.sparql.sparql import Query

query_string_expanded = r"""
SELECT ?nIndividual
WHERE {
?nIndividual a <http://example.org/ontology/core/MyClassB> .
}"""

# NOTE: This is expected to fail. The SPARQL grammar production rules
# for prefixed IRIs, especially at production rule PN_LOCAL, have no way
# to reach the forward-slash or backslash characters.
# https://www.w3.org/TR/rdf-sparql-query/#grammar
query_string_prefixed = r"""
PREFIX ex: <http://example.org/ontology/>
SELECT ?nIndividual
WHERE {
# NOTE: Syntax is incorrect - forward slash cannot be included in
# local component of name.
?nIndividual a ex:core\/MyClassB .
}"""

PN_LOCAL_BACKSLASH_XFAIL_REASON = """
Contrary to the ratified SPARQL 1.1 grammar, the RDFlib SPARQL propcessor
accepts backslashes as part of PN_LOCAL which it treats as escape
characters.
There should be a way to instruct the SPARQL parser to operate in strict
mode, and in strict mode backslashes should not be permitted in PN_LOCAL.
See https://github.com/RDFLib/rdflib/issues/1871
"""


def _test_query_prepares(query_string: str) -> None:
"""
Confirm parse behavior of SPARQL engine when a concept would be
prefixed at a point that leaves a forward-slash character in the
suffix.
"""
nsdict = {
"ex": "http://example.org/ontology/",
"kb": "http://example.org/kb/",
"owl": "http://www.w3.org/2002/07/owl#",
}
# TODO: A 'strict' flag for prepareQuery is under consideration to
# adjust parse behavior around backslash characters.
query_object = prepareQuery(query_string, initNs=nsdict)
assert isinstance(query_object, Query)


def test_query_prepares_expanded() -> None:
_test_query_prepares(query_string_expanded)


@pytest.mark.xfail(reason=PN_LOCAL_BACKSLASH_XFAIL_REASON)
def test_query_prepares_prefixed() -> None:
with pytest.raises(ValueError):
_test_query_prepares(query_string_prefixed)


def _test_escapes_and_query(
graph: Graph, query_string: str, expected_query_compiled: bool
) -> None:
"""
Confirm search-results behavior of SPARQL engine when a concept
would be prefixed at a point that leaves a forward-slash character
in the suffix.
Note that _test_query_prepares also exercises the expected parse
failure. This parameterized test is more for demonstrating that
searching can work without prefixes.
"""
expected: Set[str] = {
"http://example.org/kb/individual-b",
}
computed: Set[str] = set()

query_compiled: bool = False
try:
query_object = prepareQuery(query_string)
query_compiled = True
except Exception:
pass
assert expected_query_compiled == query_compiled

for result in graph.query(query_object):
computed.add(str(result[0]))

assert expected == computed


def test_escapes_and_query_turtle_expanded() -> None:
graph = cached_graph((TEST_DATA_DIR / "variants/forward_slash.ttl",))
_test_escapes_and_query(graph, query_string_expanded, True)


@pytest.mark.xfail(reason=PN_LOCAL_BACKSLASH_XFAIL_REASON, raises=AssertionError)
def test_escapes_and_query_turtle_prefixed() -> None:
graph = cached_graph((TEST_DATA_DIR / "variants/forward_slash.ttl",))
_test_escapes_and_query(graph, query_string_prefixed, False)


def test_escapes_and_query_jsonld_expanded() -> None:
graph = cached_graph((TEST_DATA_DIR / "variants/forward_slash.jsonld",))
_test_escapes_and_query(graph, query_string_expanded, True)


@pytest.mark.xfail(reason=PN_LOCAL_BACKSLASH_XFAIL_REASON, raises=AssertionError)
def test_escapes_and_query_jsonld_prefixed() -> None:
graph = cached_graph((TEST_DATA_DIR / "variants/forward_slash.jsonld",))
_test_escapes_and_query(graph, query_string_prefixed, False)

0 comments on commit ccb9c4a

Please sign in to comment.