Skip to content

Commit

Permalink
[wikidata] supporting constraints on properties (Pxxx)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebferre committed Dec 2, 2022
1 parent daff08d commit f446f8e
Show file tree
Hide file tree
Showing 7 changed files with 5,458 additions and 5,367 deletions.
18 changes: 14 additions & 4 deletions jsutils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 743,8 @@ end
(* Wikidata services *)
module Wikidata =
struct
let prefix_property = "Property:"
let prefix_property_len = String.length prefix_property
let entities_of_json ojson : (string list, exn) Result.t =
try
let oquery = Unsafe.get ojson (string "query") in
Expand All @@ -752,21 754,29 @@ module Wikidata =
for i = n-1 downto 0 do
let oresult = Unsafe.get osearch (string (string_of_int i)) in
let otitle = Unsafe.get oresult (string "title") in
le := (Js.to_string otitle)::!le
let title = Js.to_string otitle in
let e =
let title_len = String.length title in
if Common.has_prefix title prefix_property
then (* Pxxx *) String.sub title prefix_property_len (title_len - prefix_property_len)
else (* Qxxx *) title in
le := e::!le
done;
firebug (string_of_int n ^ " wikidata entities found");
firebug (string_of_int n ^ " wikidata entities (items and properties) found");
Result.Ok !le
with exn ->
Result.Error (Failure ("Wikidata entity search: unexpected JSON: " ^ Printexc.to_string exn))

let ajax_entity_search (query : string) (limit : int) (k : (string list, exn) Result.t -> unit) : unit =
let ajax_entity_search (query : string) (limit : int) (k : (string list, exn) Result.t -> unit) : unit = (* the returned list is made of Pxxx and Qxxx, property and item identifiers *)
if String.length query < 3
then k (Result.Error (Failure "Wikidata entity search: query too short (less than 3 cars)"))
else
let _ = firebug ("Wikidata entity search: " ^ query) in
let query_url =
Printf.sprintf
"https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srlimit=%d&srsearch=%s"
"https://www.wikidata.org/w/api.php?action=query&list=search&srnamespace=0|120&format=json&srlimit=%d&srsearch=%s"
(* namespace 0 is for items, and namespace 120 is for properties *)
(* SEE https://www.mediawiki.org/w/api.php?action=help&modules=query+search for for doc *)
(*"https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&language=en&limit=%d&search=%s" (* type=item|property *) NOTE: less flexible search *)
limit
(Url.urlencode query) in
Expand Down
53 changes: 41 additions & 12 deletions lis.ml
Original file line number Diff line number Diff line change
Expand Up @@ -786,11 786,17 @@ let ajax_external_search_constr ~endpoint (search : Lisql.search) (k : (Lisql.co
Jsutils.Wikidata.ajax_entity_search
query limit
(function
| Result.Ok lq ->
| Result.Ok le ->
let lt =
List.map
(fun q -> Rdf.URI (Rdf.wikidata_entity q))
lq in
(fun e ->
match e.[0] with
| 'P' ->
if config_nary_relations#value (* see Lisq2sparql.WhichProp/Pred *)
then Rdf.URI (Rdf.wikidata_prop e)
else Rdf.URI (Rdf.wikidata_prop_direct e)
| _ (* 'Q' *) -> Rdf.URI (Rdf.wikidata_entity e))
le in
k (Result.Ok (Lisql.ExternalSearch (search, Some lt)))
| (Result.Error _ as err) -> k err)
| TextQuery kwds ->
Expand Down Expand Up @@ -1472,10 1478,11 @@ object (self)
let suggestions = {partial; forest} in
let _, suggestions = hook_suggestions (Concepts, suggestions) in
k (Result.Ok suggestions)) in
let process_wikidata_with_external_search (lx : Rdf.var list) (lt : Rdf.term list) results_class =
let process_wikidata_with_external_search (lx : Rdf.var list) (ltq : Rdf.term list) (ltp : Rdf.term list) results_class =
let freq = { value=(if freq0 then 0 else 1); max_value=None; partial=true; unit=Entities } in
let incr_index = new incr_freq_tree_index term_hierarchy in
let open Sparql_endpoint in
(* adding class increments *)
( match results_class.bindings with
| binding::_ -> (* LIMIT 1 => at most one binding *)
List.iter2
Expand All @@ -1491,8 1498,26 @@ object (self)
(fun incr -> incr_index#add (incr, Some freq))
| _ -> ()
with Not_found -> assert false)
lx lt
lx ltq
| _ -> () );
(* adding property increments *)
List.iter
(fun t ->
match t with
| Rdf.URI uri ->
Ontology.enqueue_property uri;
Lexicon.enqueue_property uri;
let incrs =
if config_nary_relations#value
then
let uri_stat = Rdf.wikidata_rebase uri Rdf.wikidata_prop_base Rdf.wikidata_prop_statement_base in
Lisql2sparql.WhichPred.increments_of_terms ~init:true [Some t; None; Some (Rdf.URI uri_stat); None]
else Lisql2sparql.WhichProp.increments_of_terms ~init:true [Some t; None] in
List.iter
(fun incr -> incr_index#add (incr, Some freq))
incrs
| _ -> ())
ltp;
sync_concepts (fun () ->
let forest = incr_index#filter_map_forest ~inverse (fun x -> Some x) in
let suggestions = {partial = true; forest} in
Expand All @@ -1516,7 1541,13 @@ object (self)
:> string)),
None
| Lisql.ExternalSearch (_, Some lt) ->
let lx = List.mapi (fun i t -> "x" ^ string_of_int (i 1)) lt in
let ltq, ltp = (* separating Qxxx and Pxxx *)
List.partition
(function
| Rdf.URI uri -> Common.has_prefix uri Rdf.wikidata_entity_base
| _ -> true) (* should not happen *)
lt in
let lx = List.mapi (fun i t -> "x" ^ string_of_int (i 1)) ltq in
Sparql.((select
~distinct:false
~projections:(List.map (fun x -> `Bare, x) lx)
Expand All @@ -1525,9 1556,9 @@ object (self)
(join
(List.map2
(fun x t -> optional (rdf_type (var x) (term t)))
lx lt))
lx ltq))
:> string)),
Some (lx,lt)
Some (lx,ltq,ltp)
| _ -> "", None in (* avoiding timeouts in evaluations *)
Jsutils.firebug sparql_class;
if sparql_class = ""
Expand All @@ -1538,7 1569,7 @@ object (self)
(function
| [results_class] ->
( match lt_opt with
| Some (lx,lt) -> process_wikidata_with_external_search lx lt results_class
| Some (lx,ltq,ltp) -> process_wikidata_with_external_search lx ltq ltp results_class
| None -> process_wikidata results_class )
| _ -> assert false)
(fun code -> k (Result.Error (Failure ("Initial concept suggestions: HTTP error code " ^ string_of_int code))))
Expand All @@ -1555,8 1586,6 @@ object (self)
k (Result.Ok {partial = false; forest = []}) (* only constraints on aggregations (HAVING clause) *)
else if focus_descr#unconstrained then
self#ajax_forest_properties_init ~inverse constr elts k
else if Rdf.config_wikidata_mode#value && constr <> Lisql.True then
k (Result.Error (Failure "Concept suggestions: constraint not supported on Wikidata, except for initial suggestions")) (* timeout pbs *)
else begin
let hierarchy_focus_as_incr_opt =
let open Lisql in
Expand Down Expand Up @@ -2113,7 2142,7 @@ object (self)
IsExactly "...";
StartsWith "...";
EndsWith "..." ] in
if Rdf.config_wikidata_mode#value && focus_descr#unconstrained then
if Rdf.config_wikidata_mode#value then
ExternalSearch (WikidataSearch ["..."], None) :: l_constr
(*else if Lisql2sparql.config_fulltext_search#value = "text:query" then
ExternalSearch (`TextQuery ["..."], None) :: l_constr*)
Expand Down
11 changes: 11 additions & 0 deletions lisql.ml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 113,17 @@ let reset_constr : constr -> constr = function
| HasDatatype _ -> HasDatatype "..."
| ExternalSearch (s, _) -> ExternalSearch (reset_search s, None)

let filter_external_search (f : Rdf.uri -> bool) : constr -> constr = function
| ExternalSearch (s, Some lt) ->
let lt = (* TODO: should handle empty list *)
List.filter
(function
| Rdf.URI uri -> f uri
| t -> true) (* should not happen *)
lt in
ExternalSearch (s, Some lt)
| c -> c

(* LISQL modifiers *)

type num_conv_type = IntegerConv | DecimalConv | DoubleConv
Expand Down
69 changes: 40 additions & 29 deletions lisql2sparql.ml
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 242,34 @@ let filter_constr_gen (ctx : filter_context) (gv : genvar) ~(label_properties_la
| ExternalSearch (_, None) -> Sparql.True
| ExternalSearch (_, Some lt) ->
Sparql.formula_term_in_term_list t (List.map Sparql.term lt)

let filter_constr_entity gv t c (ft : Lisql.filter_type) = filter_constr_gen (`Terms,ft,`Filter) gv ~label_properties_langs:Lexicon.config_entity_lexicon#properties_langs t c
let filter_constr_class gv t c = filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c
let filter_constr_property gv t c = filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c

let filter_constr_entity gv t c (ft : Lisql.filter_type) =
let c =
if Rdf.config_wikidata_mode#value
then
Lisql.filter_external_search
(fun uri -> Common.has_prefix uri Rdf.wikidata_entity_base)
c
else c in
filter_constr_gen (`Terms,ft,`Filter) gv ~label_properties_langs:Lexicon.config_entity_lexicon#properties_langs t c
let filter_constr_class gv t c =
let c =
if Rdf.config_wikidata_mode#value
then
Lisql.filter_external_search
(fun uri -> Common.has_prefix uri Rdf.wikidata_entity_base)
c
else c in
filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c
let filter_constr_property gv t c =
let c =
if Rdf.config_wikidata_mode#value
then
Lisql.filter_external_search
(fun uri -> not (Common.has_prefix uri Rdf.wikidata_entity_base))
c
else c in
filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c

let search_constr_entity (gv : genvar) (t : _ Sparql.any_term) (c : constr) (ft : Lisql.filter_type) : Sparql.formula =
let label_properties_langs = Lexicon.config_entity_lexicon#properties_langs in
Expand Down Expand Up @@ -507,13 531,12 @@ module WhichPred =
| Some t -> false, t in
if Rdf.config_wikidata_mode#value
then
let make_pat p1 p2 pat =
let pat = Sparql.join [pat; filter_wikidata p1 p2] in
make_pat ?hook p1 pat in
let pat_wikidata =
Sparql.(union
let pat_wikidata p1 p2 pat =
Sparql.join [pat; filter_wikidata p1 p2] in
let pat =
Sparql.(union
[ if Rdf.term_can_be_subject t
then make_pat "pe" "po"
then pat_wikidata "pe" "po"
(triple (* forward: pe, po *)
(term t)
(var "pe")
Expand All @@ -523,25 546,13 @@ module WhichPred =

if init
then empty
else join (* backward: pe, ps, po *)
[ make_pat "pe" "ps"
(triple
(bnode "")
(var "pe")
(bnode_triples
[ var "ps", term t ])) ] (* binding 'ps' to distinguish orientation *)
(*join (* qualifier: pe, po, pq *)
[ triple
(bnode "")
(var "pe")
(bnode_triples
[ var "po", bnode "";
var "pq", term t ]);
filter
(expr_infix "!=" [var "pq"; var "po"])
]*)
]) in
pat_wikidata
else pat_wikidata "pe" "ps"
(triple (* backward: pe, ps, po *)
(bnode "")
(var "pe")
(bnode_triples
[ var "ps", term t ])) ]) in (* binding 'ps' to distinguish orientation *)
make_pat ?hook "pe" pat
else
let pat_SO =
Sparql.(join
Expand Down
19 changes: 18 additions & 1 deletion rdf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 66,27 @@ let wikibase_Property = "http://wikiba.se/ontology#Property"
let wikibase_directClaim = "http://wikiba.se/ontology#directClaim"
let wikibase_claim = "http://wikiba.se/ontology#claim"
let wikibase_statementProperty = "http://wikiba.se/ontology#statementProperty"
let wikidata_entity (q : string) = "http://www.wikidata.org/entity/" ^ q
let wikidata_entity_base = "http://www.wikidata.org/entity/"
let wikidata_entity (q : string) = wikidata_entity_base ^ q
let wikidata_prop_base = "http://www.wikidata.org/prop/"
let wikidata_prop (p : string) = wikidata_prop_base ^ p
let wikidata_prop_statement_base = "http://www.wikidata.org/prop/statement/"
let wikidata_prop_statement (p : string) = wikidata_prop_statement_base ^ p
let wikidata_prop_direct_base = "http://www.wikidata.org/prop/direct/"
let wikidata_prop_direct (p : string) = wikidata_prop_direct_base ^ p
let wikidata_Q (n : int) = "http://www.wikidata.org/entity/Q" ^ string_of_int n
let wikidata_P (n : int) = "http://www.wikidata.org/prop/P" ^ string_of_int n
let p_P625 = wikidata_P 625 (* Wikidata: geographical coordinates *)
let wikidata_rebase uri base new_base =
if Common.has_prefix uri base
then
let k = String.length base in
let name = String.sub uri k (String.length uri - k) in
new_base ^ name
else (
Jsutils.firebug ("Rdf.wikidata_rebase: unexpected URI: " ^ uri);
uri)


let lat_long_properties =
[ "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "http://www.w3.org/2003/01/geo/wgs84_pos#long";
Expand Down
1 change: 1 addition & 0 deletions sparql.ml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 102,7 @@ object (self)
("http://www.wikidata.org/prop/statement/","ps:");
("http://www.wikidata.org/prop/","p:");
("http://www.wikidata.org/prop/direct/","wdt:");
("http://www.wikidata.org/entity/","wd:");
("http://wikiba.se/ontology#","wikibase:");
("http://purl.org/dc/terms/", "dcterms:");
("http://purl.org/dc/elements/1.1/", "dc:");
Expand Down
10,654 changes: 5,333 additions & 5,321 deletions webapp/osparklis.js

Large diffs are not rendered by default.

0 comments on commit f446f8e

Please sign in to comment.