From f446f8e8745efb5349aa7eb2ee4d4c0b29a92f9e Mon Sep 17 00:00:00 2001 From: Sebastien Ferre Date: Fri, 2 Dec 2022 13:59:13 +0100 Subject: [PATCH] [wikidata] supporting constraints on properties (Pxxx) --- jsutils.ml | 18 +- lis.ml | 53 +- lisql.ml | 11 + lisql2sparql.ml | 69 +- rdf.ml | 19 +- sparql.ml | 1 + webapp/osparklis.js | 10654 +++++++++++++++++++++--------------------- 7 files changed, 5458 insertions(+), 5367 deletions(-) diff --git a/jsutils.ml b/jsutils.ml index 0bbae76..e5cfaa4 100644 --- a/jsutils.ml +++ b/jsutils.ml @@ -743,6 +743,8 @@ end (* Wikidata services *) module Wikidata = struct + let prefix_property = "Property:" + let prefix_property_len = String.length prefix_property let entities_of_json ojson : (string list, exn) Result.t = try let oquery = Unsafe.get ojson (string "query") in @@ -752,21 +754,29 @@ module Wikidata = for i = n-1 downto 0 do let oresult = Unsafe.get osearch (string (string_of_int i)) in let otitle = Unsafe.get oresult (string "title") in - le := (Js.to_string otitle)::!le + let title = Js.to_string otitle in + let e = + let title_len = String.length title in + if Common.has_prefix title prefix_property + then (* Pxxx *) String.sub title prefix_property_len (title_len - prefix_property_len) + else (* Qxxx *) title in + le := e::!le done; - firebug (string_of_int n ^ " wikidata entities found"); + firebug (string_of_int n ^ " wikidata entities (items and properties) found"); Result.Ok !le with exn -> Result.Error (Failure ("Wikidata entity search: unexpected JSON: " ^ Printexc.to_string exn)) - let ajax_entity_search (query : string) (limit : int) (k : (string list, exn) Result.t -> unit) : unit = + let ajax_entity_search (query : string) (limit : int) (k : (string list, exn) Result.t -> unit) : unit = (* the returned list is made of Pxxx and Qxxx, property and item identifiers *) if String.length query < 3 then k (Result.Error (Failure "Wikidata entity search: query too short (less than 3 cars)")) else let _ = firebug ("Wikidata entity search: " ^ query) in let query_url = Printf.sprintf - "https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srlimit=%d&srsearch=%s" + "https://www.wikidata.org/w/api.php?action=query&list=search&srnamespace=0|120&format=json&srlimit=%d&srsearch=%s" + (* namespace 0 is for items, and namespace 120 is for properties *) + (* SEE https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bsearch for for doc *) (*"https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&language=en&limit=%d&search=%s" (* type=item|property *) NOTE: less flexible search *) limit (Url.urlencode query) in diff --git a/lis.ml b/lis.ml index fd1466c..77cbf6b 100644 --- a/lis.ml +++ b/lis.ml @@ -786,11 +786,17 @@ let ajax_external_search_constr ~endpoint (search : Lisql.search) (k : (Lisql.co Jsutils.Wikidata.ajax_entity_search query limit (function - | Result.Ok lq -> + | Result.Ok le -> let lt = List.map - (fun q -> Rdf.URI (Rdf.wikidata_entity q)) - lq in + (fun e -> + match e.[0] with + | 'P' -> + if config_nary_relations#value (* see Lisq2sparql.WhichProp/Pred *) + then Rdf.URI (Rdf.wikidata_prop e) + else Rdf.URI (Rdf.wikidata_prop_direct e) + | _ (* 'Q' *) -> Rdf.URI (Rdf.wikidata_entity e)) + le in k (Result.Ok (Lisql.ExternalSearch (search, Some lt))) | (Result.Error _ as err) -> k err) | TextQuery kwds -> @@ -1472,10 +1478,11 @@ object (self) let suggestions = {partial; forest} in let _, suggestions = hook_suggestions (Concepts, suggestions) in k (Result.Ok suggestions)) in - let process_wikidata_with_external_search (lx : Rdf.var list) (lt : Rdf.term list) results_class = + let process_wikidata_with_external_search (lx : Rdf.var list) (ltq : Rdf.term list) (ltp : Rdf.term list) results_class = let freq = { value=(if freq0 then 0 else 1); max_value=None; partial=true; unit=Entities } in let incr_index = new incr_freq_tree_index term_hierarchy in let open Sparql_endpoint in + (* adding class increments *) ( match results_class.bindings with | binding::_ -> (* LIMIT 1 => at most one binding *) List.iter2 @@ -1491,8 +1498,26 @@ object (self) (fun incr -> incr_index#add (incr, Some freq)) | _ -> () with Not_found -> assert false) - lx lt + lx ltq | _ -> () ); + (* adding property increments *) + List.iter + (fun t -> + match t with + | Rdf.URI uri -> + Ontology.enqueue_property uri; + Lexicon.enqueue_property uri; + let incrs = + if config_nary_relations#value + then + let uri_stat = Rdf.wikidata_rebase uri Rdf.wikidata_prop_base Rdf.wikidata_prop_statement_base in + Lisql2sparql.WhichPred.increments_of_terms ~init:true [Some t; None; Some (Rdf.URI uri_stat); None] + else Lisql2sparql.WhichProp.increments_of_terms ~init:true [Some t; None] in + List.iter + (fun incr -> incr_index#add (incr, Some freq)) + incrs + | _ -> ()) + ltp; sync_concepts (fun () -> let forest = incr_index#filter_map_forest ~inverse (fun x -> Some x) in let suggestions = {partial = true; forest} in @@ -1516,7 +1541,13 @@ object (self) :> string)), None | Lisql.ExternalSearch (_, Some lt) -> - let lx = List.mapi (fun i t -> "x" ^ string_of_int (i+1)) lt in + let ltq, ltp = (* separating Qxxx and Pxxx *) + List.partition + (function + | Rdf.URI uri -> Common.has_prefix uri Rdf.wikidata_entity_base + | _ -> true) (* should not happen *) + lt in + let lx = List.mapi (fun i t -> "x" ^ string_of_int (i+1)) ltq in Sparql.((select ~distinct:false ~projections:(List.map (fun x -> `Bare, x) lx) @@ -1525,9 +1556,9 @@ object (self) (join (List.map2 (fun x t -> optional (rdf_type (var x) (term t))) - lx lt)) + lx ltq)) :> string)), - Some (lx,lt) + Some (lx,ltq,ltp) | _ -> "", None in (* avoiding timeouts in evaluations *) Jsutils.firebug sparql_class; if sparql_class = "" @@ -1538,7 +1569,7 @@ object (self) (function | [results_class] -> ( match lt_opt with - | Some (lx,lt) -> process_wikidata_with_external_search lx lt results_class + | Some (lx,ltq,ltp) -> process_wikidata_with_external_search lx ltq ltp results_class | None -> process_wikidata results_class ) | _ -> assert false) (fun code -> k (Result.Error (Failure ("Initial concept suggestions: HTTP error code " ^ string_of_int code)))) @@ -1555,8 +1586,6 @@ object (self) k (Result.Ok {partial = false; forest = []}) (* only constraints on aggregations (HAVING clause) *) else if focus_descr#unconstrained then self#ajax_forest_properties_init ~inverse constr elts k - else if Rdf.config_wikidata_mode#value && constr <> Lisql.True then - k (Result.Error (Failure "Concept suggestions: constraint not supported on Wikidata, except for initial suggestions")) (* timeout pbs *) else begin let hierarchy_focus_as_incr_opt = let open Lisql in @@ -2113,7 +2142,7 @@ object (self) IsExactly "..."; StartsWith "..."; EndsWith "..." ] in - if Rdf.config_wikidata_mode#value && focus_descr#unconstrained then + if Rdf.config_wikidata_mode#value then ExternalSearch (WikidataSearch ["..."], None) :: l_constr (*else if Lisql2sparql.config_fulltext_search#value = "text:query" then ExternalSearch (`TextQuery ["..."], None) :: l_constr*) diff --git a/lisql.ml b/lisql.ml index 26d2909..d9be78f 100644 --- a/lisql.ml +++ b/lisql.ml @@ -113,6 +113,17 @@ let reset_constr : constr -> constr = function | HasDatatype _ -> HasDatatype "..." | ExternalSearch (s, _) -> ExternalSearch (reset_search s, None) +let filter_external_search (f : Rdf.uri -> bool) : constr -> constr = function + | ExternalSearch (s, Some lt) -> + let lt = (* TODO: should handle empty list *) + List.filter + (function + | Rdf.URI uri -> f uri + | t -> true) (* should not happen *) + lt in + ExternalSearch (s, Some lt) + | c -> c + (* LISQL modifiers *) type num_conv_type = IntegerConv | DecimalConv | DoubleConv diff --git a/lisql2sparql.ml b/lisql2sparql.ml index 0db1225..8d7a7ee 100644 --- a/lisql2sparql.ml +++ b/lisql2sparql.ml @@ -242,10 +242,34 @@ let filter_constr_gen (ctx : filter_context) (gv : genvar) ~(label_properties_la | ExternalSearch (_, None) -> Sparql.True | ExternalSearch (_, Some lt) -> Sparql.formula_term_in_term_list t (List.map Sparql.term lt) - -let filter_constr_entity gv t c (ft : Lisql.filter_type) = filter_constr_gen (`Terms,ft,`Filter) gv ~label_properties_langs:Lexicon.config_entity_lexicon#properties_langs t c -let filter_constr_class gv t c = filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c -let filter_constr_property gv t c = filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c + +let filter_constr_entity gv t c (ft : Lisql.filter_type) = + let c = + if Rdf.config_wikidata_mode#value + then + Lisql.filter_external_search + (fun uri -> Common.has_prefix uri Rdf.wikidata_entity_base) + c + else c in + filter_constr_gen (`Terms,ft,`Filter) gv ~label_properties_langs:Lexicon.config_entity_lexicon#properties_langs t c +let filter_constr_class gv t c = + let c = + if Rdf.config_wikidata_mode#value + then + Lisql.filter_external_search + (fun uri -> Common.has_prefix uri Rdf.wikidata_entity_base) + c + else c in + filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c +let filter_constr_property gv t c = + let c = + if Rdf.config_wikidata_mode#value + then + Lisql.filter_external_search + (fun uri -> not (Common.has_prefix uri Rdf.wikidata_entity_base)) + c + else c in + filter_constr_gen (`Properties,OnlyIRIs,`Filter) gv ~label_properties_langs:Lexicon.config_concept_lexicon#properties_langs t c let search_constr_entity (gv : genvar) (t : _ Sparql.any_term) (c : constr) (ft : Lisql.filter_type) : Sparql.formula = let label_properties_langs = Lexicon.config_entity_lexicon#properties_langs in @@ -507,13 +531,12 @@ module WhichPred = | Some t -> false, t in if Rdf.config_wikidata_mode#value then - let make_pat p1 p2 pat = - let pat = Sparql.join [pat; filter_wikidata p1 p2] in - make_pat ?hook p1 pat in - let pat_wikidata = - Sparql.(union + let pat_wikidata p1 p2 pat = + Sparql.join [pat; filter_wikidata p1 p2] in + let pat = + Sparql.(union [ if Rdf.term_can_be_subject t - then make_pat "pe" "po" + then pat_wikidata "pe" "po" (triple (* forward: pe, po *) (term t) (var "pe") @@ -523,25 +546,13 @@ module WhichPred = if init then empty - else join (* backward: pe, ps, po *) - [ make_pat "pe" "ps" - (triple - (bnode "") - (var "pe") - (bnode_triples - [ var "ps", term t ])) ] (* binding 'ps' to distinguish orientation *) - (*join (* qualifier: pe, po, pq *) - [ triple - (bnode "") - (var "pe") - (bnode_triples - [ var "po", bnode ""; - var "pq", term t ]); - filter - (expr_infix "!=" [var "pq"; var "po"]) - ]*) - ]) in - pat_wikidata + else pat_wikidata "pe" "ps" + (triple (* backward: pe, ps, po *) + (bnode "") + (var "pe") + (bnode_triples + [ var "ps", term t ])) ]) in (* binding 'ps' to distinguish orientation *) + make_pat ?hook "pe" pat else let pat_SO = Sparql.(join diff --git a/rdf.ml b/rdf.ml index a8928a1..abe3eb2 100644 --- a/rdf.ml +++ b/rdf.ml @@ -66,10 +66,27 @@ let wikibase_Property = "http://wikiba.se/ontology#Property" let wikibase_directClaim = "http://wikiba.se/ontology#directClaim" let wikibase_claim = "http://wikiba.se/ontology#claim" let wikibase_statementProperty = "http://wikiba.se/ontology#statementProperty" -let wikidata_entity (q : string) = "http://www.wikidata.org/entity/" ^ q +let wikidata_entity_base = "http://www.wikidata.org/entity/" +let wikidata_entity (q : string) = wikidata_entity_base ^ q +let wikidata_prop_base = "http://www.wikidata.org/prop/" +let wikidata_prop (p : string) = wikidata_prop_base ^ p +let wikidata_prop_statement_base = "http://www.wikidata.org/prop/statement/" +let wikidata_prop_statement (p : string) = wikidata_prop_statement_base ^ p +let wikidata_prop_direct_base = "http://www.wikidata.org/prop/direct/" +let wikidata_prop_direct (p : string) = wikidata_prop_direct_base ^ p let wikidata_Q (n : int) = "http://www.wikidata.org/entity/Q" ^ string_of_int n let wikidata_P (n : int) = "http://www.wikidata.org/prop/P" ^ string_of_int n let p_P625 = wikidata_P 625 (* Wikidata: geographical coordinates *) +let wikidata_rebase uri base new_base = + if Common.has_prefix uri base + then + let k = String.length base in + let name = String.sub uri k (String.length uri - k) in + new_base ^ name + else ( + Jsutils.firebug ("Rdf.wikidata_rebase: unexpected URI: " ^ uri); + uri) + let lat_long_properties = [ "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "http://www.w3.org/2003/01/geo/wgs84_pos#long"; diff --git a/sparql.ml b/sparql.ml index c4d0741..9a2b382 100644 --- a/sparql.ml +++ b/sparql.ml @@ -102,6 +102,7 @@ object (self) ("http://www.wikidata.org/prop/statement/","ps:"); ("http://www.wikidata.org/prop/","p:"); ("http://www.wikidata.org/prop/direct/","wdt:"); + ("http://www.wikidata.org/entity/","wd:"); ("http://wikiba.se/ontology#","wikibase:"); ("http://purl.org/dc/terms/", "dcterms:"); ("http://purl.org/dc/elements/1.1/", "dc:"); diff --git a/webapp/osparklis.js b/webapp/osparklis.js index 24359ee..b36e1b3 100644 --- a/webapp/osparklis.js +++ b/webapp/osparklis.js @@ -1,27 +1,27 @@ // Generated by js_of_ocaml 3.7.1 (function(ai){"use strict";var -ws="black",fd=260469468,go="children",ME=1650,gy="date",Hl=1199,bM="la",OX=-171652197,kU="add",OW=42558696,bH=3903731,aA=365180284,JX="save_expanded_terms",df=771172693,Hk="[]",d6=-66304437,JW="matches regexp (case insensitive)",MD="arg1",Hi="set_navigation",Hj="fecha",OV="save_ui_state",Hh="endsWith",xn="block",wr="StartsWith",Hg=815,qY="title",iK="result",a9=-512962225,y$="id2",qX="abort_all_ajax",OU="MinimumConv",Hf="novalue/",wq="IsBlank",em="endpoint",JV=122,JU="new_place",xm=" (",He="results_slides",OT=456275501,JT="\xc3\xa0",Hd=607,xl="vars",na=128,hf=">",JS=941,y_="como",xk="float",MC=287411743,ye="by",y9=1027,gn="en",rY="GET",iJ="matches",rX=" ++ ",OS=-629094236,Hc="arg2",yd="Const",wp=2147483000.,xj="In",MB=1251,Hb=1036,JR="xsd:date",qW="hierarchy",OR="Div",yc=-1067049653,fj=770676513,xi="n\xc3\xbamero",Ha=172069535,nL="e",y8=338643209,G$="p:",JQ="strends",qV="tooltip_sample",ks=206270158,kD="missing (",G_=" URIs",fk="lisql.ml",JP='",ns=" * ",k4=">=",m8=955169437,Mn="o",Mm="host",wj="list",iG=256,wi="les",xb=-977585392,OC='