Module:Wikidata/Chemin/parser
La documentation pour ce module peut être créée à Module:Wikidata/Chemin/parser/doc
local tool = require("Module:Utilitaire")
local path = require "Module:Wikidata/Chemin/Path"
local parser = require "Module:FParser"
local pparser = {}
--[[
grammar :
letter ::= "A" | "B" | "C" | "D" | "E" | "F" | "G"
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
| "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
| "c" | "d" | "e" | "f" | "g" | "h" | "i"
| "j" | "k" | "l" | "m" | "n" | "o" | "p"
| "q" | "r" | "s" | "t" | "u" | "v" | "w"
| "x" | "y" | "z" ;
digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
space ::= " " ;
Pid ::= "P" , digit, { digit } ;
Pname ::= letter, { letter | digit | space | "'" } ;
PathFirstLevel ::= pathFirstAlternative
-- Rules specific to allow to start from a statement instead of an item on the highest level of a path, variant of PathAlternative and PathSequence
pathFirstAlternative ::= PathFirstSequence ( '|' PathFirstSequence )*
PathFirstSequence
::= ('>' PathQualifier | PathEltOrInverse ) ( '/' PathEltOrInverse | '^' PathElt )*
Path ::= PathAlternative
PathAlternative ::= PathSequence ( '|' PathSequence )*
PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt ::= PathPrimary PathMod?
PathEltOrInverse ::= PathElt | '^' PathElt
PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary ::= ( Prop | 'a' | '(' Path ')'
| ( Prop | '!' PathNegatedPropertySet ) '>' PathQualifier
| '!' PathNegatedPropertySet )
PathQualifier ::= ( Prop | '!' PathNegatedPropertySet | PathPropertySet )
Prop ::= IRIref | Pid | Pname
rules 95 and 96 in https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rPathNegatedPropertySet
PathNegatedPropertySet ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
PathOneInPropertySet ::= iri | 'a' | '^' ( iri | 'a' )
PathPropertySet ::= '(' Path ( '|' Path )+ ')'
For information, SPARQL property path grammar :
https://www.w3.org/TR/sparql11-property-paths/#path-syntax
TriplesSameSubjectPath ::= VarOrTerm PropertyListNotEmptyPath | TriplesNode PropertyListPath
PropertyListPath ::= PropertyListNotEmpty?
PropertyListNotEmptyPath::= ( VerbPath | VerbSimple ) ObjectList ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )*
VerbPath ::= Path
VerbSimple ::= Var
Path ::= PathAlternative
PathAlternative ::= PathSequence ( '|' PathSequence )*
PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt ::= PathPrimary PathMod?
PathEltOrInverse ::= PathElt | '^' PathElt
PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary ::= ( IRIref | 'a' | '(' Path ')' )
--]]
local lexer = parser.lexer
local chain = parser.chain
local alternative = parser.alternative
local plus = parser.plus
local idop = parser.idop
local nary_op_parser = parser.nary_op_parser
local lex_char = lexer.lex_char
local parse_epsilon = lexer.lex_epsilon
local lex_integer = lexer.lex_integer
----------------------------------------------------------------------
-- grammar base lexer functions
----------------------------------------------------------------------
local lex_pid = function(state)
local res = lexer.lex_regex(state, "P[0-9]+")
if res then res.type="Pid" return res end
end
local lex_sparql_prefix = function(state)
local res = lexer.lex_regex(state, "[a-z_]*")
if res then res.type="prefix" return res end
end
local lex_property_name = function(state)
local res = lexer.lex_regex(state, "[a-zA-Z][a-z A-Z'-]*")
if res then res.type="Plabel" return res end
end
-------------------------------------------------------------------
-- PathElt ::= PathPrimary PathMod?
-- PathMod ::= ( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
function pparser.pathElt(state)
local node
local prime_node
local min_bound = nil
local max_bound = nil
local function create_node(type)
return idop(
function(state)
node = type:create(prime_node, min_bound, max_bound)
end
)
end
local res = chain{
pparser.pathPrimary,
idop(function(state) prime_node = state.node end),
alternative{
chain{
lex_char("*"),
create_node(path.StarNode)
},
chain{
lex_char("+"),
create_node(path.PlusNode)
},
chain{
lex_char("?"),
create_node(path.MaybeNode)
},
chain{
lex_char("^"),
create_node(path.InverseNode)
},
chain{
lex_char("{"),
lex_integer,
idop(function(state) min_bound = tonumber(state.lexed) end),
alternative{
chain{
lex_char(","),
lex_integer,
idop(function(state) max_bound = tonumber(state.lexed) end)
},
chain{
parse_epsilon,
idop(function(state) max_bound = nil end)
}
},
create_node(path.BetweenNode, min_bound, max_bound),
lex_char("}"),
},
chain{
parse_epsilon,
idop(function(state) node = prime_node end)
}
}
}(state)
if res then
res.node = node
return res
end
end
-- PathEltOrInverse ::= PathElt | '^' PathElt
pparser.pathEltOrInverse = function(state)
return alternative{
pparser.pathElt,
chain{
lex_char("^"),
pparser.pathElt,
function(state)
state.node = path.InverseNode(state.node)
return state
end
}
}(state)
end
--[[
Tests :
plop=p.parse("P31",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
property=>
P31
plop=p.parse("P31>P279", p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
property=>
P279
node=>
P31
plop=p.parse("P31{1,6}",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
plop=p.parse("(P31|P17>P31)",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
nodes=>
1=>
property=>
P31
2=>
property=>
P31
node=>
P17
--]]
pparser.pathSequence = nary_op_parser(
pparser.pathEltOrInverse,
alternative{
chain{
lexer.lex_char("/"),
pparser.pathEltOrInverse,
},
chain{
lexer.lex_char("\^"),
pparser.pathElt,
function(state)
state.node = path.InverseNode:create(state.node)
return state
end
}
},
function(acc) return path.SequenceNode:create(acc) end
)
--[[
Tests:
plop=p.parse("P31/P31+",p.pathSequence) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
nodes=>
1=>
property=>
P31
2=>
node=>
property=>
P31
--]]
-- PathAlternative ::= PathSequence ( '|' PathSequence )*
pparser.pathAlternative = nary_op_parser(
pparser.pathSequence,
chain{
lex_char("[|]"),
pparser.pathSequence
},
function(acc) return path.AlternativeNode:create(acc) end
)
--[[
plop=p.parse("P31|P17/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
nodes=>
1=>
property=>
P31
2=>
nodes=>
1=>
property=>
P17
2=>
node=>
property=>
P279
plop=p.parse("P31|P17>P31/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
nodes=>
1=>
property=>
P31
2=>
nodes=>
1=>
property=>
P31
node=>
P17
2=>
node=>
property=>
P279
--]]
-- PathSequence ::= PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
local instance = function()
-- P31/P279*
return path.SequenceNode:create(
{
path.PropertyNode:create("P31"),
path.StarNode:create(path.PropertyNode:create("P279"))
}
)
end
-- PathPrimary ::= ( Prop | '!' NegatedPropertySet ) ( '>' ( Prop | '!' NegatedPropertySet ) ) ? | 'a' | '(' Path ')'
pparser.pathPrimary = function(state)
local node
local res = alternative{
chain{
lex_char('a'),
lex_char(' '),
idop(function(state) node = instance() end)
},
chain{
chain{
alternative{
pparser.prop,
chain {lex_char('!'), pparser.negatedPropertySet}
},
idop(function(state) node = state.node end)
},
alternative{
chain{
pparser.pathQualifier,
idop(
function(state)
node = path.QualifiedStatementNode:create(
node,
state.node
)
end
)
},
parse_epsilon
}
},
chain{
lexer.open_parenthesis,
pparser.path,
idop(
function(state)
node = state.node
end
),
lexer.close_parenthesis
},
chain{
lexer.lex_char('!'),
pparser.negatedPropertySet,
idop(
function(state)
node = state.node
end
)
}
}(state)
if res then
res.node = node
return res
end
end
--[[
Tests :
p.parse("a ", p.pathPrimary) => yes
p.parse("!P31", p.pathPrimary) => yes
p.parse("!(P31|instance of)", p.pathPrimary) => yes
--]]
-- stupid function to be eliminated soon (hum)
local function parsePropAndWrap(wrapper)
return chain{
pparser.prop,
function (state)
local node = state.node
local nodes = {}
nodes[1] = {}
nodes[1].node = node -- TODO: understand why this is needed instead of just "nodes[1] = node"
state.node = wrapper(nodes)
return state
end
}
end
pparser.pathPropertySetParser = function(final_node_creator)
return function(state)
return chain{
lexer.open_parenthesis,
alternative{
nary_op_parser(
pparser.pathOneInPropertySet,
chain{
lexer.lex_char("|"),
pparser.pathOneInPropertySet
},
final_node_creator,
function (node)
local singlenodes = {}
singlenodes[1] = node -- mmm
-- singlenodes[1].node = node
return final_node_creator(singlenodes)
end
),
-- parsePropAndWrap(final_node_creator), -- case for "!(P31)" like patterns, naryopparser or something needs to be fixed to better handle this
-- here the solution for negation is to create a negated set with only one property.
chain{
parse_epsilon,
function(state)
state.node = final_node_creator({})
return state
end
} -- allows emty set (to mimic any qualifer allowed, equiv of «*»)
},
lexer.close_parenthesis
}(state)
end
end
pparser.propOrSetParser = function(creator)
return function(state)
return alternative{
parsePropAndWrap(creator), -- case for the pattern !P31 , in case it’s negated this stills need to be wrapped on a negated set
pparser.pathPropertySetParser(function(nodes) return creator(nodes) end),
}(state)
end
end
-- '>' ( Prop | '!' NegatedPropertySet | PropertySet )
pparser.pathQualifier = chain{
lex_char(">"),
alternative{
chain{
lex_char("!"),
pparser.propOrSetParser(function(nodes) return path.NegatedPropertySetNode:create(nodes) end)
},
pparser.propOrSetParser(function(nodes) return path.PropertySetNode:create(nodes) end)
},
function(state)
state.node = path.QualifierSnakNode:create(state.node)
return state
end
}
--[[
=p.parse(">!(P31|P31)",p.pathQualifier)
=p.parse(">(P31|P31)",p.pathQualifier)
=p.parse(">P31",p.pathQualifier)
=p.parse(">!P31",p.pathQualifier)
--]]
-- PathNegatedPropertySet ::= PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
pparser.negatedPropertySet = pparser.pathPropertySetParser(
function(nodes)
return path.NegatedPropertySetNode:create(nodes)
end
)
--[[
Tests :
p.parse("!P31",p.negatedPropertySet)
p.parse("(P31|P32)",p.negatedPropertySet) => yes
p.parse("P31",p.negatedPropertySet) => yes
p.parse("^P31",p.negatedPropertySet) => yes
p.parse("^(P31)",p.negatedPropertySet) => nope
p.parse("(P31)",p.negatedPropertySet) => yes
p.parse("(^P31)",p.negatedPropertySet) => yes
p.parse("(^P31|a|plop)",p.negatedPropertySet) => yes
All good(?)
--]]
-- PathOneInPropertySet ::= iri | 'a' | '^' ( iri | 'a' )
pparser.pathOneInPropertySet = function(state)
local node = {}
local pElement = alternative{
chain{
lexer.lex_char('a'),
idop(function(state) elem = instance() end)
},
chain{
pparser.prop,
idop(function(state) elem = state.node end)
}
}
local res = alternative{
chain{
lexer.lex_char("^"),
pElement,
idop(function(state) node = state.node end)
},
chain{
pElement,
idop(function(state) node = path.InverseNode:create(state.node) end)
}
}(state)
if res then res.node = node end
return res
end
-- Prop ::= IRIref | Pid | Pname
pparser.prop = function(state)
local res = alternative{
chain{
parser.questionmark(
chain{
lex_sparql_prefix,
lex_char(":")
}
),
lex_pid
},
lex_property_name
}(state)
if res then
res.node = path.PropertyNode:create(res.lexed)
return res
end
end
--[[
Tests :
p.parse("a ", p.primary) => yes
p.parse("P31@", p.prop) => nope
p.parse("P31", p.prop) => nope
p.parse("P31>P279", p.prop) => nope
--]]
-- PathFirstSequence ::= '>' PathQualifier ( '/' PathEltOrInverse | '^' PathElt )*
pparser.pathFirstSequence = nary_op_parser(
-- chain{
pparser.pathQualifier,
-- function(state)
-- state.node = path.QualifierSnakNode:create(state.node)
-- return state
-- end
-- },
chain{
lex_char("/"),
pparser.pathEltOrInverse
},
function (acc)
return path.SequenceNode:create(acc)
end
)
pparser.path = function(state)
return pparser.pathAlternative(state)
end
-- PathFirstAlternative ::= PathFirstSequence ( '|' PathFirstSequence )* | Path
pparser.pathFirstAlternative = alternative{
pparser.path,
nary_op_parser(
pparser.pathFirstSequence,
chain{
lex_char("|"),
pparser.pathFirstSequence
},
function(acc) return path.AlternativeNode:create(acc) end
),
}
-- plop = p.parse_path("P31/P31/P31>P31/P31")
pparser.parse_path = function (property_path)
local res = parser.parse(property_path, pparser.pathFirstAlternative)
assert(res, "parsing returned a nil obj on path : «" .. property_path .. "»")
return res
end
-- to test in console
pparser.parse = parser.parse
return pparser