1. my class Cursor does NQPCursorRole {
  2. has $!made; # Need it to survive re-creations of the match object.
  3. my Mu $EMPTY_LIST := nqp::list();
  4. my Mu $NO_CAPS := nqp::hash();
  5. multi method Bool(Cursor:D:) {
  6. nqp::p6bool(
  7. nqp::isge_i(
  8. nqp::getattr_i(self, Cursor, '$!pos'),
  9. nqp::getattr_i(self, Cursor, '$!from')
  10. )
  11. )
  12. }
  13. method STR() {
  14. nqp::if(
  15. nqp::istype((my $match := nqp::getattr(self,Cursor,'$!match')),Match)
  16. && nqp::isconcrete($match),
  17. $match.Str,
  18. self!MATCH.Str
  19. )
  20. }
  21. method MATCH() {
  22. nqp::if(
  23. nqp::istype((my $match := nqp::getattr(self,Cursor,'$!match')),Match)
  24. && nqp::isconcrete($match),
  25. $match,
  26. self!MATCH
  27. )
  28. }
  29. method !MATCH() {
  30. my $match := nqp::create(Match);
  31. nqp::bindattr($match, Match, '$!orig', nqp::findmethod(self, 'orig')(self));
  32. my int $from = nqp::getattr_i(self, Cursor, '$!from');
  33. my int $to = nqp::getattr_i(self, Cursor, '$!pos');
  34. nqp::bindattr_i($match, Match, '$!from', $from);
  35. nqp::bindattr_i($match, Match, '$!to', $to);
  36. nqp::bindattr($match, Match, '$!made', nqp::getattr(self, Cursor, '$!made'));
  37. nqp::bindattr($match, Match, '$!CURSOR', self);
  38. my Mu $list;
  39. my Mu $hash := nqp::hash();
  40. if nqp::isge_i($to, $from) {
  41. # For captures with lists, initialize the lists.
  42. my $caplist := $NO_CAPS;
  43. my $rxsub := nqp::getattr(self, Cursor, '$!regexsub');
  44. my str $onlyname = '';
  45. my int $namecount = 0;
  46. if nqp::not_i(nqp::isnull($rxsub)) {
  47. $caplist := nqp::can($rxsub, 'CAPS') ?? nqp::findmethod($rxsub, 'CAPS')($rxsub) !! nqp::null();
  48. if nqp::not_i(nqp::isnull($caplist)) && nqp::istrue($caplist) {
  49. my $iter := nqp::iterator($caplist);
  50. my $curcap;
  51. my str $name;
  52. while $iter {
  53. $curcap := nqp::shift($iter);
  54. $namecount = nqp::add_i($namecount, 1);
  55. if nqp::iterval($curcap) >= 2 {
  56. $name = nqp::iterkey_s($curcap);
  57. $onlyname = $name if nqp::iseq_i($namecount, 1);
  58. nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0)
  59. ?? nqp::bindpos(
  60. nqp::if(nqp::isconcrete($list), $list, ($list := nqp::list())),
  61. nqp::fromstr_I($name, Int), [])
  62. !! nqp::bindkey($hash, $name, []);
  63. }
  64. }
  65. }
  66. }
  67. # Walk the Cursor stack and populate the Cursor.
  68. my Mu $cs := nqp::getattr(self, Cursor, '$!cstack');
  69. if nqp::isnull($cs) || nqp::not_i(nqp::istrue($cs)) {}
  70. elsif nqp::not_i(nqp::istrue($caplist)) {}
  71. elsif nqp::iseq_i($namecount, 1) && nqp::isgt_i(nqp::chars($onlyname), 0) && nqp::eqat($onlyname, '$!', 0) {
  72. # If there's only one destination, avoid repeated hash lookups
  73. my int $cselems = nqp::elems($cs);
  74. my int $csi = -1;
  75. my Mu $dest;
  76. # numeric: <= ord("9") so positional capture
  77. $dest := nqp::islt_i(nqp::ord($onlyname),58)
  78. ?? nqp::atpos($list, $onlyname)
  79. !! nqp::atkey($hash, $onlyname);
  80. my $subcur;
  81. my str $name;
  82. while nqp::islt_i(++$csi,$cselems) {
  83. $subcur := nqp::atpos($cs, $csi);
  84. $name = nqp::getattr_s($subcur, $?CLASS, '$!name');
  85. nqp::push($dest,$subcur.MATCH())
  86. if nqp::not_i(nqp::isnull_s($name));
  87. }
  88. }
  89. else {
  90. my int $cselems = nqp::elems($cs);
  91. my int $csi = -1;
  92. my $subcur;
  93. my str $name;
  94. while nqp::islt_i(++$csi,$cselems) {
  95. $subcur := nqp::atpos($cs, $csi);
  96. $name = nqp::getattr_s($subcur, $?CLASS, '$!name');
  97. if nqp::not_i(nqp::isnull_s($name)) && nqp::isgt_i(nqp::chars($name), 0) {
  98. my Mu $submatch := $subcur.MATCH;
  99. if nqp::eqat($name, '$', 0) && (nqp::iseq_s($name, '$!from') || nqp::iseq_s($name, '$!to')) {
  100. nqp::bindattr_i($match, Match, $name, $submatch.from);
  101. }
  102. elsif nqp::islt_i(nqp::index($name, '='), 0) {
  103. my Mu $capval := nqp::atkey($caplist, $name);
  104. my int $needs_list = nqp::isconcrete($capval) && $capval >= 2;
  105. if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
  106. $list := nqp::list() unless nqp::isconcrete($list);
  107. $needs_list
  108. ?? nqp::atpos($list, nqp::fromstr_I($name, Int)).append($submatch)
  109. !! nqp::bindpos($list, nqp::fromstr_I($name, Int), $submatch);
  110. }
  111. else {
  112. $needs_list
  113. ?? nqp::atkey($hash, $name).append($submatch)
  114. !! nqp::bindkey($hash, $name, $submatch);
  115. }
  116. }
  117. else {
  118. my $names := nqp::split('=', $name);
  119. my $iter := nqp::iterator($names);
  120. my Mu $capval;
  121. my int $needs_list;
  122. while $iter {
  123. $name = nqp::shift($iter);
  124. $capval := nqp::atkey($caplist, $name);
  125. $needs_list = nqp::isconcrete($capval) && $capval >= 2;
  126. if nqp::iscclass(nqp::const::CCLASS_NUMERIC, $name, 0) {
  127. $list := nqp::list() unless nqp::isconcrete($list);
  128. $needs_list
  129. ?? nqp::atpos($list, nqp::fromstr_I($name, Int)).append($submatch)
  130. !! nqp::bindpos($list, nqp::fromstr_I($name, Int), $submatch);
  131. }
  132. else {
  133. $needs_list
  134. ?? nqp::atkey($hash, $name).append($submatch)
  135. !! nqp::bindkey($hash, $name, $submatch);
  136. }
  137. }
  138. }
  139. }
  140. }
  141. }
  142. }
  143. nqp::bindattr($match, Capture, '@!list', nqp::isconcrete($list) ?? $list !! $EMPTY_LIST);
  144. nqp::bindattr($match, Capture, '%!hash', $hash);
  145. nqp::bindattr(self, Cursor, '$!match', $match);
  146. # Once we've produced the captures, and if we know we're finished and
  147. # will never be backtracked into, we can release cstack and regexsub.
  148. unless nqp::defined(nqp::getattr(self, Cursor, '$!bstack')) {
  149. nqp::bindattr(self, Cursor, '$!cstack', nqp::null());
  150. nqp::bindattr(self, Cursor, '$!regexsub', nqp::null());
  151. }
  152. $match;
  153. }
  154. method CURSOR_NEXT() { # from !cursor_next in nqp
  155. nqp::if(
  156. nqp::defined($!restart),
  157. $!restart(self),
  158. nqp::stmts(
  159. (my $cur := self."!cursor_start_cur"()),
  160. $cur."!cursor_fail"(),
  161. $cur
  162. )
  163. )
  164. }
  165. method CURSOR_OVERLAP() { # adapted from !cursor_more in nqp
  166. nqp::stmts(
  167. (my $new := nqp::create(self)),
  168. nqp::bindattr( $new,$?CLASS,'$!shared',$!shared),
  169. nqp::bindattr( $new,$?CLASS,'$!braid',$!braid),
  170. nqp::bindattr_i($new,$?CLASS,'$!from',-1),
  171. nqp::bindattr_i($new,$?CLASS,'$!pos',nqp::add_i($!from,1)),
  172. $!regexsub($new)
  173. )
  174. }
  175. method CURSOR_MORE() { # adapted from !cursor_more in nqp
  176. nqp::stmts(
  177. (my $new := nqp::create(self)),
  178. nqp::bindattr( $new,$?CLASS,'$!shared',$!shared),
  179. nqp::bindattr( $new,$?CLASS,'$!braid',$!braid),
  180. nqp::bindattr_i($new,$?CLASS,'$!from',-1),
  181. nqp::bindattr_i($new,$?CLASS,'$!pos',
  182. nqp::if(
  183. nqp::isge_i($!from,$!pos),
  184. nqp::add_i($!from,1),
  185. $!pos
  186. )
  187. ),
  188. $!regexsub($new)
  189. )
  190. }
  191. # INTERPOLATE will iterate over the string $tgt beginning at position 0.
  192. # If it can't match against pattern var (or any element of var if it is an array)
  193. # it will increment $pos and try again. Therefor it is important to only match
  194. # against the current position.
  195. # $i is case insensitive flag
  196. # $s is for sequential matching instead of junctive
  197. # $a is true if we are in an assertion
  198. method INTERPOLATE(\var, int $i, int $m, int $monkey, int $s, int $a = 0, $context = PseudoStash) {
  199. if nqp::isconcrete(var) {
  200. # Call it if it is a routine. This will capture if requested.
  201. return (var)(self) if nqp::istype(var,Callable);
  202. my $maxmatch;
  203. my $cur := self.'!cursor_start_cur'();
  204. my str $tgt = $cur.target;
  205. my int $eos = nqp::chars($tgt);
  206. my int $maxlen = -1;
  207. my int $pos = nqp::getattr_i($cur, $?CLASS, '$!from');
  208. my int $start = 1;
  209. my int $nomod = !($i || $m);
  210. my Mu $order := nqp::list();
  211. # Looks something we need to loop over
  212. if nqp::istype(var, Iterable) and !nqp::iscont(var) {
  213. my $varlist := var.list;
  214. my int $elems = $varlist.elems; # reifies
  215. my $list := nqp::getattr($varlist,List,'$!reified');
  216. # Order matters for sequential matching, so no NFA involved.
  217. if $s {
  218. $order := $list;
  219. }
  220. # prepare to run the NFA if var is array-ish.
  221. else {
  222. my Mu $nfa := QRegex::NFA.new;
  223. my Mu $alts := nqp::setelems(nqp::list,$elems);
  224. my int $fate = 0;
  225. my int $j = -1;
  226. while nqp::islt_i(++$j,$elems) {
  227. my Mu $topic := nqp::atpos($list,$j);
  228. nqp::bindpos($alts,$j,$topic);
  229. # We are in a regex assertion, the strings we get will
  230. # be treated as regex rules.
  231. if $a {
  232. return $cur.'!cursor_start_cur'()
  233. if nqp::istype($topic,Associative);
  234. my $rx := MAKE_REGEX($topic,$i,$m,$monkey,$context);
  235. $nfa.mergesubstates($start,0,nqp::decont($fate),
  236. nqp::findmethod($rx,'NFA')($rx),
  237. Mu);
  238. }
  239. # A Regex already.
  240. elsif nqp::istype($topic,Regex) {
  241. $nfa.mergesubstates($start,0,nqp::decont($fate),
  242. nqp::findmethod($topic,'NFA')($topic),
  243. Mu);
  244. }
  245. # The pattern is a string.
  246. else {
  247. my Mu $lit := QAST::Regex.new(
  248. :rxtype<literal>, $topic,
  249. :subtype( $nomod
  250. ?? ''
  251. !! $m
  252. ?? $i
  253. ?? 'ignorecase+ignoremark'
  254. !! 'ignoremark'
  255. !! 'ignorecase')
  256. );
  257. my Mu $nfa2 := QRegex::NFA.new;
  258. my Mu $node := nqp::findmethod($nfa2,'addnode')($nfa2,$lit);
  259. $nfa.mergesubstates($start,0,nqp::decont($fate),
  260. nqp::findmethod($node,'save')($node,:non_empty(1)),
  261. Mu);
  262. }
  263. ++$fate;
  264. }
  265. # Now run the NFA
  266. my Mu $fates := nqp::findmethod($nfa,'run')($nfa,$tgt,$pos);
  267. my int $count = nqp::elems($fates);
  268. nqp::setelems($order,$count);
  269. $j = -1;
  270. nqp::bindpos($order,$j,
  271. nqp::atpos($alts,nqp::atpos_i($fates,$j)))
  272. while nqp::islt_i(++$j,$count);
  273. }
  274. }
  275. # Use the var as it is if it's not array-ish.
  276. else {
  277. nqp::push($order, var);
  278. }
  279. my str $topic_str;
  280. my int $omax = nqp::elems($order);
  281. my int $o = -1;
  282. while nqp::islt_i(++$o,$omax) {
  283. my Mu $topic := nqp::atpos($order,$o);
  284. my $match;
  285. my int $len;
  286. # We are in a regex assertion, the strings we get will be
  287. # treated as regex rules.
  288. if $a {
  289. return $cur.'!cursor_start_cur'()
  290. if nqp::istype($topic,Associative);
  291. my $rx := MAKE_REGEX($topic,$i,$m,$monkey,$context);
  292. $match := self.$rx;
  293. $len = $match.pos - $match.from;
  294. }
  295. # A Regex already.
  296. elsif nqp::istype($topic,Regex) {
  297. $match := self.$topic;
  298. $len = $match.pos - $match.from;
  299. }
  300. # The pattern is a string. $len and and $topic_str are used
  301. # later on if this condition does not hold.
  302. elsif nqp::iseq_i(($len = nqp::chars($topic_str = $topic.Str)),0) {
  303. $match = 1;
  304. }
  305. # no modifier, match literally
  306. elsif $nomod {
  307. $match = nqp::eqat($tgt, $topic_str, $pos)
  308. }
  309. # ignoremark(+ignorecase?)
  310. elsif $m {
  311. my int $k = -1;
  312. # ignorecase+ignoremark
  313. if $i {
  314. my str $tgt_fc = nqp::fc(nqp::substr($tgt,$pos,$len));
  315. my str $topic_fc = nqp::fc($topic_str);
  316. Nil while nqp::islt_i(++$k,$len)
  317. && nqp::iseq_i(
  318. nqp::ordbaseat($tgt_fc, nqp::add_i($pos,$k)),
  319. nqp::ordbaseat($topic_fc, $k)
  320. );
  321. }
  322. # ignoremark
  323. else {
  324. Nil while nqp::islt_i(++$k, $len)
  325. && nqp::iseq_i(
  326. nqp::ordbaseat($tgt, nqp::add_i($pos,$k)),
  327. nqp::ordbaseat($topic_str, $k)
  328. );
  329. }
  330. $match = nqp::iseq_i($k,$len); # match if completed
  331. }
  332. # ignorecase
  333. else {
  334. $match = nqp::iseq_s(
  335. nqp::fc(nqp::substr($tgt, $pos, $len)),
  336. nqp::fc($topic_str)
  337. )
  338. }
  339. if $match
  340. && nqp::isgt_i($len,$maxlen)
  341. && nqp::isle_i(nqp::add_i($pos,$len),$eos) {
  342. $maxlen = $len;
  343. $maxmatch := $match;
  344. last if $s; # stop here for sequential alternation
  345. }
  346. }
  347. nqp::istype($maxmatch,Cursor)
  348. ?? $maxmatch
  349. !! nqp::isge_i($maxlen,0)
  350. ?? $cur.'!cursor_pass'(nqp::add_i($pos,$maxlen), '')
  351. !! $cur
  352. }
  353. else {
  354. self."!cursor_start_cur"()
  355. }
  356. }
  357. method CALL_SUBRULE($rule, |c) {
  358. $rule(self, |c)
  359. }
  360. method DYNQUANT_LIMITS($mm) {
  361. nqp::istype($mm,Range)
  362. ?? $mm.min == Inf
  363. ?? die 'Range minimum in quantifier (**) cannot be +Inf'
  364. !! $mm.max == -Inf
  365. ?? die 'Range maximum in quantifier (**) cannot be -Inf'
  366. !! nqp::list_i(
  367. $mm.min < 0 ?? 0 !! $mm.min.Int,
  368. $mm.max == Inf ?? -1 !! $mm.max.Int)
  369. !! $mm == -Inf || $mm == Inf
  370. ?? Failure.new('Fixed quantifier cannot be infinite')
  371. !! nqp::list_i($mm.Int, $mm.Int)
  372. }
  373. method OTHERGRAMMAR($grammar, $name, |) {
  374. my $lang_cursor := $grammar.'!cursor_init'(self.target(), :p(self.pos()));
  375. $lang_cursor.clone_braid_from(self);
  376. $lang_cursor."$name"();
  377. }
  378. method INDMETHOD($name, |c) {
  379. self."$name"(|c);
  380. }
  381. method INDRULE($rule, |c) {
  382. $rule(self, |c)
  383. }
  384. method RECURSE() {
  385. nqp::getlexdyn('$?REGEX')(self)
  386. }
  387. sub MAKE_REGEX($arg, int $i, int $m, int $monkey, $context) {
  388. my role CachedCompiledRegex {
  389. has $.regex;
  390. }
  391. if nqp::istype($arg,Regex) {
  392. $arg
  393. }
  394. elsif nqp::istype($arg, CachedCompiledRegex) {
  395. $arg.regex
  396. }
  397. else {
  398. my $*RESTRICTED = "Prohibited regex interpolation"
  399. unless $monkey; # Comes from when regex was originally compiled.
  400. my $rx := $i
  401. ?? $m
  402. ?? EVAL("anon regex \{ :i :m $arg\}", :$context)
  403. !! EVAL("anon regex \{ :i $arg\}", :$context)
  404. !! $m
  405. ?? EVAL("anon regex \{ :m $arg\}", :$context)
  406. !! EVAL("anon regex \{ $arg\}", :$context);
  407. $arg does CachedCompiledRegex($rx);
  408. $rx
  409. }
  410. }
  411. }