re.cpp 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #include "pocketpy/re.h"
  2. namespace pkpy{
  3. struct ReMatch {
  4. PY_CLASS(ReMatch, re, Match)
  5. i64 start;
  6. i64 end;
  7. std::cmatch m;
  8. ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {}
  9. static void _register(VM* vm, PyObject* mod, PyObject* type){
  10. vm->bind_notimplemented_constructor<ReMatch>(type);
  11. vm->bind_method<0>(type, "start", PK_LAMBDA(VAR(_CAST(ReMatch&, args[0]).start)));
  12. vm->bind_method<0>(type, "end", PK_LAMBDA(VAR(_CAST(ReMatch&, args[0]).end)));
  13. vm->bind_method<0>(type, "span", [](VM* vm, ArgsView args) {
  14. auto& self = _CAST(ReMatch&, args[0]);
  15. return VAR(Tuple({VAR(self.start), VAR(self.end)}));
  16. });
  17. vm->bind_method<1>(type, "group", [](VM* vm, ArgsView args) {
  18. auto& self = _CAST(ReMatch&, args[0]);
  19. int index = CAST(int, args[1]);
  20. index = vm->normalized_index(index, self.m.size());
  21. return VAR(self.m[index].str());
  22. });
  23. }
  24. };
  25. static PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){
  26. std::regex re(pattern.begin(), pattern.end());
  27. std::cmatch m;
  28. if(std::regex_search(string.begin(), string.end(), m, re)){
  29. if(from_start && m.position() != 0) return vm->None;
  30. i64 start = string._byte_index_to_unicode(m.position());
  31. i64 end = string._byte_index_to_unicode(m.position() + m.length());
  32. return VAR_T(ReMatch, start, end, m);
  33. }
  34. return vm->None;
  35. };
  36. void add_module_re(VM* vm){
  37. PyObject* mod = vm->new_module("re");
  38. ReMatch::register_class(vm, mod);
  39. vm->bind_func<2>(mod, "match", [](VM* vm, ArgsView args) {
  40. const Str& pattern = CAST(Str&, args[0]);
  41. const Str& string = CAST(Str&, args[1]);
  42. return _regex_search(pattern, string, true, vm);
  43. });
  44. vm->bind_func<2>(mod, "search", [](VM* vm, ArgsView args) {
  45. const Str& pattern = CAST(Str&, args[0]);
  46. const Str& string = CAST(Str&, args[1]);
  47. return _regex_search(pattern, string, false, vm);
  48. });
  49. vm->bind_func<3>(mod, "sub", [](VM* vm, ArgsView args) {
  50. const Str& pattern = CAST(Str&, args[0]);
  51. const Str& repl = CAST(Str&, args[1]);
  52. const Str& string = CAST(Str&, args[2]);
  53. std::regex re(pattern.begin(), pattern.end());
  54. return VAR(std::regex_replace(string.str(), re, repl.str()));
  55. });
  56. vm->bind_func<2>(mod, "split", [](VM* vm, ArgsView args) {
  57. const Str& pattern = CAST(Str&, args[0]);
  58. const Str& string = CAST(Str&, args[1]);
  59. std::regex re(pattern.begin(), pattern.end());
  60. std::cregex_token_iterator it(string.begin(), string.end(), re, -1);
  61. std::cregex_token_iterator end;
  62. List vec;
  63. for(; it != end; ++it){
  64. vec.push_back(VAR(it->str()));
  65. }
  66. return VAR(vec);
  67. });
  68. }
  69. } // namespace pkpy