re.h 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #pragma once
  2. #include "common.h"
  3. #if PK_MODULE_RE
  4. #include "cffi.h"
  5. namespace pkpy{
  6. struct ReMatch {
  7. PY_CLASS(ReMatch, re, Match)
  8. i64 start;
  9. i64 end;
  10. std::cmatch m;
  11. ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {}
  12. static void _register(VM* vm, PyObject* mod, PyObject* type){
  13. vm->bind_notimplemented_constructor<ReMatch>(type);
  14. vm->bind_method<0>(type, "start", PK_LAMBDA(VAR(_CAST(ReMatch&, args[0]).start)));
  15. vm->bind_method<0>(type, "end", PK_LAMBDA(VAR(_CAST(ReMatch&, args[0]).end)));
  16. vm->bind_method<0>(type, "span", [](VM* vm, ArgsView args) {
  17. auto& self = _CAST(ReMatch&, args[0]);
  18. return VAR(Tuple({VAR(self.start), VAR(self.end)}));
  19. });
  20. vm->bind_method<1>(type, "group", [](VM* vm, ArgsView args) {
  21. auto& self = _CAST(ReMatch&, args[0]);
  22. int index = CAST(int, args[1]);
  23. index = vm->normalized_index(index, self.m.size());
  24. return VAR(self.m[index].str());
  25. });
  26. }
  27. };
  28. inline PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){
  29. std::regex re(pattern.begin(), pattern.end());
  30. std::cmatch m;
  31. if(std::regex_search(string.begin(), string.end(), m, re)){
  32. if(from_start && m.position() != 0) return vm->None;
  33. i64 start = string._byte_index_to_unicode(m.position());
  34. i64 end = string._byte_index_to_unicode(m.position() + m.length());
  35. return VAR_T(ReMatch, start, end, m);
  36. }
  37. return vm->None;
  38. };
  39. inline void add_module_re(VM* vm){
  40. PyObject* mod = vm->new_module("re");
  41. ReMatch::register_class(vm, mod);
  42. vm->bind_func<2>(mod, "match", [](VM* vm, ArgsView args) {
  43. const Str& pattern = CAST(Str&, args[0]);
  44. const Str& string = CAST(Str&, args[1]);
  45. return _regex_search(pattern, string, true, vm);
  46. });
  47. vm->bind_func<2>(mod, "search", [](VM* vm, ArgsView args) {
  48. const Str& pattern = CAST(Str&, args[0]);
  49. const Str& string = CAST(Str&, args[1]);
  50. return _regex_search(pattern, string, false, vm);
  51. });
  52. vm->bind_func<3>(mod, "sub", [](VM* vm, ArgsView args) {
  53. const Str& pattern = CAST(Str&, args[0]);
  54. const Str& repl = CAST(Str&, args[1]);
  55. const Str& string = CAST(Str&, args[2]);
  56. std::regex re(pattern.begin(), pattern.end());
  57. return VAR(std::regex_replace(string.str(), re, repl.str()));
  58. });
  59. vm->bind_func<2>(mod, "split", [](VM* vm, ArgsView args) {
  60. const Str& pattern = CAST(Str&, args[0]);
  61. const Str& string = CAST(Str&, args[1]);
  62. std::regex re(pattern.begin(), pattern.end());
  63. std::cregex_token_iterator it(string.begin(), string.end(), re, -1);
  64. std::cregex_token_iterator end;
  65. List vec;
  66. for(; it != end; ++it){
  67. vec.push_back(VAR(it->str()));
  68. }
  69. return VAR(vec);
  70. });
  71. }
  72. } // namespace pkpy
  73. #else
  74. ADD_MODULE_PLACEHOLDER(re)
  75. #endif