re-findall.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #! /usr/bin/env python
  2. ## This script substitutes the re.findall() of python.
  3. ##
  4. ## Version: 1.1 (25-Sep-2012)
  5. ##
  6. import sys, optparse, re
  7. ##======================================================================
  8. ## main
  9. def main(argv=None):
  10. if argv is None:
  11. arv = sys.argv
  12. parser = optparse.OptionParser(usage="%prog <REGEX>" +
  13. "\nSubstitutes the re.findall() of python.", version="%prog 1.1")
  14. parser.add_option("-i", help="ignore case", dest="ignoreCase", action="store_true")
  15. parser.add_option("--ure", help="decode REGEX to unicode", dest="unicodeRE", action="store_true")
  16. parser.add_option("--uin", help="decode input to unicode", dest="unicodeIn", action="store_true")
  17. parser.add_option("--umatch", help="use unicode match", dest="unicodeMatch", action="store_true")
  18. parser.add_option("-p", help="print input", dest="prnInput", action="store_true")
  19. (opts, posArgs) = parser.parse_args()
  20. if len(posArgs) < 1:
  21. parser.error("At least 1 arguments is required")
  22. if opts.unicodeRE:
  23. vRE = posArgs[0].decode("UTF-8")
  24. else:
  25. vRE = posArgs[0]
  26. # processing input
  27. while True:
  28. vInput = sys.stdin.readline()
  29. if not vInput:
  30. break
  31. if opts.unicodeIn:
  32. vInput = vInput.decode("utf-8")
  33. if opts.ignoreCase:
  34. if opts.unicodeMatch:
  35. vPattern = re.compile(vRE, re.IGNORECASE | re.UNICODE)
  36. else:
  37. vPattern = re.compile(vRE, re.IGNORECASE)
  38. else:
  39. if opts.useUnicode:
  40. vPattern = re.compile(vRE, re.UNICODE)
  41. else:
  42. vPattern = re.compile(vRE)
  43. vlOutput = re.findall(vPattern, vInput)
  44. if vlOutput:
  45. if opts.unicodeIn:
  46. vInput = vInput.encode("utf-8")
  47. if opts.prnInput:
  48. print vInput
  49. for vOutput in vlOutput:
  50. if opts.unicodeIn:
  51. vOutput = vOutput.encode("utf-8")
  52. print vOutput
  53. ##======================================================================
  54. ## calling main
  55. if __name__ == "__main__":
  56. sys.exit(main())