check_images2.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/usr/bin/env python
  2. import io
  3. import os
  4. import re
  5. import sys
  6. #path = os.getcwd()
  7. #print path
  8. startpattern = """images/"""
  9. imgFormats = ['jpg','gif','png','mp4']
  10. images = []
  11. jmages = []
  12. numberoflinks = 0
  13. filename_false = 'LinksToSkip.txt'
  14. with io.open(filename_false, 'r', encoding='UTF-8') as file:
  15. linesfalse = file.readlines()
  16. linesfalse = [line.rstrip() for line in linesfalse]
  17. file.close()
  18. ifile = io.open(
  19. "glife.txt",
  20. mode='rt',
  21. encoding='utf-16'
  22. )
  23. text = ifile.read()
  24. for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
  25. imgfile = match.group().encode("utf-8")
  26. randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
  27. if randmatch != None:
  28. for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
  29. images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
  30. else:
  31. images.append(imgfile)
  32. ifile.close()
  33. for image in images:
  34. ex = 0
  35. for jmage in jmages:
  36. if image == jmage:
  37. ex = 1
  38. break
  39. if ex == 0:
  40. ex = 0
  41. for line in linesfalse:
  42. if image == line:
  43. ex = 1
  44. break
  45. if ex == 0:
  46. jmages.append(image)
  47. for image in jmages:
  48. if not re.search(r"[<$]", image) and not os.path.isfile(image):
  49. print image
  50. numberoflinks += 1
  51. print ("Number of images not found: ",numberoflinks )