check_images3.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #!/usr/bin/env python
  2. import io
  3. import os
  4. import re
  5. import sys
  6. from sys import version_info
  7. #path = os.getcwd()
  8. #print path
  9. startpattern = """images/"""
  10. imgFormats = ['jpg','gif','png','mp4']
  11. images = []
  12. jmages = []
  13. numberoflinks = 0
  14. filename_false = 'LinksToSkip.txt'
  15. with open(filename_false, 'r', encoding='UTF-8') as file:
  16. linesfalse = file.readlines()
  17. linesfalse = [line.rstrip() for line in linesfalse]
  18. file.close()
  19. ifile = io.open(
  20. "glife.txt",
  21. mode='rt',
  22. encoding='utf-16'
  23. )
  24. text = ifile.read()
  25. for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
  26. imgfile = match.group().encode("utf-8").decode()
  27. randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
  28. if randmatch != None:
  29. for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
  30. images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
  31. else:
  32. images.append(imgfile)
  33. ifile.close()
  34. for image in images:
  35. ex = 0
  36. for jmage in jmages:
  37. if image.casefold() == jmage.casefold():
  38. ex = 1
  39. break
  40. if ex == 0:
  41. ex = 0
  42. for line in linesfalse:
  43. if image.casefold() == line.casefold():
  44. ex = 1
  45. break
  46. if ex == 0:
  47. jmages.append(image)
  48. for image in jmages:
  49. if not re.search(r"[<$]", image) and not os.path.isfile(image):
  50. print (image)
  51. numberoflinks += 1
  52. print ("Number of images not found: ",numberoflinks )