Browse Source

improve the code, remove duplicates and false positive

rachels 2 năm trước cách đây
mục cha
commit
c75b54a890
2 tập tin đã thay đổi với 74 bổ sung43 xóa
  1. 37 21
      tools/check_images2.py
  2. 37 22
      tools/check_images3.py

+ 37 - 21
tools/check_images2.py

@@ -12,31 +12,47 @@ startpattern = """images/"""
 imgFormats = ['jpg','gif','png','mp4']
 
 images = []
+jmages = []
 
-for name in os.listdir("locations"):
-    path = os.path.join("locations", name)
-    if os.path.isdir(path):
-        continue
-        # skip directories
+filename_false = 'LinksToSkip.txt'
+with io.open(filename_false, 'r', encoding='UTF-8') as file:
+    linesfalse = file.readlines()
+    linesfalse = [line.rstrip() for line in linesfalse]
+file.close()
+
+
+ifile = io.open(
+    "glife.txt",
+    mode='rt',
+    encoding='utf-16'
+)
+text = ifile.read()
+for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
+    imgfile = match.group().encode("utf-8")
+    randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
+    if randmatch != None:
+        for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
+            images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
     else:
-        ifile = io.open(
-            os.path.join("locations", name),
-            mode='rt',
-            encoding='utf-8'
-        )
-        text = ifile.read()
-        for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
-            imgfile = match.group().encode("utf-8")
-            randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
-            if randmatch != None:
-                for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
-                    images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
-            else:
-                images.append(imgfile)
-        
-        ifile.close()
+        images.append(imgfile)
 
+ifile.close()
 for image in images:
+    ex = 0
+    for jmage in jmages:
+        if image == jmage:
+            ex = 1
+            break
+    if ex == 0:
+        ex = 0
+        for line in linesfalse:
+            if image == line:
+                ex = 1
+                break
+        if ex == 0:
+            jmages.append(image)
+
+for image in jmages:
     if not re.search(r"[<$]", image) and not os.path.isfile(image):
         print "Image not found:", image
 

+ 37 - 22
tools/check_images3.py

@@ -13,31 +13,46 @@ startpattern = """images/"""
 imgFormats = ['jpg','gif','png','mp4']
 
 images = []
+jmages = []
 
-for name in os.listdir("locations"):
-    path = os.path.join("locations", name)
-    if os.path.isdir(path):
-        continue
-        # skip directories
+filename_false = 'LinksToSkip.txt'
+with open(filename_false, 'r', encoding='UTF-8') as file:
+    linesfalse = file.readlines()
+    linesfalse = [line.rstrip() for line in linesfalse]
+file.close()
+
+
+ifile = io.open(
+    "glife.txt",
+    mode='rt',
+    encoding='utf-16'
+)
+text = ifile.read()
+for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
+    imgfile = match.group().encode("utf-8").decode()
+    randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
+    if randmatch != None:
+        for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
+            images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
     else:
-        ifile = io.open(
-            os.path.join("locations", name),
-            mode='rt',
-            encoding='utf-8'
-        )
-        text = ifile.read()
-        for match in re.finditer(r"images.+?[.](gif|jpg|png|mp4)", text, flags=re.U):
-            imgfile = match.group().encode("utf-8").decode()
-            randmatch = re.search(r"'\s*[+]\s*rand\s*[(]\s*(\d+)\s*[,]\s*(\d+)\s*[)]\s*[+]\s*'", imgfile)
-            if randmatch != None:
-                for i in range(int(randmatch.group(1)), 1+int(randmatch.group(2))):
-                    images.append(re.sub(r"'\s*[+]\s*rand\s*[(].*?[)]\s*[+]\s*'", str(i), imgfile))
-            else:
-                images.append(imgfile)
-        
-        ifile.close()
+        images.append(imgfile)
 
+ifile.close()
 for image in images:
+    ex = 0
+    for jmage in jmages:
+        if image.casefold() == jmage.casefold():
+            ex = 1
+            break
+    if ex == 0:
+        ex = 0
+        for line in linesfalse:
+            if image.casefold() == line.casefold():
+                ex = 1
+                break
+        if ex == 0:
+            jmages.append(image)
+
+for image in jmages:
     if not re.search(r"[<$]", image) and not os.path.isfile(image):
         print ("Image not found:", image)
-