From 942122edece9ffeeaa073e209606e8ff7a6cec09 Mon Sep 17 00:00:00 2001 From: minoplhy Date: Sun, 8 Aug 2021 13:44:45 +0700 Subject: [PATCH] Fix filtering issues that cause problem --- crawler.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/crawler.py b/crawler.py index b59cca4..b8ef260 100644 --- a/crawler.py +++ b/crawler.py @@ -34,7 +34,7 @@ def filteringcon(filters_regex_one): with open(filters_regex_one) as f: file = f.read().split('\n') for i in range(len(file)): - file[i] = re.sub(';.*', '', file[i]) + file[i] = re.sub('\s\s+#.*', '', file[i]) file[i] = re.sub(' CNAME .$', '', file[i]) file[i] = re.sub(' CNAME . $', '', file[i]) with open(filters_regex_one, 'w') as f1: @@ -47,22 +47,26 @@ def filteringcon(filters_regex_one): with open(filters_regex_one, 'r') as f: for line in f: for word in a: - if word in line: + if word in line and not line.startswith('#') and line.startswith((tuple(a))): line = line.replace(word,'') + else: + line = line.replace(line, line) lst.append(line) f.close() with open(filters_regex_one, 'w') as f: for line in lst: f.write(line) - f.close() + f.close() remove_words = ['localhost','localhost.localdomain','local','broadcasthost','loopback','ip6-localnet','ip6-mcastprefix','ip6-allnodes','ip6-allrouters','ip6-allhosts','ip6-loopback'] + with open(filters_regex_one, 'r') as f: lines = f.read().splitlines() with open(filters_regex_one, 'w') as f: for line in lines: - if not any(remove_word in line for remove_word in remove_words): + if not line.endswith((tuple(remove_words))): f.write('\n'.join([line + '\n'])) + with open(filters_regex_one) as f: file = f.read().split('\n') for i in range(len(file)): @@ -78,7 +82,7 @@ def killingdup(duplicated_file): with open(duplicated_file, 'r') as f: lines = set(f.readlines()) with open(duplicated_file, 'w') as f: - f.writelines(set(lines)) + f.writelines(set(lines)) print("++ successful!") f.close() @@ -96,6 +100,14 @@ def excluded(excluded ,incoming): elif not line.strip(): f.write('\n'.join([line + '\n'])) +def blankremover(incoming): + with open(incoming ,'r') as f: + lines = f.read().split() + with open(incoming ,'w') as f: + for line in lines: + if line.strip(): + f.write('\n'.join([line + '\n'])) + def sort(incoming): with open(incoming, 'r') as f: lines = sorted(f.readlines())