From b93d4bd3632b96f141cf27498af4dee51d13061a Mon Sep 17 00:00:00 2001 From: minoplhy Date: Tue, 3 Aug 2021 18:56:08 +0700 Subject: [PATCH] Fixing crawler --- crawler.py | 13 +++++++++---- excluder.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 excluder.py diff --git a/crawler.py b/crawler.py index 5555d25..b2d47e7 100644 --- a/crawler.py +++ b/crawler.py @@ -3,8 +3,6 @@ import sys import requests import re -print('starting . . . ') - def clear_old_files(incoming): try: os.remove(incoming) @@ -27,8 +25,8 @@ def filtering(filters_welcome): lines = f.read().splitlines() with open(filters_welcome, 'w') as f: for line in lines: - if not line.startswith(('#',';','@','$',' NS','@@||','!')) and line.strip(): - f.write('\n'.join([line + '\n'])) + if not line.startswith(('#',';','@','$',' NS',' NS','@@||','!')) and line.strip(): + f.write('\n'.join([line + '\n'])) print("++ successful!") f.close() @@ -82,6 +80,13 @@ def excluded(excluded ,incoming): elif not line.strip(): f.write('\n'.join([line + '\n'])) +def sort(incoming): + with open(incoming, 'r') as f: + lines = sorted(f.readlines()) + with open(incoming, 'w') as f: + for line in lines: + f.write(line) + if __name__ == "__main__": download_filters('https://filters.kylz.nl/RPZ/someonewhocares/rpz.txt','test.txt') filtering('test.txt') diff --git a/excluder.py b/excluder.py new file mode 100644 index 0000000..13ff4aa --- /dev/null +++ b/excluder.py @@ -0,0 +1,34 @@ + +def add(incoming,input): + with open(incoming, 'r') as f: + lines = f.read().split() + with open(incoming, 'a') as f: + f.write('\n'.join([input + '\n'])) + with open(incoming, 'r') as f: + lines = set(f.readlines()) + with open(incoming, 'w') as f: + f.writelines(set(lines)) + sort(incoming) + +def add_file(incoming,excluded_in): + data= "" + with open(incoming) as fp: + data = fp.read() + with open(excluded_in) as fp: + data2 = fp.read() + data += "\n" + data += data2 + with open (incoming, 'w') as fp: + fp.write(data) + with open(incoming, 'r') as f: + lines = set(f.readlines()) + with open(incoming, 'w') as f: + f.writelines(set(lines)) + sort(incoming) + +def sort(incoming): + with open(incoming, 'r') as f: + lines = sorted(f.readlines()) + with open(incoming, 'w') as f: + for line in lines: + f.write(line) \ No newline at end of file