From 11bc15614d953ac042a38ea81abf63e6d7613a3d Mon Sep 17 00:00:00 2001 From: minoplhy Date: Mon, 2 Aug 2021 15:15:42 +0700 Subject: [PATCH] Support for AdBlock Format --- crawler.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/crawler.py b/crawler.py index b081a07..aa1ec25 100644 --- a/crawler.py +++ b/crawler.py @@ -28,7 +28,7 @@ def filtering(filters_welcome): lines = f.read().splitlines() with open(filters_welcome, 'w') as f: for line in lines: - if not line.startswith(('#',';','@','$',' NS','@@||')) and line.strip(): + if not line.startswith(('#',';','@','$',' NS','@@||','!')) and line.strip(): f.write('\n'.join([line + '\n'])) print("++ successful!") f.close() @@ -46,6 +46,20 @@ def filteringcon(filters_regex_one): print("++ successful!") f.close() + a = ['||','^','|'] + lst = [] + with open(filters_regex_one, 'r') as f: + for line in f: + for word in a: + if word in line: + line = line.replace(word,'') + lst.append(line) + f.close() + with open(filters_regex_one, 'w') as f: + for line in lst: + f.write(line) + f.close() + def killingdup(duplicated_file): print('Getting rid of duplicated line') with open(duplicated_file, 'r') as f: @@ -63,6 +77,7 @@ download_filters("https://filters.kylz.nl/RPZ/adguard/cname-original.txt") download_filters("https://filters.kylz.nl/RPZ/stevenblack/f-s.txt") download_filters("https://filters.kylz.nl/RPZ/someonewhocares/rpz.txt") download_filters("https://urlhaus.abuse.ch/downloads/rpz/") +download_filters("https://github.com/easylist/easylist/raw/master/easylist/easylist_adservers.txt") filtering(input) filteringcon(input) killingdup(input)