From 4f1d32dfcf1972c6126920d8fc4014f64d4ac453 Mon Sep 17 00:00:00 2001 From: minoplhy Date: Mon, 15 Nov 2021 17:07:48 +0700 Subject: [PATCH] crawler.filtering : add ' IN' to unwanted, crawler.excluded : remove Unwanted --- crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crawler.py b/crawler.py index d525fc7..a238022 100644 --- a/crawler.py +++ b/crawler.py @@ -32,7 +32,7 @@ def download_group_filters(multi_url,incoming): f.write(data) def filtering(filters_welcome): - unwanted = ['#',';','@','$',' NS',' NS','@@||','!','local-data:','-'] + unwanted = ['#',';','@','$',' NS',' NS','@@||','!','local-data:','-',' IN'] print("filtering . . .") with open(filters_welcome, 'r') as f: lines = f.read().splitlines() @@ -150,14 +150,14 @@ def killingdup(duplicated_file): f.close() def excluded(excluded ,incoming): - exline = [';','$','@',' IN'] + exline = [';','$','@',' IN','#'] with open(excluded ,'r') as f: exclude = f.read().split() with open(incoming ,'r') as f: lines = f.read().splitlines() # read lines with open(incoming ,'w') as f: for line in lines: - if line.strip() and not line in exclude and not line.startswith(';'): + if line.strip() and not line in exclude: f.write('\n'.join([line + '\n'])) elif line.startswith((tuple(exline))): f.write('\n'.join([line + '\n']))