diff --git a/crawler.py b/crawler.py index 999cee8..111a6ab 100644 --- a/crawler.py +++ b/crawler.py @@ -19,12 +19,13 @@ def download_filters(url,incoming): return url def filtering(filters_welcome): + unwanted = ['#',';','@','$',' NS',' NS','@@||','!'] print("filtering . . .") with open(filters_welcome, 'r') as f: lines = f.read().splitlines() with open(filters_welcome, 'w') as f: for line in lines: - if not line.startswith(('#',';','@','$',' NS',' NS','@@||','!')) and line.strip(): + if not line.startswith((tuple(unwanted))) and line.strip(): f.write('\n'.join([line + '\n'])) print("++ successful!") f.close() @@ -89,6 +90,7 @@ def killingdup(duplicated_file): f.close() def excluded(excluded ,incoming): + exline = [';','$','@',' IN'] with open(excluded ,'r') as f: exclude = f.read().split() with open(incoming ,'r') as f: @@ -97,7 +99,7 @@ def excluded(excluded ,incoming): for line in lines: if line.strip() and not line in exclude and not line.startswith(';'): f.write('\n'.join([line + '\n'])) - elif line.startswith((';','$','@',' IN')): + elif line.startswith((tuple(exline))): f.write('\n'.join([line + '\n'])) elif not line.strip(): f.write('\n'.join([line + '\n'])) diff --git a/excluder.py b/excluder.py index b32e7cc..f381701 100644 --- a/excluder.py +++ b/excluder.py @@ -13,6 +13,7 @@ def add(incoming,userinput): crawler.sort(incoming) def add_file(incoming,excluded_in): + comment_roc = ['#',';','!'] data= "" with open(incoming) as fp: data = fp.read() @@ -29,7 +30,7 @@ def add_file(incoming,excluded_in): lines = f.read().split() with open(incoming ,'w') as f: for line in lines: - if line.strip(): + if line.strip() and not line.startswith((tuple(comment_roc))): f.write('\n'.join([line + '\n'])) crawler.sort(incoming) os.remove(excluded_in)