Fixing crawler

This commit is contained in:
minoplhy 2021-08-03 18:56:08 +07:00
parent f651d59dc4
commit b93d4bd363
2 changed files with 43 additions and 4 deletions

View File

@ -3,8 +3,6 @@ import sys
import requests
import re
print('starting . . . ')
def clear_old_files(incoming):
try:
os.remove(incoming)
@ -27,8 +25,8 @@ def filtering(filters_welcome):
lines = f.read().splitlines()
with open(filters_welcome, 'w') as f:
for line in lines:
if not line.startswith(('#',';','@','$',' NS','@@||','!')) and line.strip():
f.write('\n'.join([line + '\n']))
if not line.startswith(('#',';','@','$',' NS',' NS','@@||','!')) and line.strip():
f.write('\n'.join([line + '\n']))
print("++ successful!")
f.close()
@ -82,6 +80,13 @@ def excluded(excluded ,incoming):
elif not line.strip():
f.write('\n'.join([line + '\n']))
def sort(incoming):
with open(incoming, 'r') as f:
lines = sorted(f.readlines())
with open(incoming, 'w') as f:
for line in lines:
f.write(line)
if __name__ == "__main__":
download_filters('https://filters.kylz.nl/RPZ/someonewhocares/rpz.txt','test.txt')
filtering('test.txt')

34
excluder.py Normal file
View File

@ -0,0 +1,34 @@
def add(incoming,input):
with open(incoming, 'r') as f:
lines = f.read().split()
with open(incoming, 'a') as f:
f.write('\n'.join([input + '\n']))
with open(incoming, 'r') as f:
lines = set(f.readlines())
with open(incoming, 'w') as f:
f.writelines(set(lines))
sort(incoming)
def add_file(incoming,excluded_in):
data= ""
with open(incoming) as fp:
data = fp.read()
with open(excluded_in) as fp:
data2 = fp.read()
data += "\n"
data += data2
with open (incoming, 'w') as fp:
fp.write(data)
with open(incoming, 'r') as f:
lines = set(f.readlines())
with open(incoming, 'w') as f:
f.writelines(set(lines))
sort(incoming)
def sort(incoming):
with open(incoming, 'r') as f:
lines = sorted(f.readlines())
with open(incoming, 'w') as f:
for line in lines:
f.write(line)