diff --git a/crawler.py b/crawler.py index 9c6e1a5..026121a 100644 --- a/crawler.py +++ b/crawler.py @@ -82,6 +82,8 @@ def filteringcon(filters_regex_one): with open(filters_regex_one) as f: file = f.read().split('\n') for i in range(len(file)): + file[i] = re.sub('\|http\$..*\|..*', '', file[i]) + file[i] = re.sub('\|http\$..*', '', file[i]) file[i] = re.sub('(@@\|\|..*)|(\|\|..*(\/|\*(\-|\&|banner|..))..*)|(^\|http)|(^(_|\*|&|\-|\/|\.|:|@@|\?|\=|\;|\,|\$|\~)..*)|(..*(#|\$|\*)..*)|(^..*\$(app=|removeparam=|popup)..*)(\$..*)', '', file[i]) file[i] = re.sub('0\.0\.0\.0 0\.0\.0\.0\Z', '', file[i]) file[i] = re.sub('\A'+str(IP4)+' ', '', file[i])