From 9b2b51d7ebe8cce8b9a969f323ffecc82ef0aa33 Mon Sep 17 00:00:00 2001 From: minoplhy Date: Fri, 18 Mar 2022 01:43:04 +0700 Subject: [PATCH] crawler : Fix Regex Don't work with |http$ --- crawler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crawler.py b/crawler.py index 9c6e1a5..026121a 100644 --- a/crawler.py +++ b/crawler.py @@ -82,6 +82,8 @@ def filteringcon(filters_regex_one): with open(filters_regex_one) as f: file = f.read().split('\n') for i in range(len(file)): + file[i] = re.sub('\|http\$..*\|..*', '', file[i]) + file[i] = re.sub('\|http\$..*', '', file[i]) file[i] = re.sub('(@@\|\|..*)|(\|\|..*(\/|\*(\-|\&|banner|..))..*)|(^\|http)|(^(_|\*|&|\-|\/|\.|:|@@|\?|\=|\;|\,|\$|\~)..*)|(..*(#|\$|\*)..*)|(^..*\$(app=|removeparam=|popup)..*)(\$..*)', '', file[i]) file[i] = re.sub('0\.0\.0\.0 0\.0\.0\.0\Z', '', file[i]) file[i] = re.sub('\A'+str(IP4)+' ', '', file[i])