From 010815e7437f7886bb5cf0b415ba3658802dc0d4 Mon Sep 17 00:00:00 2001 From: "a.chernenko" Date: Wed, 19 Nov 2025 14:39:32 +1000 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=BD=D0=B4=D0=BE=D0=BC=D0=BD?= =?UTF-8?q?=D1=8B=D0=B9=20=D0=B7=D0=B0=D0=B3=D0=BE=D0=BB=D0=BE=D0=B2=D0=BE?= =?UTF-8?q?=D0=BA=20=D0=B7=D0=B0=D0=BF=D1=80=D0=BE=D1=81=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- download.py | 19 ++----- include/http_header.py | 85 ++++++++++++++++++++++++++++++ net_tree.py => include/net_tree.py | 0 3 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 include/http_header.py rename net_tree.py => include/net_tree.py (100%) diff --git a/download.py b/download.py index 04fa010..57d20de 100755 --- a/download.py +++ b/download.py @@ -4,24 +4,13 @@ import re import os import sys import ast -import net_tree import requests +from include import net_tree +from include.http_header import get_headers # компилируем регулярку поиска ipv4 адреса ipv4_find_str=re.compile(r"[^0-9.]?(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]|[0-9])(/([0-9]{1}[0-9]*))?[^0-9.]?") -# заголовок HTTP запроса -headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/124.0.0.0 Safari/537.36", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br", - "Connection": "keep-alive", - "Upgrade-Insecure-Requests": "1", -} - # метод сбора словаря ip адресов ipv4 из текста def ipv4_find(strip:str, size:int): """ @@ -77,7 +66,7 @@ def list_ip(c_dict: dict = []): # бежим весь список ссылок пока не код 200 for c_url in c_list['url']: try: - if (result:=requests.get(c_url, headers=headers)) and result.status_code == 200 and result.text: + if (result:=requests.get(c_url, headers=get_headers())) and result.status_code == 200 and result.text: print(f"URL: {c_url}") # пополняем словарь ipv4_list if ipv4: ipv4_list.update(ipv4_find(result.text,ipv4)) @@ -141,7 +130,7 @@ if __name__ == "__main__": try: # если файл list ссылка, загружаем и парсим его with open(list_file, "r") as file: - if (result:=requests.get(url_list_file:=file.readline().strip(), headers=headers)) and result.status_code == 200 and result.text: + if (result:=requests.get(url_list_file:=file.readline().strip(), headers=get_headers())) and result.status_code == 200 and result.text: ip_list = ast.literal_eval(result.text) print(f"Список выгрузки по url: {url_list_file}") except requests.exceptions.MissingSchema: diff --git a/include/http_header.py b/include/http_header.py new file mode 100644 index 0000000..5c8d9c5 --- /dev/null +++ b/include/http_header.py @@ -0,0 +1,85 @@ +import random + +def get_headers(): + # ОС Chrome/Firefox + platforms = [ + 'Windows NT 10.0; Win64; x64', + 'Windows NT 10.0; WOW64', + 'Macintosh; Intel Mac OS X 10_15_7', + 'X11; Linux x86_64', + ] + + # Chrome версии + chrome_major = random.randint(120, 128) + chrome_build = random.randint(6000, 9999) + chrome_patch = random.randint(10, 200) + + chrome_ua = ( + f"Mozilla/5.0 ({random.choice(platforms)}) " + f"AppleWebKit/537.36 (KHTML, like Gecko) " + f"Chrome/{chrome_major}.0.{chrome_build}.{chrome_patch} Safari/537.36" + ) + + # Firefox версии + ff_ver = random.randint(110, 125) + firefox_ua = ( + f"Mozilla/5.0 ({random.choice(platforms)}; rv:{ff_ver}.0) " + f"Gecko/20100101 Firefox/{ff_ver}.0" + ) + + # Выбираем браузер + user_agent = random.choice([chrome_ua, firefox_ua]) + + # sec-ch-ua зависит только от Chrome + if "Chrome" in user_agent: + sec_ch_ua = f'"Not_A Brand";v="8", "Chromium";v="{chrome_major}", "Google Chrome";v="{chrome_major}"' + sec_ch_mob = "?0" + sec_platform = '"Windows"' if "Windows" in user_agent else '"macOS"' if "Macintosh" in user_agent else '"Linux"' + else: + # Firefox их не отправляет + sec_ch_ua = None + sec_ch_mob = None + sec_platform = None + + # Accept-Language + accept_lang = random.choice([ + "en-US,en;q=0.9", + "en-US,en;q=0.8", + "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7", + "en;q=0.8", + ]) + + # Реалистичные fetch-заголовки Chrome + sec_fetch_site = random.choice(["none", "same-site", "same-origin", "cross-site"]) + sec_fetch_mode = "navigate" + sec_fetch_user = "?1" + sec_fetch_dest = "document" + + # Заголовки в случайном порядке как в браузере + headers_list = [ + ("User-Agent", user_agent), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"), + ("Accept-Language", accept_lang), + ("Accept-Encoding", "gzip, deflate, br"), + ("Connection", "keep-alive"), + ("Upgrade-Insecure-Requests", "1"), + ] + + if sec_ch_ua: + headers_list.extend([ + ("sec-ch-ua", sec_ch_ua), + ("sec-ch-ua-mobile", sec_ch_mob), + ("sec-ch-ua-platform", sec_platform), + ("Sec-Fetch-Site", sec_fetch_site), + ("Sec-Fetch-Mode", sec_fetch_mode), + ("Sec-Fetch-User", sec_fetch_user), + ("Sec-Fetch-Dest", sec_fetch_dest), + ]) + + # Перемешиваем порядок (!) — браузеры могут менять порядок + random.shuffle(headers_list) + + # Превращаем в dict + headers = {k: v for k, v in headers_list} + + return headers diff --git a/net_tree.py b/include/net_tree.py similarity index 100% rename from net_tree.py rename to include/net_tree.py