diff --git a/download.py b/download.py index 81ee2e7..ec7cacc 100755 --- a/download.py +++ b/download.py @@ -123,7 +123,7 @@ def list_ip(c_list: list = []): # пробегаем словарь выгрузки for c_dict in c_list: - # если есть источник ссылка + # # если есть источник ссылка if 'url' in list(c_dict): # бежим весь список ссылок пока не код 200 for c_url in c_dict['url']: @@ -150,48 +150,37 @@ def list_ip(c_list: list = []): # пополняем словарь ipv6_list ipv6_list.update(ipv6_find(str(c_dict['static6']),ipv6)) - # если ключ не сжимать - if ipv4_list and not compress: - ipv4_list:str="".join([f"route {k} blackhole;\n" for k, v in ipv4_list.items() if isinstance(v, list)]) # сжимаем подсети ipv4 - elif ipv4_list and compress: - # строим дерево - Root = net_tree.Node(net_tree.Net(0, 0), 0) - # пробегаем в цикле - for c in ipv4_list.values(): - # добавляем запись в дерево - Root.addSubnet(net_tree.Node(net_tree.Net(c[0], c[1]), 1)) - Root.finishTreeFirst() - # жесткое сжатие в размер 30000 записей - #Root.collapseRoot(Root.real_ip_records_count - 30000) - # более мягкое сжатие - Root.collapse(1,Root.real_ip_records_count) - # возвращаем результат - ipv4_list:str=Root.returnCollapsedTree('route {addr}/{masklen} blackhole;') + if ipv4_list: + # создаем дерево + Root = net_tree.Node(net_tree.Net(0, 0, 4)) + # добавляем IPv4 подсети + for ip_int, mask in sorted(ipv4_list.values(), key=lambda x: x[0]): + Root.insert(net_tree.Net(ip_int, mask, 4)) + # считаем статистику + Root.finalize() + # сжатие по CIDR, если ключ сжимать + if compress: Root.collapse() + # получаем результат + ipv4_list = Root.export('route {addr}/{masklen} blackhole;') else: ipv4_list:bool=False - # если ключ не сжимать - if ipv6_list and not compress: - ipv6_list:str="".join([f"route {k} blackhole;\n" for k, v in ipv6_list.items() if isinstance(v, list)]) # сжимаем подсети ipv6 - elif ipv6_list and compress: + if ipv6_list: # строим дерево - Root = net_tree.Node(net_tree.Net(1 << 127, 0), 0) - # пробегаем в цикле - for c in ipv6_list.values(): - # добавляем запись в дерево - Root.addSubnet(net_tree.Node(net_tree.Net(c[0], c[1]), 1)) - Root.finishTreeFirst() - # жесткое сжатие в размер 30000 записей - #Root.collapseRoot(Root.real_ip_records_count - 30000) - # более мягкое сжатие - Root.collapse(1,Root.real_ip_records_count) - # возвращаем результат - ipv6_list:str=Root.returnCollapsedTree('route {addr}/{masklen} blackhole;') + Root = net_tree.Node(net_tree.Net(1 << 127, 0, 6)) + # добавляем IPv4 подсети + for ip_int, mask in sorted(ipv6_list.values(), key=lambda x: x[0]): + Root.insert(net_tree.Net(ip_int, mask, 6)) + # считаем статистику + Root.finalize() + # сжатие по CIDR, если ключ сжимать + if compress: Root.collapse() + # получаем результат + ipv6_list = Root.export('route {addr}/{masklen} blackhole;') else: ipv6_list:bool=False - # возвращаем 2 списка маршрутов return ipv4_list, ipv6_list except Exception as e: diff --git a/include/net_tree.py b/include/net_tree.py index 44c097e..bec3d14 100644 --- a/include/net_tree.py +++ b/include/net_tree.py @@ -1,256 +1,255 @@ # -# Unified IPv4/IPv6 Network Aggregator +# CIDR AGGREGATOR (IPv4 + IPv6) # -def detect_ip_version(ip_int: int): - """Определяем IPv4 или IPv6 по величине числа""" - if ip_int <= 0xFFFFFFFF: - return 4, 32 - else: - return 6, 128 - - -def get_mask_by_mask_size(mask_size, total_bits): +def mask_to_int(mask_size, total_bits): return ((1 << total_bits) - 1) ^ ((1 << (total_bits - mask_size)) - 1) -def get_ip_volume(mask_size, total_bits): +def ip_volume(mask_size, total_bits): return 1 << (total_bits - mask_size) -def int_to_ipv4(n): - return ".".join(str((n >> (24 - 8*i)) & 0xFF) for i in range(4)) - - -def int_to_ipv6(n): - # разбор на 8 блоков по 16 бит - blocks = [(n >> (112 - 16*i)) & 0xFFFF for i in range(8)] - # убираем ведущие нули (схлопывание "::") - best_start = -1 - best_len = 0 - cur_start = -1 - cur_len = 0 - - for i in range(8): - if blocks[i] == 0: - if cur_start == -1: - cur_start = i - cur_len = 1 - else: - cur_len += 1 - else: - if cur_len > best_len: - best_len = cur_len - best_start = cur_start - cur_start = -1 - cur_len = 0 - - if cur_len > best_len: - best_len = cur_len - best_start = cur_start - - if best_len > 1: - new_blocks = [] - i = 0 - while i < 8: - if i == best_start: - new_blocks.append("") - i += best_len - else: - new_blocks.append(format(blocks[i], "x")) - i += 1 - res = ":".join(new_blocks) - # иногда получается ":::" → исправим - while ":::" in res: - res = res.replace(":::", "::") - return res - else: - return ":".join(format(b, "x") for b in blocks) - - -def int_to_ip(n, version): - return int_to_ipv4(n) if version == 4 else int_to_ipv6(n) - - class Net: - __slots__ = ['mask_size', 'net', 'mask', 'ip_volume', 'version', 'total_bits'] + __slots__ = ["version","bits","mask_size","net","mask","volume"] - def __init__(self, net: int, mask_size: int): - # Определяем IPv4/IPv6 - self.version, self.total_bits = detect_ip_version(net) + def __init__(self, net: int, mask_size: int, version: int=4): + self.version, self.bits = (4, 32) if version==4 else (6, 128) self.mask_size = mask_size - - self.mask = get_mask_by_mask_size(mask_size, self.total_bits) + self.mask = mask_to_int(mask_size, self.bits) self.net = net & self.mask - self.ip_volume = get_ip_volume(mask_size, self.total_bits) + self.volume = ip_volume(mask_size, self.bits) - def hasSubnet(self, other: 'Net'): - if other.version != self.version: - return 0 - if other.mask_size <= self.mask_size: - return 0 - return self.net == (other.net & self.mask) + # + # --- IP CONVERSION --- + # + def __int_to_ipv4(self, n): + return ".".join(str((n >> (24 - 8*i)) & 0xFF) for i in range(4)) - def isSameNet(self, other: 'Net'): - return ( - self.version == other.version and - self.mask_size == other.mask_size and - self.net == other.net - ) + def __int_to_ipv6(self, n): + blocks = [(n >> (112 - 16*i)) & 0xFFFF for i in range(8)] - def getCommonNet(self, other: 'Net', min_mask_size: int): - if self.version != other.version: - return 0 - if self.mask_size <= min_mask_size: return 0 - if other.mask_size <= min_mask_size: return 0 + best_start = -1 + best_len = 0 + cur_start = -1 + cur_len = 0 - upper = min(self.mask_size, other.mask_size) - 1 + for i in range(8): + if blocks[i] == 0: + if cur_start < 0: + cur_start = i + cur_len = 1 + else: + cur_len += 1 + else: + if cur_len > best_len: + best_len = cur_len + best_start = cur_start + cur_start = -1 + cur_len = 0 - for mask_size in range(upper, min_mask_size - 1, -1): - mask = get_mask_by_mask_size(mask_size, self.total_bits) - if (self.net & mask) == (other.net & mask): - return Net(self.net, mask_size) - return 0 + if cur_len > best_len: + best_len = cur_len + best_start = cur_start + if best_len > 1: + new = [] + i = 0 + while i < 8: + if i == best_start: + new.append('') + i += best_len + else: + new.append(format(blocks[i], 'x')) + i += 1 + res = ":".join(new) + while ":::" in res: + res = res.replace(":::", "::") + return res + + return ":".join(format(b, 'x') for b in blocks) + + def __int_to_ip(self, n): + return self.__int_to_ipv4(n) if self.version == 4 else self.__int_to_ipv6(n) + + # + # --- PUBLIC API --- + # def getAsString(self, fmt='{addr}/{masklen}'): return fmt.format( - addr=int_to_ip(self.net, self.version), + addr=self.__int_to_ip(self.net), masklen=self.mask_size ) + def is_adjacent(self, other): + if self.version != other.version: return False + if self.mask_size != other.mask_size: return False + step = 1 << (self.bits - self.mask_size) + return self.net + step == other.net or other.net + step == self.net + + def supernet(self): + if self.mask_size == 0: + return self + new_mask = self.mask_size - 1 + new_mask_int = mask_to_int(new_mask, self.bits) + new_net = self.net & new_mask_int + return Net(new_net, new_mask, self.version) + class Node: - __slots__ = ['net', 'child1', 'child2', 'is_real_net', 'real_ip_volume', - 'real_ip_records_count', 'weight', 'max_child_weight', 'added_fake_ip_volume'] + __slots__ = [ + "net", "child0", "child1", + "is_real", + "real_volume", "real_count", + "fake_volume", "weight", "max_child_weight" + ] - def __init__(self, net: Net, is_real_net: int): + def __init__(self, net: Net): self.net = net + self.child0 = None self.child1 = None - self.child2 = None - self.is_real_net = is_real_net - self.real_ip_volume = 0 - self.real_ip_records_count = 0 - self.weight = 0.0 - self.max_child_weight = 0.0 - self.added_fake_ip_volume = 0 + self.is_real = False - def getNet(self): - return self.net - - def addSubnet(self, NewNode: 'Node'): - if self.net.isSameNet(NewNode.net): - if not self.is_real_net and NewNode.is_real_net: - self.is_real_net = 1 - self.child1 = None - self.child2 = None - return 1 - - if self.is_real_net and self.net.hasSubnet(NewNode.net): - return 1 - - if not self.net.hasSubnet(NewNode.net): - return 0 - - for Child in (self.child1, self.child2): - if Child and Child.addSubnet(NewNode): - return 1 - - for child_attr in ('child1', 'child2'): - Child = getattr(self, child_attr) - if Child: - CommonNet = Child.net.getCommonNet(NewNode.net, self.net.mask_size + 1) - if CommonNet: - CommonNode = Node(CommonNet, 0) - CommonNode.addSubnet(NewNode) - CommonNode.addSubnet(Child) - setattr(self, child_attr, CommonNode) - return 1 - - if not self.child1: - self.child1 = NewNode - else: - self.child2 = NewNode - return 1 - - def finishTreeFirst(self): - if self.is_real_net: - self.real_ip_volume = self.net.ip_volume - self.real_ip_records_count = 1 - self.weight = 0 - self.max_child_weight = 0 - else: - self.real_ip_volume = 0 - self.real_ip_records_count = 0 - self.max_child_weight = 0 - for Child in (self.child1, self.child2): - if Child: - Child.finishTreeFirst() - self.real_ip_volume += Child.real_ip_volume - self.real_ip_records_count += Child.real_ip_records_count - self.max_child_weight = max(self.max_child_weight, Child.weight, Child.max_child_weight) - self.recalcWeight() - - def collapse(self, min_weight, max_net_delta): - if self.weight >= min_weight: - self.weight = 0 - self.max_child_weight = 0 - delta = (self.net.ip_volume - self.real_ip_volume) - self.added_fake_ip_volume - self.added_fake_ip_volume = self.net.ip_volume - self.real_ip_volume - return self.real_ip_records_count - 1, delta - - net_delta = 0 - fake_ip_delta = 0 + self.real_volume = 0 + self.real_count = 0 + self.fake_volume = 0 + self.weight = 0 self.max_child_weight = 0 - for Child in (self.child1, self.child2): - if Child: - if net_delta < max_net_delta and min_weight <= max(Child.weight, Child.max_child_weight): - child_net_delta, child_fake_ip_count = Child.collapse(min_weight, max_net_delta - net_delta) - net_delta += child_net_delta - fake_ip_delta += child_fake_ip_count - self.max_child_weight = max(self.max_child_weight, Child.weight, Child.max_child_weight) + # + # INSERT NETWORK INTO TRIE + # + def insert(self, new_net: Net): + return self.__insert(new_net, level=0) - if net_delta > 0: - self.added_fake_ip_volume += fake_ip_delta - self.real_ip_records_count -= net_delta - self.recalcWeight() + def __insert(self, new_net: Net, level): + # если дошли до маски сети — это лист + if level == new_net.mask_size: + if not self.is_real: + self.is_real = True + self.child0 = None + self.child1 = None + return - if self.weight >= min_weight: + # разбираем бит адреса + bit_pos = self.net.bits - 1 - level + direction = (new_net.net >> bit_pos) & 1 + + if direction == 0: + if not self.child0: + child_net = Net(new_net.net & mask_to_int(level+1, self.net.bits), level+1, self.net.version) + self.child0 = Node(child_net) + self.child0.__insert(new_net, level+1) + + else: + if not self.child1: + child_net = Net(new_net.net & mask_to_int(level+1, self.net.bits), level+1, self.net.version) + self.child1 = Node(child_net) + self.child1.__insert(new_net, level+1) + + # + # CALCULATE WEIGHTS + # + def finalize(self): + if self.is_real: + self.real_volume = self.net.volume + self.real_count = 1 + self.fake_volume = 0 self.weight = 0 self.max_child_weight = 0 - delta = (self.net.ip_volume - self.real_ip_volume) - (self.added_fake_ip_volume - fake_ip_delta) - self.added_fake_ip_volume = self.net.ip_volume - self.real_ip_volume - return self.real_ip_records_count - 1, delta + return + + self.real_volume = 0 + self.real_count = 0 + self.fake_volume = 0 + self.max_child_weight = 0 + + for ch in (self.child0, self.child1): + if ch: + ch.finalize() + self.real_volume += ch.real_volume + self.real_count += ch.real_count + self.fake_volume += ch.fake_volume + self.max_child_weight = max(self.max_child_weight, ch.weight, ch.max_child_weight) + + self.__recalc() + + def __recalc(self): + missing = self.net.volume - self.real_volume - self.fake_volume + if missing > 0: + self.weight = (self.real_count - 1) / (missing ** 0.5) else: - return net_delta, fake_ip_delta + self.weight = float('inf') - def collapseRoot(self, required_net_delta): - while required_net_delta > 0: - delta, fake_ip_volume = self.collapse(self.max_child_weight, required_net_delta) - required_net_delta -= delta + # + # COLLAPSE / AGGREGATE + # + def collapse(self, min_weight=0, max_delta=float('inf')): + if self.is_real: + return 0,0 - def returnCollapsedTree(self, fmt='{addr}/{masklen}'): - if self.is_real_net or self.weight == 0: - return self.net.getAsString(fmt) + "\n" - else: - res = "" - for Child in (self.child1, self.child2): - if Child: - res += Child.returnCollapsedTree(fmt) - return res + delta = 0 + fake = 0 - def recalcWeight(self): - fake_ip_delta = self.net.ip_volume - self.real_ip_volume - self.added_fake_ip_volume - if fake_ip_delta > 0: - self.weight = (self.real_ip_records_count - 1) / (fake_ip_delta ** 0.5) - else: - self.weight = float('Inf') + # сворачиваем детей + for ch in (self.child0, self.child1): + if ch: + d, f = ch.collapse(min_weight, max_delta - delta) + delta += d + fake += f - def getNotRealIpCount(self): - if self.is_real_net: return 0 - if self.weight == 0: return self.net.ip_volume - self.real_ip_volume - res = 0 - for Child in (self.child1, self.child2): - if Child: - res += Child.getNotRealIpCount() - return res + # попытаемся объединить + if self.child0 and self.child1: + c0 = self.child0 + c1 = self.child1 + + if (c0.is_real and c1.is_real and + c0.net.is_adjacent(c1.net)): + + super_net = c0.net.supernet() + + # превращаем текущий узел в супернет + self.net = super_net + self.is_real = True + self.child0 = None + self.child1 = None + + self.real_volume = c0.real_volume + c1.real_volume + self.fake_volume = super_net.volume - self.real_volume + self.real_count = 1 + self.weight = 0 + self.max_child_weight = 0 + + return delta + 2, fake + self.fake_volume + + # пересчитываем статистику + if not self.is_real: + self.real_volume = 0 + self.real_count = 0 + self.fake_volume = 0 + self.max_child_weight = 0 + + for ch in (self.child0, self.child1): + if ch: + self.real_volume += ch.real_volume + self.real_count += ch.real_count + self.fake_volume += ch.fake_volume + self.max_child_weight = max(self.max_child_weight, ch.weight, ch.max_child_weight) + self.__recalc() + return delta, fake + + def export(self, fmt='{addr}/{masklen}'): + result = [] + + def walk(node): + if node is None: + return + # если суперсеть реальная – дети не нужны + if node.is_real: + result.append(node.net.getAsString(fmt)) + return + walk(node.child0) + walk(node.child1) + + walk(self) + return "\n".join(result)