import random
import xml.etree.ElementTree as ET

#-------------------------------------#
#       对数据集进行训练
#-------------------------------------#
import datetime
import os

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from .nets.yolo import YoloBody
from .nets.yolo_training import (ModelEMA, YOLOLoss, get_lr_scheduler,
                                set_optimizer_lr, weights_init)
from .utils.callbacks import EvalCallback, LossHistory
from .utils.dataloader import YoloDataset, yolo_dataset_collate
from .utils.utils import download_weights, get_anchors, get_classes, show_config
from .utils.utils_fit import fit_one_epoch




class Yolov7:

    
    def __init__(self, classes_path: str, VOCdevkit_path: str = 'VOCdevkit' ) -> None:
        self.classes_path = classes_path
        self.VOCdevkit_path = VOCdevkit_path
        self.classes, _  = get_classes(self.classes_path)
        self.VOCdevkit_sets: list  = [('2007', 'train'), ('2007', 'val')]
        self.photo_nums  = np.zeros(len(self.VOCdevkit_sets))
        self.nums = np.zeros(len(self.classes))



    def _set_annotation_file(self, annotation_mode: int = 0, trainval_percent: float = 0.9, train_percent: float = 0.9) -> tuple:

        return (annotation_mode, trainval_percent, train_percent)


    def _convert_annotation(self, year: int, image_id: int, list_file: list) -> None:

        annotation_mode, trainval_percent, train_percent = self._set_annotation_file()

        in_file = open(os.path.join(self.VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
        tree=ET.parse(in_file)
        root = tree.getroot()

        for obj in root.iter('object'):
            difficult = 0 
            if obj.find('difficult')!=None:
                difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in self.classes or int(difficult)==1:
                continue
            cls_id = self.classes.index(cls)
            xmlbox = obj.find('bndbox')
            b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
            list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
            
            self.nums[self.classes.index(cls)] = self.nums[self.classes.index(cls)] + 1


    def voc_annotation(self) -> None:
        random.seed(0)

        annotation_mode, trainval_percent, train_percent = self._set_annotation_file()
        
        if " " in os.path.abspath(self.VOCdevkit_path):
            raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格，否则会影响正常的模型训练，请注意修改。")

        if annotation_mode == 0 or annotation_mode == 1:
            print("Generate txt in ImageSets.")
            xmlfilepath     = os.path.join(self.VOCdevkit_path, 'VOC2007/Annotations')
            saveBasePath    = os.path.join(self.VOCdevkit_path, 'VOC2007/ImageSets/Main')
            temp_xml        = os.listdir(xmlfilepath)
            total_xml       = []
            for xml in temp_xml:
                if xml.endswith(".xml"):
                    total_xml.append(xml)

            num     = len(total_xml)  
            list    = range(num)  
            tv      = int(num*trainval_percent)  
            tr      = int(tv*train_percent)  
            trainval= random.sample(list,tv)  
            train   = random.sample(trainval,tr)  
            
            print("train and val size",tv)
            print("train size",tr)
            ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
            ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
            ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
            fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
            
            for i in list:  
                name=total_xml[i][:-4]+'\n'  
                if i in trainval:  
                    ftrainval.write(name)  
                    if i in train:  
                        ftrain.write(name)  
                    else:  
                        fval.write(name)  
                else:  
                    ftest.write(name)  
            
            ftrainval.close()  
            ftrain.close()  
            fval.close()  
            ftest.close()
            print("Generate txt in ImageSets done.")

        if annotation_mode == 0 or annotation_mode == 2:
            print("Generate 2007_train.txt and 2007_val.txt for train.")
            type_index = 0
            for year, image_set in self.VOCdevkit_sets:
                image_ids = open(os.path.join(self.VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split()
                list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
                for image_id in image_ids:
                    list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(self.VOCdevkit_path), year, image_id))

                    self._convert_annotation(year, image_id, list_file)
                    list_file.write('\n')
                self.photo_nums[type_index] = len(image_ids)
                type_index += 1
                list_file.close()
            print("Generate 2007_train.txt and 2007_val.txt for train done.")
            
            def printTable(List1, List2):
                for i in range(len(List1[0])):
                    print("|", end=' ')
                    for j in range(len(List1)):
                        print(List1[j][i].rjust(int(List2[j])), end=' ')
                        print("|", end=' ')
                    print()

            str_nums = [str(int(x)) for x in self.nums]
            tableData = [
                self.classes, str_nums
            ]
            colWidths = [0]*len(tableData)
            len1 = 0
            for i in range(len(tableData)):
                for j in range(len(tableData[i])):
                    if len(tableData[i][j]) > colWidths[i]:
                        colWidths[i] = len(tableData[i][j])
            printTable(tableData, colWidths)

            if self.photo_nums[0] <= 500:
                print("训练集数量小于500，属于较小的数据量，请注意设置较大的训练世代（Epoch）以满足足够的梯度下降次数（Step）。")

            if np.sum(self.nums) == 0:
                print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
                print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
                print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")
                print("（重要的事情说三遍）。")  
    
    
    def train(self, cuda: bool = True, distributed: bool = False, sync_bn: bool = False,
              fp16: bool = False, anchors_path: str = 'yolovx/yolov7pytorch/model_data/yolo_anchors.txt', pretrained: bool = False,
              model_path: str = "", phi: str = "l", input_shape: list = [640, 640],
              save_dir: str = 'logs', save_period: int = 10, eval_flag: bool= True,
              eval_period: int = 10, num_workers: int = 4) -> None:
            
        anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]


        mosaic              = True
        mosaic_prob         = 0.5
        mixup               = True
        mixup_prob          = 0.5
        special_aug_ratio   = 0.7
        #------------------------------------------------------------------#
        #   label_smoothing     标签平滑。一般0.01以下。如0.01、0.005。
        #------------------------------------------------------------------#
        label_smoothing     = 0

        #----------------------------------------------------------------------------------------------------------------------------#
        #   训练分为两个阶段，分别是冻结阶段和解冻阶段。设置冻结阶段是为了满足机器性能不足的同学的训练需求。
        #   冻结训练需要的显存较小，显卡非常差的情况下，可设置Freeze_Epoch等于UnFreeze_Epoch，Freeze_Train = True，此时仅仅进行冻结训练。
        #      
        #   在此提供若干参数设置建议，各位训练者根据自己的需求进行灵活调整：
        #   （一）从整个模型的预训练权重开始训练： 
        #       Adam：
        #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 100，Freeze_Train = True，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（冻结）
        #           Init_Epoch = 0，UnFreeze_Epoch = 100，Freeze_Train = False，optimizer_type = 'adam'，Init_lr = 1e-3，weight_decay = 0。（不冻结）
        #       SGD：
        #           Init_Epoch = 0，Freeze_Epoch = 50，UnFreeze_Epoch = 300，Freeze_Train = True，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（冻结）
        #           Init_Epoch = 0，UnFreeze_Epoch = 300，Freeze_Train = False，optimizer_type = 'sgd'，Init_lr = 1e-2，weight_decay = 5e-4。（不冻结）
        #       其中：UnFreeze_Epoch可以在100-300之间调整。
        #   （二）从0开始训练：
        #       Init_Epoch = 0，UnFreeze_Epoch >= 300，Unfreeze_batch_size >= 16，Freeze_Train = False（不冻结训练）
        #       其中：UnFreeze_Epoch尽量不小于300。optimizer_type = 'sgd'，Init_lr = 1e-2，mosaic = True。
        #   （三）batch_size的设置：
        #       在显卡能够接受的范围内，以大为好。显存不足与数据集大小无关，提示显存不足（OOM或者CUDA out of memory）请调小batch_size。
        #       受到BatchNorm层影响，batch_size最小为2，不能为1。
        #       正常情况下Freeze_batch_size建议为Unfreeze_batch_size的1-2倍。不建议设置的差距过大，因为关系到学习率的自动调整。
        #----------------------------------------------------------------------------------------------------------------------------#
        #------------------------------------------------------------------#
        #   冻结阶段训练参数
        #   此时模型的主干被冻结了，特征提取网络不发生改变
        #   占用的显存较小，仅对网络进行微调
        #   Init_Epoch          模型当前开始的训练世代，其值可以大于Freeze_Epoch，如设置：
        #                       Init_Epoch = 60、Freeze_Epoch = 50、UnFreeze_Epoch = 100
        #                       会跳过冻结阶段，直接从60代开始，并调整对应的学习率。
        #                       （断点续练时使用）
        #   Freeze_Epoch        模型冻结训练的Freeze_Epoch
        #                       (当Freeze_Train=False时失效)
        #   Freeze_batch_size   模型冻结训练的batch_size
        #                       (当Freeze_Train=False时失效)
        #------------------------------------------------------------------#
        Init_Epoch          = 0
        Freeze_Epoch        = 50
        Freeze_batch_size   = 8
        #------------------------------------------------------------------#
        #   解冻阶段训练参数
        #   此时模型的主干不被冻结了，特征提取网络会发生改变
        #   占用的显存较大，网络所有的参数都会发生改变
        #   UnFreeze_Epoch          模型总共训练的epoch
        #                           SGD需要更长的时间收敛，因此设置较大的UnFreeze_Epoch
        #                           Adam可以使用相对较小的UnFreeze_Epoch
        #   Unfreeze_batch_size     模型在解冻后的batch_size
        #------------------------------------------------------------------#
        UnFreeze_Epoch      = 300
        Unfreeze_batch_size = 4
        #------------------------------------------------------------------#
        #   Freeze_Train    是否进行冻结训练
        #                   默认先冻结主干训练后解冻训练。
        #------------------------------------------------------------------#
        Freeze_Train        = True

        #------------------------------------------------------------------#
        #   其它训练参数：学习率、优化器、学习率下降有关
        #------------------------------------------------------------------#
        #------------------------------------------------------------------#
        #   Init_lr         模型的最大学习率
        #   Min_lr          模型的最小学习率，默认为最大学习率的0.01
        #------------------------------------------------------------------#
        Init_lr             = 1e-2
        Min_lr              = Init_lr * 0.01
        #------------------------------------------------------------------#
        #   optimizer_type  使用到的优化器种类，可选的有adam、sgd
        #                   当使用Adam优化器时建议设置  Init_lr=1e-3
        #                   当使用SGD优化器时建议设置   Init_lr=1e-2
        #   momentum        优化器内部使用到的momentum参数
        #   weight_decay    权值衰减，可防止过拟合
        #                   adam会导致weight_decay错误，使用adam时建议设置为0。
        #------------------------------------------------------------------#
        optimizer_type      = "sgd"
        momentum            = 0.937
        weight_decay        = 5e-4
        #------------------------------------------------------------------#
        #   lr_decay_type   使用到的学习率下降方式，可选的有step、cos
        #------------------------------------------------------------------#
        lr_decay_type       = "cos"

        train_annotation_path   = '2007_train.txt'
        val_annotation_path     = '2007_val.txt'

        #------------------------------------------------------#
        #   设置用到的显卡
        #------------------------------------------------------#
        ngpus_per_node  = torch.cuda.device_count()
        if distributed:
            dist.init_process_group(backend="nccl")
            local_rank  = int(os.environ["LOCAL_RANK"])
            rank        = int(os.environ["RANK"])
            device      = torch.device("cuda", local_rank)
            if local_rank == 0:
                print(f"[{os.getpid()}] (rank = {rank}, local_rank = {local_rank}) training...")
                print("Gpu Device Count : ", ngpus_per_node)
        else:
            device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            local_rank      = 0
            rank            = 0

        #------------------------------------------------------#
        #   获取classes和anchor
        #------------------------------------------------------#
        class_names, num_classes = get_classes(self.classes_path)
        anchors, num_anchors     = get_anchors(anchors_path)

        #----------------------------------------------------#
        #   下载预训练权重
        #----------------------------------------------------#
        if pretrained:
            if distributed:
                if local_rank == 0:
                    download_weights(phi)  
                dist.barrier()
            else:
                download_weights(phi)
                
        #------------------------------------------------------#
        #   创建yolo模型
        #------------------------------------------------------#
        model = YoloBody(anchors_mask, num_classes, phi, pretrained=pretrained)
        if not pretrained:
            weights_init(model)
        if model_path != '':
            #------------------------------------------------------#
            #   权值文件请看README，百度网盘下载
            #------------------------------------------------------#
            if local_rank == 0:
                print('Load weights {}.'.format(model_path))
            
            #------------------------------------------------------#
            #   根据预训练权重的Key和模型的Key进行加载
            #------------------------------------------------------#
            model_dict      = model.state_dict()
            pretrained_dict = torch.load(model_path, map_location = device)
            load_key, no_load_key, temp_dict = [], [], {}
            for k, v in pretrained_dict.items():
                if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
                    temp_dict[k] = v
                    load_key.append(k)
                else:
                    no_load_key.append(k)
            model_dict.update(temp_dict)
            model.load_state_dict(model_dict)
            #------------------------------------------------------#
            #   显示没有匹配上的Key
            #------------------------------------------------------#
            if local_rank == 0:
                print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
                print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
                print("\n\033[1;33;44m温馨提示，head部分没有载入是正常现象，Backbone部分没有载入是错误的。\033[0m")

        #----------------------#
        #   获得损失函数
        #----------------------#
        yolo_loss    = YOLOLoss(anchors, num_classes, input_shape, anchors_mask, label_smoothing)
        #----------------------#
        #   记录Loss
        #----------------------#
        if local_rank == 0:
            time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
            log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
            loss_history    = LossHistory(log_dir, model, input_shape=input_shape)
        else:
            loss_history    = None
            
        #------------------------------------------------------------------#
        #   torch 1.2不支持amp，建议使用torch 1.7.1及以上正确使用fp16
        #   因此torch1.2这里显示"could not be resolve"
        #------------------------------------------------------------------#
        if fp16:
            from torch.cuda.amp import GradScaler as GradScaler
            scaler = GradScaler()
        else:
            scaler = None

        model_train = model.train()
        #----------------------------#
        #   多卡同步Bn
        #----------------------------#
        if sync_bn and ngpus_per_node > 1 and distributed:
            model_train = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model_train)
        elif sync_bn:
            print("Sync_bn is not support in one gpu or not distributed.")

        if cuda:
            if distributed:
                #----------------------------#
                #   多卡平行运行
                #----------------------------#
                model_train = model_train.cuda(local_rank)
                model_train = torch.nn.parallel.DistributedDataParallel(model_train, device_ids=[local_rank], find_unused_parameters=True)
            else:
                model_train = torch.nn.DataParallel(model)
                cudnn.benchmark = True
                model_train = model_train.cuda()
                
        #----------------------------#
        #   权值平滑
        #----------------------------#
        ema = ModelEMA(model_train)
        
        #---------------------------#
        #   读取数据集对应的txt
        #---------------------------#
        with open(train_annotation_path, encoding='utf-8') as f:
            train_lines = f.readlines()
        with open(val_annotation_path, encoding='utf-8') as f:
            val_lines   = f.readlines()
        num_train   = len(train_lines)
        num_val     = len(val_lines)

        if local_rank == 0:
            show_config(
                classes_path = self.classes_path, anchors_path = anchors_path, anchors_mask = anchors_mask, model_path = model_path, input_shape = input_shape, \
                Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \
                Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \
                save_period = save_period, save_dir = save_dir, num_workers = num_workers, num_train = num_train, num_val = num_val
            )
            #---------------------------------------------------------#
            #   总训练世代指的是遍历全部数据的总次数
            #   总训练步长指的是梯度下降的总次数 
            #   每个训练世代包含若干训练步长，每个训练步长进行一次梯度下降。
            #   此处仅建议最低训练世代，上不封顶，计算时只考虑了解冻部分
            #----------------------------------------------------------#
            wanted_step = 5e4 if optimizer_type == "sgd" else 1.5e4
            total_step  = num_train // Unfreeze_batch_size * UnFreeze_Epoch
            if total_step <= wanted_step:
                if num_train // Unfreeze_batch_size == 0:
                    raise ValueError('数据集过小，无法进行训练，请扩充数据集。')
                wanted_epoch = wanted_step // (num_train // Unfreeze_batch_size) + 1
                print("\n\033[1;33;44m[Warning] 使用%s优化器时，建议将训练总步长设置到%d以上。\033[0m"%(optimizer_type, wanted_step))
                print("\033[1;33;44m[Warning] 本次运行的总训练数据量为%d，Unfreeze_batch_size为%d，共训练%d个Epoch，计算出总训练步长为%d。\033[0m"%(num_train, Unfreeze_batch_size, UnFreeze_Epoch, total_step))
                print("\033[1;33;44m[Warning] 由于总训练步长为%d，小于建议总步长%d，建议设置总世代为%d。\033[0m"%(total_step, wanted_step, wanted_epoch))

        #------------------------------------------------------#
        #   主干特征提取网络特征通用，冻结训练可以加快训练速度
        #   也可以在训练初期防止权值被破坏。
        #   Init_Epoch为起始世代
        #   Freeze_Epoch为冻结训练的世代
        #   UnFreeze_Epoch总训练世代
        #   提示OOM或者显存不足请调小Batch_size
        #------------------------------------------------------#
        if True:
            UnFreeze_flag = False
            #------------------------------------#
            #   冻结一定部分训练
            #------------------------------------#
            if Freeze_Train:
                for param in model.backbone.parameters():
                    param.requires_grad = False

            #-------------------------------------------------------------------#
            #   如果不冻结训练的话，直接设置batch_size为Unfreeze_batch_size
            #-------------------------------------------------------------------#
            batch_size = Freeze_batch_size if Freeze_Train else Unfreeze_batch_size

            #-------------------------------------------------------------------#
            #   判断当前batch_size，自适应调整学习率
            #-------------------------------------------------------------------#
            nbs             = 64
            lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
            lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
            Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
            Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)

            #---------------------------------------#
            #   根据optimizer_type选择优化器
            #---------------------------------------#
            pg0, pg1, pg2 = [], [], []  
            for k, v in model.named_modules():
                if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
                    pg2.append(v.bias)    
                if isinstance(v, nn.BatchNorm2d) or "bn" in k:
                    pg0.append(v.weight)    
                elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                    pg1.append(v.weight)   
            optimizer = {
                'adam'  : optim.Adam(pg0, Init_lr_fit, betas = (momentum, 0.999)),
                'sgd'   : optim.SGD(pg0, Init_lr_fit, momentum = momentum, nesterov=True)
            }[optimizer_type]
            optimizer.add_param_group({"params": pg1, "weight_decay": weight_decay})
            optimizer.add_param_group({"params": pg2})

            #---------------------------------------#
            #   获得学习率下降的公式
            #---------------------------------------#
            lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)
            
            #---------------------------------------#
            #   判断每一个世代的长度
            #---------------------------------------#
            epoch_step      = num_train // batch_size
            epoch_step_val  = num_val // batch_size
            
            if epoch_step == 0 or epoch_step_val == 0:
                raise ValueError("数据集过小，无法继续进行训练，请扩充数据集。")

            if ema:
                ema.updates     = epoch_step * Init_Epoch
            
            #---------------------------------------#
            #   构建数据集加载器。
            #---------------------------------------#
            train_dataset   = YoloDataset(train_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length=UnFreeze_Epoch, \
                                            mosaic=mosaic, mixup=mixup, mosaic_prob=mosaic_prob, mixup_prob=mixup_prob, train=True, special_aug_ratio=special_aug_ratio)
            val_dataset     = YoloDataset(val_lines, input_shape, num_classes, anchors, anchors_mask, epoch_length=UnFreeze_Epoch, \
                                            mosaic=False, mixup=False, mosaic_prob=0, mixup_prob=0, train=False, special_aug_ratio=0)
            
            if distributed:
                train_sampler   = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True,)
                val_sampler     = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False,)
                batch_size      = batch_size // ngpus_per_node
                shuffle         = False
            else:
                train_sampler   = None
                val_sampler     = None
                shuffle         = True

            gen             = DataLoader(train_dataset, shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
                                        drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler)
            gen_val         = DataLoader(val_dataset  , shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
                                        drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler)

            #----------------------#
            #   记录eval的map曲线
            #----------------------#
            if local_rank == 0:
                eval_callback   = EvalCallback(model, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, cuda, \
                                                eval_flag=eval_flag, period=eval_period)
            else:
                eval_callback   = None
            
            #---------------------------------------#
            #   开始模型训练
            #---------------------------------------#
            for epoch in range(Init_Epoch, UnFreeze_Epoch):
                #---------------------------------------#
                #   如果模型有冻结学习部分
                #   则解冻，并设置参数
                #---------------------------------------#
                if epoch >= Freeze_Epoch and not UnFreeze_flag and Freeze_Train:
                    batch_size = Unfreeze_batch_size

                    #-------------------------------------------------------------------#
                    #   判断当前batch_size，自适应调整学习率
                    #-------------------------------------------------------------------#
                    nbs             = 64
                    lr_limit_max    = 1e-3 if optimizer_type == 'adam' else 5e-2
                    lr_limit_min    = 3e-4 if optimizer_type == 'adam' else 5e-4
                    Init_lr_fit     = min(max(batch_size / nbs * Init_lr, lr_limit_min), lr_limit_max)
                    Min_lr_fit      = min(max(batch_size / nbs * Min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
                    #---------------------------------------#
                    #   获得学习率下降的公式
                    #---------------------------------------#
                    lr_scheduler_func = get_lr_scheduler(lr_decay_type, Init_lr_fit, Min_lr_fit, UnFreeze_Epoch)

                    for param in model.backbone.parameters():
                        param.requires_grad = True

                    epoch_step      = num_train // batch_size
                    epoch_step_val  = num_val // batch_size

                    if epoch_step == 0 or epoch_step_val == 0:
                        raise ValueError("数据集过小，无法继续进行训练，请扩充数据集。")
                        
                    if ema:
                        ema.updates     = epoch_step * epoch

                    if distributed:
                        batch_size  = batch_size // ngpus_per_node
                        
                    gen             = DataLoader(train_dataset, shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
                                                drop_last=True, collate_fn=yolo_dataset_collate, sampler=train_sampler)
                    gen_val         = DataLoader(val_dataset  , shuffle = shuffle, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
                                                drop_last=True, collate_fn=yolo_dataset_collate, sampler=val_sampler)

                    UnFreeze_flag   = True

                gen.dataset.epoch_now       = epoch
                gen_val.dataset.epoch_now   = epoch

                if distributed:
                    train_sampler.set_epoch(epoch)

                set_optimizer_lr(optimizer, lr_scheduler_func, epoch)

                fit_one_epoch(model_train, model, ema, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, UnFreeze_Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank)
                
                if distributed:
                    dist.barrier()


    def predict(self, cuda: bool = True, distributed: bool = False, sync_bn: bool = False,
              fp16: bool = False, anchors_path: str = 'yolovx/yolov7pytorch/model_data/yolo_anchors.txt', pretrained: bool = False,
              model_path: str = "", phi: str = "l", input_shape: list = [640, 640],
              save_dir: str = 'logs', save_period: int = 10, eval_flag: bool= True,
              eval_period: int = 10, num_workers: int = 4) -> None:
        pass
        


def main():
    yolo = Yolov7(classes_path="yolovx/yolov7pytorch/model_data/voc_classes.txt")
    yolo.voc_annotation()
    yolo.train()


if __name__ == "__main__":
    main()