环境依赖:

pytorch   0.4以上

tensorboardX:   pip install tensorboardX、pip install tensorflow

 

在项目代码中加入tensorboardX的记录代码,生成文件并返回到浏览器中显示可视化结果。

官方示例:

pytorch中tensorboardX进行可视化

 

默认设置是在根目录下生成一个runs文件夹,里面存储summary的信息。

在runs的同级目录下命令行中输入:

tensorboard --logdir runs            (不是输tensorboardX)

会出来一个网站,复制到浏览器即可可视化loss,acc,lr等数据的变化过程.

 

举例说明pytorch中设置summary的方式:实例化summary对象,然后在对象上add_scalar想要监督的指标即可。

包含调用logging包对log文件保存方法,详见链接(https://www.cnblogs.com/ywheunji/p/14125085.html)

  1 class Trainer(object):
  2     def __init__(self, weight_path, resume, gpu_id, accumulate, fp_16):
  3         init_seeds(0)
  4         self.train_dataloader = DataLoader(
  5             self.train_dataset,
  6             batch_size=cfg.TRAIN["BATCH_SIZE"],
  7             num_workers=cfg.TRAIN["NUMBER_WORKERS"],
  8             shuffle=True,
  9             pin_memory=True,
 10         )
 11 
 12         self.yolov4 = Build_Model(weight_path=weight_path, resume=resume).to(
 13             self.device
 14         )
 15 
 16     def train(self):
 17         global writer
 18         logger.info(
 19             "Training start,img size is: {:d},batchsize is: {:d},work number is {:d}".format(
 20                 cfg.TRAIN["TRAIN_IMG_SIZE"],
 21                 cfg.TRAIN["BATCH_SIZE"],
 22                 cfg.TRAIN["NUMBER_WORKERS"],
 23             )
 24         )
 25         logger.info(self.yolov4)
 26         logger.info(
 27             "Train datasets number is : {}".format(len(self.train_dataset))
 28         )
 29 
 30         if self.fp_16:
 31             self.yolov4, self.optimizer = amp.initialize(
 32                 self.yolov4, self.optimizer, opt_level="O1", verbosity=0
 33             )
 34         logger.info("        =======  start  training   ======     ")
 35         for epoch in range(self.start_epoch, self.epochs):
 36             start = time.time()
 37             self.yolov4.train()
 38 
 39             mloss = torch.zeros(4)
 40             logger.info("===Epoch:[{}/{}]===".format(epoch, self.epochs))
 41             for i, (imgs, label_sbbox,
 42             ) in enumerate(self.train_dataloader):
 43 
 44                 loss, loss_ciou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox)
 45 
 46                 loss.backward()
 47                 # Print batch results
 48                 if i % 10 == 0:
 49                     logger.info(
 50                         "  === Epoch:[{:3}/{}],step:[{:3}/{}],img_size:[{:3}],total_loss:{:.4f}|loss_ciou:{:.4f}|loss_conf:{:.4f}|loss_cls:{:.4f}|lr:{:.4f}".format(
 51                             epoch,
 52                             self.epochs,
 53                             i,
 54                             len(self.train_dataloader) - 1,
 55                             self.train_dataset.img_size,
 56                             mloss[3],
 57                             mloss[0],
 58                             mloss[1],
 59                             mloss[2],
 60                             self.optimizer.param_groups[0]["lr"],
 61                         )
 62                     )
 63                     writer.add_scalar(
 64                         "loss_ciou",
 65                         mloss[0],
 66                         len(self.train_dataloader)
 67                         * epoch
 68                         + i,
 69                     )
 70                     writer.add_scalar(
 71                         "train_loss",
 72                         mloss[3],
 73                         len(self.train_dataloader)
 74                         * epoch
 75                         + i,
 76                     )
 77 
 78 
 79             # eval
 80             logger.info(
 81                 "===== Validate =====".format(epoch, self.epochs)
 82             )
 83             logger.info("val img size is {}".format(cfg.VAL["TEST_IMG_SIZE"]))
 84             with torch.no_grad():
 85                 APs, inference_time = Evaluator(
 86                     self.yolov4, showatt=False
 87                 ).APs_voc()
 88                 for i in APs:
 89                     logger.info("{} --> mAP : {}".format(i, APs[i]))
 90                     mAP += APs[i]
 91                 mAP = mAP / self.train_dataset.num_classes
 92                 logger.info("mAP : {}".format(mAP))
 93                 logger.info(
 94                     "inference time: {:.2f} ms".format(inference_time)
 95                 )
 96                 writer.add_scalar("mAP", mAP, epoch)
 97                 self.__save_model_weights(epoch, mAP)
 98                 logger.info("save weights done")
 99             logger.info("  ===test mAP:{:.3f}".format(mAP))
100             
101 if __name__ == "__main__":
102     global logger, writer
103     writer = SummaryWriter(logdir=opt.log_path + "/event")
104     logger = Logger(
105         log_file_name=opt.log_path + "/log.txt",
106         log_level=logging.DEBUG,
107         logger_name="YOLOv4",
108     ).get_log()
109 
110     Trainer(
111         weight_path=opt.weight_path,
112         resume=opt.resume,
113         gpu_id=opt.gpu_id,
114         accumulate=opt.accumulate,
115         fp_16=opt.fp_16,
116     ).train()