From 5da2bc7cf1878d6c286aa04eee9ad7616a0c9604 Mon Sep 17 00:00:00 2001
From: Aries-chen <1416535234@qq.com>
Date: Fri, 11 Feb 2022 11:05:32 +0800
Subject: [PATCH 1/6] simplify the description of SE

---
 docs/source/flowvision.layers.rst |  3 ++-
 flowvision/layers/attention/se.py | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/docs/source/flowvision.layers.rst b/docs/source/flowvision.layers.rst
index ae121569..836b23d2 100644
--- a/docs/source/flowvision.layers.rst
+++ b/docs/source/flowvision.layers.rst
@@ -10,5 +10,6 @@ Plug and Play Modules or Functions that are specific for Computer Vision Tasks
         batched_nms,
         box_iou,
         FeaturePyramidNetwork,
-        MultiScaleRoIAlign
+        MultiScaleRoIAlign,
+        SEModule
   
\ No newline at end of file
diff --git a/flowvision/layers/attention/se.py b/flowvision/layers/attention/se.py
index 925f4af9..70f323f2 100644
--- a/flowvision/layers/attention/se.py
+++ b/flowvision/layers/attention/se.py
@@ -3,6 +3,19 @@
 
 
 class SEModule(nn.Module):
+    """
+    "Squeeze-and-Excitation" block adaptively recalibrates channel-wise feature responses. This is based on
+    `"Squeeze-and-Excitation Networks" <https://arxiv.org/abs/1709.01507>`_. This unit is designed to improve the representational capacity of a network by enabling it to perform dynamic channel-wise feature recalibration.
+
+    Args:
+        channels (int): The input channel size
+        reduction (int): Ratio that allows us tovary the capacity and computational cost of the SE model. Default: 16
+        rd_channels (int or None): Number of reduced channels. If none, uses reduction to calculate
+        act_layer (flow.nn.Module): An activation layer used between two FC layers. Default: flow.nn.ReLU
+        gate_layer (flow.nn.Module): An activation layer used after two FC layers. Default: flow.nn.Sigmoid
+        mlp_bias (bool): If True, add learnable bias to the linear layers. Default: False
+    """
+
     def __init__(
         self,
         channels,

From abac487c1572262850b5d5dd89d7321058929be6 Mon Sep 17 00:00:00 2001
From: Aries-chen <1416535234@qq.com>
Date: Fri, 11 Feb 2022 11:32:21 +0800
Subject: [PATCH 2/6] improve the decription again

---
 flowvision/layers/attention/se.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowvision/layers/attention/se.py b/flowvision/layers/attention/se.py
index 70f323f2..8b7d0ada 100644
--- a/flowvision/layers/attention/se.py
+++ b/flowvision/layers/attention/se.py
@@ -9,7 +9,7 @@ class SEModule(nn.Module):
 
     Args:
         channels (int): The input channel size
-        reduction (int): Ratio that allows us tovary the capacity and computational cost of the SE model. Default: 16
+        reduction (int): Ratio that allows us to vary the capacity and computational cost of the SE SEModule. Default: 16
         rd_channels (int or None): Number of reduced channels. If none, uses reduction to calculate
         act_layer (flow.nn.Module): An activation layer used between two FC layers. Default: flow.nn.ReLU
         gate_layer (flow.nn.Module): An activation layer used after two FC layers. Default: flow.nn.Sigmoid

From 4a9648491adee1cca4a2189f40585c9b0b9be04f Mon Sep 17 00:00:00 2001
From: Aries-chen <1416535234@qq.com>
Date: Fri, 11 Feb 2022 11:35:37 +0800
Subject: [PATCH 3/6] improve the decription again again

---
 flowvision/layers/attention/se.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowvision/layers/attention/se.py b/flowvision/layers/attention/se.py
index 8b7d0ada..58713753 100644
--- a/flowvision/layers/attention/se.py
+++ b/flowvision/layers/attention/se.py
@@ -9,7 +9,7 @@ class SEModule(nn.Module):
 
     Args:
         channels (int): The input channel size
-        reduction (int): Ratio that allows us to vary the capacity and computational cost of the SE SEModule. Default: 16
+        reduction (int): Ratio that allows us to vary the capacity and computational cost of the SE Module. Default: 16
         rd_channels (int or None): Number of reduced channels. If none, uses reduction to calculate
         act_layer (flow.nn.Module): An activation layer used between two FC layers. Default: flow.nn.ReLU
         gate_layer (flow.nn.Module): An activation layer used after two FC layers. Default: flow.nn.Sigmoid

From fa8da5bef5113c085329d8a5060aed2ab823b3eb Mon Sep 17 00:00:00 2001
From: Aries-chen <1416535234@qq.com>
Date: Fri, 11 Feb 2022 11:58:23 +0800
Subject: [PATCH 4/6] improve the layers

---
 flowvision/layers/attention/se.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flowvision/layers/attention/se.py b/flowvision/layers/attention/se.py
index 58713753..1f0c00cd 100644
--- a/flowvision/layers/attention/se.py
+++ b/flowvision/layers/attention/se.py
@@ -11,8 +11,8 @@ class SEModule(nn.Module):
         channels (int): The input channel size
         reduction (int): Ratio that allows us to vary the capacity and computational cost of the SE Module. Default: 16
         rd_channels (int or None): Number of reduced channels. If none, uses reduction to calculate
-        act_layer (flow.nn.Module): An activation layer used between two FC layers. Default: flow.nn.ReLU
-        gate_layer (flow.nn.Module): An activation layer used after two FC layers. Default: flow.nn.Sigmoid
+        act_layer (flow.nn.Module): An activation layer used after the first FC layer. Default: flow.nn.ReLU
+        gate_layer (flow.nn.Module): An activation layer used after the second FC layer. Default: flow.nn.Sigmoid
         mlp_bias (bool): If True, add learnable bias to the linear layers. Default: False
     """
 

From 1eb58de5fda0be57be421c6447c74faa20557a1f Mon Sep 17 00:00:00 2001
From: Aries-chen <1416535234@qq.com>
Date: Wed, 16 Feb 2022 12:40:42 +0800
Subject: [PATCH 5/6] mlp_bias Default = True

---
 flowvision/layers/attention/se.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/flowvision/layers/attention/se.py b/flowvision/layers/attention/se.py
index 1f0c00cd..df51da5d 100644
--- a/flowvision/layers/attention/se.py
+++ b/flowvision/layers/attention/se.py
@@ -1,6 +1,8 @@
+from typing import Optional
+
 import oneflow as flow
 import oneflow.nn as nn
-
+from oneflow.nn import ReLU, Sigmoid
 
 class SEModule(nn.Module):
     """
@@ -11,19 +13,19 @@ class SEModule(nn.Module):
         channels (int): The input channel size
         reduction (int): Ratio that allows us to vary the capacity and computational cost of the SE Module. Default: 16
         rd_channels (int or None): Number of reduced channels. If none, uses reduction to calculate
-        act_layer (flow.nn.Module): An activation layer used after the first FC layer. Default: flow.nn.ReLU
-        gate_layer (flow.nn.Module): An activation layer used after the second FC layer. Default: flow.nn.Sigmoid
-        mlp_bias (bool): If True, add learnable bias to the linear layers. Default: False
+        act_layer (Optional[ReLU]): An activation layer used after the first FC layer. Default: flow.nn.ReLU
+        gate_layer (Optional[Sigmoid]): An activation layer used after the second FC layer. Default: flow.nn.Sigmoid
+        mlp_bias (bool): If True, add learnable bias to the linear layers. Default: True
     """
 
     def __init__(
         self,
-        channels,
-        reduction=16,
-        rd_channels=None,
-        act_layer=nn.ReLU,
-        gate_layer=nn.Sigmoid,
-        mlp_bias=False,
+        channels: int,
+        reduction: int = 16,
+        rd_channels: int = None,
+        act_layer: Optional[ReLU] = nn.ReLU,
+        gate_layer: Optional[Sigmoid] = nn.Sigmoid,
+        mlp_bias=True,
     ):
         super(SEModule, self).__init__()
         rd_channels = channels // reduction if rd_channels is None else rd_channels

From 4d2229b3d44985c7574b82b8a31eae3c9e33d699 Mon Sep 17 00:00:00 2001
From: tripleMu <865626@163.com>
Date: Fri, 20 May 2022 10:03:19 +0800
Subject: [PATCH 6/6] Fix R50 config

---
 projects/classification/config.py             | 30 +++++++---
 .../configs/resnet50_default_settings.yaml    | 58 +++++++++++++++++++
 2 files changed, 79 insertions(+), 9 deletions(-)
 create mode 100644 projects/classification/configs/resnet50_default_settings.yaml

diff --git a/projects/classification/config.py b/projects/classification/config.py
index 3549c476..6afb0c8f 100644
--- a/projects/classification/config.py
+++ b/projects/classification/config.py
@@ -20,7 +20,7 @@
 # Path to dataset, could be overwritten by command line argument
 _C.DATA.DATA_PATH = ""
 # Dataset name
-_C.DATA.DATASET = "cifar100"
+_C.DATA.DATASET = "imagenet"
 # Input image size
 _C.DATA.IMG_SIZE = 224
 # Interpolation to resize image (random, bilinear, bicubic)
@@ -40,7 +40,7 @@
 # -----------------------------------------------------------------------------
 _C.MODEL = CN()
 # Model arch
-_C.MODEL.ARCH = "swin_tiny_patch4_window7_224"
+_C.MODEL.ARCH = "resnet50"
 # Pretrained weight from checkpoint
 _C.MODEL.PRETRAINED = False
 # Path to a specific weights to load, e.g., "./checkpoints/swin_tiny_pretrained_model"
@@ -90,13 +90,15 @@
 
 # Optimizer
 _C.TRAIN.OPTIMIZER = CN()
-_C.TRAIN.OPTIMIZER.NAME = "adamw"
-# Optimizer Epsilon
-_C.TRAIN.OPTIMIZER.EPS = 1e-8
-# Optimizer Betas
-_C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999)
+_C.TRAIN.OPTIMIZER.NAME = "sgd"
+# # Optimizer Epsilon
+# _C.TRAIN.OPTIMIZER.EPS = 1e-8
+# # Optimizer Betas
+# _C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999)
 # SGD momentum
 _C.TRAIN.OPTIMIZER.MOMENTUM = 0.9
+# # NESTEROV
+_C.TRAIN.OPTIMIZER.NESTEROV = True
 
 # -----------------------------------------------------------------------------
 # Augmentation settings
@@ -110,12 +112,22 @@
 _C.AUG.REPROB = 0.25
 # Random erase mode
 _C.AUG.REMODE = "pixel"
+# Scale
+_C.AUG.SCALE = [0.08, 1.0]
+# Ratio
+_C.RATIO = [0.75, 1.0+1/3]
+# Hflip
+_C.HFLIP = 0.5
+# Vflip
+_C.VFLIP = 0.0
+# Interpolation
+_C.INTERPLOATION = 'random'
 # Random erase count
 _C.AUG.RECOUNT = 1
 # Mixup alpha, mixup enabled if > 0
-_C.AUG.MIXUP = 0.8
+_C.AUG.MIXUP = 0.0
 # Cutmix alpha, cutmix enabled if > 0
-_C.AUG.CUTMIX = 1.0
+_C.AUG.CUTMIX = 0.0
 # Cutmix min/max ratio, overrides alpha and enables cutmix if set
 _C.AUG.CUTMIX_MINMAX = None
 # Probability of performing mixup or cutmix when either/both is enabled
diff --git a/projects/classification/configs/resnet50_default_settings.yaml b/projects/classification/configs/resnet50_default_settings.yaml
new file mode 100644
index 00000000..27161aec
--- /dev/null
+++ b/projects/classification/configs/resnet50_default_settings.yaml
@@ -0,0 +1,58 @@
+DATA:
+  BATCH_SIZE: 256
+  DATASET: imagenet
+  DATA_PATH: /home/ubuntu/work/oneflow/datasets
+  IMG_SIZE: 224
+  INTERPOLATION: bicubic
+  ZIP_MODE: False
+  CACHE_MODE: "part"
+  PIN_MEMORY: True
+  NUM_WORKERS: 8
+
+MODEL:
+  PRETRAINED: False
+  RESUME: ""
+  LABEL_SMOOTHING: 0.1
+
+TRAIN:
+  START_EPOCH: 0
+  EPOCHS: 300
+  WARMUP_EPOCHS: 3
+  WARMUP_LR: 0.0001
+  MIN_LR: 1.0e-06
+  WEIGHT_DECAY: 2.0e-05
+  BASE_LR: 0.01
+  CLIP_GRAD: None
+  AUTO_RESUME: True
+  ACCUMULATION_STEPS: 0
+  
+  LR_SCHEDULER:
+    NAME: cosine
+    MILESTONES: None
+
+  OPTIMIZER:
+    NAME: sgd
+    MOMENTUM: 0.9
+    NESTEROV: True
+
+  
+AUG:
+  COLOR_JITTER: 0.4
+  AUTO_AUGMENT: rand-m9-mstd0.5-inc1
+  REPROB: 0.6
+  REMODE: pixel
+  RECOUNT: 1
+  MIXUP: 0.
+  CUTMIX: 0.
+  CUTMIX_MINMAX: None
+
+TEST:
+  CROP: True
+  SEQUENTIAL: False
+
+TAG: default
+SAVE_FREQ: 1
+PRINT_FREQ: 10
+SEED: 42
+EVAL_MODE: False
+THROUGHPUT_MODE: False