@article{shen_wu_2023, title={LEARNING SPATIALLY-ADAPTIVE SQUEEZE-EXCITATION NETWORKS FOR FEW SHOT IMAGE SYNTHESIS}, DOI={10.1109/ICIP49359.2023.10222248}, abstractNote={Learning light-weight yet expressive deep networks for image synthesis is a challenging problem. Inspired by a recent observation that it is the data-specificity that makes the multi-head self-attention (MHSA) in the Transformer model so powerful, this paper proposes to extend the widely adopted light-weight Squeeze-Excitation (SE) module to be spatially-adaptive to reinforce its data specificity, as a convolutional alternative of the MHSA, while retaining the efficiency of SE and the inductive bias of convolution. It proposes a spatially-adaptive squeeze-excitation (SASE) module for image synthesis task.SASE is tested in low-shot image generative learning task, and shows better performance than prior arts.}, journal={2023 IEEE INTERNATIONAL CONFERENCE ON IMAGE PROCESSING, ICIP}, author={Shen, Jianghao and Wu, Tianfu}, year={2023}, pages={2855–2859} } @article{shen_wu_2023, title={LEARNING SPATIALLY-ADAPTIVE STYLE-MODULATION NETWORKS FOR SINGLE IMAGE SYNTHESIS}, DOI={10.1109/ICIP49359.2023.10222483}, abstractNote={Recently there has been a growing interest in learning generative models from a single image. This task is important as in many real world applications, collecting large dataset is not feasible. Existing work like SinGAN is able to synthesize novel images that resemble the patch distribution of the training image. However, SinGAN cannot learn high level semantics of the image, and thus their synthesized samples tend to have unrealistic spatial layouts. To address this issue, this paper proposes a spatially adaptive style-modulation (SASM) module that learns to preserve realistic spatial configuration of images. Specifically, it extracts style vector (in the form of channel-wise attention) and latent spatial mask (in the form of spatial attention) from a coarse level feature separately. The style vector and spatial mask are then aggregated to modulate features of deeper layers. The disentangled modulation of spatial and style attributes enables the model to preserve the spatial structure of the image without overfitting. Experimental results show that the proposed module learns to generate samples with better fidelity than prior works.}, journal={2023 IEEE INTERNATIONAL CONFERENCE ON IMAGE PROCESSING, ICIP}, author={Shen, Jianghao and Wu, Tianfu}, year={2023}, pages={1455–1459} }