jsaunders909.github.io/_bibliography/papers.bib at master · jsaunders909/jsaunders909.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
@inproceedings{Saunders2023READ,
  author    = {Saunders, Jack and Namboodiri, Vinay P.},
  title     = {READ Avatars: Realistic Emotion-controllable Audio Driven Avatars},
  booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
  year      = {2023},
  abbr      = {BMVC},
  arxiv     = {2303.00744},
  pdf       = {https://arxiv.org/pdf/2303.00744.pdf},
  video     = {https://www.youtube.com/watch?v=QSyMl3vV0pA},
  website   = {https://readavatars.github.io/},
  preview   = {READ_Avatars_RepresentativeImage.jpg},
  selected  = {true},
  abstract  = {We present READ Avatars, a 3D-based approach for generating 2D avatars that are driven by audio input with direct and granular control over the emotion. Previous methods are unable to achieve realistic animation due to the many-to-many nature of audio to expression mappings. We alleviate this issue by introducing an adversarial loss in the audio-to-expression generation process. This removes the smoothing effect of regression-based models and helps to improve the realism and expressiveness of the generated avatars.},
}

@inproceedings{Saunders2023FACTS,
  author    = {Saunders, Jack and Caulkin, Steven and Namboodiri, Vinay P.},
  title     = {FACTS: Facial Animation Creation using the Transfer of Styles},
  booktitle = {Eurographics 2024 (Short Papers)},
  year      = {2024},
  abbr      = {EG},
  arxiv     = {2307.09480},
  pdf       = {https://arxiv.org/pdf/2307.09480},
  preview   = {FACTS.png},
  abstract  = {We present FACTS, a method for modifying existing 3D facial animations by transferring style characteristics across identities and emotions. FACTS employs StarGAN to enable the conversion of 3D facial animations into different emotions and person-specific idiosyncratic styles, while preserving lip-sync through a novel viseme-preserving loss.},
}

@inproceedings{Saunders2024TalkLoRA,
  author    = {Saunders, Jack and Namboodiri, Vinay P.},
  title     = {TalkLoRA: Low-Rank Adaptation for Speech-Driven Animation},
  booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
  year      = {2024},
  abbr      = {BMVC},
  arxiv     = {2408.13714},
  pdf       = {https://arxiv.org/pdf/2408.13714},
  preview   = {talklora_overview.png},
  abstract  = {Transformer-based speech-driven facial animation models are difficult to adapt to new personalised speaking styles and suffer from computational inefficiency with long sentences. TalkLoRA applies Low-Rank Adaptation to learn subject-specific adaptors with minimal data, and introduces a chunking strategy that reduces transformer complexity by an order of magnitude without quality loss.},
}

@inproceedings{Saunders2025DEAD,
  author    = {Saunders, Jack and Namboodiri, Vinay P.},
  title     = {DEAD: Data-Efficient Audiovisual Dubbing using Neural Rendering Priors},
  booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
  year      = {2025},
  abbr      = {BMVC},
  arxiv     = {2401.06126},
  pdf       = {https://arxiv.org/pdf/2401.06126},
  video     = {https://www.youtube.com/watch?v=mnlWVLLoeiY},
  preview   = {dead_teaser.jpg},
  abstract  = {Visual dubbing generates lip motions of an actor to synchronise with given audio, enabling video media to reach global audiences. We train a large multi-person prior network that can be rapidly adapted to new users with just a few seconds of data, enabling high-quality dubbing for any actor. We achieve state-of-the-art visual quality and recognisability, outperforming baselines under limited data conditions.},
}

@inproceedings{saunders2025gasp,
  title={{GASP}: Gaussian Avatars with Synthetic Priors},
  author={Saunders, Jack and Hewitt, Charlie and Jian, Yanan and Kowalski, Marek and Baltru\v{s}aitis, Tadas and Chen, Yiye and Cosker, Darren and Estellers, Virginia and Gyd{\'e}, Nicholas and Namboodiri, Vinay P and others},
  booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)},
  pages={271--280},
  month={June},
  year={2025},
  abbr      = {CVPR},
  arxiv     = {2412.07739},
  pdf       = {https://arxiv.org/pdf/2412.07739},
  video     = {https://youtu.be/3oWB7-UJUYE},
  website   = {https://microsoft.github.io/GASP/},
  html      = {https://openaccess.thecvf.com/content/CVPR2025/html/Saunders_GASP_Gaussian_Avatars_with_Synthetic_Priors_CVPR_2025_paper.html},
  preview   = {gasp_teaser.jpg},
  selected  = {true},
  abstract  = {We propose GASP: Gaussian Avatars with Synthetic Priors. By exploiting pixel-perfect synthetic data to train a Gaussian Avatar prior, we obtain high-quality, 360-degree renderable avatars from a single photo or short monocular video. The prior is only needed for fitting, not inference, enabling real-time rendering at 70fps on commercial hardware.},
}