Skip to content

Commit 3c838a9

Browse files
committed
publication images
1 parent f99d9d0 commit 3c838a9

21 files changed

+75
-21
lines changed

_bibliography/references.bib

+12-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ @inproceedings{
99
year={2025},
1010
url={https://openreview.net/forum?id=U47ymTS3ut},
1111
pdf={https://openreview.net/pdf?id=U47ymTS3ut},
12+
img={mask_in_the_mirror.png},
1213
abstract={Continuous sparsification strategies are among the most effective methods for reducing the inference costs and memory demands of large-scale neural networks. A key factor in their success is the implicit L1 regularization induced by jointly learning both mask and weight variables, which has been shown experimentally to outperform explicit L1 regularization. We provide a theoretical explanation for this observation by analyzing the learning dynamics, revealing that early continuous sparsification is governed by an implicit L2 regularization that gradually transitions to an L1 penalty over time. Leveraging this insight, we propose a method to dynamically control the strength of this implicit bias. Through an extension of the mirror flow framework, we establish convergence and optimality guarantees in the context of underdetermined linear regression. Our theoretical findings may be of independent interest, as we demonstrate how to enter the rich regime and show that the implicit bias can be controlled via a time-dependent Bregman potential. To validate these insights, we introduce PILoT, a continuous sparsification approach with novel initialization and dynamic regularization, which consistently outperforms baselines in standard experiments.},
1314
}
1415

@@ -22,6 +23,7 @@ @inproceedings{
2223
pdf={https://openreview.net/pdf?id=g6v09VxgFw},
2324
img={gnns-getting-comfy.png},
2425
abstract={Maximizing the spectral gap through graph rewiring has been proposed to enhance the performance of message-passing graph neural networks (GNNs) by addressing over-squashing. However, as we show, minimizing the spectral gap can also improve generalization. To explain this, we analyze how rewiring can benefit GNNs within the context of stochastic block models. Since spectral gap optimization primarily influences community strength, it improves performance when the community structure aligns with node labels. Building on this insight, we propose three distinct rewiring strategies that explicitly target community structure, node labels, and their alignment: (a) community structure-based rewiring (ComMa), a more computationally efficient alternative to spectral gap optimization that achieves similar goals; (b) feature similarity-based rewiring (FeaSt), which focuses on maximizing global homophily; and (c) a hybrid approach (ComFy), which enhances local feature similarity while preserving community structure to optimize label-community alignment. Extensive experiments confirm the effectiveness of these strategies and support our theoretical insights.},
26+
code={https://github.com/RelationalML/ComFy}
2527
}
2628

2729
@inproceedings{
@@ -58,7 +60,8 @@ @inproceedings{
5860
year={2024},
5961
url={https://openreview.net/forum?id=FNtsZLwkGr},
6062
pdf={https://openreview.net/pdf?id=FNtsZLwkGr},
61-
abstract={The practical utility of machine learning models in the sciences often hinges on their interpretability. It is common to assess a model's merit for scientific discovery, and thus novel insights, by how well it aligns with already available domain knowledge - a dimension that is currently largely disregarded in the comparison of neural network models. While pruning can simplify deep neural network architectures and excels in identifying sparse models, as we show in the context of gene regulatory network inference, state-of-the-art techniques struggle with biologically meaningful structure learning. To address this issue, we propose DASH, a generalizable framework that guides network pruning by using domain-specific structural information in model fitting and leads to sparser, better interpretable models that are more robust to noise. Using both synthetic data with ground truth information, as well as real-world gene expression data, we show that DASH, using knowledge about gene interaction partners within the putative regulatory network, outperforms general pruning methods by a large margin and yields deeper insights into the biological systems being studied.}
63+
abstract={The practical utility of machine learning models in the sciences often hinges on their interpretability. It is common to assess a model's merit for scientific discovery, and thus novel insights, by how well it aligns with already available domain knowledge - a dimension that is currently largely disregarded in the comparison of neural network models. While pruning can simplify deep neural network architectures and excels in identifying sparse models, as we show in the context of gene regulatory network inference, state-of-the-art techniques struggle with biologically meaningful structure learning. To address this issue, we propose DASH, a generalizable framework that guides network pruning by using domain-specific structural information in model fitting and leads to sparser, better interpretable models that are more robust to noise. Using both synthetic data with ground truth information, as well as real-world gene expression data, we show that DASH, using knowledge about gene interaction partners within the putative regulatory network, outperforms general pruning methods by a large margin and yields deeper insights into the biological systems being studied.},
64+
img={GRN_pruning.png}
6265
}
6366

6467
@article{hossain2024biologically,
@@ -69,7 +72,8 @@ @article{hossain2024biologically
6972
month={May},
7073
volume={25},
7174
url={https://doi.org/10.1186/s13059-024-03264-0},
72-
abstract={Gene regulatory network (GRN) models that are formulated as ordinary differential equations (ODEs) can accurately explain temporal gene expression patterns and promise to yield new insights into important cellular processes, disease progression, and intervention design. Learning such gene regulatory ODEs is challenging, since we want to predict the evolution of gene expression in a way that accurately encodes the underlying GRN governing the dynamics and the nonlinear functional relationships between genes. Most widely used ODE estimation methods either impose too many parametric restrictions or are not guided by meaningful biological insights, both of which impede either scalability, explainability, or both.}
75+
abstract={Gene regulatory network (GRN) models that are formulated as ordinary differential equations (ODEs) can accurately explain temporal gene expression patterns and promise to yield new insights into important cellular processes, disease progression, and intervention design. Learning such gene regulatory ODEs is challenging, since we want to predict the evolution of gene expression in a way that accurately encodes the underlying GRN governing the dynamics and the nonlinear functional relationships between genes. Most widely used ODE estimation methods either impose too many parametric restrictions or are not guided by meaningful biological insights, both of which impede either scalability, explainability, or both.},
76+
img={GNR_Phoenix.png}
7377
}
7478

7579
@inproceedings{
@@ -107,6 +111,7 @@ @inproceedings{
107111
url={https://openreview.net/forum?id=wOSYMHfENq},
108112
pdf={https://openreview.net/pdf?id=wOSYMHfENq},
109113
abstract={Layer normalization, for which Batch Normalization (BN) is a popular choice, is an integral part of many deep learning architectures and contributes significantly to the learning success. We provide a partial explanation for this phenomenon by proving that training normalization layers alone is already sufficient for universal function approximation if the number of available, potentially random features matches or exceeds the weight parameters of the target networks that can be expressed. Our bound on the number of required features does not only improve on a recent result for fully-connected feed-forward architectures but also applies to CNNs with and without residual connections and almost arbitrary activation functions (which include ReLUs). Our explicit construction of a given target network solves a depth-width trade-off that is driven by architectural constraints and can explain why switching off entire neurons can have representational benefits, as has been observed empirically. To validate our theory, we explicitly match target networks that outperform experimentally obtained networks with trained BN parameters by utilizing a sufficient number of random features.},
114+
img={two_layer_construct.png}
110115
}
111116

112117
@inproceedings{
@@ -136,6 +141,7 @@ @InProceedings{pmlr-v202-gadhikar23a
136141
url = {https://proceedings.mlr.press/v202/gadhikar23a.html},
137142
abstract = {Random masks define surprisingly effective sparse neural network models, as has been shown empirically. The resulting sparse networks can often compete with dense architectures and state-of-the-art lottery ticket pruning algorithms, even though they do not rely on computationally expensive prune-train iterations and can be drawn initially without significant computational overhead. We offer a theoretical explanation of how random masks can approximate arbitrary target networks if they are wider by a logarithmic factor in the inverse sparsity $1 / \log(1/\text{sparsity})$. This overparameterization factor is necessary at least for 3-layer random networks, which elucidates the observed degrading performance of random networks at higher sparsity. At moderate to high sparsity levels, however, our results imply that sparser networks are contained within random source networks so that any dense-to-sparse training scheme can be turned into a computationally more efficient sparse-to-sparse one by constraining the search to a fixed random mask. We demonstrate the feasibility of this approach in experiments for different pruning methods and propose particularly effective choices of initial layer-wise sparsity ratios of the random source network. As a special case, we show theoretically and experimentally that random source networks also contain strong lottery tickets.},
138143
code = {https://github.com/RelationalML/sparse_to_sparse},
144+
img={why-random-pruning.png},
139145
}
140146
@inproceedings{NEURIPS2022_76bf7786,
141147
author = {Burkholz, Rebekka},
@@ -149,7 +155,8 @@ @inproceedings{NEURIPS2022_76bf7786
149155
pdf = {https://proceedings.neurips.cc/paper_files/paper/2022/file/76bf7786d311217077bc8bb021946cd9-Paper-Conference.pdf},
150156
volume = {35},
151157
code = {https://github.com/RelationalML/LT-existence},
152-
year = {2022}
158+
year = {2022},
159+
img={subsetsumVis.png},
153160
}
154161

155162
@InProceedings{pmlr-v162-burkholz22a,
@@ -165,5 +172,6 @@ @InProceedings{pmlr-v162-burkholz22a
165172
pdf = {https://proceedings.mlr.press/v162/burkholz22a/burkholz22a.pdf},
166173
url = {https://proceedings.mlr.press/v162/burkholz22a.html},
167174
abstract = {The Lottery Ticket Hypothesis continues to have a profound practical impact on the quest for small scale deep neural networks that solve modern deep learning tasks at competitive performance. These lottery tickets are identified by pruning large randomly initialized neural networks with architectures that are as diverse as their applications. Yet, theoretical insights that attest their existence have been mostly focused on deed fully-connected feed forward networks with ReLU activation functions. We prove that also modern architectures consisting of convolutional and residual layers that can be equipped with almost arbitrary activation functions can contain lottery tickets with high probability.},
168-
spotlight={true}
175+
spotlight={true},
176+
img={proof_network_overview.png},
169177
}

_data/team_members.yml

+2
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,15 @@
6464
start_date: Jun 2024
6565
6666
url: https://cispa.de/en/people/c01rani
67+
description: "I am developing algorithms to reduce the size of neural networks by increasing parameter sparsity and decreasing the storage required for each parameter. My current focus is on sparse topologies that enhance the performance of sparse networks. Additionally, I am working on efficient quantization techniques to minimize the effective size of large language models (LLMs)."
6768

6869
- name: Dong Sun
6970
last_name: Sun
7071
photo: c01dosu.jpg
7172
start_date: Jul 2024
7273
7374
url: https://cispa.de/en/people/c01dosu
75+
description: "My current research focuses on theoretically elucidating the superior performance of Mixture of Experts models, with an emphasis on their generalization performance, sample complexity, training dynamics, and robustness to adversarial noises."
7476

7577
- role: Research engineers
7678
members:

_site/css/main.css

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

_site/css/main.css.map

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
221 KB
Loading
83.6 KB
Loading
Loading
Loading
307 KB
Loading
Loading
Loading

_site/publications/index.html

+45-9
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,12 @@ <h1 id="publications">Publications</h1>
8787

8888

8989
<div class="row" style="margin-top: 30px;" id="bib-jacobs2025mask" authors="JacobsBurkholz">
90-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
90+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/mask_in_the_mirror.png" />
91+
<!-- include figure.html
92+
path=entry_path
93+
class="preview z-depth-1 rounded"
94+
zoomable=false
95+
alt=entry.img --></div>
9196
<!-- Entry bib key -->
9297
<div id="jacobs2025mask" class="col-sm-10">
9398
<!-- Title -->
@@ -168,6 +173,7 @@ <h1 id="publications">Publications</h1>
168173
<div class="links"><a class="conf btn btn-sm z-depth-0">ICLR</a><a class="bibtex btn btn-sm z-depth-0" role="button">Bib</a>
169174
<a class="abstract btn btn-sm z-depth-0" role="button">Abs</a>
170175
<!-- <a href="https://openreview.net/pdf?id=g6v09VxgFw" class="btn btn-sm z-depth-0" role="button">PDF</a> -->
176+
<a href="https://github.com/RelationalML/ComFy" class="btn btn-sm z-depth-0" role="button">Code</a>
171177
</div>
172178

173179

@@ -300,7 +306,12 @@ <h1 id="publications">Publications</h1>
300306

301307

302308
<div class="row" style="margin-top: 30px;" id="bib-hossain2024pruning" authors="HossainFischerBurkholzQuackenbush">
303-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
309+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/GRN_pruning.png" />
310+
<!-- include figure.html
311+
path=entry_path
312+
class="preview z-depth-1 rounded"
313+
zoomable=false
314+
alt=entry.img --></div>
304315
<!-- Entry bib key -->
305316
<div id="hossain2024pruning" class="col-sm-10">
306317
<!-- Title -->
@@ -349,7 +360,12 @@ <h1 id="publications">Publications</h1>
349360

350361

351362
<div class="row" style="margin-top: 30px;" id="bib-hossain2024biologically" authors="HossainFanfaniFischerQuackenbushBurkholz">
352-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
363+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/GNR_Phoenix.png" />
364+
<!-- include figure.html
365+
path=entry_path
366+
class="preview z-depth-1 rounded"
367+
zoomable=false
368+
alt=entry.img --></div>
353369
<!-- Entry bib key -->
354370
<div id="hossain2024biologically" class="col-sm-10">
355371
<!-- Title -->
@@ -390,7 +406,7 @@ <h1 id="publications">Publications</h1>
390406
<span class="na">year</span> <span class="p">=</span> <span class="s">{2024}</span><span class="p">,</span>
391407
<span class="na">month</span> <span class="p">=</span> <span class="nv">may</span><span class="p">,</span>
392408
<span class="na">volume</span> <span class="p">=</span> <span class="s">{25}</span><span class="p">,</span>
393-
<span class="na">url</span> <span class="p">=</span> <span class="s">{https://doi.org/10.1186/s13059-024-03264-0}</span>
409+
<span class="na">url</span> <span class="p">=</span> <span class="s">{https://doi.org/10.1186/s13059-024-03264-0}</span><span class="p">,</span>
394410
<span class="p">}</span></code></pre></figure>
395411
</div>
396412
</div>
@@ -508,7 +524,12 @@ <h1 id="publications">Publications</h1>
508524

509525

510526
<div class="row" style="margin-top: 30px;" id="bib-burkholz2024batch" authors="Burkholz">
511-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
527+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/two_layer_construct.png" />
528+
<!-- include figure.html
529+
path=entry_path
530+
class="preview z-depth-1 rounded"
531+
zoomable=false
532+
alt=entry.img --></div>
512533
<!-- Entry bib key -->
513534
<div id="burkholz2024batch" class="col-sm-10">
514535
<!-- Title -->
@@ -612,7 +633,12 @@ <h1 id="publications">Publications</h1>
612633

613634

614635
<div class="row" style="margin-top: 30px;" id="bib-pmlr-v202-gadhikar23a" authors="GadhikarMukherjeeBurkholz">
615-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
636+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/why-random-pruning.png" />
637+
<!-- include figure.html
638+
path=entry_path
639+
class="preview z-depth-1 rounded"
640+
zoomable=false
641+
alt=entry.img --></div>
616642
<!-- Entry bib key -->
617643
<div id="pmlr-v202-gadhikar23a" class="col-sm-10">
618644
<!-- Title -->
@@ -667,7 +693,12 @@ <h1 id="publications">Publications</h1>
667693

668694

669695
<div class="row" style="margin-top: 30px;" id="bib-NEURIPS2022_76bf7786" authors="Burkholz">
670-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
696+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/subsetsumVis.png" />
697+
<!-- include figure.html
698+
path=entry_path
699+
class="preview z-depth-1 rounded"
700+
zoomable=false
701+
alt=entry.img --></div>
671702
<!-- Entry bib key -->
672703
<div id="NEURIPS2022_76bf7786" class="col-sm-10">
673704
<!-- Title -->
@@ -712,7 +743,7 @@ <h1 id="publications">Publications</h1>
712743
<span class="na">title</span> <span class="p">=</span> <span class="s">{Most Activation Functions Can Win the Lottery Without Excessive Depth}</span><span class="p">,</span>
713744
<span class="na">url</span> <span class="p">=</span> <span class="s">{https://papers.nips.cc/paper_files/paper/2022/hash/76bf7786d311217077bc8bb021946cd9-Abstract-Conference.html}</span><span class="p">,</span>
714745
<span class="na">volume</span> <span class="p">=</span> <span class="s">{35}</span><span class="p">,</span>
715-
<span class="na">year</span> <span class="p">=</span> <span class="s">{2022}</span>
746+
<span class="na">year</span> <span class="p">=</span> <span class="s">{2022}</span><span class="p">,</span>
716747
<span class="p">}</span></code></pre></figure>
717748
</div>
718749
</div>
@@ -721,7 +752,12 @@ <h1 id="publications">Publications</h1>
721752

722753

723754
<div class="row" style="margin-top: 30px;" id="bib-pmlr-v162-burkholz22a" authors="Burkholz">
724-
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
755+
<div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/proof_network_overview.png" />
756+
<!-- include figure.html
757+
path=entry_path
758+
class="preview z-depth-1 rounded"
759+
zoomable=false
760+
alt=entry.img --></div>
725761
<!-- Entry bib key -->
726762
<div id="pmlr-v162-burkholz22a" class="col-sm-10">
727763
<!-- Title -->

0 commit comments

Comments
 (0)