RelationalML
diff --git a/‎_bibliography/references.bib
+12-4 b/‎_bibliography/references.bib
+12-4
diff --git a/‎_data/team_members.yml
+2 b/‎_data/team_members.yml
+2
diff --git a/‎_site/css/main.css
+2-2 b/‎_site/css/main.css
+2-2
diff --git a/‎_site/css/main.css.map
+1-1 b/‎_site/css/main.css.map
+1-1
diff --git a/‎_site/images/publications/GNR_Phoenix.png
221 KB b/‎_site/images/publications/GNR_Phoenix.png
221 KB
diff --git a/‎_site/images/publications/GRN_pruning.png
83.6 KB b/‎_site/images/publications/GRN_pruning.png
83.6 KB
diff --git a/‎_site/images/publications/mask_in_the_mirror.png
437 KB b/‎_site/images/publications/mask_in_the_mirror.png
437 KB
diff --git a/‎_site/images/publications/proof_network_overview.png
228 KB b/‎_site/images/publications/proof_network_overview.png
228 KB
diff --git a/‎_site/images/publications/subsetsumVis.png
307 KB b/‎_site/images/publications/subsetsumVis.png
307 KB
diff --git a/‎_site/images/publications/two_layer_construct.png
106 KB b/‎_site/images/publications/two_layer_construct.png
106 KB
diff --git a/‎_site/images/publications/why-random-pruning.png
697 KB b/‎_site/images/publications/why-random-pruning.png
697 KB
diff --git a/‎_site/publications/index.html
+45-9 b/‎_site/publications/index.html
+45-9
@@ -9,6 +9,7 @@ @inproceedings{
 year={2025},
 url={https://openreview.net/forum?id=U47ymTS3ut},
 pdf={https://openreview.net/pdf?id=U47ymTS3ut},
+img={mask_in_the_mirror.png},
 abstract={Continuous sparsification strategies are among the most effective methods for reducing the inference costs and memory demands of large-scale neural networks. A key factor in their success is the implicit L1 regularization induced by jointly learning both mask and weight variables, which has been shown experimentally to outperform explicit L1 regularization. We provide a theoretical explanation for this observation by analyzing the learning dynamics, revealing that early continuous sparsification is governed by an implicit L2 regularization that gradually transitions to an L1 penalty over time. Leveraging this insight, we propose a method to dynamically control the strength of this implicit bias. Through an extension of the mirror flow framework, we establish convergence and optimality guarantees in the context of underdetermined linear regression. Our theoretical findings may be of independent interest, as we demonstrate how to enter the rich regime and show that the implicit bias can be controlled via a time-dependent Bregman potential. To validate these insights, we introduce PILoT, a continuous sparsification approach with novel initialization and dynamic regularization, which consistently outperforms baselines in standard experiments.},
 }
 
@@ -22,6 +23,7 @@ @inproceedings{
 pdf={https://openreview.net/pdf?id=g6v09VxgFw},
 img={gnns-getting-comfy.png},
 abstract={Maximizing the spectral gap through graph rewiring has been proposed to enhance the performance of message-passing graph neural networks (GNNs) by addressing over-squashing. However, as we show, minimizing the spectral gap can also improve generalization. To explain this, we analyze how rewiring can benefit GNNs within the context of stochastic block models. Since spectral gap optimization primarily influences community strength, it improves performance when the community structure aligns with node labels. Building on this insight, we propose three distinct rewiring strategies that explicitly target community structure, node labels, and their alignment: (a) community structure-based rewiring (ComMa), a more computationally efficient alternative to spectral gap optimization that achieves similar goals; (b) feature similarity-based rewiring (FeaSt), which focuses on maximizing global homophily; and (c) a hybrid approach (ComFy), which enhances local feature similarity while preserving community structure to optimize label-community alignment. Extensive experiments confirm the effectiveness of these strategies and support our theoretical insights.},
+code={https://github.com/RelationalML/ComFy}
 }
 
 @inproceedings{
@@ -58,7 +60,8 @@ @inproceedings{
 year={2024},
 url={https://openreview.net/forum?id=FNtsZLwkGr},
 pdf={https://openreview.net/pdf?id=FNtsZLwkGr},
-abstract={The practical utility of machine learning models in the sciences often hinges on their interpretability. It is common to assess a model's merit for scientific discovery, and thus novel insights, by how well it aligns with already available domain knowledge - a dimension that is currently largely disregarded in the comparison of neural network models. While pruning can simplify deep neural network architectures and excels in identifying sparse models, as we show in the context of gene regulatory network inference, state-of-the-art techniques struggle with biologically meaningful structure learning. To address this issue, we propose DASH, a generalizable framework that guides network pruning by using domain-specific structural information in model fitting and leads to sparser, better interpretable models that are more robust to noise. Using both synthetic data with ground truth information, as well as real-world gene expression data, we show that DASH, using knowledge about gene interaction partners within the putative regulatory network, outperforms general pruning methods by a large margin and yields deeper insights into the biological systems being studied.}
+abstract={The practical utility of machine learning models in the sciences often hinges on their interpretability. It is common to assess a model's merit for scientific discovery, and thus novel insights, by how well it aligns with already available domain knowledge - a dimension that is currently largely disregarded in the comparison of neural network models. While pruning can simplify deep neural network architectures and excels in identifying sparse models, as we show in the context of gene regulatory network inference, state-of-the-art techniques struggle with biologically meaningful structure learning. To address this issue, we propose DASH, a generalizable framework that guides network pruning by using domain-specific structural information in model fitting and leads to sparser, better interpretable models that are more robust to noise. Using both synthetic data with ground truth information, as well as real-world gene expression data, we show that DASH, using knowledge about gene interaction partners within the putative regulatory network, outperforms general pruning methods by a large margin and yields deeper insights into the biological systems being studied.},
+img={GRN_pruning.png}
 }
 
 @article{hossain2024biologically,
@@ -69,7 +72,8 @@ @article{hossain2024biologically
 month={May},
 volume={25},
 url={https://doi.org/10.1186/s13059-024-03264-0},
-abstract={Gene regulatory network (GRN) models that are formulated as ordinary differential equations (ODEs) can accurately explain temporal gene expression patterns and promise to yield new insights into important cellular processes, disease progression, and intervention design. Learning such gene regulatory ODEs is challenging, since we want to predict the evolution of gene expression in a way that accurately encodes the underlying GRN governing the dynamics and the nonlinear functional relationships between genes. Most widely used ODE estimation methods either impose too many parametric restrictions or are not guided by meaningful biological insights, both of which impede either scalability, explainability, or both.}
+abstract={Gene regulatory network (GRN) models that are formulated as ordinary differential equations (ODEs) can accurately explain temporal gene expression patterns and promise to yield new insights into important cellular processes, disease progression, and intervention design. Learning such gene regulatory ODEs is challenging, since we want to predict the evolution of gene expression in a way that accurately encodes the underlying GRN governing the dynamics and the nonlinear functional relationships between genes. Most widely used ODE estimation methods either impose too many parametric restrictions or are not guided by meaningful biological insights, both of which impede either scalability, explainability, or both.},
+img={GNR_Phoenix.png}
 }
 
 @inproceedings{
@@ -107,6 +111,7 @@ @inproceedings{
 url={https://openreview.net/forum?id=wOSYMHfENq},
 pdf={https://openreview.net/pdf?id=wOSYMHfENq},
 abstract={Layer normalization, for which Batch Normalization (BN) is a popular choice, is an integral part of many deep learning architectures and contributes significantly to the learning success. We provide a partial explanation for this phenomenon by proving that training normalization layers alone is already sufficient for universal function approximation if the number of available, potentially random features matches or exceeds the weight parameters of the target networks that can be expressed. Our bound on the number of required features does not only improve on a recent result for fully-connected feed-forward architectures but also applies to CNNs with and without residual connections and almost arbitrary activation functions (which include ReLUs). Our explicit construction of a given target network solves a depth-width trade-off that is driven by architectural constraints and can explain why switching off entire neurons can have representational benefits, as has been observed empirically. To validate our theory, we explicitly match target networks that outperform experimentally obtained networks with trained BN parameters by utilizing a sufficient number of random features.},
+img={two_layer_construct.png}
 }
 
 @inproceedings{
@@ -136,6 +141,7 @@ @InProceedings{pmlr-v202-gadhikar23a
   url = 	 {https://proceedings.mlr.press/v202/gadhikar23a.html},
   abstract = 	 {Random masks define surprisingly effective sparse neural network models, as has been shown empirically. The resulting sparse networks can often compete with dense architectures and state-of-the-art lottery ticket pruning algorithms, even though they do not rely on computationally expensive prune-train iterations and can be drawn initially without significant computational overhead. We offer a theoretical explanation of how random masks can approximate arbitrary target networks if they are wider by a logarithmic factor in the inverse sparsity $1 / \log(1/\text{sparsity})$. This overparameterization factor is necessary at least for 3-layer random networks, which elucidates the observed degrading performance of random networks at higher sparsity. At moderate to high sparsity levels, however, our results imply that sparser networks are contained within random source networks so that any dense-to-sparse training scheme can be turned into a computationally more efficient sparse-to-sparse one by constraining the search to a fixed random mask. We demonstrate the feasibility of this approach in experiments for different pruning methods and propose particularly effective choices of initial layer-wise sparsity ratios of the random source network. As a special case, we show theoretically and experimentally that random source networks also contain strong lottery tickets.},
   code = 	 {https://github.com/RelationalML/sparse_to_sparse},
+  img={why-random-pruning.png},
 }
 @inproceedings{NEURIPS2022_76bf7786,
  author = {Burkholz, Rebekka},
@@ -149,7 +155,8 @@ @inproceedings{NEURIPS2022_76bf7786
  pdf = {https://proceedings.neurips.cc/paper_files/paper/2022/file/76bf7786d311217077bc8bb021946cd9-Paper-Conference.pdf},
  volume = {35},
  code = 	 {https://github.com/RelationalML/LT-existence},
- year = {2022}
+ year = {2022},
+  img={subsetsumVis.png},
 }
 
 @InProceedings{pmlr-v162-burkholz22a,
@@ -165,5 +172,6 @@ @InProceedings{pmlr-v162-burkholz22a
   pdf = 	 {https://proceedings.mlr.press/v162/burkholz22a/burkholz22a.pdf},
   url = 	 {https://proceedings.mlr.press/v162/burkholz22a.html},
   abstract = 	 {The Lottery Ticket Hypothesis continues to have a profound practical impact on the quest for small scale deep neural networks that solve modern deep learning tasks at competitive performance. These lottery tickets are identified by pruning large randomly initialized neural networks with architectures that are as diverse as their applications. Yet, theoretical insights that attest their existence have been mostly focused on deed fully-connected feed forward networks with ReLU activation functions. We prove that also modern architectures consisting of convolutional and residual layers that can be equipped with almost arbitrary activation functions can contain lottery tickets with high probability.},
-  spotlight={true}
+  spotlight={true},
+  img={proof_network_overview.png},
 }
@@ -64,13 +64,15 @@
       start_date: Jun 2024
       email: [email protected]
       url: https://cispa.de/en/people/c01rani
+      description: "I am developing algorithms to reduce the size of neural networks by increasing parameter sparsity and decreasing the storage required for each parameter. My current focus is on sparse topologies that enhance the performance of sparse networks. Additionally, I am working on efficient quantization techniques to minimize the effective size of large language models (LLMs)."
 
     - name: Dong Sun
       last_name: Sun
       photo: c01dosu.jpg
       start_date: Jul 2024
       email: [email protected]
       url: https://cispa.de/en/people/c01dosu
+      description: "My current research focuses on theoretically elucidating the superior performance of Mixture of Experts models, with an emphasis on their generalization performance, sample complexity, training dynamics, and robustness to adversarial noises."
 
 - role: Research engineers
   members:
 
@@ -87,7 +87,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-jacobs2025mask" authors="JacobsBurkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/mask_in_the_mirror.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="jacobs2025mask" class="col-sm-10">
         <!-- Title -->
@@ -168,6 +173,7 @@ <h1 id="publications">Publications</h1>
         <div class="links"><a class="conf btn btn-sm z-depth-0">ICLR</a><a class="bibtex btn btn-sm z-depth-0" role="button">Bib</a>
             <a class="abstract btn btn-sm z-depth-0" role="button">Abs</a>
             <!-- <a href="https://openreview.net/pdf?id=g6v09VxgFw" class="btn btn-sm z-depth-0" role="button">PDF</a> -->
+            <a href="https://github.com/RelationalML/ComFy" class="btn btn-sm z-depth-0" role="button">Code</a>
           </div>
 
 
@@ -300,7 +306,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-hossain2024pruning" authors="HossainFischerBurkholzQuackenbush">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/GRN_pruning.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="hossain2024pruning" class="col-sm-10">
         <!-- Title -->
@@ -349,7 +360,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-hossain2024biologically" authors="HossainFanfaniFischerQuackenbushBurkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/GNR_Phoenix.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="hossain2024biologically" class="col-sm-10">
         <!-- Title -->
@@ -390,7 +406,7 @@ <h1 id="publications">Publications</h1>
   <span class="na">year</span> <span class="p">=</span> <span class="s">{2024}</span><span class="p">,</span>
   <span class="na">month</span> <span class="p">=</span> <span class="nv">may</span><span class="p">,</span>
   <span class="na">volume</span> <span class="p">=</span> <span class="s">{25}</span><span class="p">,</span>
-  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://doi.org/10.1186/s13059-024-03264-0}</span>
+  <span class="na">url</span> <span class="p">=</span> <span class="s">{https://doi.org/10.1186/s13059-024-03264-0}</span><span class="p">,</span>
 <span class="p">}</span></code></pre></figure>
           </div>
         </div>
@@ -508,7 +524,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-burkholz2024batch" authors="Burkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/two_layer_construct.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="burkholz2024batch" class="col-sm-10">
         <!-- Title -->
@@ -612,7 +633,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-pmlr-v202-gadhikar23a" authors="GadhikarMukherjeeBurkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/why-random-pruning.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="pmlr-v202-gadhikar23a" class="col-sm-10">
         <!-- Title -->
@@ -667,7 +693,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-NEURIPS2022_76bf7786" authors="Burkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/subsetsumVis.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="NEURIPS2022_76bf7786" class="col-sm-10">
         <!-- Title -->
@@ -712,7 +743,7 @@ <h1 id="publications">Publications</h1>
   <span class="na">title</span> <span class="p">=</span> <span class="s">{Most Activation Functions Can Win the Lottery Without Excessive Depth}</span><span class="p">,</span>
   <span class="na">url</span> <span class="p">=</span> <span class="s">{https://papers.nips.cc/paper_files/paper/2022/hash/76bf7786d311217077bc8bb021946cd9-Abstract-Conference.html}</span><span class="p">,</span>
   <span class="na">volume</span> <span class="p">=</span> <span class="s">{35}</span><span class="p">,</span>
-  <span class="na">year</span> <span class="p">=</span> <span class="s">{2022}</span>
+  <span class="na">year</span> <span class="p">=</span> <span class="s">{2022}</span><span class="p">,</span>
 <span class="p">}</span></code></pre></figure>
           </div>
         </div>
@@ -721,7 +752,12 @@ <h1 id="publications">Publications</h1>
 
 
   <div class="row" style="margin-top: 30px;" id="bib-pmlr-v162-burkholz22a" authors="Burkholz">
-        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/default.png" /></div>
+        <div class="preview"><img class="preview z-depth-1 rounded" src="/images/publications/proof_network_overview.png" />
+            <!-- include figure.html
+            path=entry_path
+            class="preview z-depth-1 rounded"
+            zoomable=false
+            alt=entry.img --></div>
         <!-- Entry bib key -->
         <div id="pmlr-v162-burkholz22a" class="col-sm-10">
         <!-- Title -->