"make name consistent"

apoorvalal · apoorvalal · commit 04e5a90a6aa3 · 2025-06-29T16:41:35.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,4 @@
 *.blg
 *.out
 *.synctex.gz
+nb/scratch.ipynb
diff --git a/jaxonometrics/__init__.py b/jaxonometrics/__init__.py
@@ -8,7 +8,7 @@
 from .causal import EntropyBalancing, IPW, AIPW # Added IPW, AIPW
 from .gmm import GMM, LinearIVGMM, TwoStepGMM
 from .linear import LinearRegression
-from .mle import Logit, PoissonRegression, MaximumLikelihoodEstimator # Added MLE models
+from .mle import LogisticRegression, PoissonRegression, MaximumLikelihoodEstimator # Added MLE models
 
 __all__ = [
     "BaseEstimator",
@@ -20,6 +20,6 @@
     "TwoStepGMM",
     "LinearRegression",
     "MaximumLikelihoodEstimator",
-    "Logit",
+    "LogisticRegression",
     "PoissonRegression",
 ]
diff --git a/jaxonometrics/causal.py b/jaxonometrics/causal.py
@@ -67,8 +67,8 @@ def __init__(self, propensity_optimizer: Optional[optax.GradientTransformation]
             ps_clip_epsilon: Small constant to clip propensity scores.
         """
         super().__init__()
-        from .mle import Logit # Local import
-        self.logit_model = Logit(optimizer=propensity_optimizer, maxiter=propensity_maxiter)
+        from .mle import LogisticRegression
+        self.logit_model = LogisticRegression(optimizer=propensity_optimizer, maxiter=propensity_maxiter)
         self.ps_clip_epsilon = ps_clip_epsilon
         self.params: Dict[str, Any] = {"ate": None, "propensity_scores": None}
 
@@ -168,11 +168,11 @@ def __init__(self,
             ps_clip_epsilon: Small constant to clip propensity scores to avoid extreme values.
         """
         super().__init__()
-        from .mle import Logit # Local import for Logit
+        from .mle import LogisticRegression
 
         self.outcome_model_template = outcome_model if outcome_model else LinearRegression()
         # We need two instances of the outcome model, one for T=1 and one for T=0
-        self.propensity_model = propensity_model if propensity_model else Logit()
+        self.propensity_model = propensity_model if propensity_model else LogisticRegression()
 
         self.ps_clip_epsilon = ps_clip_epsilon
         self.params: Dict[str, Any] = {"ate": None, "propensity_scores": None,
diff --git a/jaxonometrics/linear.py b/jaxonometrics/linear.py
@@ -1,4 +1,5 @@
 from typing import Dict, Optional
+from functools import partial
 
 import numpy as np
 import jax # Ensure jax is imported
@@ -9,7 +10,7 @@
 
 
 # Helper function for JIT compilation of vcov calculations
-@jax.jit(static_argnames=['se_type', 'n', 'k']) # Mark se_type, n, and k as static
+@partial(jax.jit, static_argnames=['se_type', 'n', 'k']) # Mark se_type, n, and k as static
 def _calculate_vcov_details(
     coef: jnp.ndarray, X: jnp.ndarray, y: jnp.ndarray, se_type: str, n: int, k: int
 ):
diff --git a/jaxonometrics/mle.py b/jaxonometrics/mle.py
@@ -5,6 +5,7 @@
 import jax.numpy as jnp
 import optax
 
+
 from .base import BaseEstimator
 
 
@@ -13,18 +14,28 @@ class MaximumLikelihoodEstimator(BaseEstimator):
     Base class for Maximum Likelihood Estimators using Optax.
     """
 
-    def __init__(self, optimizer: Optional[optax.GradientTransformation] = None, maxiter: int = 5000, tol: float = 1e-4):
+    def __init__(
+        self,
+        optimizer: Optional[optax.GradientTransformation] = None,
+        maxiter: int = 5000,
+        tol: float = 1e-4,
+    ):
         super().__init__()
-        self.optimizer = optimizer if optimizer is not None else optax.adam(learning_rate=1e-3)
+        self.optimizer = optimizer if optimizer is not None else optax.lbfgs()
         self.maxiter = maxiter
         # Tol is not directly used by basic optax loops for stopping but can be a reference
         # or used if a convergence check is manually added.
         self.tol = tol
-        self.params: Dict[str, jnp.ndarray] = {} # Initialize params
-        self.history: Dict[str, list] = {"loss": []} # To store loss history
+        self.params: Dict[str, jnp.ndarray] = {}  # Initialize params
+        self.history: Dict[str, list] = {"loss": []}  # To store loss history
 
     @abstractmethod
-    def _negative_log_likelihood(self, params: jnp.ndarray, X: jnp.ndarray, y: jnp.ndarray) -> float:
+    def _negative_log_likelihood(
+        self,
+        params: jnp.ndarray,
+        X: jnp.ndarray,
+        y: jnp.ndarray,
+    ) -> float:
         """
         Computes the negative log-likelihood for the model.
         Must be implemented by subclasses.
@@ -37,7 +48,13 @@ def _negative_log_likelihood(self, params: jnp.ndarray, X: jnp.ndarray, y: jnp.n
         """
         raise NotImplementedError
 
-    def fit(self, X: jnp.ndarray, y: jnp.ndarray, init_params: Optional[jnp.ndarray] = None) -> "MaximumLikelihoodEstimator":
+    def fit(
+        self,
+        X: jnp.ndarray,
+        y: jnp.ndarray,
+        init_params: Optional[jnp.ndarray] = None,
+        verbose: bool = False,
+    ) -> "MaximumLikelihoodEstimator":
         """
         Fit the model using the specified Optax optimizer.
 
@@ -53,10 +70,10 @@ def fit(self, X: jnp.ndarray, y: jnp.ndarray, init_params: Optional[jnp.ndarray]
         """
         n_features = X.shape[1]
         if init_params is None:
-            try: # Try to use a key for initialization for better starting points
-                key = jax.random.PRNGKey(0) # Simple fixed key for reproducibility
+            try:  # Try to use a key for initialization for better starting points
+                key = jax.random.PRNGKey(0)  # Simple fixed key for reproducibility
                 init_params_val = jax.random.normal(key, (n_features,)) * 0.01
-            except Exception: # Fallback if key generation fails or not in context
+            except Exception:  # Fallback if key generation fails or not in context
                 init_params_val = jnp.zeros(n_features)
         else:
             init_params_val = init_params
@@ -67,31 +84,41 @@ def loss_fn(params_lg):
             return self._negative_log_likelihood(params_lg, X, y)
 
         # Get the gradient function
-        value_and_grad_fn = jax.value_and_grad(loss_fn)
+        value_and_grad_fn = optax.value_and_grad_from_state(loss_fn)
 
         # Initialize optimizer state
         opt_state = self.optimizer.init(init_params_val)
 
         current_params = init_params_val
-        self.history["loss"] = [] # Reset loss history
+        self.history["loss"] = []  # Reset loss history
 
         # Optimization loop
         for i in range(self.maxiter):
-            loss_val, grads = value_and_grad_fn(current_params)
-            updates, opt_state = self.optimizer.update(grads, opt_state, current_params)
-            current_params = optax.apply_updates(current_params, updates)
+            loss_val, grads = value_and_grad_fn(current_params, state=opt_state)
+            updates, opt_state = self.optimizer.update(
+                grads,
+                opt_state,
+                current_params,
+                value=loss_val,
+                grad=grads,
+                value_fn=loss_fn,
+            )
+            current_params = optax.apply_updates(
+                current_params,
+                updates,
+            )
             self.history["loss"].append(loss_val)
-
-            # Basic convergence check (optional, and might need adjustment)
-            # This is a simple check on loss improvement. More robust checks might look at gradient norms or param changes.
             if i > 10 and self.tol > 0:
-                loss_change = abs(self.history["loss"][-2] - self.history["loss"][-1]) / (abs(self.history["loss"][-2]) + 1e-8)
+                loss_change = abs(
+                    self.history["loss"][-2] - self.history["loss"][-1]
+                ) / (abs(self.history["loss"][-2]) + 1e-8)
                 if loss_change < self.tol:
-                    # print(f"Convergence tolerance {self.tol} met at iteration {i}.")
+                    if verbose:
+                        print(f"Convergence tolerance {self.tol} met at iteration {i}.")
                     break
 
         self.params = {"coef": current_params}
-        self.iterations_run = i + 1 # Store how many iterations actually ran
+        self.iterations_run = i + 1  # Store how many iterations actually ran
 
         return self
 
@@ -104,21 +131,28 @@ def summary(self) -> None:
         print(f"{self.__class__.__name__} Results")
         print("=" * 30)
         print(f"Optimizer: {self.optimizer}")
-        if hasattr(self, 'iterations_run'):
-            print(f"Optimization ran for {self.iterations_run}/{self.maxiter} iterations.")
+        if hasattr(self, "iterations_run"):
+            print(
+                f"Optimization ran for {self.iterations_run}/{self.maxiter} iterations."
+            )
         if self.history["loss"]:
             print(f"Final Loss: {self.history['loss'][-1]:.4e}")
 
         print(f"Coefficients: {self.params['coef']}")
         print("=" * 30)
 
 
-class Logit(MaximumLikelihoodEstimator):
+class LogisticRegression(MaximumLikelihoodEstimator):
     """
     Logistic Regression model.
     """
 
-    def _negative_log_likelihood(self, params: jnp.ndarray, X: jnp.ndarray, y: jnp.ndarray) -> float:
+    def _negative_log_likelihood(
+        self,
+        params: jnp.ndarray,
+        X: jnp.ndarray,
+        y: jnp.ndarray,
+    ) -> float:
         """
         Computes the negative log-likelihood for logistic regression.
         NLL = -Σ [y_i * log(p_i) + (1 - y_i) * log(1 - p_i)]
@@ -128,15 +162,10 @@ def _negative_log_likelihood(self, params: jnp.ndarray, X: jnp.ndarray, y: jnp.n
         log(1-p_i) = log_sigmoid(-(X_i @ β))
         """
         logits = X @ params
-        # Using jax.nn.log_sigmoid for log(σ(z)) and log(1-σ(z)) = log(σ(-z))
-        log_p = jax.nn.log_sigmoid(logits)
-        log_one_minus_p = jax.nn.log_sigmoid(-logits) # log(1 - sigmoid(x)) = log(sigmoid(-x))
-
-        # Sum over samples
-        nll = -jnp.sum(y * log_p + (1 - y) * log_one_minus_p)
-        # Return mean NLL for more stable optimization across batch sizes,
-        # though sum is also common. Your example used sum. Let's stick to sum for now.
-        return nll # / X.shape[0] if averaging
+        # alt: Using jax.nn.log_sigmoid for log(σ(z)) and log(1-σ(z)) = log(σ(-z))
+        h = jax.scipy.special.expit(logits)
+        nll = -jnp.sum(y * jnp.log(h) + (1 - y) * jnp.log1p(-h))
+        return nll  # / X.shape[0] if averaging
 
     def predict_proba(self, X: jnp.ndarray) -> jnp.ndarray:
         """
@@ -150,7 +179,7 @@ def predict_proba(self, X: jnp.ndarray) -> jnp.ndarray:
             raise ValueError("Model has not been fitted yet.")
 
         logits = X @ self.params["coef"]
-        return jax.nn.sigmoid(logits) # jax.scipy.special.expit is equivalent
+        return jax.nn.sigmoid(logits)  # jax.scipy.special.expit is equivalent
 
     def predict(self, X: jnp.ndarray, threshold: float = 0.5) -> jnp.ndarray:
         """
@@ -170,18 +199,23 @@ class PoissonRegression(MaximumLikelihoodEstimator):
     Poisson Regression model.
     """
 
-    def _negative_log_likelihood(self, params: jnp.ndarray, X: jnp.ndarray, y: jnp.ndarray) -> float:
+    def _negative_log_likelihood(
+        self,
+        params: jnp.ndarray,
+        X: jnp.ndarray,
+        y: jnp.ndarray,
+    ) -> float:
         """
         Computes the negative log-likelihood for Poisson regression.
         The log(y_i!) term is constant w.r.t params, so ignored for optimization.
         NLL = Σ [exp(X_i @ β) - y_i * (X_i @ β)]
         """
         linear_predictor = X @ params
-        lambda_i = jnp.exp(linear_predictor) # Predicted rates
+        lambda_i = jnp.exp(linear_predictor)  # Predicted rates
 
         # Sum over samples
         nll = jnp.sum(lambda_i - y * linear_predictor)
-        return nll # / X.shape[0] if averaging
+        return nll  # / X.shape[0] if averaging
 
     def predict(self, X: jnp.ndarray) -> jnp.ndarray:
         """
diff --git a/nb/linmod.ipynb b/nb/linmod.ipynb
@@ -64,7 +64,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:2025-06-29 11:43:45,267:jax._src.xla_bridge:752: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n",
+      "INFO:2025-06-29 15:46:55,666:jax._src.xla_bridge:752: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n",
       "INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n"
      ]
     },
@@ -258,15 +258,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "id": "25b0053d",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "9.2 ms ± 1.24 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"
+      "8.98 ms ± 1.47 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"
      ]
     }
    ],
@@ -287,15 +287,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "id": "051697e3",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "12.3 ms ± 1.57 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"
+      "10.9 ms ± 617 μs per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"
      ]
     }
    ],
diff --git a/tests/test_causal.py b/tests/test_causal.py
@@ -6,7 +6,7 @@
 
 from jaxonometrics.causal import IPW, AIPW
 from jaxonometrics.linear import LinearRegression
-from jaxonometrics.mle import Logit
+from jaxonometrics.mle import LogisticRegression
 
 # Function to generate synthetic data for causal inference tests
 def generate_causal_data(n_samples=1000, n_features=3, true_ate=2.0, seed=42):
@@ -87,7 +87,7 @@ def test_aipw_ate_estimation(causal_sim_data):
     # Using default LinearRegression for outcome, Logit for propensity
     aipw_estimator = AIPW(
         outcome_model=LinearRegression(solver="lineax"), # Explicitly pass an instance
-        propensity_model=Logit(maxiter=10000) # Explicitly pass an instance
+        propensity_model=LogisticRegression(maxiter=10000) # Explicitly pass an instance
     )
     # X should include intercept for LinearRegression and Logit as currently implemented
     aipw_estimator.fit(X, T, y)
@@ -111,7 +111,7 @@ def test_aipw_with_custom_models(causal_sim_data):
     X, T, y, _, _, _, true_ate = causal_sim_data
 
     # 1. Fit propensity score model
-    ps_model = Logit(maxiter=10000)
+    ps_model = LogisticRegression(maxiter=10000)
     ps_model.fit(X, T) # X includes intercept
 
     # 2. Fit outcome models
@@ -137,7 +137,7 @@ def test_aipw_with_custom_models(causal_sim_data):
 
     aipw_estimator = AIPW(
         outcome_model=LinearRegression(), # It will create new instances and fit
-        propensity_model=Logit(maxiter=10000) # It will create a new instance and fit
+        propensity_model=LogisticRegression(maxiter=10000) # It will create a new instance and fit
     )
     aipw_estimator.fit(X, T, y)
     estimated_ate = aipw_estimator.params["ate"]
diff --git a/tests/test_linear.py b/tests/test_linear.py
@@ -19,6 +19,6 @@ def test_linear_regression():
     X_with_intercept = jnp.c_[jnp.ones(X.shape[0]), X]
     jax_model = LinearRegression()
     jax_model.fit(X_with_intercept, jnp.array(y))
-    jax_coef = jax_model.params["beta"][1:]
+    jax_coef = jax_model.params["coef"][1:]
 
     assert np.allclose(sklearn_coef, jax_coef, atol=1e-6)
diff --git a/tests/test_mle.py b/tests/test_mle.py

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@`
`64`	`64`	`"name": "stderr",`
`65`	`65`	`"output_type": "stream",`
`66`	`66`	`"text": [`
`67`		`- "INFO:2025-06-29 11:43:45,267:jax._src.xla_bridge:752: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n",`
	`67`	`+ "INFO:2025-06-29 15:46:55,666:jax._src.xla_bridge:752: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n",`
`68`	`68`	`"INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory\n"`
`69`	`69`	`]`
`70`	`70`	`},`
`@@ -258,15 +258,15 @@`
`258`	`258`	`},`
`259`	`259`	`{`
`260`	`260`	`"cell_type": "code",`
`261`		`- "execution_count": 8,`
	`261`	`+ "execution_count": 6,`
`262`	`262`	`"id": "25b0053d",`
`263`	`263`	`"metadata": {},`
`264`	`264`	`"outputs": [`
`265`	`265`	`{`
`266`	`266`	`"name": "stdout",`
`267`	`267`	`"output_type": "stream",`
`268`	`268`	`"text": [`
`269`		`- "9.2 ms ± 1.24 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"`
	`269`	`+ "8.98 ms ± 1.47 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"`
`270`	`270`	`]`
`271`	`271`	`}`
`272`	`272`	`],`
`@@ -287,15 +287,15 @@`
`287`	`287`	`},`
`288`	`288`	`{`
`289`	`289`	`"cell_type": "code",`
`290`		`- "execution_count": 9,`
	`290`	`+ "execution_count": 7,`
`291`	`291`	`"id": "051697e3",`
`292`	`292`	`"metadata": {},`
`293`	`293`	`"outputs": [`
`294`	`294`	`{`
`295`	`295`	`"name": "stdout",`
`296`	`296`	`"output_type": "stream",`
`297`	`297`	`"text": [`
`298`		`- "12.3 ms ± 1.57 ms per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"`
	`298`	`+ "10.9 ms ± 617 μs per loop (mean ± std. dev. of 5 runs, 100 loops each)\n"`
`299`	`299`	`]`
`300`	`300`	`}`
`301`	`301`	`],`