diff --git a/.coverage-tmp.html b/.coverage-tmp.html new file mode 100644 index 00000000..cff3f814 --- /dev/null +++ b/.coverage-tmp.html @@ -0,0 +1,49 @@ + Coverage report - stimulus-py

Coverage report

\ No newline at end of file diff --git a/404.html b/404.html index c2e439f0..1630a29c 100644 --- a/404.html +++ b/404.html @@ -1,1905 +1 @@ - - - - - - - - - - - - - - - - - - - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- -

404 - Not found

- -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - - - - - \ No newline at end of file + stimulus-py
\ No newline at end of file diff --git a/assets/javascripts/bundle.83f73b43.min.js b/assets/javascripts/bundle.f13b1293.min.js similarity index 65% rename from assets/javascripts/bundle.83f73b43.min.js rename to assets/javascripts/bundle.f13b1293.min.js index 43d8b70f..7fbc8fc8 100644 --- a/assets/javascripts/bundle.83f73b43.min.js +++ b/assets/javascripts/bundle.f13b1293.min.js @@ -1,4 +1,4 @@ -"use strict";(()=>{var Wi=Object.create;var gr=Object.defineProperty;var Di=Object.getOwnPropertyDescriptor;var Vi=Object.getOwnPropertyNames,Vt=Object.getOwnPropertySymbols,Ni=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,ao=Object.prototype.propertyIsEnumerable;var io=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,$=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&io(e,r,t[r]);if(Vt)for(var r of Vt(t))ao.call(t,r)&&io(e,r,t[r]);return e};var so=(e,t)=>{var r={};for(var o in e)yr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Vt)for(var o of Vt(e))t.indexOf(o)<0&&ao.call(e,o)&&(r[o]=e[o]);return r};var xr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var zi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Vi(t))!yr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=Di(t,n))||o.enumerable});return e};var Mt=(e,t,r)=>(r=e!=null?Wi(Ni(e)):{},zi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var co=(e,t,r)=>new Promise((o,n)=>{var i=p=>{try{s(r.next(p))}catch(c){n(c)}},a=p=>{try{s(r.throw(p))}catch(c){n(c)}},s=p=>p.done?o(p.value):Promise.resolve(p.value).then(i,a);s((r=r.apply(e,t)).next())});var lo=xr((Er,po)=>{(function(e,t){typeof Er=="object"&&typeof po!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(k){return!!(k&&k!==document&&k.nodeName!=="HTML"&&k.nodeName!=="BODY"&&"classList"in k&&"contains"in k.classList)}function p(k){var ft=k.type,qe=k.tagName;return!!(qe==="INPUT"&&a[ft]&&!k.readOnly||qe==="TEXTAREA"&&!k.readOnly||k.isContentEditable)}function c(k){k.classList.contains("focus-visible")||(k.classList.add("focus-visible"),k.setAttribute("data-focus-visible-added",""))}function l(k){k.hasAttribute("data-focus-visible-added")&&(k.classList.remove("focus-visible"),k.removeAttribute("data-focus-visible-added"))}function f(k){k.metaKey||k.altKey||k.ctrlKey||(s(r.activeElement)&&c(r.activeElement),o=!0)}function u(k){o=!1}function d(k){s(k.target)&&(o||p(k.target))&&c(k.target)}function y(k){s(k.target)&&(k.target.classList.contains("focus-visible")||k.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(k.target))}function L(k){document.visibilityState==="hidden"&&(n&&(o=!0),X())}function X(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function te(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(k){k.target.nodeName&&k.target.nodeName.toLowerCase()==="html"||(o=!1,te())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",L,!0),X(),r.addEventListener("focus",d,!0),r.addEventListener("blur",y,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var qr=xr((hy,On)=>{"use strict";/*! +"use strict";(()=>{var Wi=Object.create;var gr=Object.defineProperty;var Di=Object.getOwnPropertyDescriptor;var Vi=Object.getOwnPropertyNames,Vt=Object.getOwnPropertySymbols,Ni=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,ao=Object.prototype.propertyIsEnumerable;var io=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,$=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&io(e,r,t[r]);if(Vt)for(var r of Vt(t))ao.call(t,r)&&io(e,r,t[r]);return e};var so=(e,t)=>{var r={};for(var o in e)yr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Vt)for(var o of Vt(e))t.indexOf(o)<0&&ao.call(e,o)&&(r[o]=e[o]);return r};var xr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var zi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Vi(t))!yr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=Di(t,n))||o.enumerable});return e};var Mt=(e,t,r)=>(r=e!=null?Wi(Ni(e)):{},zi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var co=(e,t,r)=>new Promise((o,n)=>{var i=p=>{try{s(r.next(p))}catch(c){n(c)}},a=p=>{try{s(r.throw(p))}catch(c){n(c)}},s=p=>p.done?o(p.value):Promise.resolve(p.value).then(i,a);s((r=r.apply(e,t)).next())});var lo=xr((Er,po)=>{(function(e,t){typeof Er=="object"&&typeof po!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(k){return!!(k&&k!==document&&k.nodeName!=="HTML"&&k.nodeName!=="BODY"&&"classList"in k&&"contains"in k.classList)}function p(k){var ft=k.type,qe=k.tagName;return!!(qe==="INPUT"&&a[ft]&&!k.readOnly||qe==="TEXTAREA"&&!k.readOnly||k.isContentEditable)}function c(k){k.classList.contains("focus-visible")||(k.classList.add("focus-visible"),k.setAttribute("data-focus-visible-added",""))}function l(k){k.hasAttribute("data-focus-visible-added")&&(k.classList.remove("focus-visible"),k.removeAttribute("data-focus-visible-added"))}function f(k){k.metaKey||k.altKey||k.ctrlKey||(s(r.activeElement)&&c(r.activeElement),o=!0)}function u(k){o=!1}function d(k){s(k.target)&&(o||p(k.target))&&c(k.target)}function y(k){s(k.target)&&(k.target.classList.contains("focus-visible")||k.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(k.target))}function L(k){document.visibilityState==="hidden"&&(n&&(o=!0),X())}function X(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function ee(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(k){k.target.nodeName&&k.target.nodeName.toLowerCase()==="html"||(o=!1,ee())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",L,!0),X(),r.addEventListener("focus",d,!0),r.addEventListener("blur",y,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var qr=xr((hy,On)=>{"use strict";/*! * escape-html * Copyright(c) 2012-2013 TJ Holowaychuk * Copyright(c) 2015 Andreas Lubbe @@ -9,8 +9,8 @@ * https://clipboardjs.com/ * * Licensed MIT © Zeno Rocha - */(function(t,r){typeof It=="object"&&typeof Yr=="object"?Yr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof It=="object"?It.ClipboardJS=r():t.ClipboardJS=r()})(It,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Ui}});var a=i(279),s=i.n(a),p=i(370),c=i.n(p),l=i(817),f=i.n(l);function u(V){try{return document.execCommand(V)}catch(A){return!1}}var d=function(A){var M=f()(A);return u("cut"),M},y=d;function L(V){var A=document.documentElement.getAttribute("dir")==="rtl",M=document.createElement("textarea");M.style.fontSize="12pt",M.style.border="0",M.style.padding="0",M.style.margin="0",M.style.position="absolute",M.style[A?"right":"left"]="-9999px";var F=window.pageYOffset||document.documentElement.scrollTop;return M.style.top="".concat(F,"px"),M.setAttribute("readonly",""),M.value=V,M}var X=function(A,M){var F=L(A);M.container.appendChild(F);var D=f()(F);return u("copy"),F.remove(),D},te=function(A){var M=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},F="";return typeof A=="string"?F=X(A,M):A instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(A==null?void 0:A.type)?F=X(A.value,M):(F=f()(A),u("copy")),F},J=te;function k(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?k=function(M){return typeof M}:k=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},k(V)}var ft=function(){var A=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},M=A.action,F=M===void 0?"copy":M,D=A.container,Y=A.target,$e=A.text;if(F!=="copy"&&F!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&k(Y)==="object"&&Y.nodeType===1){if(F==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(F==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if($e)return J($e,{container:D});if(Y)return F==="cut"?y(Y):J(Y,{container:D})},qe=ft;function Fe(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Fe=function(M){return typeof M}:Fe=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},Fe(V)}function ki(V,A){if(!(V instanceof A))throw new TypeError("Cannot call a class as a function")}function no(V,A){for(var M=0;M0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof D.action=="function"?D.action:this.defaultAction,this.target=typeof D.target=="function"?D.target:this.defaultTarget,this.text=typeof D.text=="function"?D.text:this.defaultText,this.container=Fe(D.container)==="object"?D.container:document.body}},{key:"listenClick",value:function(D){var Y=this;this.listener=c()(D,"click",function($e){return Y.onClick($e)})}},{key:"onClick",value:function(D){var Y=D.delegateTarget||D.currentTarget,$e=this.action(Y)||"copy",Dt=qe({action:$e,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(Dt?"success":"error",{action:$e,text:Dt,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(D){return vr("action",D)}},{key:"defaultTarget",value:function(D){var Y=vr("target",D);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(D){return vr("text",D)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(D){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(D,Y)}},{key:"cut",value:function(D){return y(D)}},{key:"isSupported",value:function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof D=="string"?[D]:D,$e=!!document.queryCommandSupported;return Y.forEach(function(Dt){$e=$e&&!!document.queryCommandSupported(Dt)}),$e}}]),M}(s()),Ui=Fi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,p){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(p))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,d,y){var L=c.apply(this,arguments);return l.addEventListener(u,L,y),{destroy:function(){l.removeEventListener(u,L,y)}}}function p(l,f,u,d,y){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(L){return s(L,f,u,d,y)}))}function c(l,f,u,d){return function(y){y.delegateTarget=a(y.target,f),y.delegateTarget&&d.call(l,y)}}o.exports=p},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function p(u,d,y){if(!u&&!d&&!y)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(y))throw new TypeError("Third argument must be a Function");if(a.node(u))return c(u,d,y);if(a.nodeList(u))return l(u,d,y);if(a.string(u))return f(u,d,y);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(u,d,y){return u.addEventListener(d,y),{destroy:function(){u.removeEventListener(d,y)}}}function l(u,d,y){return Array.prototype.forEach.call(u,function(L){L.addEventListener(d,y)}),{destroy:function(){Array.prototype.forEach.call(u,function(L){L.removeEventListener(d,y)})}}}function f(u,d,y){return s(document.body,u,d,y)}o.exports=p},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var p=window.getSelection(),c=document.createRange();c.selectNodeContents(i),p.removeAllRanges(),p.addRange(c),a=p.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var p=this.e||(this.e={});return(p[i]||(p[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var p=this;function c(){p.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),p=0,c=s.length;for(p;p0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function N(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function q(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||p(d,L)})},y&&(n[d]=y(n[d])))}function p(d,y){try{c(o[d](y))}catch(L){u(i[0][3],L)}}function c(d){d.value instanceof nt?Promise.resolve(d.value.v).then(l,f):u(i[0][2],d)}function l(d){p("next",d)}function f(d){p("throw",d)}function u(d,y){d(y),i.shift(),i.length&&p(i[0][0],i[0][1])}}function uo(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof he=="function"?he(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,p){a=e[i](a),n(s,p,a.done,a.value)})}}function n(i,a,s,p){Promise.resolve(p).then(function(c){i({value:c,done:s})},a)}}function H(e){return typeof e=="function"}function ut(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var zt=ut(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: + */(function(t,r){typeof It=="object"&&typeof Yr=="object"?Yr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof It=="object"?It.ClipboardJS=r():t.ClipboardJS=r()})(It,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Ui}});var a=i(279),s=i.n(a),p=i(370),c=i.n(p),l=i(817),f=i.n(l);function u(V){try{return document.execCommand(V)}catch(A){return!1}}var d=function(A){var M=f()(A);return u("cut"),M},y=d;function L(V){var A=document.documentElement.getAttribute("dir")==="rtl",M=document.createElement("textarea");M.style.fontSize="12pt",M.style.border="0",M.style.padding="0",M.style.margin="0",M.style.position="absolute",M.style[A?"right":"left"]="-9999px";var F=window.pageYOffset||document.documentElement.scrollTop;return M.style.top="".concat(F,"px"),M.setAttribute("readonly",""),M.value=V,M}var X=function(A,M){var F=L(A);M.container.appendChild(F);var D=f()(F);return u("copy"),F.remove(),D},ee=function(A){var M=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},F="";return typeof A=="string"?F=X(A,M):A instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(A==null?void 0:A.type)?F=X(A.value,M):(F=f()(A),u("copy")),F},J=ee;function k(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?k=function(M){return typeof M}:k=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},k(V)}var ft=function(){var A=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},M=A.action,F=M===void 0?"copy":M,D=A.container,Y=A.target,$e=A.text;if(F!=="copy"&&F!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&k(Y)==="object"&&Y.nodeType===1){if(F==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(F==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if($e)return J($e,{container:D});if(Y)return F==="cut"?y(Y):J(Y,{container:D})},qe=ft;function Fe(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Fe=function(M){return typeof M}:Fe=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},Fe(V)}function ki(V,A){if(!(V instanceof A))throw new TypeError("Cannot call a class as a function")}function no(V,A){for(var M=0;M0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof D.action=="function"?D.action:this.defaultAction,this.target=typeof D.target=="function"?D.target:this.defaultTarget,this.text=typeof D.text=="function"?D.text:this.defaultText,this.container=Fe(D.container)==="object"?D.container:document.body}},{key:"listenClick",value:function(D){var Y=this;this.listener=c()(D,"click",function($e){return Y.onClick($e)})}},{key:"onClick",value:function(D){var Y=D.delegateTarget||D.currentTarget,$e=this.action(Y)||"copy",Dt=qe({action:$e,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(Dt?"success":"error",{action:$e,text:Dt,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(D){return vr("action",D)}},{key:"defaultTarget",value:function(D){var Y=vr("target",D);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(D){return vr("text",D)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(D){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(D,Y)}},{key:"cut",value:function(D){return y(D)}},{key:"isSupported",value:function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof D=="string"?[D]:D,$e=!!document.queryCommandSupported;return Y.forEach(function(Dt){$e=$e&&!!document.queryCommandSupported(Dt)}),$e}}]),M}(s()),Ui=Fi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,p){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(p))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,d,y){var L=c.apply(this,arguments);return l.addEventListener(u,L,y),{destroy:function(){l.removeEventListener(u,L,y)}}}function p(l,f,u,d,y){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(L){return s(L,f,u,d,y)}))}function c(l,f,u,d){return function(y){y.delegateTarget=a(y.target,f),y.delegateTarget&&d.call(l,y)}}o.exports=p},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function p(u,d,y){if(!u&&!d&&!y)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(y))throw new TypeError("Third argument must be a Function");if(a.node(u))return c(u,d,y);if(a.nodeList(u))return l(u,d,y);if(a.string(u))return f(u,d,y);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(u,d,y){return u.addEventListener(d,y),{destroy:function(){u.removeEventListener(d,y)}}}function l(u,d,y){return Array.prototype.forEach.call(u,function(L){L.addEventListener(d,y)}),{destroy:function(){Array.prototype.forEach.call(u,function(L){L.removeEventListener(d,y)})}}}function f(u,d,y){return s(document.body,u,d,y)}o.exports=p},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var p=window.getSelection(),c=document.createRange();c.selectNodeContents(i),p.removeAllRanges(),p.addRange(c),a=p.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var p=this.e||(this.e={});return(p[i]||(p[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var p=this;function c(){p.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),p=0,c=s.length;for(p;p0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function N(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function q(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||p(d,L)})},y&&(n[d]=y(n[d])))}function p(d,y){try{c(o[d](y))}catch(L){u(i[0][3],L)}}function c(d){d.value instanceof nt?Promise.resolve(d.value.v).then(l,f):u(i[0][2],d)}function l(d){p("next",d)}function f(d){p("throw",d)}function u(d,y){d(y),i.shift(),i.length&&p(i[0][0],i[0][1])}}function uo(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof he=="function"?he(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,p){a=e[i](a),n(s,p,a.done,a.value)})}}function n(i,a,s,p){Promise.resolve(p).then(function(c){i({value:c,done:s})},a)}}function H(e){return typeof e=="function"}function ut(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var zt=ut(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: `+r.map(function(o,n){return n+1+") "+o.toString()}).join(` - `):"",this.name="UnsubscriptionError",this.errors=r}});function Qe(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ue=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=he(a),p=s.next();!p.done;p=s.next()){var c=p.value;c.remove(this)}}catch(L){t={error:L}}finally{try{p&&!p.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(H(l))try{l()}catch(L){i=L instanceof zt?L.errors:[L]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=he(f),d=u.next();!d.done;d=u.next()){var y=d.value;try{ho(y)}catch(L){i=i!=null?i:[],L instanceof zt?i=q(q([],N(i)),N(L.errors)):i.push(L)}}}catch(L){o={error:L}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new zt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ho(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Qe(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Qe(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=Ue.EMPTY;function qt(e){return e instanceof Ue||e&&"closed"in e&&H(e.remove)&&H(e.add)&&H(e.unsubscribe)}function ho(e){H(e)?e():e.unsubscribe()}var Pe={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var dt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Tr:(this.currentObservers=null,s.push(r),new Ue(function(){o.currentObservers=null,Qe(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new To(r,o)},t}(j);var To=function(e){oe(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(g);var _r=function(e){oe(t,e);function t(r){var o=e.call(this)||this;return o._value=r,o}return Object.defineProperty(t.prototype,"value",{get:function(){return this.getValue()},enumerable:!1,configurable:!0}),t.prototype._subscribe=function(r){var o=e.prototype._subscribe.call(this,r);return!o.closed&&r.next(this._value),o},t.prototype.getValue=function(){var r=this,o=r.hasError,n=r.thrownError,i=r._value;if(o)throw n;return this._throwIfClosed(),i},t.prototype.next=function(r){e.prototype.next.call(this,this._value=r)},t}(g);var At={now:function(){return(At.delegate||Date).now()},delegate:void 0};var Ct=function(e){oe(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=At);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,p=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+p)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),p=0;p0?e.prototype.schedule.call(this,r,o):(this.delay=o,this.state=r,this.scheduler.flush(this),this)},t.prototype.execute=function(r,o){return o>0||this.closed?e.prototype.execute.call(this,r,o):this._execute(r,o)},t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!=null&&n>0||n==null&&this.delay>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.flush(this),0)},t}(gt);var Lo=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t}(yt);var kr=new Lo(Oo);var Mo=function(e){oe(t,e);function t(r,o){var n=e.call(this,r,o)||this;return n.scheduler=r,n.work=o,n}return t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!==null&&n>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=vt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(vt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(gt);var _o=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(yt);var me=new _o(Mo);var S=new j(function(e){return e.complete()});function Yt(e){return e&&H(e.schedule)}function Hr(e){return e[e.length-1]}function Xe(e){return H(Hr(e))?e.pop():void 0}function ke(e){return Yt(Hr(e))?e.pop():void 0}function Bt(e,t){return typeof Hr(e)=="number"?e.pop():t}var xt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Gt(e){return H(e==null?void 0:e.then)}function Jt(e){return H(e[bt])}function Xt(e){return Symbol.asyncIterator&&H(e==null?void 0:e[Symbol.asyncIterator])}function Zt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var er=Zi();function tr(e){return H(e==null?void 0:e[er])}function rr(e){return fo(this,arguments,function(){var r,o,n,i;return Nt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,nt(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,nt(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,nt(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function or(e){return H(e==null?void 0:e.getReader)}function U(e){if(e instanceof j)return e;if(e!=null){if(Jt(e))return ea(e);if(xt(e))return ta(e);if(Gt(e))return ra(e);if(Xt(e))return Ao(e);if(tr(e))return oa(e);if(or(e))return na(e)}throw Zt(e)}function ea(e){return new j(function(t){var r=e[bt]();if(H(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function ta(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):le,Te(1),r?De(t):Qo(function(){return new ir}))}}function jr(e){return e<=0?function(){return S}:E(function(t,r){var o=[];t.subscribe(T(r,function(n){o.push(n),e=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,p=s===void 0?!0:s;return function(c){var l,f,u,d=0,y=!1,L=!1,X=function(){f==null||f.unsubscribe(),f=void 0},te=function(){X(),l=u=void 0,y=L=!1},J=function(){var k=l;te(),k==null||k.unsubscribe()};return E(function(k,ft){d++,!L&&!y&&X();var qe=u=u!=null?u:r();ft.add(function(){d--,d===0&&!L&&!y&&(f=Ur(J,p))}),qe.subscribe(ft),!l&&d>0&&(l=new at({next:function(Fe){return qe.next(Fe)},error:function(Fe){L=!0,X(),f=Ur(te,n,Fe),qe.error(Fe)},complete:function(){y=!0,X(),f=Ur(te,a),qe.complete()}}),U(k).subscribe(l))})(c)}}function Ur(e,t){for(var r=[],o=2;oe.next(document)),e}function P(e,t=document){return Array.from(t.querySelectorAll(e))}function R(e,t=document){let r=fe(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function fe(e,t=document){return t.querySelector(e)||void 0}function Ie(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var wa=O(h(document.body,"focusin"),h(document.body,"focusout")).pipe(_e(1),Q(void 0),m(()=>Ie()||document.body),G(1));function et(e){return wa.pipe(m(t=>e.contains(t)),K())}function $t(e,t){return C(()=>O(h(e,"mouseenter").pipe(m(()=>!0)),h(e,"mouseleave").pipe(m(()=>!1))).pipe(t?Ht(r=>Le(+!r*t)):le,Q(e.matches(":hover"))))}function Jo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Jo(e,r)}function x(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Jo(o,n);return o}function sr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function Tt(e){let t=x("script",{src:e});return C(()=>(document.head.appendChild(t),O(h(t,"load"),h(t,"error").pipe(v(()=>$r(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),_(()=>document.head.removeChild(t)),Te(1))))}var Xo=new g,Ta=C(()=>typeof ResizeObserver=="undefined"?Tt("https://unpkg.com/resize-observer-polyfill"):I(void 0)).pipe(m(()=>new ResizeObserver(e=>e.forEach(t=>Xo.next(t)))),v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function ce(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){let t=e;for(;t.clientWidth===0&&t.parentElement;)t=t.parentElement;return Ta.pipe(w(r=>r.observe(t)),v(r=>Xo.pipe(b(o=>o.target===t),_(()=>r.unobserve(t)))),m(()=>ce(e)),Q(ce(e)))}function St(e){return{width:e.scrollWidth,height:e.scrollHeight}}function cr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}function Zo(e){let t=[],r=e.parentElement;for(;r;)(e.clientWidth>r.clientWidth||e.clientHeight>r.clientHeight)&&t.push(r),r=(e=r).parentElement;return t.length===0&&t.push(document.documentElement),t}function Ve(e){return{x:e.offsetLeft,y:e.offsetTop}}function en(e){let t=e.getBoundingClientRect();return{x:t.x+window.scrollX,y:t.y+window.scrollY}}function tn(e){return O(h(window,"load"),h(window,"resize")).pipe(Me(0,me),m(()=>Ve(e)),Q(Ve(e)))}function pr(e){return{x:e.scrollLeft,y:e.scrollTop}}function Ne(e){return O(h(e,"scroll"),h(window,"scroll"),h(window,"resize")).pipe(Me(0,me),m(()=>pr(e)),Q(pr(e)))}var rn=new g,Sa=C(()=>I(new IntersectionObserver(e=>{for(let t of e)rn.next(t)},{threshold:0}))).pipe(v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function tt(e){return Sa.pipe(w(t=>t.observe(e)),v(t=>rn.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function on(e,t=16){return Ne(e).pipe(m(({y:r})=>{let o=ce(e),n=St(e);return r>=n.height-o.height-t}),K())}var lr={drawer:R("[data-md-toggle=drawer]"),search:R("[data-md-toggle=search]")};function nn(e){return lr[e].checked}function Je(e,t){lr[e].checked!==t&&lr[e].click()}function ze(e){let t=lr[e];return h(t,"change").pipe(m(()=>t.checked),Q(t.checked))}function Oa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function La(){return O(h(window,"compositionstart").pipe(m(()=>!0)),h(window,"compositionend").pipe(m(()=>!1))).pipe(Q(!1))}function an(){let e=h(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:nn("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Ie();if(typeof o!="undefined")return!Oa(o,r)}return!0}),pe());return La().pipe(v(t=>t?S:e))}function ye(){return new URL(location.href)}function lt(e,t=!1){if(B("navigation.instant")&&!t){let r=x("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function sn(){return new g}function cn(){return location.hash.slice(1)}function pn(e){let t=x("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Ma(e){return O(h(window,"hashchange"),e).pipe(m(cn),Q(cn()),b(t=>t.length>0),G(1))}function ln(e){return Ma(e).pipe(m(t=>fe(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function Pt(e){let t=matchMedia(e);return ar(r=>t.addListener(()=>r(t.matches))).pipe(Q(t.matches))}function mn(){let e=matchMedia("print");return O(h(window,"beforeprint").pipe(m(()=>!0)),h(window,"afterprint").pipe(m(()=>!1))).pipe(Q(e.matches))}function Nr(e,t){return e.pipe(v(r=>r?t():S))}function zr(e,t){return new j(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function je(e,t){return zr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),G(1))}function fn(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),G(1))}function un(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),G(1))}function dn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function hn(){return O(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(m(dn),Q(dn()))}function bn(){return{width:innerWidth,height:innerHeight}}function vn(){return h(window,"resize",{passive:!0}).pipe(m(bn),Q(bn()))}function gn(){return z([hn(),vn()]).pipe(m(([e,t])=>({offset:e,size:t})),G(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(ee("size")),n=z([o,r]).pipe(m(()=>Ve(e)));return z([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:p,y:c}])=>({offset:{x:a.x-p,y:a.y-c+i},size:s})))}function _a(e){return h(e,"message",t=>t.data)}function Aa(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function yn(e,t=new Worker(e)){let r=_a(t),o=Aa(t),n=new g;n.subscribe(o);let i=o.pipe(Z(),ie(!0));return n.pipe(Z(),Re(r.pipe(W(i))),pe())}var Ca=R("#__config"),Ot=JSON.parse(Ca.textContent);Ot.base=`${new URL(Ot.base,ye())}`;function xe(){return Ot}function B(e){return Ot.features.includes(e)}function Ee(e,t){return typeof t!="undefined"?Ot.translations[e].replace("#",t.toString()):Ot.translations[e]}function Se(e,t=document){return R(`[data-md-component=${e}]`,t)}function ae(e,t=document){return P(`[data-md-component=${e}]`,t)}function ka(e){let t=R(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(m(()=>R(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function xn(e){if(!B("announce.dismiss")||!e.childElementCount)return S;if(!e.hidden){let t=R(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return C(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),ka(e).pipe(w(r=>t.next(r)),_(()=>t.complete()),m(r=>$({ref:e},r)))})}function Ha(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function En(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),Ha(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))}function Rt(e,t){return t==="inline"?x("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"})):x("div",{class:"md-tooltip",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"}))}function wn(...e){return x("div",{class:"md-tooltip2",role:"tooltip"},x("div",{class:"md-tooltip2__inner md-typeset"},e))}function Tn(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("a",{href:r,class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}else return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("span",{class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}function Sn(e){return x("button",{class:"md-clipboard md-icon",title:Ee("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}var Ln=Mt(qr());function Qr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(p=>!e.terms[p]).reduce((p,c)=>[...p,x("del",null,(0,Ln.default)(c))," "],[]).slice(0,-1),i=xe(),a=new URL(e.location,i.base);B("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,p])=>p).reduce((p,[c])=>`${p} ${c}`.trim(),""));let{tags:s}=xe();return x("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},x("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&x("div",{class:"md-search-result__icon md-icon"}),r>0&&x("h1",null,e.title),r<=0&&x("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&x("nav",{class:"md-tags"},e.tags.map(p=>{let c=s?p in s?`md-tag-icon md-tag--${s[p]}`:"md-tag-icon":"";return x("span",{class:`md-tag ${c}`},p)})),o>0&&n.length>0&&x("p",{class:"md-search-result__terms"},Ee("search.result.term.missing"),": ",...n)))}function Mn(e){let t=e[0].score,r=[...e],o=xe(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreQr(l,1)),...p.length?[x("details",{class:"md-search-result__more"},x("summary",{tabIndex:-1},x("div",null,p.length>0&&p.length===1?Ee("search.result.more.one"):Ee("search.result.more.other",p.length))),...p.map(l=>Qr(l,1)))]:[]];return x("li",{class:"md-search-result__item"},c)}function _n(e){return x("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>x("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?sr(r):r)))}function Kr(e){let t=`tabbed-control tabbed-control--${e}`;return x("div",{class:t,hidden:!0},x("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function An(e){return x("div",{class:"md-typeset__scrollwrap"},x("div",{class:"md-typeset__table"},e))}function Ra(e){var o;let t=xe(),r=new URL(`../${e.version}/`,t.base);return x("li",{class:"md-version__item"},x("a",{href:`${r}`,class:"md-version__link"},e.title,((o=t.version)==null?void 0:o.alias)&&e.aliases.length>0&&x("span",{class:"md-version__alias"},e.aliases[0])))}function Cn(e,t){var o;let r=xe();return e=e.filter(n=>{var i;return!((i=n.properties)!=null&&i.hidden)}),x("div",{class:"md-version"},x("button",{class:"md-version__current","aria-label":Ee("select.version")},t.title,((o=r.version)==null?void 0:o.alias)&&t.aliases.length>0&&x("span",{class:"md-version__alias"},t.aliases[0])),x("ul",{class:"md-version__list"},e.map(Ra)))}var Ia=0;function ja(e){let t=z([et(e),$t(e)]).pipe(m(([o,n])=>o||n),K()),r=C(()=>Zo(e)).pipe(ne(Ne),pt(1),He(t),m(()=>en(e)));return t.pipe(Ae(o=>o),v(()=>z([t,r])),m(([o,n])=>({active:o,offset:n})),pe())}function Fa(e,t){let{content$:r,viewport$:o}=t,n=`__tooltip2_${Ia++}`;return C(()=>{let i=new g,a=new _r(!1);i.pipe(Z(),ie(!1)).subscribe(a);let s=a.pipe(Ht(c=>Le(+!c*250,kr)),K(),v(c=>c?r:S),w(c=>c.id=n),pe());z([i.pipe(m(({active:c})=>c)),s.pipe(v(c=>$t(c,250)),Q(!1))]).pipe(m(c=>c.some(l=>l))).subscribe(a);let p=a.pipe(b(c=>c),re(s,o),m(([c,l,{size:f}])=>{let u=e.getBoundingClientRect(),d=u.width/2;if(l.role==="tooltip")return{x:d,y:8+u.height};if(u.y>=f.height/2){let{height:y}=ce(l);return{x:d,y:-16-y}}else return{x:d,y:16+u.height}}));return z([s,i,p]).subscribe(([c,{offset:l},f])=>{c.style.setProperty("--md-tooltip-host-x",`${l.x}px`),c.style.setProperty("--md-tooltip-host-y",`${l.y}px`),c.style.setProperty("--md-tooltip-x",`${f.x}px`),c.style.setProperty("--md-tooltip-y",`${f.y}px`),c.classList.toggle("md-tooltip2--top",f.y<0),c.classList.toggle("md-tooltip2--bottom",f.y>=0)}),a.pipe(b(c=>c),re(s,(c,l)=>l),b(c=>c.role==="tooltip")).subscribe(c=>{let l=ce(R(":scope > *",c));c.style.setProperty("--md-tooltip-width",`${l.width}px`),c.style.setProperty("--md-tooltip-tail","0px")}),a.pipe(K(),ve(me),re(s)).subscribe(([c,l])=>{l.classList.toggle("md-tooltip2--active",c)}),z([a.pipe(b(c=>c)),s]).subscribe(([c,l])=>{l.role==="dialog"?(e.setAttribute("aria-controls",n),e.setAttribute("aria-haspopup","dialog")):e.setAttribute("aria-describedby",n)}),a.pipe(b(c=>!c)).subscribe(()=>{e.removeAttribute("aria-controls"),e.removeAttribute("aria-describedby"),e.removeAttribute("aria-haspopup")}),ja(e).pipe(w(c=>i.next(c)),_(()=>i.complete()),m(c=>$({ref:e},c)))})}function mt(e,{viewport$:t},r=document.body){return Fa(e,{content$:new j(o=>{let n=e.title,i=wn(n);return o.next(i),e.removeAttribute("title"),r.append(i),()=>{i.remove(),e.setAttribute("title",n)}}),viewport$:t})}function Ua(e,t){let r=C(()=>z([tn(e),Ne(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=ce(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return et(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),Te(+!o||1/0))))}function kn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return C(()=>{let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),tt(e).pipe(W(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),O(i.pipe(b(({active:s})=>s)),i.pipe(_e(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Me(16,me)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(W(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),h(n,"mousedown").pipe(W(a),re(i)).subscribe(([s,{active:p}])=>{var c;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(p){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(c=Ie())==null||c.blur()}}),r.pipe(W(a),b(s=>s===o),Ge(125)).subscribe(()=>e.focus()),Ua(e,t).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function Wa(e){return e.tagName==="CODE"?P(".c, .c1, .cm",e):[e]}function Da(e){let t=[];for(let r of Wa(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,p]=a;if(typeof p=="undefined"){let c=i.splitText(a.index);i=c.splitText(s.length),t.push(c)}else{i.textContent=s,t.push(i);break}}}}return t}function Hn(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Da(t)){let[,p]=s.textContent.match(/\((\d+)\)/);fe(`:scope > li:nth-child(${p})`,e)&&(a.set(p,Tn(p,i)),s.replaceWith(a.get(p)))}return a.size===0?S:C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=[];for(let[l,f]of a)c.push([R(".md-typeset",f),R(`:scope > li:nth-child(${l})`,e)]);return o.pipe(W(p)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of c)l?Hn(f,u):Hn(u,f)}),O(...[...a].map(([,l])=>kn(l,t,{target$:r}))).pipe(_(()=>s.complete()),pe())})}function $n(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return $n(t)}}function Pn(e,t){return C(()=>{let r=$n(e);return typeof r!="undefined"?fr(r,e,t):S})}var Rn=Mt(Br());var Va=0;function In(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return In(t)}}function Na(e){return ge(e).pipe(m(({width:t})=>({scrollable:St(e).width>t})),ee("scrollable"))}function jn(e,t){let{matches:r}=matchMedia("(hover)"),o=C(()=>{let n=new g,i=n.pipe(jr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Rn.default.isSupported()&&(e.closest(".copy")||B("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Va++}`;let l=Sn(c.id);c.insertBefore(l,e),B("content.tooltips")&&a.push(mt(l,{viewport$}))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=In(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||B("content.code.annotate"))){let l=fr(c,e,t);a.push(ge(s).pipe(W(i),m(({width:f,height:u})=>f&&u),K(),v(f=>f?l:S)))}}return P(":scope > span[id]",e).length&&e.classList.add("md-code__content"),Na(e).pipe(w(c=>n.next(c)),_(()=>n.complete()),m(c=>$({ref:e},c)),Re(...a))});return B("content.lazy")?tt(e).pipe(b(n=>n),Te(1),v(()=>o)):o}function za(e,{target$:t,print$:r}){let o=!0;return O(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),w(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Fn(e,t){return C(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),za(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}var Un=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel p,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel p{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}a .nodeLabel{text-decoration:underline}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Gr,Qa=0;function Ka(){return typeof mermaid=="undefined"||mermaid instanceof Element?Tt("https://unpkg.com/mermaid@11/dist/mermaid.min.js"):I(void 0)}function Wn(e){return e.classList.remove("mermaid"),Gr||(Gr=Ka().pipe(w(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Un,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),G(1))),Gr.subscribe(()=>co(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Qa++}`,r=x("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),Gr.pipe(m(()=>({ref:e})))}var Dn=x("table");function Vn(e){return e.replaceWith(Dn),Dn.replaceWith(An(e)),I({ref:e})}function Ya(e){let t=e.find(r=>r.checked)||e[0];return O(...e.map(r=>h(r,"change").pipe(m(()=>R(`label[for="${r.id}"]`))))).pipe(Q(R(`label[for="${t.id}"]`)),m(r=>({active:r})))}function Nn(e,{viewport$:t,target$:r}){let o=R(".tabbed-labels",e),n=P(":scope > input",e),i=Kr("prev");e.append(i);let a=Kr("next");return e.append(a),C(()=>{let s=new g,p=s.pipe(Z(),ie(!0));z([s,ge(e),tt(e)]).pipe(W(p),Me(1,me)).subscribe({next([{active:c},l]){let f=Ve(c),{width:u}=ce(c);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let d=pr(o);(f.xd.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),z([Ne(o),ge(o)]).pipe(W(p)).subscribe(([c,l])=>{let f=St(o);i.hidden=c.x<16,a.hidden=c.x>f.width-l.width-16}),O(h(i,"click").pipe(m(()=>-1)),h(a,"click").pipe(m(()=>1))).pipe(W(p)).subscribe(c=>{let{width:l}=ce(o);o.scrollBy({left:l*c,behavior:"smooth"})}),r.pipe(W(p),b(c=>n.includes(c))).subscribe(c=>c.click()),o.classList.add("tabbed-labels--linked");for(let c of n){let l=R(`label[for="${c.id}"]`);l.replaceChildren(x("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),h(l.firstElementChild,"click").pipe(W(p),b(f=>!(f.metaKey||f.ctrlKey)),w(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return B("content.tabs.link")&&s.pipe(Ce(1),re(t)).subscribe(([{active:c},{offset:l}])=>{let f=c.innerText.trim();if(c.hasAttribute("data-md-switching"))c.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let y of P("[data-tabs]"))for(let L of P(":scope > input",y)){let X=R(`label[for="${L.id}"]`);if(X!==c&&X.innerText.trim()===f){X.setAttribute("data-md-switching",""),L.click();break}}window.scrollTo({top:e.offsetTop-u});let d=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...d])])}}),s.pipe(W(p)).subscribe(()=>{for(let c of P("audio, video",e))c.pause()}),Ya(n).pipe(w(c=>s.next(c)),_(()=>s.complete()),m(c=>$({ref:e},c)))}).pipe(Ke(se))}function zn(e,{viewport$:t,target$:r,print$:o}){return O(...P(".annotate:not(.highlight)",e).map(n=>Pn(n,{target$:r,print$:o})),...P("pre:not(.mermaid) > code",e).map(n=>jn(n,{target$:r,print$:o})),...P("pre.mermaid",e).map(n=>Wn(n)),...P("table:not([class])",e).map(n=>Vn(n)),...P("details",e).map(n=>Fn(n,{target$:r,print$:o})),...P("[data-tabs]",e).map(n=>Nn(n,{viewport$:t,target$:r})),...P("[title]",e).filter(()=>B("content.tooltips")).map(n=>mt(n,{viewport$:t})))}function Ba(e,{alert$:t}){return t.pipe(v(r=>O(I(!0),I(!1).pipe(Ge(2e3))).pipe(m(o=>({message:r,active:o})))))}function qn(e,t){let r=R(".md-typeset",e);return C(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ba(e,t).pipe(w(n=>o.next(n)),_(()=>o.complete()),m(n=>$({ref:e},n)))})}var Ga=0;function Ja(e,t){document.body.append(e);let{width:r}=ce(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=cr(t),n=typeof o!="undefined"?Ne(o):I({x:0,y:0}),i=O(et(t),$t(t)).pipe(K());return z([i,n]).pipe(m(([a,s])=>{let{x:p,y:c}=Ve(t),l=ce(t),f=t.closest("table");return f&&t.parentElement&&(p+=f.offsetLeft+t.parentElement.offsetLeft,c+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:p-s.x+l.width/2-r/2,y:c-s.y+l.height+8}}}))}function Qn(e){let t=e.title;if(!t.length)return S;let r=`__tooltip_${Ga++}`,o=Rt(r,"inline"),n=R(".md-typeset",o);return n.innerHTML=t,C(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),O(i.pipe(b(({active:a})=>a)),i.pipe(_e(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Me(16,me)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ja(o,e).pipe(w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))}).pipe(Ke(se))}function Xa({viewport$:e}){if(!B("header.autohide"))return I(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Be(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),K()),o=ze("search");return z([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),K(),v(n=>n?r:I(!1)),Q(!1))}function Kn(e,t){return C(()=>z([ge(e),Xa(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),K((r,o)=>r.height===o.height&&r.hidden===o.hidden),G(1))}function Yn(e,{header$:t,main$:r}){return C(()=>{let o=new g,n=o.pipe(Z(),ie(!0));o.pipe(ee("active"),He(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=ue(P("[title]",e)).pipe(b(()=>B("content.tooltips")),ne(a=>Qn(a)));return r.subscribe(o),t.pipe(W(n),m(a=>$({ref:e},a)),Re(i.pipe(W(n))))})}function Za(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=ce(e);return{active:o>=n}}),ee("active"))}function Bn(e,t){return C(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=fe(".md-content h1");return typeof o=="undefined"?S:Za(o,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))})}function Gn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),K()),n=o.pipe(v(()=>ge(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),ee("bottom"))));return z([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:p},size:{height:c}}])=>(c=Math.max(0,c-Math.max(0,a-p,i)-Math.max(0,c+p-s)),{offset:a-i,height:c,active:a-i<=p})),K((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function es(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return I(...e).pipe(ne(o=>h(o,"change").pipe(m(()=>o))),Q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),G(1))}function Jn(e){let t=P("input",e),r=x("meta",{name:"theme-color"});document.head.appendChild(r);let o=x("meta",{name:"color-scheme"});document.head.appendChild(o);let n=Pt("(prefers-color-scheme: light)");return C(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),p=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=p.getAttribute("data-md-color-scheme"),a.color.primary=p.getAttribute("data-md-color-primary"),a.color.accent=p.getAttribute("data-md-color-accent")}for(let[s,p]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,p);for(let s=0;sa.key==="Enter"),re(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Se("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(p=>(+p).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(ve(se)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),es(t).pipe(W(n.pipe(Ce(1))),ct(),w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))})}function Xn(e,{progress$:t}){return C(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(w(o=>r.next({value:o})),_(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Jr=Mt(Br());function ts(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Zn({alert$:e}){Jr.default.isSupported()&&new j(t=>{new Jr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||ts(R(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(w(t=>{t.trigger.focus()}),m(()=>Ee("clipboard.copied"))).subscribe(e)}function ei(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function rs(e,t){let r=new Map;for(let o of P("url",e)){let n=R("loc",o),i=[ei(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of P("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(ei(new URL(s),t))}}return r}function ur(e){return un(new URL("sitemap.xml",e)).pipe(m(t=>rs(t,new URL(e))),de(()=>I(new Map)))}function os(e,t){if(!(e.target instanceof Element))return S;let r=e.target.closest("a");if(r===null)return S;if(r.target||e.metaKey||e.ctrlKey)return S;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),I(new URL(r.href))):S}function ti(e){let t=new Map;for(let r of P(":scope > *",e.head))t.set(r.outerHTML,r);return t}function ri(e){for(let t of P("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return I(e)}function ns(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...B("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=fe(o),i=fe(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=ti(document);for(let[o,n]of ti(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Se("container");return We(P("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new j(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),S}),Z(),ie(document))}function oi({location$:e,viewport$:t,progress$:r}){let o=xe();if(location.protocol==="file:")return S;let n=ur(o.base);I(document).subscribe(ri);let i=h(document.body,"click").pipe(He(n),v(([p,c])=>os(p,c)),pe()),a=h(window,"popstate").pipe(m(ye),pe());i.pipe(re(t)).subscribe(([p,{offset:c}])=>{history.replaceState(c,""),history.pushState(null,"",p)}),O(i,a).subscribe(e);let s=e.pipe(ee("pathname"),v(p=>fn(p,{progress$:r}).pipe(de(()=>(lt(p,!0),S)))),v(ri),v(ns),pe());return O(s.pipe(re(e,(p,c)=>c)),s.pipe(v(()=>e),ee("pathname"),v(()=>e),ee("hash")),e.pipe(K((p,c)=>p.pathname===c.pathname&&p.hash===c.hash),v(()=>i),w(()=>history.back()))).subscribe(p=>{var c,l;history.state!==null||!p.hash?window.scrollTo(0,(l=(c=history.state)==null?void 0:c.y)!=null?l:0):(history.scrollRestoration="auto",pn(p.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(ee("offset"),_e(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),s}var ni=Mt(qr());function ii(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,ni.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function jt(e){return e.type===1}function dr(e){return e.type===3}function ai(e,t){let r=yn(e);return O(I(location.protocol!=="file:"),ze("search")).pipe(Ae(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:B("search.suggest")}}})),r}function si(e){var l;let{selectedVersionSitemap:t,selectedVersionBaseURL:r,currentLocation:o,currentBaseURL:n}=e,i=(l=Xr(n))==null?void 0:l.pathname;if(i===void 0)return;let a=ss(o.pathname,i);if(a===void 0)return;let s=ps(t.keys());if(!t.has(s))return;let p=Xr(a,s);if(!p||!t.has(p.href))return;let c=Xr(a,r);if(c)return c.hash=o.hash,c.search=o.search,c}function Xr(e,t){try{return new URL(e,t)}catch(r){return}}function ss(e,t){if(e.startsWith(t))return e.slice(t.length)}function cs(e,t){let r=Math.min(e.length,t.length),o;for(o=0;oS)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>h(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),re(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let p=s.href;return!i.target.closest(".md-version")&&n.get(p)===a?S:(i.preventDefault(),I(new URL(p)))}}return S}),v(i=>ur(i).pipe(m(a=>{var s;return(s=si({selectedVersionSitemap:a,selectedVersionBaseURL:i,currentLocation:ye(),currentBaseURL:t.base}))!=null?s:i})))))).subscribe(n=>lt(n,!0)),z([r,o]).subscribe(([n,i])=>{R(".md-header__topic").appendChild(Cn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var a;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let s=((a=t.version)==null?void 0:a.default)||"latest";Array.isArray(s)||(s=[s]);e:for(let p of s)for(let c of n.aliases.concat(n.version))if(new RegExp(p,"i").test(c)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let s of ae("outdated"))s.hidden=!1})}function ls(e,{worker$:t}){let{searchParams:r}=ye();r.has("q")&&(Je("search",!0),e.value=r.get("q"),e.focus(),ze("search").pipe(Ae(i=>!i)).subscribe(()=>{let i=ye();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=et(e),n=O(t.pipe(Ae(jt)),h(e,"keyup"),o).pipe(m(()=>e.value),K());return z([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),G(1))}function pi(e,{worker$:t}){let r=new g,o=r.pipe(Z(),ie(!0));z([t.pipe(Ae(jt)),r],(i,a)=>a).pipe(ee("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(ee("focus")).subscribe(({focus:i})=>{i&&Je("search",i)}),h(e.form,"reset").pipe(W(o)).subscribe(()=>e.focus());let n=R("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),ls(e,{worker$:t}).pipe(w(i=>r.next(i)),_(()=>r.complete()),m(i=>$({ref:e},i)),G(1))}function li(e,{worker$:t,query$:r}){let o=new g,n=on(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=R(":scope > :first-child",e),s=R(":scope > :last-child",e);ze("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(re(r),Wr(t.pipe(Ae(jt)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?Ee("search.result.none"):Ee("search.result.placeholder");break;case 1:a.textContent=Ee("search.result.one");break;default:let u=sr(l.length);a.textContent=Ee("search.result.other",u)}});let p=o.pipe(w(()=>s.innerHTML=""),v(({items:l})=>O(I(...l.slice(0,10)),I(...l.slice(10)).pipe(Be(4),Vr(n),v(([f])=>f)))),m(Mn),pe());return p.subscribe(l=>s.appendChild(l)),p.pipe(ne(l=>{let f=fe("details",l);return typeof f=="undefined"?S:h(f,"toggle").pipe(W(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(dr),m(({data:l})=>l)).pipe(w(l=>o.next(l)),_(()=>o.complete()),m(l=>$({ref:e},l)))}function ms(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=ye();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function mi(e,t){let r=new g,o=r.pipe(Z(),ie(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(W(o)).subscribe(n=>n.preventDefault()),ms(e,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))}function fi(e,{worker$:t,keyboard$:r}){let o=new g,n=Se("search-query"),i=O(h(n,"keydown"),h(n,"focus")).pipe(ve(se),m(()=>n.value),K());return o.pipe(He(i),m(([{suggest:s},p])=>{let c=p.split(/([\s-]+)/);if(s!=null&&s.length&&c[c.length-1]){let l=s[s.length-1];l.startsWith(c[c.length-1])&&(c[c.length-1]=l)}else c.length=0;return c})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(dr),m(({data:s})=>s)).pipe(w(s=>o.next(s)),_(()=>o.complete()),m(()=>({ref:e})))}function ui(e,{index$:t,keyboard$:r}){let o=xe();try{let n=ai(o.search,t),i=Se("search-query",e),a=Se("search-result",e);h(e,"click").pipe(b(({target:p})=>p instanceof Element&&!!p.closest("a"))).subscribe(()=>Je("search",!1)),r.pipe(b(({mode:p})=>p==="search")).subscribe(p=>{let c=Ie();switch(p.type){case"Enter":if(c===i){let l=new Map;for(let f of P(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,d])=>d-u);f.click()}p.claim()}break;case"Escape":case"Tab":Je("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof c=="undefined")i.focus();else{let l=[i,...P(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(c))+l.length+(p.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}p.claim();break;default:i!==Ie()&&i.focus()}}),r.pipe(b(({mode:p})=>p==="global")).subscribe(p=>{switch(p.type){case"f":case"s":case"/":i.focus(),i.select(),p.claim();break}});let s=pi(i,{worker$:n});return O(s,li(a,{worker$:n,query$:s})).pipe(Re(...ae("search-share",e).map(p=>mi(p,{query$:s})),...ae("search-suggest",e).map(p=>fi(p,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ye}}function di(e,{index$:t,location$:r}){return z([t,r.pipe(Q(ye()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>ii(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let p=s.textContent,c=o(p);c.length>p.length&&n.set(s,c)}for(let[s,p]of n){let{childNodes:c}=x("span",null,p);s.replaceWith(...Array.from(c))}return{ref:e,nodes:n}}))}function fs(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return z([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),K((i,a)=>i.height===a.height&&i.locked===a.locked))}function Zr(e,o){var n=o,{header$:t}=n,r=so(n,["header$"]);let i=R(".md-sidebar__scrollwrap",e),{y:a}=Ve(i);return C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=s.pipe(Me(0,me));return c.pipe(re(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),c.pipe(Ae()).subscribe(()=>{for(let l of P(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2})}}}),ue(P("label[tabindex]",e)).pipe(ne(l=>h(l,"click").pipe(ve(se),m(()=>l),W(p)))).subscribe(l=>{let f=R(`[id="${l.htmlFor}"]`);R(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),fs(e,r).pipe(w(l=>s.next(l)),_(()=>s.complete()),m(l=>$({ref:e},l)))})}function hi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return st(je(`${r}/releases/latest`).pipe(de(()=>S),m(o=>({version:o.tag_name})),De({})),je(r).pipe(de(()=>S),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return je(r).pipe(m(o=>({repositories:o.public_repos})),De({}))}}function bi(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return st(je(`${r}/releases/permalink/latest`).pipe(de(()=>S),m(({tag_name:o})=>({version:o})),De({})),je(r).pipe(de(()=>S),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}function vi(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return hi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return bi(r,o)}return S}var us;function ds(e){return us||(us=C(()=>{let t=__md_get("__source",sessionStorage);if(t)return I(t);if(ae("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return S}return vi(e.href).pipe(w(o=>__md_set("__source",o,sessionStorage)))}).pipe(de(()=>S),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),G(1)))}function gi(e){let t=R(":scope > :last-child",e);return C(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(_n(o)),t.classList.add("md-source__repository--active")}),ds(e).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function hs(e,{viewport$:t,header$:r}){return ge(document.body).pipe(v(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),ee("hidden"))}function yi(e,t){return C(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(B("navigation.tabs.sticky")?I({hidden:!1}):hs(e,t)).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function bs(e,{viewport$:t,header$:r}){let o=new Map,n=P(".md-nav__link",e);for(let s of n){let p=decodeURIComponent(s.hash.substring(1)),c=fe(`[id="${p}"]`);typeof c!="undefined"&&o.set(s,c)}let i=r.pipe(ee("height"),m(({height:s})=>{let p=Se("main"),c=R(":scope > :first-child",p);return s+.8*(c.offsetTop-p.offsetTop)}),pe());return ge(document.body).pipe(ee("height"),v(s=>C(()=>{let p=[];return I([...o].reduce((c,[l,f])=>{for(;p.length&&o.get(p[p.length-1]).tagName>=f.tagName;)p.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return c.set([...p=[...p,l]].reverse(),u)},new Map))}).pipe(m(p=>new Map([...p].sort(([,c],[,l])=>c-l))),He(i),v(([p,c])=>t.pipe(Fr(([l,f],{offset:{y:u},size:d})=>{let y=u+d.height>=Math.floor(s.height);for(;f.length;){let[,L]=f[0];if(L-c=u&&!y)f=[l.pop(),...f];else break}return[l,f]},[[],[...p]]),K((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,p])=>({prev:s.map(([c])=>c),next:p.map(([c])=>c)})),Q({prev:[],next:[]}),Be(2,1),m(([s,p])=>s.prev.length{let i=new g,a=i.pipe(Z(),ie(!0));if(i.subscribe(({prev:s,next:p})=>{for(let[c]of p)c.classList.remove("md-nav__link--passed"),c.classList.remove("md-nav__link--active");for(let[c,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",c===s.length-1)}),B("toc.follow")){let s=O(t.pipe(_e(1),m(()=>{})),t.pipe(_e(250),m(()=>"smooth")));i.pipe(b(({prev:p})=>p.length>0),He(o.pipe(ve(se))),re(s)).subscribe(([[{prev:p}],c])=>{let[l]=p[p.length-1];if(l.offsetHeight){let f=cr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2,behavior:c})}}})}return B("navigation.tracking")&&t.pipe(W(a),ee("offset"),_e(250),Ce(1),W(n.pipe(Ce(1))),ct({delay:250}),re(i)).subscribe(([,{prev:s}])=>{let p=ye(),c=s[s.length-1];if(c&&c.length){let[l]=c,{hash:f}=new URL(l.href);p.hash!==f&&(p.hash=f,history.replaceState({},"",`${p}`))}else p.hash="",history.replaceState({},"",`${p}`)}),bs(e,{viewport$:t,header$:r}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function vs(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Be(2,1),m(([a,s])=>a>s&&s>0),K()),i=r.pipe(m(({active:a})=>a));return z([i,n]).pipe(m(([a,s])=>!(a&&s)),K(),W(o.pipe(Ce(1))),ie(!0),ct({delay:250}),m(a=>({hidden:a})))}function Ei(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(W(a),ee("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),h(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),vs(e,{viewport$:t,main$:o,target$:n}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))}function wi({document$:e,viewport$:t}){e.pipe(v(()=>P(".md-ellipsis")),ne(r=>tt(r).pipe(W(e.pipe(Ce(1))),b(o=>o),m(()=>r),Te(1))),b(r=>r.offsetWidth{let o=r.innerText,n=r.closest("a")||r;return n.title=o,B("content.tooltips")?mt(n,{viewport$:t}).pipe(W(e.pipe(Ce(1))),_(()=>n.removeAttribute("title"))):S})).subscribe(),B("content.tooltips")&&e.pipe(v(()=>P(".md-status")),ne(r=>mt(r,{viewport$:t}))).subscribe()}function Ti({document$:e,tablet$:t}){e.pipe(v(()=>P(".md-toggle--indeterminate")),w(r=>{r.indeterminate=!0,r.checked=!1}),ne(r=>h(r,"change").pipe(Dr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),re(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function gs(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function Si({document$:e}){e.pipe(v(()=>P("[data-md-scrollfix]")),w(t=>t.removeAttribute("data-md-scrollfix")),b(gs),ne(t=>h(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function Oi({viewport$:e,tablet$:t}){z([ze("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>I(r).pipe(Ge(r?400:100))),re(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function ys(){return location.protocol==="file:"?Tt(`${new URL("search/search_index.js",eo.base)}`).pipe(m(()=>__index),G(1)):je(new URL("search/search_index.json",eo.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var ot=Go(),Ut=sn(),Lt=ln(Ut),to=an(),Oe=gn(),hr=Pt("(min-width: 960px)"),Mi=Pt("(min-width: 1220px)"),_i=mn(),eo=xe(),Ai=document.forms.namedItem("search")?ys():Ye,ro=new g;Zn({alert$:ro});var oo=new g;B("navigation.instant")&&oi({location$:Ut,viewport$:Oe,progress$:oo}).subscribe(ot);var Li;((Li=eo.version)==null?void 0:Li.provider)==="mike"&&ci({document$:ot});O(Ut,Lt).pipe(Ge(125)).subscribe(()=>{Je("drawer",!1),Je("search",!1)});to.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=fe("link[rel=prev]");typeof t!="undefined"&<(t);break;case"n":case".":let r=fe("link[rel=next]");typeof r!="undefined"&<(r);break;case"Enter":let o=Ie();o instanceof HTMLLabelElement&&o.click()}});wi({viewport$:Oe,document$:ot});Ti({document$:ot,tablet$:hr});Si({document$:ot});Oi({viewport$:Oe,tablet$:hr});var rt=Kn(Se("header"),{viewport$:Oe}),Ft=ot.pipe(m(()=>Se("main")),v(e=>Gn(e,{viewport$:Oe,header$:rt})),G(1)),xs=O(...ae("consent").map(e=>En(e,{target$:Lt})),...ae("dialog").map(e=>qn(e,{alert$:ro})),...ae("palette").map(e=>Jn(e)),...ae("progress").map(e=>Xn(e,{progress$:oo})),...ae("search").map(e=>ui(e,{index$:Ai,keyboard$:to})),...ae("source").map(e=>gi(e))),Es=C(()=>O(...ae("announce").map(e=>xn(e)),...ae("content").map(e=>zn(e,{viewport$:Oe,target$:Lt,print$:_i})),...ae("content").map(e=>B("search.highlight")?di(e,{index$:Ai,location$:Ut}):S),...ae("header").map(e=>Yn(e,{viewport$:Oe,header$:rt,main$:Ft})),...ae("header-title").map(e=>Bn(e,{viewport$:Oe,header$:rt})),...ae("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Nr(Mi,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft})):Nr(hr,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft}))),...ae("tabs").map(e=>yi(e,{viewport$:Oe,header$:rt})),...ae("toc").map(e=>xi(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})),...ae("top").map(e=>Ei(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})))),Ci=ot.pipe(v(()=>Es),Re(xs),G(1));Ci.subscribe();window.document$=ot;window.location$=Ut;window.target$=Lt;window.keyboard$=to;window.viewport$=Oe;window.tablet$=hr;window.screen$=Mi;window.print$=_i;window.alert$=ro;window.progress$=oo;window.component$=Ci;})(); -//# sourceMappingURL=bundle.83f73b43.min.js.map + `):"",this.name="UnsubscriptionError",this.errors=r}});function Qe(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ue=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=he(a),p=s.next();!p.done;p=s.next()){var c=p.value;c.remove(this)}}catch(L){t={error:L}}finally{try{p&&!p.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(H(l))try{l()}catch(L){i=L instanceof zt?L.errors:[L]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=he(f),d=u.next();!d.done;d=u.next()){var y=d.value;try{ho(y)}catch(L){i=i!=null?i:[],L instanceof zt?i=q(q([],N(i)),N(L.errors)):i.push(L)}}}catch(L){o={error:L}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new zt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ho(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Qe(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Qe(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=Ue.EMPTY;function qt(e){return e instanceof Ue||e&&"closed"in e&&H(e.remove)&&H(e.add)&&H(e.unsubscribe)}function ho(e){H(e)?e():e.unsubscribe()}var Pe={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var dt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Tr:(this.currentObservers=null,s.push(r),new Ue(function(){o.currentObservers=null,Qe(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new To(r,o)},t}(j);var To=function(e){oe(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(g);var _r=function(e){oe(t,e);function t(r){var o=e.call(this)||this;return o._value=r,o}return Object.defineProperty(t.prototype,"value",{get:function(){return this.getValue()},enumerable:!1,configurable:!0}),t.prototype._subscribe=function(r){var o=e.prototype._subscribe.call(this,r);return!o.closed&&r.next(this._value),o},t.prototype.getValue=function(){var r=this,o=r.hasError,n=r.thrownError,i=r._value;if(o)throw n;return this._throwIfClosed(),i},t.prototype.next=function(r){e.prototype.next.call(this,this._value=r)},t}(g);var At={now:function(){return(At.delegate||Date).now()},delegate:void 0};var Ct=function(e){oe(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=At);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,p=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+p)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),p=0;p0?e.prototype.schedule.call(this,r,o):(this.delay=o,this.state=r,this.scheduler.flush(this),this)},t.prototype.execute=function(r,o){return o>0||this.closed?e.prototype.execute.call(this,r,o):this._execute(r,o)},t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!=null&&n>0||n==null&&this.delay>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.flush(this),0)},t}(gt);var Lo=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t}(yt);var kr=new Lo(Oo);var Mo=function(e){oe(t,e);function t(r,o){var n=e.call(this,r,o)||this;return n.scheduler=r,n.work=o,n}return t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!==null&&n>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=vt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(vt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(gt);var _o=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(yt);var me=new _o(Mo);var S=new j(function(e){return e.complete()});function Yt(e){return e&&H(e.schedule)}function Hr(e){return e[e.length-1]}function Xe(e){return H(Hr(e))?e.pop():void 0}function ke(e){return Yt(Hr(e))?e.pop():void 0}function Bt(e,t){return typeof Hr(e)=="number"?e.pop():t}var xt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Gt(e){return H(e==null?void 0:e.then)}function Jt(e){return H(e[bt])}function Xt(e){return Symbol.asyncIterator&&H(e==null?void 0:e[Symbol.asyncIterator])}function Zt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var er=Zi();function tr(e){return H(e==null?void 0:e[er])}function rr(e){return fo(this,arguments,function(){var r,o,n,i;return Nt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,nt(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,nt(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,nt(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function or(e){return H(e==null?void 0:e.getReader)}function U(e){if(e instanceof j)return e;if(e!=null){if(Jt(e))return ea(e);if(xt(e))return ta(e);if(Gt(e))return ra(e);if(Xt(e))return Ao(e);if(tr(e))return oa(e);if(or(e))return na(e)}throw Zt(e)}function ea(e){return new j(function(t){var r=e[bt]();if(H(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function ta(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):le,Te(1),r?De(t):Qo(function(){return new ir}))}}function jr(e){return e<=0?function(){return S}:E(function(t,r){var o=[];t.subscribe(T(r,function(n){o.push(n),e=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,p=s===void 0?!0:s;return function(c){var l,f,u,d=0,y=!1,L=!1,X=function(){f==null||f.unsubscribe(),f=void 0},ee=function(){X(),l=u=void 0,y=L=!1},J=function(){var k=l;ee(),k==null||k.unsubscribe()};return E(function(k,ft){d++,!L&&!y&&X();var qe=u=u!=null?u:r();ft.add(function(){d--,d===0&&!L&&!y&&(f=Ur(J,p))}),qe.subscribe(ft),!l&&d>0&&(l=new at({next:function(Fe){return qe.next(Fe)},error:function(Fe){L=!0,X(),f=Ur(ee,n,Fe),qe.error(Fe)},complete:function(){y=!0,X(),f=Ur(ee,a),qe.complete()}}),U(k).subscribe(l))})(c)}}function Ur(e,t){for(var r=[],o=2;oe.next(document)),e}function P(e,t=document){return Array.from(t.querySelectorAll(e))}function R(e,t=document){let r=fe(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function fe(e,t=document){return t.querySelector(e)||void 0}function Ie(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var wa=O(h(document.body,"focusin"),h(document.body,"focusout")).pipe(_e(1),Q(void 0),m(()=>Ie()||document.body),G(1));function et(e){return wa.pipe(m(t=>e.contains(t)),K())}function $t(e,t){return C(()=>O(h(e,"mouseenter").pipe(m(()=>!0)),h(e,"mouseleave").pipe(m(()=>!1))).pipe(t?Ht(r=>Le(+!r*t)):le,Q(e.matches(":hover"))))}function Jo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Jo(e,r)}function x(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Jo(o,n);return o}function sr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function Tt(e){let t=x("script",{src:e});return C(()=>(document.head.appendChild(t),O(h(t,"load"),h(t,"error").pipe(v(()=>$r(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),_(()=>document.head.removeChild(t)),Te(1))))}var Xo=new g,Ta=C(()=>typeof ResizeObserver=="undefined"?Tt("https://unpkg.com/resize-observer-polyfill"):I(void 0)).pipe(m(()=>new ResizeObserver(e=>e.forEach(t=>Xo.next(t)))),v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function ce(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){let t=e;for(;t.clientWidth===0&&t.parentElement;)t=t.parentElement;return Ta.pipe(w(r=>r.observe(t)),v(r=>Xo.pipe(b(o=>o.target===t),_(()=>r.unobserve(t)))),m(()=>ce(e)),Q(ce(e)))}function St(e){return{width:e.scrollWidth,height:e.scrollHeight}}function cr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}function Zo(e){let t=[],r=e.parentElement;for(;r;)(e.clientWidth>r.clientWidth||e.clientHeight>r.clientHeight)&&t.push(r),r=(e=r).parentElement;return t.length===0&&t.push(document.documentElement),t}function Ve(e){return{x:e.offsetLeft,y:e.offsetTop}}function en(e){let t=e.getBoundingClientRect();return{x:t.x+window.scrollX,y:t.y+window.scrollY}}function tn(e){return O(h(window,"load"),h(window,"resize")).pipe(Me(0,me),m(()=>Ve(e)),Q(Ve(e)))}function pr(e){return{x:e.scrollLeft,y:e.scrollTop}}function Ne(e){return O(h(e,"scroll"),h(window,"scroll"),h(window,"resize")).pipe(Me(0,me),m(()=>pr(e)),Q(pr(e)))}var rn=new g,Sa=C(()=>I(new IntersectionObserver(e=>{for(let t of e)rn.next(t)},{threshold:0}))).pipe(v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function tt(e){return Sa.pipe(w(t=>t.observe(e)),v(t=>rn.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function on(e,t=16){return Ne(e).pipe(m(({y:r})=>{let o=ce(e),n=St(e);return r>=n.height-o.height-t}),K())}var lr={drawer:R("[data-md-toggle=drawer]"),search:R("[data-md-toggle=search]")};function nn(e){return lr[e].checked}function Je(e,t){lr[e].checked!==t&&lr[e].click()}function ze(e){let t=lr[e];return h(t,"change").pipe(m(()=>t.checked),Q(t.checked))}function Oa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function La(){return O(h(window,"compositionstart").pipe(m(()=>!0)),h(window,"compositionend").pipe(m(()=>!1))).pipe(Q(!1))}function an(){let e=h(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:nn("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Ie();if(typeof o!="undefined")return!Oa(o,r)}return!0}),pe());return La().pipe(v(t=>t?S:e))}function ye(){return new URL(location.href)}function lt(e,t=!1){if(B("navigation.instant")&&!t){let r=x("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function sn(){return new g}function cn(){return location.hash.slice(1)}function pn(e){let t=x("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Ma(e){return O(h(window,"hashchange"),e).pipe(m(cn),Q(cn()),b(t=>t.length>0),G(1))}function ln(e){return Ma(e).pipe(m(t=>fe(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function Pt(e){let t=matchMedia(e);return ar(r=>t.addListener(()=>r(t.matches))).pipe(Q(t.matches))}function mn(){let e=matchMedia("print");return O(h(window,"beforeprint").pipe(m(()=>!0)),h(window,"afterprint").pipe(m(()=>!1))).pipe(Q(e.matches))}function Nr(e,t){return e.pipe(v(r=>r?t():S))}function zr(e,t){return new j(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function je(e,t){return zr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),G(1))}function fn(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),G(1))}function un(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),G(1))}function dn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function hn(){return O(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(m(dn),Q(dn()))}function bn(){return{width:innerWidth,height:innerHeight}}function vn(){return h(window,"resize",{passive:!0}).pipe(m(bn),Q(bn()))}function gn(){return z([hn(),vn()]).pipe(m(([e,t])=>({offset:e,size:t})),G(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(te("size")),n=z([o,r]).pipe(m(()=>Ve(e)));return z([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:p,y:c}])=>({offset:{x:a.x-p,y:a.y-c+i},size:s})))}function _a(e){return h(e,"message",t=>t.data)}function Aa(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function yn(e,t=new Worker(e)){let r=_a(t),o=Aa(t),n=new g;n.subscribe(o);let i=o.pipe(Z(),ie(!0));return n.pipe(Z(),Re(r.pipe(W(i))),pe())}var Ca=R("#__config"),Ot=JSON.parse(Ca.textContent);Ot.base=`${new URL(Ot.base,ye())}`;function xe(){return Ot}function B(e){return Ot.features.includes(e)}function Ee(e,t){return typeof t!="undefined"?Ot.translations[e].replace("#",t.toString()):Ot.translations[e]}function Se(e,t=document){return R(`[data-md-component=${e}]`,t)}function ae(e,t=document){return P(`[data-md-component=${e}]`,t)}function ka(e){let t=R(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(m(()=>R(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function xn(e){if(!B("announce.dismiss")||!e.childElementCount)return S;if(!e.hidden){let t=R(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return C(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),ka(e).pipe(w(r=>t.next(r)),_(()=>t.complete()),m(r=>$({ref:e},r)))})}function Ha(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function En(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),Ha(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))}function Rt(e,t){return t==="inline"?x("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"})):x("div",{class:"md-tooltip",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"}))}function wn(...e){return x("div",{class:"md-tooltip2",role:"tooltip"},x("div",{class:"md-tooltip2__inner md-typeset"},e))}function Tn(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("a",{href:r,class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}else return x("aside",{class:"md-annotation",tabIndex:0},Rt(t),x("span",{class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}function Sn(e){return x("button",{class:"md-clipboard md-icon",title:Ee("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}var Ln=Mt(qr());function Qr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(p=>!e.terms[p]).reduce((p,c)=>[...p,x("del",null,(0,Ln.default)(c))," "],[]).slice(0,-1),i=xe(),a=new URL(e.location,i.base);B("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,p])=>p).reduce((p,[c])=>`${p} ${c}`.trim(),""));let{tags:s}=xe();return x("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},x("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&x("div",{class:"md-search-result__icon md-icon"}),r>0&&x("h1",null,e.title),r<=0&&x("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&x("nav",{class:"md-tags"},e.tags.map(p=>{let c=s?p in s?`md-tag-icon md-tag--${s[p]}`:"md-tag-icon":"";return x("span",{class:`md-tag ${c}`},p)})),o>0&&n.length>0&&x("p",{class:"md-search-result__terms"},Ee("search.result.term.missing"),": ",...n)))}function Mn(e){let t=e[0].score,r=[...e],o=xe(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreQr(l,1)),...p.length?[x("details",{class:"md-search-result__more"},x("summary",{tabIndex:-1},x("div",null,p.length>0&&p.length===1?Ee("search.result.more.one"):Ee("search.result.more.other",p.length))),...p.map(l=>Qr(l,1)))]:[]];return x("li",{class:"md-search-result__item"},c)}function _n(e){return x("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>x("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?sr(r):r)))}function Kr(e){let t=`tabbed-control tabbed-control--${e}`;return x("div",{class:t,hidden:!0},x("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function An(e){return x("div",{class:"md-typeset__scrollwrap"},x("div",{class:"md-typeset__table"},e))}function Ra(e){var o;let t=xe(),r=new URL(`../${e.version}/`,t.base);return x("li",{class:"md-version__item"},x("a",{href:`${r}`,class:"md-version__link"},e.title,((o=t.version)==null?void 0:o.alias)&&e.aliases.length>0&&x("span",{class:"md-version__alias"},e.aliases[0])))}function Cn(e,t){var o;let r=xe();return e=e.filter(n=>{var i;return!((i=n.properties)!=null&&i.hidden)}),x("div",{class:"md-version"},x("button",{class:"md-version__current","aria-label":Ee("select.version")},t.title,((o=r.version)==null?void 0:o.alias)&&t.aliases.length>0&&x("span",{class:"md-version__alias"},t.aliases[0])),x("ul",{class:"md-version__list"},e.map(Ra)))}var Ia=0;function ja(e){let t=z([et(e),$t(e)]).pipe(m(([o,n])=>o||n),K()),r=C(()=>Zo(e)).pipe(ne(Ne),pt(1),He(t),m(()=>en(e)));return t.pipe(Ae(o=>o),v(()=>z([t,r])),m(([o,n])=>({active:o,offset:n})),pe())}function Fa(e,t){let{content$:r,viewport$:o}=t,n=`__tooltip2_${Ia++}`;return C(()=>{let i=new g,a=new _r(!1);i.pipe(Z(),ie(!1)).subscribe(a);let s=a.pipe(Ht(c=>Le(+!c*250,kr)),K(),v(c=>c?r:S),w(c=>c.id=n),pe());z([i.pipe(m(({active:c})=>c)),s.pipe(v(c=>$t(c,250)),Q(!1))]).pipe(m(c=>c.some(l=>l))).subscribe(a);let p=a.pipe(b(c=>c),re(s,o),m(([c,l,{size:f}])=>{let u=e.getBoundingClientRect(),d=u.width/2;if(l.role==="tooltip")return{x:d,y:8+u.height};if(u.y>=f.height/2){let{height:y}=ce(l);return{x:d,y:-16-y}}else return{x:d,y:16+u.height}}));return z([s,i,p]).subscribe(([c,{offset:l},f])=>{c.style.setProperty("--md-tooltip-host-x",`${l.x}px`),c.style.setProperty("--md-tooltip-host-y",`${l.y}px`),c.style.setProperty("--md-tooltip-x",`${f.x}px`),c.style.setProperty("--md-tooltip-y",`${f.y}px`),c.classList.toggle("md-tooltip2--top",f.y<0),c.classList.toggle("md-tooltip2--bottom",f.y>=0)}),a.pipe(b(c=>c),re(s,(c,l)=>l),b(c=>c.role==="tooltip")).subscribe(c=>{let l=ce(R(":scope > *",c));c.style.setProperty("--md-tooltip-width",`${l.width}px`),c.style.setProperty("--md-tooltip-tail","0px")}),a.pipe(K(),ve(me),re(s)).subscribe(([c,l])=>{l.classList.toggle("md-tooltip2--active",c)}),z([a.pipe(b(c=>c)),s]).subscribe(([c,l])=>{l.role==="dialog"?(e.setAttribute("aria-controls",n),e.setAttribute("aria-haspopup","dialog")):e.setAttribute("aria-describedby",n)}),a.pipe(b(c=>!c)).subscribe(()=>{e.removeAttribute("aria-controls"),e.removeAttribute("aria-describedby"),e.removeAttribute("aria-haspopup")}),ja(e).pipe(w(c=>i.next(c)),_(()=>i.complete()),m(c=>$({ref:e},c)))})}function mt(e,{viewport$:t},r=document.body){return Fa(e,{content$:new j(o=>{let n=e.title,i=wn(n);return o.next(i),e.removeAttribute("title"),r.append(i),()=>{i.remove(),e.setAttribute("title",n)}}),viewport$:t})}function Ua(e,t){let r=C(()=>z([tn(e),Ne(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=ce(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return et(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),Te(+!o||1/0))))}function kn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return C(()=>{let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),tt(e).pipe(W(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),O(i.pipe(b(({active:s})=>s)),i.pipe(_e(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Me(16,me)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(W(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),h(n,"mousedown").pipe(W(a),re(i)).subscribe(([s,{active:p}])=>{var c;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(p){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(c=Ie())==null||c.blur()}}),r.pipe(W(a),b(s=>s===o),Ge(125)).subscribe(()=>e.focus()),Ua(e,t).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function Wa(e){return e.tagName==="CODE"?P(".c, .c1, .cm",e):[e]}function Da(e){let t=[];for(let r of Wa(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,p]=a;if(typeof p=="undefined"){let c=i.splitText(a.index);i=c.splitText(s.length),t.push(c)}else{i.textContent=s,t.push(i);break}}}}return t}function Hn(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Da(t)){let[,p]=s.textContent.match(/\((\d+)\)/);fe(`:scope > li:nth-child(${p})`,e)&&(a.set(p,Tn(p,i)),s.replaceWith(a.get(p)))}return a.size===0?S:C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=[];for(let[l,f]of a)c.push([R(".md-typeset",f),R(`:scope > li:nth-child(${l})`,e)]);return o.pipe(W(p)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of c)l?Hn(f,u):Hn(u,f)}),O(...[...a].map(([,l])=>kn(l,t,{target$:r}))).pipe(_(()=>s.complete()),pe())})}function $n(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return $n(t)}}function Pn(e,t){return C(()=>{let r=$n(e);return typeof r!="undefined"?fr(r,e,t):S})}var Rn=Mt(Br());var Va=0;function In(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return In(t)}}function Na(e){return ge(e).pipe(m(({width:t})=>({scrollable:St(e).width>t})),te("scrollable"))}function jn(e,t){let{matches:r}=matchMedia("(hover)"),o=C(()=>{let n=new g,i=n.pipe(jr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Rn.default.isSupported()&&(e.closest(".copy")||B("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Va++}`;let l=Sn(c.id);c.insertBefore(l,e),B("content.tooltips")&&a.push(mt(l,{viewport$}))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=In(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||B("content.code.annotate"))){let l=fr(c,e,t);a.push(ge(s).pipe(W(i),m(({width:f,height:u})=>f&&u),K(),v(f=>f?l:S)))}}return P(":scope > span[id]",e).length&&e.classList.add("md-code__content"),Na(e).pipe(w(c=>n.next(c)),_(()=>n.complete()),m(c=>$({ref:e},c)),Re(...a))});return B("content.lazy")?tt(e).pipe(b(n=>n),Te(1),v(()=>o)):o}function za(e,{target$:t,print$:r}){let o=!0;return O(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),w(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Fn(e,t){return C(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),za(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}var Un=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.flowchartTitleText{fill:var(--md-mermaid-label-fg-color)}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel p,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel p{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}.classDiagramTitleText{fill:var(--md-mermaid-label-fg-color)}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}.statediagramTitleText{fill:var(--md-mermaid-label-fg-color)}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}a .nodeLabel{text-decoration:underline}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.entityTitleText{fill:var(--md-mermaid-label-fg-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}text:not([class]):last-child{fill:var(--md-mermaid-label-fg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Gr,Qa=0;function Ka(){return typeof mermaid=="undefined"||mermaid instanceof Element?Tt("https://unpkg.com/mermaid@11/dist/mermaid.min.js"):I(void 0)}function Wn(e){return e.classList.remove("mermaid"),Gr||(Gr=Ka().pipe(w(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Un,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),G(1))),Gr.subscribe(()=>co(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Qa++}`,r=x("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),Gr.pipe(m(()=>({ref:e})))}var Dn=x("table");function Vn(e){return e.replaceWith(Dn),Dn.replaceWith(An(e)),I({ref:e})}function Ya(e){let t=e.find(r=>r.checked)||e[0];return O(...e.map(r=>h(r,"change").pipe(m(()=>R(`label[for="${r.id}"]`))))).pipe(Q(R(`label[for="${t.id}"]`)),m(r=>({active:r})))}function Nn(e,{viewport$:t,target$:r}){let o=R(".tabbed-labels",e),n=P(":scope > input",e),i=Kr("prev");e.append(i);let a=Kr("next");return e.append(a),C(()=>{let s=new g,p=s.pipe(Z(),ie(!0));z([s,ge(e),tt(e)]).pipe(W(p),Me(1,me)).subscribe({next([{active:c},l]){let f=Ve(c),{width:u}=ce(c);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let d=pr(o);(f.xd.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),z([Ne(o),ge(o)]).pipe(W(p)).subscribe(([c,l])=>{let f=St(o);i.hidden=c.x<16,a.hidden=c.x>f.width-l.width-16}),O(h(i,"click").pipe(m(()=>-1)),h(a,"click").pipe(m(()=>1))).pipe(W(p)).subscribe(c=>{let{width:l}=ce(o);o.scrollBy({left:l*c,behavior:"smooth"})}),r.pipe(W(p),b(c=>n.includes(c))).subscribe(c=>c.click()),o.classList.add("tabbed-labels--linked");for(let c of n){let l=R(`label[for="${c.id}"]`);l.replaceChildren(x("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),h(l.firstElementChild,"click").pipe(W(p),b(f=>!(f.metaKey||f.ctrlKey)),w(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return B("content.tabs.link")&&s.pipe(Ce(1),re(t)).subscribe(([{active:c},{offset:l}])=>{let f=c.innerText.trim();if(c.hasAttribute("data-md-switching"))c.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let y of P("[data-tabs]"))for(let L of P(":scope > input",y)){let X=R(`label[for="${L.id}"]`);if(X!==c&&X.innerText.trim()===f){X.setAttribute("data-md-switching",""),L.click();break}}window.scrollTo({top:e.offsetTop-u});let d=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...d])])}}),s.pipe(W(p)).subscribe(()=>{for(let c of P("audio, video",e))c.pause()}),Ya(n).pipe(w(c=>s.next(c)),_(()=>s.complete()),m(c=>$({ref:e},c)))}).pipe(Ke(se))}function zn(e,{viewport$:t,target$:r,print$:o}){return O(...P(".annotate:not(.highlight)",e).map(n=>Pn(n,{target$:r,print$:o})),...P("pre:not(.mermaid) > code",e).map(n=>jn(n,{target$:r,print$:o})),...P("pre.mermaid",e).map(n=>Wn(n)),...P("table:not([class])",e).map(n=>Vn(n)),...P("details",e).map(n=>Fn(n,{target$:r,print$:o})),...P("[data-tabs]",e).map(n=>Nn(n,{viewport$:t,target$:r})),...P("[title]",e).filter(()=>B("content.tooltips")).map(n=>mt(n,{viewport$:t})))}function Ba(e,{alert$:t}){return t.pipe(v(r=>O(I(!0),I(!1).pipe(Ge(2e3))).pipe(m(o=>({message:r,active:o})))))}function qn(e,t){let r=R(".md-typeset",e);return C(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ba(e,t).pipe(w(n=>o.next(n)),_(()=>o.complete()),m(n=>$({ref:e},n)))})}var Ga=0;function Ja(e,t){document.body.append(e);let{width:r}=ce(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=cr(t),n=typeof o!="undefined"?Ne(o):I({x:0,y:0}),i=O(et(t),$t(t)).pipe(K());return z([i,n]).pipe(m(([a,s])=>{let{x:p,y:c}=Ve(t),l=ce(t),f=t.closest("table");return f&&t.parentElement&&(p+=f.offsetLeft+t.parentElement.offsetLeft,c+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:p-s.x+l.width/2-r/2,y:c-s.y+l.height+8}}}))}function Qn(e){let t=e.title;if(!t.length)return S;let r=`__tooltip_${Ga++}`,o=Rt(r,"inline"),n=R(".md-typeset",o);return n.innerHTML=t,C(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),O(i.pipe(b(({active:a})=>a)),i.pipe(_e(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Me(16,me)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ja(o,e).pipe(w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))}).pipe(Ke(se))}function Xa({viewport$:e}){if(!B("header.autohide"))return I(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Be(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),K()),o=ze("search");return z([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),K(),v(n=>n?r:I(!1)),Q(!1))}function Kn(e,t){return C(()=>z([ge(e),Xa(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),K((r,o)=>r.height===o.height&&r.hidden===o.hidden),G(1))}function Yn(e,{header$:t,main$:r}){return C(()=>{let o=new g,n=o.pipe(Z(),ie(!0));o.pipe(te("active"),He(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=ue(P("[title]",e)).pipe(b(()=>B("content.tooltips")),ne(a=>Qn(a)));return r.subscribe(o),t.pipe(W(n),m(a=>$({ref:e},a)),Re(i.pipe(W(n))))})}function Za(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=ce(e);return{active:o>=n}}),te("active"))}function Bn(e,t){return C(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=fe(".md-content h1");return typeof o=="undefined"?S:Za(o,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))})}function Gn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),K()),n=o.pipe(v(()=>ge(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),te("bottom"))));return z([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:p},size:{height:c}}])=>(c=Math.max(0,c-Math.max(0,a-p,i)-Math.max(0,c+p-s)),{offset:a-i,height:c,active:a-i<=p})),K((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function es(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return I(...e).pipe(ne(o=>h(o,"change").pipe(m(()=>o))),Q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),G(1))}function Jn(e){let t=P("input",e),r=x("meta",{name:"theme-color"});document.head.appendChild(r);let o=x("meta",{name:"color-scheme"});document.head.appendChild(o);let n=Pt("(prefers-color-scheme: light)");return C(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),p=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=p.getAttribute("data-md-color-scheme"),a.color.primary=p.getAttribute("data-md-color-primary"),a.color.accent=p.getAttribute("data-md-color-accent")}for(let[s,p]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,p);for(let s=0;sa.key==="Enter"),re(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Se("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(p=>(+p).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(ve(se)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),es(t).pipe(W(n.pipe(Ce(1))),ct(),w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))})}function Xn(e,{progress$:t}){return C(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(w(o=>r.next({value:o})),_(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Jr=Mt(Br());function ts(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Zn({alert$:e}){Jr.default.isSupported()&&new j(t=>{new Jr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||ts(R(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(w(t=>{t.trigger.focus()}),m(()=>Ee("clipboard.copied"))).subscribe(e)}function ei(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function rs(e,t){let r=new Map;for(let o of P("url",e)){let n=R("loc",o),i=[ei(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of P("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(ei(new URL(s),t))}}return r}function ur(e){return un(new URL("sitemap.xml",e)).pipe(m(t=>rs(t,new URL(e))),de(()=>I(new Map)))}function os(e,t){if(!(e.target instanceof Element))return S;let r=e.target.closest("a");if(r===null)return S;if(r.target||e.metaKey||e.ctrlKey)return S;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),I(new URL(r.href))):S}function ti(e){let t=new Map;for(let r of P(":scope > *",e.head))t.set(r.outerHTML,r);return t}function ri(e){for(let t of P("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return I(e)}function ns(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...B("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=fe(o),i=fe(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=ti(document);for(let[o,n]of ti(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Se("container");return We(P("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new j(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),S}),Z(),ie(document))}function oi({location$:e,viewport$:t,progress$:r}){let o=xe();if(location.protocol==="file:")return S;let n=ur(o.base);I(document).subscribe(ri);let i=h(document.body,"click").pipe(He(n),v(([p,c])=>os(p,c)),pe()),a=h(window,"popstate").pipe(m(ye),pe());i.pipe(re(t)).subscribe(([p,{offset:c}])=>{history.replaceState(c,""),history.pushState(null,"",p)}),O(i,a).subscribe(e);let s=e.pipe(te("pathname"),v(p=>fn(p,{progress$:r}).pipe(de(()=>(lt(p,!0),S)))),v(ri),v(ns),pe());return O(s.pipe(re(e,(p,c)=>c)),s.pipe(v(()=>e),te("hash")),e.pipe(K((p,c)=>p.pathname===c.pathname&&p.hash===c.hash),v(()=>i),w(()=>history.back()))).subscribe(p=>{var c,l;history.state!==null||!p.hash?window.scrollTo(0,(l=(c=history.state)==null?void 0:c.y)!=null?l:0):(history.scrollRestoration="auto",pn(p.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(te("offset"),_e(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),s}var ni=Mt(qr());function ii(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,ni.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function jt(e){return e.type===1}function dr(e){return e.type===3}function ai(e,t){let r=yn(e);return O(I(location.protocol!=="file:"),ze("search")).pipe(Ae(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:B("search.suggest")}}})),r}function si(e){var l;let{selectedVersionSitemap:t,selectedVersionBaseURL:r,currentLocation:o,currentBaseURL:n}=e,i=(l=Xr(n))==null?void 0:l.pathname;if(i===void 0)return;let a=ss(o.pathname,i);if(a===void 0)return;let s=ps(t.keys());if(!t.has(s))return;let p=Xr(a,s);if(!p||!t.has(p.href))return;let c=Xr(a,r);if(c)return c.hash=o.hash,c.search=o.search,c}function Xr(e,t){try{return new URL(e,t)}catch(r){return}}function ss(e,t){if(e.startsWith(t))return e.slice(t.length)}function cs(e,t){let r=Math.min(e.length,t.length),o;for(o=0;oS)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>h(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),re(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let p=s.href;return!i.target.closest(".md-version")&&n.get(p)===a?S:(i.preventDefault(),I(new URL(p)))}}return S}),v(i=>ur(i).pipe(m(a=>{var s;return(s=si({selectedVersionSitemap:a,selectedVersionBaseURL:i,currentLocation:ye(),currentBaseURL:t.base}))!=null?s:i})))))).subscribe(n=>lt(n,!0)),z([r,o]).subscribe(([n,i])=>{R(".md-header__topic").appendChild(Cn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var s;let i=new URL(t.base),a=__md_get("__outdated",sessionStorage,i);if(a===null){a=!0;let p=((s=t.version)==null?void 0:s.default)||"latest";Array.isArray(p)||(p=[p]);e:for(let c of p)for(let l of n.aliases.concat(n.version))if(new RegExp(c,"i").test(l)){a=!1;break e}__md_set("__outdated",a,sessionStorage,i)}if(a)for(let p of ae("outdated"))p.hidden=!1})}function ls(e,{worker$:t}){let{searchParams:r}=ye();r.has("q")&&(Je("search",!0),e.value=r.get("q"),e.focus(),ze("search").pipe(Ae(i=>!i)).subscribe(()=>{let i=ye();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=et(e),n=O(t.pipe(Ae(jt)),h(e,"keyup"),o).pipe(m(()=>e.value),K());return z([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),G(1))}function pi(e,{worker$:t}){let r=new g,o=r.pipe(Z(),ie(!0));z([t.pipe(Ae(jt)),r],(i,a)=>a).pipe(te("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(te("focus")).subscribe(({focus:i})=>{i&&Je("search",i)}),h(e.form,"reset").pipe(W(o)).subscribe(()=>e.focus());let n=R("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),ls(e,{worker$:t}).pipe(w(i=>r.next(i)),_(()=>r.complete()),m(i=>$({ref:e},i)),G(1))}function li(e,{worker$:t,query$:r}){let o=new g,n=on(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=R(":scope > :first-child",e),s=R(":scope > :last-child",e);ze("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(re(r),Wr(t.pipe(Ae(jt)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?Ee("search.result.none"):Ee("search.result.placeholder");break;case 1:a.textContent=Ee("search.result.one");break;default:let u=sr(l.length);a.textContent=Ee("search.result.other",u)}});let p=o.pipe(w(()=>s.innerHTML=""),v(({items:l})=>O(I(...l.slice(0,10)),I(...l.slice(10)).pipe(Be(4),Vr(n),v(([f])=>f)))),m(Mn),pe());return p.subscribe(l=>s.appendChild(l)),p.pipe(ne(l=>{let f=fe("details",l);return typeof f=="undefined"?S:h(f,"toggle").pipe(W(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(dr),m(({data:l})=>l)).pipe(w(l=>o.next(l)),_(()=>o.complete()),m(l=>$({ref:e},l)))}function ms(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=ye();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function mi(e,t){let r=new g,o=r.pipe(Z(),ie(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(W(o)).subscribe(n=>n.preventDefault()),ms(e,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))}function fi(e,{worker$:t,keyboard$:r}){let o=new g,n=Se("search-query"),i=O(h(n,"keydown"),h(n,"focus")).pipe(ve(se),m(()=>n.value),K());return o.pipe(He(i),m(([{suggest:s},p])=>{let c=p.split(/([\s-]+)/);if(s!=null&&s.length&&c[c.length-1]){let l=s[s.length-1];l.startsWith(c[c.length-1])&&(c[c.length-1]=l)}else c.length=0;return c})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(dr),m(({data:s})=>s)).pipe(w(s=>o.next(s)),_(()=>o.complete()),m(()=>({ref:e})))}function ui(e,{index$:t,keyboard$:r}){let o=xe();try{let n=ai(o.search,t),i=Se("search-query",e),a=Se("search-result",e);h(e,"click").pipe(b(({target:p})=>p instanceof Element&&!!p.closest("a"))).subscribe(()=>Je("search",!1)),r.pipe(b(({mode:p})=>p==="search")).subscribe(p=>{let c=Ie();switch(p.type){case"Enter":if(c===i){let l=new Map;for(let f of P(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,d])=>d-u);f.click()}p.claim()}break;case"Escape":case"Tab":Je("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof c=="undefined")i.focus();else{let l=[i,...P(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(c))+l.length+(p.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}p.claim();break;default:i!==Ie()&&i.focus()}}),r.pipe(b(({mode:p})=>p==="global")).subscribe(p=>{switch(p.type){case"f":case"s":case"/":i.focus(),i.select(),p.claim();break}});let s=pi(i,{worker$:n});return O(s,li(a,{worker$:n,query$:s})).pipe(Re(...ae("search-share",e).map(p=>mi(p,{query$:s})),...ae("search-suggest",e).map(p=>fi(p,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ye}}function di(e,{index$:t,location$:r}){return z([t,r.pipe(Q(ye()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>ii(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let p=s.textContent,c=o(p);c.length>p.length&&n.set(s,c)}for(let[s,p]of n){let{childNodes:c}=x("span",null,p);s.replaceWith(...Array.from(c))}return{ref:e,nodes:n}}))}function fs(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return z([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),K((i,a)=>i.height===a.height&&i.locked===a.locked))}function Zr(e,o){var n=o,{header$:t}=n,r=so(n,["header$"]);let i=R(".md-sidebar__scrollwrap",e),{y:a}=Ve(i);return C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=s.pipe(Me(0,me));return c.pipe(re(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),c.pipe(Ae()).subscribe(()=>{for(let l of P(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2})}}}),ue(P("label[tabindex]",e)).pipe(ne(l=>h(l,"click").pipe(ve(se),m(()=>l),W(p)))).subscribe(l=>{let f=R(`[id="${l.htmlFor}"]`);R(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),fs(e,r).pipe(w(l=>s.next(l)),_(()=>s.complete()),m(l=>$({ref:e},l)))})}function hi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return st(je(`${r}/releases/latest`).pipe(de(()=>S),m(o=>({version:o.tag_name})),De({})),je(r).pipe(de(()=>S),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return je(r).pipe(m(o=>({repositories:o.public_repos})),De({}))}}function bi(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return st(je(`${r}/releases/permalink/latest`).pipe(de(()=>S),m(({tag_name:o})=>({version:o})),De({})),je(r).pipe(de(()=>S),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),De({}))).pipe(m(([o,n])=>$($({},o),n)))}function vi(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return hi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return bi(r,o)}return S}var us;function ds(e){return us||(us=C(()=>{let t=__md_get("__source",sessionStorage);if(t)return I(t);if(ae("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return S}return vi(e.href).pipe(w(o=>__md_set("__source",o,sessionStorage)))}).pipe(de(()=>S),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),G(1)))}function gi(e){let t=R(":scope > :last-child",e);return C(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(_n(o)),t.classList.add("md-source__repository--active")}),ds(e).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function hs(e,{viewport$:t,header$:r}){return ge(document.body).pipe(v(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),te("hidden"))}function yi(e,t){return C(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(B("navigation.tabs.sticky")?I({hidden:!1}):hs(e,t)).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function bs(e,{viewport$:t,header$:r}){let o=new Map,n=P(".md-nav__link",e);for(let s of n){let p=decodeURIComponent(s.hash.substring(1)),c=fe(`[id="${p}"]`);typeof c!="undefined"&&o.set(s,c)}let i=r.pipe(te("height"),m(({height:s})=>{let p=Se("main"),c=R(":scope > :first-child",p);return s+.8*(c.offsetTop-p.offsetTop)}),pe());return ge(document.body).pipe(te("height"),v(s=>C(()=>{let p=[];return I([...o].reduce((c,[l,f])=>{for(;p.length&&o.get(p[p.length-1]).tagName>=f.tagName;)p.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return c.set([...p=[...p,l]].reverse(),u)},new Map))}).pipe(m(p=>new Map([...p].sort(([,c],[,l])=>c-l))),He(i),v(([p,c])=>t.pipe(Fr(([l,f],{offset:{y:u},size:d})=>{let y=u+d.height>=Math.floor(s.height);for(;f.length;){let[,L]=f[0];if(L-c=u&&!y)f=[l.pop(),...f];else break}return[l,f]},[[],[...p]]),K((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,p])=>({prev:s.map(([c])=>c),next:p.map(([c])=>c)})),Q({prev:[],next:[]}),Be(2,1),m(([s,p])=>s.prev.length{let i=new g,a=i.pipe(Z(),ie(!0));if(i.subscribe(({prev:s,next:p})=>{for(let[c]of p)c.classList.remove("md-nav__link--passed"),c.classList.remove("md-nav__link--active");for(let[c,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",c===s.length-1)}),B("toc.follow")){let s=O(t.pipe(_e(1),m(()=>{})),t.pipe(_e(250),m(()=>"smooth")));i.pipe(b(({prev:p})=>p.length>0),He(o.pipe(ve(se))),re(s)).subscribe(([[{prev:p}],c])=>{let[l]=p[p.length-1];if(l.offsetHeight){let f=cr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2,behavior:c})}}})}return B("navigation.tracking")&&t.pipe(W(a),te("offset"),_e(250),Ce(1),W(n.pipe(Ce(1))),ct({delay:250}),re(i)).subscribe(([,{prev:s}])=>{let p=ye(),c=s[s.length-1];if(c&&c.length){let[l]=c,{hash:f}=new URL(l.href);p.hash!==f&&(p.hash=f,history.replaceState({},"",`${p}`))}else p.hash="",history.replaceState({},"",`${p}`)}),bs(e,{viewport$:t,header$:r}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function vs(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Be(2,1),m(([a,s])=>a>s&&s>0),K()),i=r.pipe(m(({active:a})=>a));return z([i,n]).pipe(m(([a,s])=>!(a&&s)),K(),W(o.pipe(Ce(1))),ie(!0),ct({delay:250}),m(a=>({hidden:a})))}function Ei(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(W(a),te("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),h(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),vs(e,{viewport$:t,main$:o,target$:n}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))}function wi({document$:e,viewport$:t}){e.pipe(v(()=>P(".md-ellipsis")),ne(r=>tt(r).pipe(W(e.pipe(Ce(1))),b(o=>o),m(()=>r),Te(1))),b(r=>r.offsetWidth{let o=r.innerText,n=r.closest("a")||r;return n.title=o,B("content.tooltips")?mt(n,{viewport$:t}).pipe(W(e.pipe(Ce(1))),_(()=>n.removeAttribute("title"))):S})).subscribe(),B("content.tooltips")&&e.pipe(v(()=>P(".md-status")),ne(r=>mt(r,{viewport$:t}))).subscribe()}function Ti({document$:e,tablet$:t}){e.pipe(v(()=>P(".md-toggle--indeterminate")),w(r=>{r.indeterminate=!0,r.checked=!1}),ne(r=>h(r,"change").pipe(Dr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),re(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function gs(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function Si({document$:e}){e.pipe(v(()=>P("[data-md-scrollfix]")),w(t=>t.removeAttribute("data-md-scrollfix")),b(gs),ne(t=>h(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function Oi({viewport$:e,tablet$:t}){z([ze("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>I(r).pipe(Ge(r?400:100))),re(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function ys(){return location.protocol==="file:"?Tt(`${new URL("search/search_index.js",eo.base)}`).pipe(m(()=>__index),G(1)):je(new URL("search/search_index.json",eo.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var ot=Go(),Ut=sn(),Lt=ln(Ut),to=an(),Oe=gn(),hr=Pt("(min-width: 960px)"),Mi=Pt("(min-width: 1220px)"),_i=mn(),eo=xe(),Ai=document.forms.namedItem("search")?ys():Ye,ro=new g;Zn({alert$:ro});var oo=new g;B("navigation.instant")&&oi({location$:Ut,viewport$:Oe,progress$:oo}).subscribe(ot);var Li;((Li=eo.version)==null?void 0:Li.provider)==="mike"&&ci({document$:ot});O(Ut,Lt).pipe(Ge(125)).subscribe(()=>{Je("drawer",!1),Je("search",!1)});to.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=fe("link[rel=prev]");typeof t!="undefined"&<(t);break;case"n":case".":let r=fe("link[rel=next]");typeof r!="undefined"&<(r);break;case"Enter":let o=Ie();o instanceof HTMLLabelElement&&o.click()}});wi({viewport$:Oe,document$:ot});Ti({document$:ot,tablet$:hr});Si({document$:ot});Oi({viewport$:Oe,tablet$:hr});var rt=Kn(Se("header"),{viewport$:Oe}),Ft=ot.pipe(m(()=>Se("main")),v(e=>Gn(e,{viewport$:Oe,header$:rt})),G(1)),xs=O(...ae("consent").map(e=>En(e,{target$:Lt})),...ae("dialog").map(e=>qn(e,{alert$:ro})),...ae("palette").map(e=>Jn(e)),...ae("progress").map(e=>Xn(e,{progress$:oo})),...ae("search").map(e=>ui(e,{index$:Ai,keyboard$:to})),...ae("source").map(e=>gi(e))),Es=C(()=>O(...ae("announce").map(e=>xn(e)),...ae("content").map(e=>zn(e,{viewport$:Oe,target$:Lt,print$:_i})),...ae("content").map(e=>B("search.highlight")?di(e,{index$:Ai,location$:Ut}):S),...ae("header").map(e=>Yn(e,{viewport$:Oe,header$:rt,main$:Ft})),...ae("header-title").map(e=>Bn(e,{viewport$:Oe,header$:rt})),...ae("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Nr(Mi,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft})):Nr(hr,()=>Zr(e,{viewport$:Oe,header$:rt,main$:Ft}))),...ae("tabs").map(e=>yi(e,{viewport$:Oe,header$:rt})),...ae("toc").map(e=>xi(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})),...ae("top").map(e=>Ei(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Lt})))),Ci=ot.pipe(v(()=>Es),Re(xs),G(1));Ci.subscribe();window.document$=ot;window.location$=Ut;window.target$=Lt;window.keyboard$=to;window.viewport$=Oe;window.tablet$=hr;window.screen$=Mi;window.print$=_i;window.alert$=ro;window.progress$=oo;window.component$=Ci;})(); +//# sourceMappingURL=bundle.f13b1293.min.js.map diff --git a/assets/javascripts/bundle.83f73b43.min.js.map b/assets/javascripts/bundle.f13b1293.min.js.map similarity index 79% rename from assets/javascripts/bundle.83f73b43.min.js.map rename to assets/javascripts/bundle.f13b1293.min.js.map index fe920b7d..8bc6fb8d 100644 --- a/assets/javascripts/bundle.83f73b43.min.js.map +++ b/assets/javascripts/bundle.f13b1293.min.js.map @@ -1,7 +1,7 @@ { "version": 3, "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/escape-html/index.js", "node_modules/clipboard/dist/clipboard.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/tslib/tslib.es6.mjs", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/BehaviorSubject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/QueueAction.ts", "node_modules/rxjs/src/internal/scheduler/QueueScheduler.ts", "node_modules/rxjs/src/internal/scheduler/queue.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounce.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip2/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/findurl/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], - "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*\n * Copyright (c) 2016-2024 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ viewport$, document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/******************************************************************************\nCopyright (c) Microsoft Corporation.\n\nPermission to use, copy, modify, and/or distribute this software for any\npurpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\nPERFORMANCE OF THIS SOFTWARE.\n***************************************************************************** */\n/* global Reflect, Promise, SuppressedError, Symbol, Iterator */\n\nvar extendStatics = function(d, b) {\n extendStatics = Object.setPrototypeOf ||\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\n return extendStatics(d, b);\n};\n\nexport function __extends(d, b) {\n if (typeof b !== \"function\" && b !== null)\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\n extendStatics(d, b);\n function __() { this.constructor = d; }\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\n}\n\nexport var __assign = function() {\n __assign = Object.assign || function __assign(t) {\n for (var s, i = 1, n = arguments.length; i < n; i++) {\n s = arguments[i];\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\n }\n return t;\n }\n return __assign.apply(this, arguments);\n}\n\nexport function __rest(s, e) {\n var t = {};\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\n t[p] = s[p];\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\n t[p[i]] = s[p[i]];\n }\n return t;\n}\n\nexport function __decorate(decorators, target, key, desc) {\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\n return c > 3 && r && Object.defineProperty(target, key, r), r;\n}\n\nexport function __param(paramIndex, decorator) {\n return function (target, key) { decorator(target, key, paramIndex); }\n}\n\nexport function __esDecorate(ctor, descriptorIn, decorators, contextIn, initializers, extraInitializers) {\n function accept(f) { if (f !== void 0 && typeof f !== \"function\") throw new TypeError(\"Function expected\"); return f; }\n var kind = contextIn.kind, key = kind === \"getter\" ? \"get\" : kind === \"setter\" ? \"set\" : \"value\";\n var target = !descriptorIn && ctor ? contextIn[\"static\"] ? ctor : ctor.prototype : null;\n var descriptor = descriptorIn || (target ? Object.getOwnPropertyDescriptor(target, contextIn.name) : {});\n var _, done = false;\n for (var i = decorators.length - 1; i >= 0; i--) {\n var context = {};\n for (var p in contextIn) context[p] = p === \"access\" ? {} : contextIn[p];\n for (var p in contextIn.access) context.access[p] = contextIn.access[p];\n context.addInitializer = function (f) { if (done) throw new TypeError(\"Cannot add initializers after decoration has completed\"); extraInitializers.push(accept(f || null)); };\n var result = (0, decorators[i])(kind === \"accessor\" ? { get: descriptor.get, set: descriptor.set } : descriptor[key], context);\n if (kind === \"accessor\") {\n if (result === void 0) continue;\n if (result === null || typeof result !== \"object\") throw new TypeError(\"Object expected\");\n if (_ = accept(result.get)) descriptor.get = _;\n if (_ = accept(result.set)) descriptor.set = _;\n if (_ = accept(result.init)) initializers.unshift(_);\n }\n else if (_ = accept(result)) {\n if (kind === \"field\") initializers.unshift(_);\n else descriptor[key] = _;\n }\n }\n if (target) Object.defineProperty(target, contextIn.name, descriptor);\n done = true;\n};\n\nexport function __runInitializers(thisArg, initializers, value) {\n var useValue = arguments.length > 2;\n for (var i = 0; i < initializers.length; i++) {\n value = useValue ? initializers[i].call(thisArg, value) : initializers[i].call(thisArg);\n }\n return useValue ? value : void 0;\n};\n\nexport function __propKey(x) {\n return typeof x === \"symbol\" ? x : \"\".concat(x);\n};\n\nexport function __setFunctionName(f, name, prefix) {\n if (typeof name === \"symbol\") name = name.description ? \"[\".concat(name.description, \"]\") : \"\";\n return Object.defineProperty(f, \"name\", { configurable: true, value: prefix ? \"\".concat(prefix, \" \", name) : name });\n};\n\nexport function __metadata(metadataKey, metadataValue) {\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\n}\n\nexport function __awaiter(thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n}\n\nexport function __generator(thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === \"function\" ? Iterator : Object).prototype);\n return g.next = verb(0), g[\"throw\"] = verb(1), g[\"return\"] = verb(2), typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n}\n\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n});\n\nexport function __exportStar(m, o) {\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\n}\n\nexport function __values(o) {\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\n if (m) return m.call(o);\n if (o && typeof o.length === \"number\") return {\n next: function () {\n if (o && i >= o.length) o = void 0;\n return { value: o && o[i++], done: !o };\n }\n };\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\n}\n\nexport function __read(o, n) {\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\n if (!m) return o;\n var i = m.call(o), r, ar = [], e;\n try {\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\n }\n catch (error) { e = { error: error }; }\n finally {\n try {\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\n }\n finally { if (e) throw e.error; }\n }\n return ar;\n}\n\n/** @deprecated */\nexport function __spread() {\n for (var ar = [], i = 0; i < arguments.length; i++)\n ar = ar.concat(__read(arguments[i]));\n return ar;\n}\n\n/** @deprecated */\nexport function __spreadArrays() {\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\n r[k] = a[j];\n return r;\n}\n\nexport function __spreadArray(to, from, pack) {\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\n if (ar || !(i in from)) {\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\n ar[i] = from[i];\n }\n }\n return to.concat(ar || Array.prototype.slice.call(from));\n}\n\nexport function __await(v) {\n return this instanceof __await ? (this.v = v, this) : new __await(v);\n}\n\nexport function __asyncGenerator(thisArg, _arguments, generator) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\n return i = Object.create((typeof AsyncIterator === \"function\" ? AsyncIterator : Object).prototype), verb(\"next\"), verb(\"throw\"), verb(\"return\", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;\n function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }\n function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\n function fulfill(value) { resume(\"next\", value); }\n function reject(value) { resume(\"throw\", value); }\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\n}\n\nexport function __asyncDelegator(o) {\n var i, p;\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; }\n}\n\nexport function __asyncValues(o) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var m = o[Symbol.asyncIterator], i;\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\n}\n\nexport function __makeTemplateObject(cooked, raw) {\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\n return cooked;\n};\n\nvar __setModuleDefault = Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n};\n\nexport function __importStar(mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n}\n\nexport function __importDefault(mod) {\n return (mod && mod.__esModule) ? mod : { default: mod };\n}\n\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\n}\n\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\n}\n\nexport function __classPrivateFieldIn(state, receiver) {\n if (receiver === null || (typeof receiver !== \"object\" && typeof receiver !== \"function\")) throw new TypeError(\"Cannot use 'in' operator on non-object\");\n return typeof state === \"function\" ? receiver === state : state.has(receiver);\n}\n\nexport function __addDisposableResource(env, value, async) {\n if (value !== null && value !== void 0) {\n if (typeof value !== \"object\" && typeof value !== \"function\") throw new TypeError(\"Object expected.\");\n var dispose, inner;\n if (async) {\n if (!Symbol.asyncDispose) throw new TypeError(\"Symbol.asyncDispose is not defined.\");\n dispose = value[Symbol.asyncDispose];\n }\n if (dispose === void 0) {\n if (!Symbol.dispose) throw new TypeError(\"Symbol.dispose is not defined.\");\n dispose = value[Symbol.dispose];\n if (async) inner = dispose;\n }\n if (typeof dispose !== \"function\") throw new TypeError(\"Object not disposable.\");\n if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };\n env.stack.push({ value: value, dispose: dispose, async: async });\n }\n else if (async) {\n env.stack.push({ async: true });\n }\n return value;\n}\n\nvar _SuppressedError = typeof SuppressedError === \"function\" ? SuppressedError : function (error, suppressed, message) {\n var e = new Error(message);\n return e.name = \"SuppressedError\", e.error = error, e.suppressed = suppressed, e;\n};\n\nexport function __disposeResources(env) {\n function fail(e) {\n env.error = env.hasError ? new _SuppressedError(e, env.error, \"An error was suppressed during disposal.\") : e;\n env.hasError = true;\n }\n var r, s = 0;\n function next() {\n while (r = env.stack.pop()) {\n try {\n if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);\n if (r.dispose) {\n var result = r.dispose.call(r.value);\n if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });\n }\n else s |= 1;\n }\n catch (e) {\n fail(e);\n }\n }\n if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();\n if (env.hasError) throw env.error;\n }\n return next();\n}\n\nexport default {\n __extends,\n __assign,\n __rest,\n __decorate,\n __param,\n __metadata,\n __awaiter,\n __generator,\n __createBinding,\n __exportStar,\n __values,\n __read,\n __spread,\n __spreadArrays,\n __spreadArray,\n __await,\n __asyncGenerator,\n __asyncDelegator,\n __asyncValues,\n __makeTemplateObject,\n __importStar,\n __importDefault,\n __classPrivateFieldGet,\n __classPrivateFieldSet,\n __classPrivateFieldIn,\n __addDisposableResource,\n __disposeResources,\n};\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { Subject } from './Subject';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\n\n/**\n * A variant of Subject that requires an initial value and emits its current\n * value whenever it is subscribed to.\n *\n * @class BehaviorSubject\n */\nexport class BehaviorSubject extends Subject {\n constructor(private _value: T) {\n super();\n }\n\n get value(): T {\n return this.getValue();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n const subscription = super._subscribe(subscriber);\n !subscription.closed && subscriber.next(this._value);\n return subscription;\n }\n\n getValue(): T {\n const { hasError, thrownError, _value } = this;\n if (hasError) {\n throw thrownError;\n }\n this._throwIfClosed();\n return _value;\n }\n\n next(value: T): void {\n super.next((this._value = value));\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { Subscription } from '../Subscription';\nimport { QueueScheduler } from './QueueScheduler';\nimport { SchedulerAction } from '../types';\nimport { TimerHandle } from './timerHandle';\n\nexport class QueueAction extends AsyncAction {\n constructor(protected scheduler: QueueScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (delay > 0) {\n return super.schedule(state, delay);\n }\n this.delay = delay;\n this.state = state;\n this.scheduler.flush(this);\n return this;\n }\n\n public execute(state: T, delay: number): any {\n return delay > 0 || this.closed ? super.execute(state, delay) : this._execute(state, delay);\n }\n\n protected requestAsyncId(scheduler: QueueScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n\n if ((delay != null && delay > 0) || (delay == null && this.delay > 0)) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n\n // Otherwise flush the scheduler starting with this action.\n scheduler.flush(this);\n\n // HACK: In the past, this was returning `void`. However, `void` isn't a valid\n // `TimerHandle`, and generally the return value here isn't really used. So the\n // compromise is to return `0` which is both \"falsy\" and a valid `TimerHandle`,\n // as opposed to refactoring every other instanceo of `requestAsyncId`.\n return 0;\n }\n}\n", "import { AsyncScheduler } from './AsyncScheduler';\n\nexport class QueueScheduler extends AsyncScheduler {\n}\n", "import { QueueAction } from './QueueAction';\nimport { QueueScheduler } from './QueueScheduler';\n\n/**\n *\n * Queue Scheduler\n *\n * Put every next task on a queue, instead of executing it immediately\n *\n * `queue` scheduler, when used with delay, behaves the same as {@link asyncScheduler} scheduler.\n *\n * When used without delay, it schedules given task synchronously - executes it right when\n * it is scheduled. However when called recursively, that is when inside the scheduled task,\n * another task is scheduled with queue scheduler, instead of executing immediately as well,\n * that task will be put on a queue and wait for current one to finish.\n *\n * This means that when you execute task with `queue` scheduler, you are sure it will end\n * before any other task scheduled with that scheduler will start.\n *\n * ## Examples\n * Schedule recursively first, then do something\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(() => {\n * queueScheduler.schedule(() => console.log('second')); // will not happen now, but will be put on a queue\n *\n * console.log('first');\n * });\n *\n * // Logs:\n * // \"first\"\n * // \"second\"\n * ```\n *\n * Reschedule itself recursively\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(function(state) {\n * if (state !== 0) {\n * console.log('before', state);\n * this.schedule(state - 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * console.log('after', state);\n * }\n * }, 0, 3);\n *\n * // In scheduler that runs recursively, you would expect:\n * // \"before\", 3\n * // \"before\", 2\n * // \"before\", 1\n * // \"after\", 1\n * // \"after\", 2\n * // \"after\", 3\n *\n * // But with queue it logs:\n * // \"before\", 3\n * // \"after\", 3\n * // \"before\", 2\n * // \"after\", 2\n * // \"before\", 1\n * // \"after\", 1\n * ```\n */\n\nexport const queueScheduler = new QueueScheduler(QueueAction);\n\n/**\n * @deprecated Renamed to {@link queueScheduler}. Will be removed in v8.\n */\nexport const queue = queueScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - -
-
-
- - - - - - - - - - - - - - \ No newline at end of file diff --git a/coverage/keybd_closed_cb_ce680311.png b/coverage/keybd_closed_cb_ce680311.png deleted file mode 100644 index ba119c47..00000000 Binary files a/coverage/keybd_closed_cb_ce680311.png and /dev/null differ diff --git a/coverage/status.json b/coverage/status.json deleted file mode 100644 index 1df3ae71..00000000 --- a/coverage/status.json +++ /dev/null @@ -1 +0,0 @@ -{"note":"This file is an internal implementation detail to speed up HTML report generation. Its format can change at any time. You might be looking for the JSON report: https://coverage.rtfd.io/cmd.html#cmd-json","format":5,"version":"7.6.4","globals":"610348b069b0ea8c34f23df79bc26b12","files":{"z_f33f92589633b86b___init___py":{"hash":"64b006efd1e1f77cbf606eed651041fc","index":{"url":"z_f33f92589633b86b___init___py.html","file":"src/stimulus/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":2,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_8855e5c0f7f22643___init___py":{"hash":"2ebf2fdcb9c437fd1051d4f6b4aee6f1","index":{"url":"z_8855e5c0f7f22643___init___py.html","file":"src/stimulus/analysis/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_8855e5c0f7f22643_analysis_default_py":{"hash":"ceecc3d3bf62fd7c5f72edcd6e2aa83f","index":{"url":"z_8855e5c0f7f22643_analysis_default_py.html","file":"src/stimulus/analysis/analysis_default.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":152,"n_excluded":0,"n_missing":152,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5___init___py":{"hash":"81a9c05db5f60e82b35c479eee21d9e3","index":{"url":"z_3382268cc3ca4be5___init___py.html","file":"src/stimulus/cli/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_analysis_default_py":{"hash":"165531f686495c999fcd1fdd1851d87f","index":{"url":"z_3382268cc3ca4be5_analysis_default_py.html","file":"src/stimulus/cli/analysis_default.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":60,"n_excluded":0,"n_missing":60,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_check_model_py":{"hash":"c9769efbae99ec7ee934f196ceda7cfb","index":{"url":"z_3382268cc3ca4be5_check_model_py.html","file":"src/stimulus/cli/check_model.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":62,"n_excluded":0,"n_missing":62,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_predict_py":{"hash":"df077d9881dda4c7e2717f1393a8d37d","index":{"url":"z_3382268cc3ca4be5_predict_py.html","file":"src/stimulus/cli/predict.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":73,"n_excluded":0,"n_missing":73,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_shuffle_csv_py":{"hash":"260227c1be062adfdc1c41f0d9464d7d","index":{"url":"z_3382268cc3ca4be5_shuffle_csv_py.html","file":"src/stimulus/cli/shuffle_csv.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":29,"n_excluded":0,"n_missing":29,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_split_csv_py":{"hash":"2efba93f338a93dfd936f2760a3ee3fc","index":{"url":"z_3382268cc3ca4be5_split_csv_py.html","file":"src/stimulus/cli/split_csv.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":33,"n_excluded":0,"n_missing":33,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_split_yaml_py":{"hash":"403868c635981aa4d9bf898813b9b5e0","index":{"url":"z_3382268cc3ca4be5_split_yaml_py.html","file":"src/stimulus/cli/split_yaml.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":19,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_transform_csv_py":{"hash":"72f731dce4d85ef0948e5d282e2b0477","index":{"url":"z_3382268cc3ca4be5_transform_csv_py.html","file":"src/stimulus/cli/transform_csv.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":24,"n_excluded":0,"n_missing":24,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3382268cc3ca4be5_tuning_py":{"hash":"a987cf80130baab09d9aab56ff134061","index":{"url":"z_3382268cc3ca4be5_tuning_py.html","file":"src/stimulus/cli/tuning.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":66,"n_excluded":0,"n_missing":66,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3a7c4543ac712e3a___init___py":{"hash":"213b2f09849f0021a6e54c4b957e3a34","index":{"url":"z_3a7c4543ac712e3a___init___py.html","file":"src/stimulus/data/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3a7c4543ac712e3a_csv_py":{"hash":"a7d52f2f894f520f48089575b8f3f34e","index":{"url":"z_3a7c4543ac712e3a_csv_py.html","file":"src/stimulus/data/csv.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":129,"n_excluded":0,"n_missing":16,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_77b10b442f9d4059___init___py":{"hash":"aed9c283ea6ff92eaeb739ce6425c935","index":{"url":"z_77b10b442f9d4059___init___py.html","file":"src/stimulus/data/encoding/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_77b10b442f9d4059_encoders_py":{"hash":"b4cbc483f19acc7b1fbc82749814157d","index":{"url":"z_77b10b442f9d4059_encoders_py.html","file":"src/stimulus/data/encoding/encoders.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":145,"n_excluded":0,"n_missing":10,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3a7c4543ac712e3a_experiments_py":{"hash":"417a579a6547b88f079027ffccba0979","index":{"url":"z_3a7c4543ac712e3a_experiments_py.html","file":"src/stimulus/data/experiments.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":78,"n_excluded":0,"n_missing":20,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_3a7c4543ac712e3a_handlertorch_py":{"hash":"477d8aef1f10172c4dbfefe108352e05","index":{"url":"z_3a7c4543ac712e3a_handlertorch_py.html","file":"src/stimulus/data/handlertorch.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":10,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_eeca319ae6b94751___init___py":{"hash":"39de0db774f519ac43f8c928e5ac1470","index":{"url":"z_eeca319ae6b94751___init___py.html","file":"src/stimulus/data/splitters/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":2,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_eeca319ae6b94751_splitters_py":{"hash":"5f8845d55849ca0d93462d25d91cafb1","index":{"url":"z_eeca319ae6b94751_splitters_py.html","file":"src/stimulus/data/splitters/splitters.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":38,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_e019996b82b92b6e___init___py":{"hash":"410b32919b1317cef941f404d9f467af","index":{"url":"z_e019996b82b92b6e___init___py.html","file":"src/stimulus/data/transform/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_e019996b82b92b6e_data_transformation_generators_py":{"hash":"7fc2de686cc532ec15d50391b1dc8ce8","index":{"url":"z_e019996b82b92b6e_data_transformation_generators_py.html","file":"src/stimulus/data/transform/data_transformation_generators.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":84,"n_excluded":0,"n_missing":12,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_f33f92589633b86b_debug_py":{"hash":"ef890092067b6b7e0adb633205ca1e68","index":{"url":"z_f33f92589633b86b_debug_py.html","file":"src/stimulus/debug.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":63,"n_excluded":0,"n_missing":63,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_156de03ec865cdf8___init___py":{"hash":"cbb32b2fc411b7fc389014eeef1db981","index":{"url":"z_156de03ec865cdf8___init___py.html","file":"src/stimulus/learner/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_156de03ec865cdf8_predict_py":{"hash":"74aeb183ce8cee23050e065f55d24a77","index":{"url":"z_156de03ec865cdf8_predict_py.html","file":"src/stimulus/learner/predict.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":53,"n_excluded":0,"n_missing":53,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_156de03ec865cdf8_raytune_learner_py":{"hash":"52f4160bac8e98ec12760cfcf13cd248","index":{"url":"z_156de03ec865cdf8_raytune_learner_py.html","file":"src/stimulus/learner/raytune_learner.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":119,"n_excluded":0,"n_missing":119,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_156de03ec865cdf8_raytune_parser_py":{"hash":"9ee560ce78d194073a40128575bcb00f","index":{"url":"z_156de03ec865cdf8_raytune_parser_py.html","file":"src/stimulus/learner/raytune_parser.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":39,"n_excluded":0,"n_missing":39,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898___init___py":{"hash":"9e3d7f9d05cc06e84310be57da611338","index":{"url":"z_218b1cef00a0b898___init___py.html","file":"src/stimulus/utils/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":0,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898_generic_utils_py":{"hash":"813d5007330e33c45d967a3f71661e8d","index":{"url":"z_218b1cef00a0b898_generic_utils_py.html","file":"src/stimulus/utils/generic_utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":13,"n_excluded":0,"n_missing":13,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898_launch_utils_py":{"hash":"3802a8a7cab9e0f61e8e69b5d20198e8","index":{"url":"z_218b1cef00a0b898_launch_utils_py.html","file":"src/stimulus/utils/launch_utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":38,"n_excluded":0,"n_missing":38,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898_performance_py":{"hash":"8d4b9ee8810096a68b8f62bd30c27d41","index":{"url":"z_218b1cef00a0b898_performance_py.html","file":"src/stimulus/utils/performance.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":51,"n_excluded":0,"n_missing":51,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898_yaml_data_py":{"hash":"4f1fc033c3c497bb52481378b3d3d2e9","index":{"url":"z_218b1cef00a0b898_yaml_data_py.html","file":"src/stimulus/utils/yaml_data.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":150,"n_excluded":0,"n_missing":8,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_218b1cef00a0b898_yaml_model_schema_py":{"hash":"42b7246bfd7888fd9d205334eb7b6cb1","index":{"url":"z_218b1cef00a0b898_yaml_model_schema_py.html","file":"src/stimulus/utils/yaml_model_schema.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":50,"n_excluded":0,"n_missing":50,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}}}} \ No newline at end of file diff --git a/coverage/style_cb_8e611ae1.css b/coverage/style_cb_8e611ae1.css deleted file mode 100644 index 3cdaf05a..00000000 --- a/coverage/style_cb_8e611ae1.css +++ /dev/null @@ -1,337 +0,0 @@ -@charset "UTF-8"; -/* Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 */ -/* For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt */ -/* Don't edit this .css file. Edit the .scss file instead! */ -html, body, h1, h2, h3, p, table, td, th { margin: 0; padding: 0; border: 0; font-weight: inherit; font-style: inherit; font-size: 100%; font-family: inherit; vertical-align: baseline; } - -body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-size: 1em; background: #fff; color: #000; } - -@media (prefers-color-scheme: dark) { body { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { body { color: #eee; } } - -html > body { font-size: 16px; } - -a:active, a:focus { outline: 2px dashed #007acc; } - -p { font-size: .875em; line-height: 1.4em; } - -table { border-collapse: collapse; } - -td { vertical-align: top; } - -table tr.hidden { display: none !important; } - -p#no_rows { display: none; font-size: 1.15em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } - -a.nav { text-decoration: none; color: inherit; } - -a.nav:hover { text-decoration: underline; color: inherit; } - -.hidden { display: none; } - -header { background: #f8f8f8; width: 100%; z-index: 2; border-bottom: 1px solid #ccc; } - -@media (prefers-color-scheme: dark) { header { background: black; } } - -@media (prefers-color-scheme: dark) { header { border-color: #333; } } - -header .content { padding: 1rem 3.5rem; } - -header h2 { margin-top: .5em; font-size: 1em; } - -header h2 a.button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } - -@media (prefers-color-scheme: dark) { header h2 a.button { background: #333; } } - -@media (prefers-color-scheme: dark) { header h2 a.button { border-color: #444; } } - -header h2 a.button.current { border: 2px solid; background: #fff; border-color: #999; cursor: default; } - -@media (prefers-color-scheme: dark) { header h2 a.button.current { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { header h2 a.button.current { border-color: #777; } } - -header p.text { margin: .5em 0 -.5em; color: #666; font-style: italic; } - -@media (prefers-color-scheme: dark) { header p.text { color: #aaa; } } - -header.sticky { position: fixed; left: 0; right: 0; height: 2.5em; } - -header.sticky .text { display: none; } - -header.sticky h1, header.sticky h2 { font-size: 1em; margin-top: 0; display: inline-block; } - -header.sticky .content { padding: 0.5rem 3.5rem; } - -header.sticky .content p { font-size: 1em; } - -header.sticky ~ #source { padding-top: 6.5em; } - -main { position: relative; z-index: 1; } - -footer { margin: 1rem 3.5rem; } - -footer .content { padding: 0; color: #666; font-style: italic; } - -@media (prefers-color-scheme: dark) { footer .content { color: #aaa; } } - -#index { margin: 1rem 0 0 3.5rem; } - -h1 { font-size: 1.25em; display: inline-block; } - -#filter_container { float: right; margin: 0 2em 0 0; line-height: 1.66em; } - -#filter_container #filter { width: 10em; padding: 0.2em 0.5em; border: 2px solid #ccc; background: #fff; color: #000; } - -@media (prefers-color-scheme: dark) { #filter_container #filter { border-color: #444; } } - -@media (prefers-color-scheme: dark) { #filter_container #filter { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { #filter_container #filter { color: #eee; } } - -#filter_container #filter:focus { border-color: #007acc; } - -#filter_container :disabled ~ label { color: #ccc; } - -@media (prefers-color-scheme: dark) { #filter_container :disabled ~ label { color: #444; } } - -#filter_container label { font-size: .875em; color: #666; } - -@media (prefers-color-scheme: dark) { #filter_container label { color: #aaa; } } - -header button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } - -@media (prefers-color-scheme: dark) { header button { background: #333; } } - -@media (prefers-color-scheme: dark) { header button { border-color: #444; } } - -header button:active, header button:focus { outline: 2px dashed #007acc; } - -header button.run { background: #eeffee; } - -@media (prefers-color-scheme: dark) { header button.run { background: #373d29; } } - -header button.run.show_run { background: #dfd; border: 2px solid #00dd00; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.run.show_run { background: #373d29; } } - -header button.mis { background: #ffeeee; } - -@media (prefers-color-scheme: dark) { header button.mis { background: #4b1818; } } - -header button.mis.show_mis { background: #fdd; border: 2px solid #ff0000; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.mis.show_mis { background: #4b1818; } } - -header button.exc { background: #f7f7f7; } - -@media (prefers-color-scheme: dark) { header button.exc { background: #333; } } - -header button.exc.show_exc { background: #eee; border: 2px solid #808080; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.exc.show_exc { background: #333; } } - -header button.par { background: #ffffd5; } - -@media (prefers-color-scheme: dark) { header button.par { background: #650; } } - -header button.par.show_par { background: #ffa; border: 2px solid #bbbb00; margin: 0 .1em; } - -@media (prefers-color-scheme: dark) { header button.par.show_par { background: #650; } } - -#help_panel, #source p .annotate.long { display: none; position: absolute; z-index: 999; background: #ffffcc; border: 1px solid #888; border-radius: .2em; color: #333; padding: .25em .5em; } - -#source p .annotate.long { white-space: normal; float: right; top: 1.75em; right: 1em; height: auto; } - -#help_panel_wrapper { float: right; position: relative; } - -#keyboard_icon { margin: 5px; } - -#help_panel_state { display: none; } - -#help_panel { top: 25px; right: 0; padding: .75em; border: 1px solid #883; color: #333; } - -#help_panel .keyhelp p { margin-top: .75em; } - -#help_panel .legend { font-style: italic; margin-bottom: 1em; } - -.indexfile #help_panel { width: 25em; } - -.pyfile #help_panel { width: 18em; } - -#help_panel_state:checked ~ #help_panel { display: block; } - -kbd { border: 1px solid black; border-color: #888 #333 #333 #888; padding: .1em .35em; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-weight: bold; background: #eee; border-radius: 3px; } - -#source { padding: 1em 0 1em 3.5rem; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; } - -#source p { position: relative; white-space: pre; } - -#source p * { box-sizing: border-box; } - -#source p .n { float: left; text-align: right; width: 3.5rem; box-sizing: border-box; margin-left: -3.5rem; padding-right: 1em; color: #999; user-select: none; } - -@media (prefers-color-scheme: dark) { #source p .n { color: #777; } } - -#source p .n.highlight { background: #ffdd00; } - -#source p .n a { scroll-margin-top: 6em; text-decoration: none; color: #999; } - -@media (prefers-color-scheme: dark) { #source p .n a { color: #777; } } - -#source p .n a:hover { text-decoration: underline; color: #999; } - -@media (prefers-color-scheme: dark) { #source p .n a:hover { color: #777; } } - -#source p .t { display: inline-block; width: 100%; box-sizing: border-box; margin-left: -.5em; padding-left: 0.3em; border-left: 0.2em solid #fff; } - -@media (prefers-color-scheme: dark) { #source p .t { border-color: #1e1e1e; } } - -#source p .t:hover { background: #f2f2f2; } - -@media (prefers-color-scheme: dark) { #source p .t:hover { background: #282828; } } - -#source p .t:hover ~ .r .annotate.long { display: block; } - -#source p .t .com { color: #008000; font-style: italic; line-height: 1px; } - -@media (prefers-color-scheme: dark) { #source p .t .com { color: #6a9955; } } - -#source p .t .key { font-weight: bold; line-height: 1px; } - -#source p .t .str { color: #0451a5; } - -@media (prefers-color-scheme: dark) { #source p .t .str { color: #9cdcfe; } } - -#source p.mis .t { border-left: 0.2em solid #ff0000; } - -#source p.mis.show_mis .t { background: #fdd; } - -@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t { background: #4b1818; } } - -#source p.mis.show_mis .t:hover { background: #f2d2d2; } - -@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t:hover { background: #532323; } } - -#source p.run .t { border-left: 0.2em solid #00dd00; } - -#source p.run.show_run .t { background: #dfd; } - -@media (prefers-color-scheme: dark) { #source p.run.show_run .t { background: #373d29; } } - -#source p.run.show_run .t:hover { background: #d2f2d2; } - -@media (prefers-color-scheme: dark) { #source p.run.show_run .t:hover { background: #404633; } } - -#source p.exc .t { border-left: 0.2em solid #808080; } - -#source p.exc.show_exc .t { background: #eee; } - -@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t { background: #333; } } - -#source p.exc.show_exc .t:hover { background: #e2e2e2; } - -@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t:hover { background: #3c3c3c; } } - -#source p.par .t { border-left: 0.2em solid #bbbb00; } - -#source p.par.show_par .t { background: #ffa; } - -@media (prefers-color-scheme: dark) { #source p.par.show_par .t { background: #650; } } - -#source p.par.show_par .t:hover { background: #f2f2a2; } - -@media (prefers-color-scheme: dark) { #source p.par.show_par .t:hover { background: #6d5d0c; } } - -#source p .r { position: absolute; top: 0; right: 2.5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } - -#source p .annotate { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; color: #666; padding-right: .5em; } - -@media (prefers-color-scheme: dark) { #source p .annotate { color: #ddd; } } - -#source p .annotate.short:hover ~ .long { display: block; } - -#source p .annotate.long { width: 30em; right: 2.5em; } - -#source p input { display: none; } - -#source p input ~ .r label.ctx { cursor: pointer; border-radius: .25em; } - -#source p input ~ .r label.ctx::before { content: "▶ "; } - -#source p input ~ .r label.ctx:hover { background: #e8f4ff; color: #666; } - -@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { background: #0f3a42; } } - -@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { color: #aaa; } } - -#source p input:checked ~ .r label.ctx { background: #d0e8ff; color: #666; border-radius: .75em .75em 0 0; padding: 0 .5em; margin: -.25em 0; } - -@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { background: #056; } } - -@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { color: #aaa; } } - -#source p input:checked ~ .r label.ctx::before { content: "▼ "; } - -#source p input:checked ~ .ctxs { padding: .25em .5em; overflow-y: scroll; max-height: 10.5em; } - -#source p label.ctx { color: #999; display: inline-block; padding: 0 .5em; font-size: .8333em; } - -@media (prefers-color-scheme: dark) { #source p label.ctx { color: #777; } } - -#source p .ctxs { display: block; max-height: 0; overflow-y: hidden; transition: all .2s; padding: 0 .5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; white-space: nowrap; background: #d0e8ff; border-radius: .25em; margin-right: 1.75em; text-align: right; } - -@media (prefers-color-scheme: dark) { #source p .ctxs { background: #056; } } - -#index { font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.875em; } - -#index table.index { margin-left: -.5em; } - -#index td, #index th { text-align: right; padding: .25em .5em; border-bottom: 1px solid #eee; } - -@media (prefers-color-scheme: dark) { #index td, #index th { border-color: #333; } } - -#index td.name, #index th.name { text-align: left; width: auto; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; min-width: 15em; } - -#index th { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-style: italic; color: #333; cursor: pointer; } - -@media (prefers-color-scheme: dark) { #index th { color: #ddd; } } - -#index th:hover { background: #eee; } - -@media (prefers-color-scheme: dark) { #index th:hover { background: #333; } } - -#index th .arrows { color: #666; font-size: 85%; font-family: sans-serif; font-style: normal; pointer-events: none; } - -#index th[aria-sort="ascending"], #index th[aria-sort="descending"] { white-space: nowrap; background: #eee; padding-left: .5em; } - -@media (prefers-color-scheme: dark) { #index th[aria-sort="ascending"], #index th[aria-sort="descending"] { background: #333; } } - -#index th[aria-sort="ascending"] .arrows::after { content: " ▲"; } - -#index th[aria-sort="descending"] .arrows::after { content: " ▼"; } - -#index td.name { font-size: 1.15em; } - -#index td.name a { text-decoration: none; color: inherit; } - -#index td.name .no-noun { font-style: italic; } - -#index tr.total td, #index tr.total_dynamic td { font-weight: bold; border-top: 1px solid #ccc; border-bottom: none; } - -#index tr.region:hover { background: #eee; } - -@media (prefers-color-scheme: dark) { #index tr.region:hover { background: #333; } } - -#index tr.region:hover td.name { text-decoration: underline; color: inherit; } - -#scroll_marker { position: fixed; z-index: 3; right: 0; top: 0; width: 16px; height: 100%; background: #fff; border-left: 1px solid #eee; will-change: transform; } - -@media (prefers-color-scheme: dark) { #scroll_marker { background: #1e1e1e; } } - -@media (prefers-color-scheme: dark) { #scroll_marker { border-color: #333; } } - -#scroll_marker .marker { background: #ccc; position: absolute; min-height: 3px; width: 100%; } - -@media (prefers-color-scheme: dark) { #scroll_marker .marker { background: #444; } } diff --git a/coverage/z_156de03ec865cdf8___init___py.html b/coverage/z_156de03ec865cdf8___init___py.html deleted file mode 100644 index 8a1298a9..00000000 --- a/coverage/z_156de03ec865cdf8___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/learner/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/learner/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Learner package for model training and evaluation.""" 

-
- - - diff --git a/coverage/z_156de03ec865cdf8_predict_py.html b/coverage/z_156de03ec865cdf8_predict_py.html deleted file mode 100644 index dfc54ee1..00000000 --- a/coverage/z_156de03ec865cdf8_predict_py.html +++ /dev/null @@ -1,215 +0,0 @@ - - - - - Coverage for src/stimulus/learner/predict.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/learner/predict.py: - 0% -

- -

- 53 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""A module for making predictions with PyTorch models using DataLoaders.""" 

-

2 

-

3from typing import Any, Optional 

-

4 

-

5import torch 

-

6 

-

7from stimulus.utils.generic_utils import ensure_at_least_1d 

-

8from stimulus.utils.performance import Performance 

-

9 

-

10 

-

11class PredictWrapper: 

-

12 """A wrapper to predict the output of a model on a datset loaded into a torch DataLoader. 

-

13 

-

14 It also provides the functionalities to measure the performance of the model. 

-

15 """ 

-

16 

-

17 def __init__(self, model: object, dataloader: object, loss_dict: Optional[dict[str, Any]] = None) -> None: 

-

18 """Initialize the PredictWrapper. 

-

19 

-

20 Args: 

-

21 model: The PyTorch model to make predictions with 

-

22 dataloader: DataLoader containing the evaluation data 

-

23 loss_dict: Optional dictionary of loss functions 

-

24 """ 

-

25 self.model = model 

-

26 self.dataloader = dataloader 

-

27 self.loss_dict = loss_dict 

-

28 try: 

-

29 self.model.eval() 

-

30 except RuntimeError as e: 

-

31 # Using logging instead of print 

-

32 import logging 

-

33 

-

34 logging.warning("Not able to run model.eval: %s", str(e)) 

-

35 

-

36 def predict(self, *, return_labels: bool = False) -> dict[str, torch.Tensor]: 

-

37 """Get the model predictions. 

-

38 

-

39 Basically, it runs a foward pass on the model for each batch, 

-

40 gets the predictions and concatenate them for all batches. 

-

41 Since the returned `current_predictions` are formed by tensors computed for one batch, 

-

42 the final `predictions` are obtained by concatenating them. 

-

43 

-

44 At the end it returns `predictions` as a dictionary of tensors with the same keys as `y`. 

-

45 

-

46 If return_labels if True, then the `labels` will be returned as well, also as a dictionary of tensors. 

-

47 

-

48 Args: 

-

49 return_labels: Whether to also return the labels 

-

50 

-

51 Returns: 

-

52 Dictionary of predictions, and optionally labels 

-

53 """ 

-

54 # create empty dictionaries with the column names 

-

55 first_batch = next(iter(self.dataloader)) 

-

56 keys = first_batch[1].keys() 

-

57 predictions = {k: [] for k in keys} 

-

58 labels = {k: [] for k in keys} 

-

59 

-

60 # get the predictions (and labels) for each batch 

-

61 with torch.no_grad(): 

-

62 for x, y, _ in self.dataloader: 

-

63 current_predictions = self.model(**x) 

-

64 current_predictions = self.handle_predictions(current_predictions, y) 

-

65 for k in keys: 

-

66 # it might happen that the batch consists of one element only so the torch.cat will fail. To prevent this the function to ensure at least one dimensionality is called. 

-

67 predictions[k].append(ensure_at_least_1d(current_predictions[k])) 

-

68 if return_labels: 

-

69 labels[k].append(ensure_at_least_1d(y[k])) 

-

70 

-

71 # return the predictions (and labels) as a dictionary of tensors for the entire dataset. 

-

72 if not return_labels: 

-

73 return {k: torch.cat(v) for k, v in predictions.items()} 

-

74 return {k: torch.cat(v) for k, v in predictions.items()}, {k: torch.cat(v) for k, v in labels.items()} 

-

75 

-

76 def handle_predictions(self, predictions: Any, y: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: 

-

77 """Handle the model outputs from forward pass, into a dictionary of tensors, just like y.""" 

-

78 if len(y) == 1: 

-

79 return {next(iter(y.keys())): predictions} 

-

80 return dict(zip(y.keys(), predictions)) 

-

81 

-

82 def compute_metrics(self, metrics: list[str]) -> dict[str, float]: 

-

83 """Wrapper to compute the performance metrics.""" 

-

84 return {m: self.compute_metric(m) for m in metrics} 

-

85 

-

86 def compute_metric(self, metric: str = "loss") -> float: 

-

87 """Wrapper to compute the performance metric.""" 

-

88 if metric == "loss": 

-

89 return self.compute_loss() 

-

90 return self.compute_other_metric(metric) 

-

91 

-

92 def compute_loss(self) -> float: 

-

93 """Compute the loss. 

-

94 

-

95 The current implmentation basically computes the loss for each batch and then averages them. 

-

96 TODO we could potentially summarize the los across batches in a different way. 

-

97 Or sometimes we may potentially even have 1+ losses. 

-

98 """ 

-

99 if self.loss_dict is None: 

-

100 raise ValueError("Loss function is not provided.") 

-

101 loss = 0.0 

-

102 with torch.no_grad(): 

-

103 for x, y, _ in self.dataloader: 

-

104 # the loss_dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable. 

-

105 current_loss = self.model.batch(x=x, y=y, **self.loss_dict)[0] 

-

106 loss += current_loss.item() 

-

107 return loss / len(self.dataloader) 

-

108 

-

109 def compute_other_metric(self, metric: str) -> float: 

-

110 """Compute the performance metric. 

-

111 

-

112 # TODO currently we computes the average performance metric across target y, but maybe in the future we want something different 

-

113 """ 

-

114 if (not hasattr(self, "predictions")) or (not hasattr(self, "labels")): 

-

115 self.predictions, self.labels = self.predict(return_labels=True) 

-

116 return sum( 

-

117 Performance(labels=self.labels[k], predictions=self.predictions[k], metric=metric).val for k in self.labels 

-

118 ) / len(self.labels) 

-
- - - diff --git a/coverage/z_156de03ec865cdf8_raytune_learner_py.html b/coverage/z_156de03ec865cdf8_raytune_learner_py.html deleted file mode 100644 index 70fd4193..00000000 --- a/coverage/z_156de03ec865cdf8_raytune_learner_py.html +++ /dev/null @@ -1,404 +0,0 @@ - - - - - Coverage for src/stimulus/learner/raytune_learner.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/learner/raytune_learner.py: - 0% -

- -

- 119 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Ray Tune wrapper and trainable model classes for hyperparameter optimization.""" 

-

2 

-

3import datetime 

-

4import logging 

-

5import os 

-

6import random 

-

7from typing import Optional, tuple 

-

8 

-

9import numpy as np 

-

10import torch 

-

11from ray import cluster_resources, init, is_initialized, shutdown, train, tune 

-

12from ray.tune import Trainable, schedulers 

-

13from safetensors.torch import load_model as safe_load_model 

-

14from safetensors.torch import save_model as safe_save_model 

-

15from torch import nn, optim 

-

16from torch.utils.data import DataLoader 

-

17 

-

18from stimulus.data.handlertorch import TorchDataset 

-

19from stimulus.learner.predict import PredictWrapper 

-

20from stimulus.utils.generic_utils import set_general_seeds 

-

21from stimulus.utils.yaml_model_schema import YamlRayConfigLoader 

-

22 

-

23 

-

24class TuneWrapper: 

-

25 """Wrapper class for Ray Tune hyperparameter optimization.""" 

-

26 

-

27 def __init__( 

-

28 self, 

-

29 config_path: str, 

-

30 model_class: nn.Module, 

-

31 data_path: str, 

-

32 experiment_object: object, 

-

33 max_gpus: Optional[int] = None, 

-

34 max_cpus: Optional[int] = None, 

-

35 max_object_store_mem: Optional[float] = None, 

-

36 max_mem: Optional[float] = None, 

-

37 ray_results_dir: Optional[str] = None, 

-

38 tune_run_name: Optional[str] = None, 

-

39 *, # Force debug to be keyword-only 

-

40 debug: bool = False, 

-

41 ) -> None: 

-

42 """Initialize the TuneWrapper with the paths to the config, model, and data.""" 

-

43 self.config = YamlRayConfigLoader(config_path).get_config() 

-

44 

-

45 # set all general seeds: python, numpy and torch. 

-

46 set_general_seeds(self.config["seed"]) 

-

47 

-

48 self.config["model"] = model_class 

-

49 self.config["experiment"] = experiment_object 

-

50 

-

51 # add the ray method for number generation to the config so it can be passed to the trainable class, that will in turn set per worker seeds in a reproducible mnanner. 

-

52 self.config["ray_worker_seed"] = tune.randint(0, 1000) 

-

53 

-

54 # add the data path to the config so it know where it is during tuning 

-

55 if not os.path.exists(data_path): 

-

56 raise ValueError("Data path does not exist. Given path:" + data_path) 

-

57 self.config["data_path"] = os.path.abspath(data_path) 

-

58 

-

59 # build the tune config 

-

60 self.config["tune"]["tune_params"]["scheduler"] = getattr(schedulers, self.config["tune"]["scheduler"]["name"])( 

-

61 **self.config["tune"]["scheduler"]["params"], 

-

62 ) 

-

63 self.tune_config = tune.TuneConfig(**self.config["tune"]["tune_params"]) 

-

64 

-

65 # set ray cluster total resources (max) 

-

66 self.max_gpus = max_gpus 

-

67 self.max_cpus = max_cpus 

-

68 self.max_object_store_mem = max_object_store_mem # this is a special subset of the total usable memory that ray need for his internal work, by default is set to 30% of total memory usable 

-

69 self.max_mem = max_mem 

-

70 

-

71 # build the run config 

-

72 self.checkpoint_config = train.CheckpointConfig(checkpoint_at_end=True) # TODO implement checkpoiting 

-

73 # in case a custom name was not given for tune_run_name, build it like ray would do. to later pass it on the worker for the debug section. 

-

74 if tune_run_name is None: 

-

75 tune_run_name = "TuneModel_" + datetime.datetime.now(tz=datetime.timezone.utc).strftime("%Y-%m-%d_%H-%M-%S") 

-

76 self.run_config = train.RunConfig( 

-

77 name=tune_run_name, 

-

78 storage_path=ray_results_dir, 

-

79 checkpoint_config=self.checkpoint_config, 

-

80 **self.config["tune"]["run_params"], 

-

81 ) # TODO maybe put name into config if it was possible to retrieve from tune the name of the result subdir) 

-

82 

-

83 # working towards the path for the tune_run directory. if ray_results_dir None ray will put it under home so we will do the same here. 

-

84 if ray_results_dir is None: 

-

85 ray_results_dir = os.environ.get("HOME") 

-

86 # then we are able to pass the whole correct tune_run path to the trainable function. so it can use thaqt to place the debug dir under if needed. 

-

87 self.config["tune_run_path"] = os.path.join(ray_results_dir, tune_run_name) 

-

88 

-

89 # pass the debug flag to the config taken fromn tune so it can be used inside the setup of the trainable 

-

90 self.config["_debug"] = False 

-

91 if debug: 

-

92 self.config["_debug"] = True 

-

93 

-

94 self.tuner = self.tuner_initialization() 

-

95 

-

96 def tuner_initialization(self) -> tune.Tuner: 

-

97 """Prepare the tuner with the configs.""" 

-

98 # in ray 3.0.0 the following issue is fixed. Sometimes it sees that ray is already initialized, so in that case shut it off and start anew. TODO update to ray 3.0.0 

-

99 if is_initialized(): 

-

100 shutdown() 

-

101 

-

102 # initialize the ray cluster with the limiter on CPUs, GPUs or memory if needed, otherwise everything that is available. None is what ray uses to get all resources available for either CPU, GPU or memory. 

-

103 # memory is split in two for ray. read more at ray.init documentation. 

-

104 init( 

-

105 num_cpus=self.max_cpus, 

-

106 num_gpus=self.max_gpus, 

-

107 object_store_memory=self.max_object_store_mem, 

-

108 _memory=self.max_mem, 

-

109 ) 

-

110 

-

111 logging.info(f"CLUSTER resources -> {cluster_resources()}") 

-

112 

-

113 # check if resources per trial are not exceeding maximum resources. traial = single set/combination of hyperparameter (parrallel actors maximum resources in ray tune gergon). 

-

114 self.gpu_per_trial = self._chek_per_trial_resources("gpu_per_trial", cluster_resources(), "GPU") 

-

115 self.cpu_per_trial = self._chek_per_trial_resources("cpu_per_trial", cluster_resources(), "CPU") 

-

116 

-

117 logging.info(f"PER_TRIAL resources -> GPU: {self.gpu_per_trial} CPU: {self.cpu_per_trial}") 

-

118 

-

119 # wrap the trainable with the allowed resources per trial 

-

120 # also provide the training and validation data to the trainable through with_parameters 

-

121 # this is a wrapper that passes the data as a object reference (pointer) 

-

122 trainable = tune.with_resources(TuneModel, resources={"cpu": self.cpu_per_trial, "gpu": self.gpu_per_trial}) 

-

123 trainable = tune.with_parameters( 

-

124 trainable, 

-

125 training=TorchDataset(self.config["data_path"], self.config["experiment"], split=0), 

-

126 validation=TorchDataset(self.config["data_path"], self.config["experiment"], split=1), 

-

127 ) 

-

128 

-

129 return tune.Tuner(trainable, tune_config=self.tune_config, param_space=self.config, run_config=self.run_config) 

-

130 

-

131 def tune(self) -> None: 

-

132 """Run the tuning process.""" 

-

133 return self.tuner.fit() 

-

134 

-

135 def _chek_per_trial_resources( 

-

136 self, 

-

137 resurce_key: str, 

-

138 cluster_max_resources: dict, 

-

139 resource_type: str, 

-

140 ) -> tuple[int, int]: 

-

141 """Helper function that check that user requested per trial resources are not exceeding the available resources for the ray cluster. 

-

142 

-

143 If the per trial resources are not asked they are set to a default resoanable ammount. 

-

144 

-

145 resurce_key: str object the key used to look into the self.config["tune"] 

-

146 cluster_max_resources: dict object the output of the ray.cluster_resources() function. It hold what ray has found to be the available resources for CPU, GPU and Memory 

-

147 resource_type: str object the key used to llok into the cluster_resources dict 

-

148 """ 

-

149 if resource_type == "GPU" and resource_type not in cluster_resources(): 

-

150 # ray does not have a GPU field also if GPUs were set to zero. So trial GPU resources have to be set to zero. 

-

151 if self.max_gpus == 0: 

-

152 return 0.0 

-

153 # in case GPUs that are not detected raise error. This happens sometimes when max_gpus stay as None and ray.init does not find GPU by itself. not setting max_gpus (None) means to use all available ones. TODO make ray see GPU on None value. 

-

154 raise SystemError( 

-

155 "#### ray did not detect any GPU, if you do not want to use GPU set max_gpus=0, or in nextflow --max_gpus 0.", 

-

156 ) 

-

157 

-

158 per_trial_resource = None 

-

159 # if everything is alright, leave the value as it is. 

-

160 if ( 

-

161 resurce_key in self.config["tune"] 

-

162 and self.config["tune"][resurce_key] <= cluster_max_resources[resource_type] 

-

163 ): 

-

164 per_trial_resource = self.config["tune"][resurce_key] 

-

165 

-

166 # if per_trial_resource are more than what is avaialble to ray set them to what is available and warn the user 

-

167 elif ( 

-

168 resurce_key in self.config["tune"] 

-

169 and self.config["tune"][resurce_key] > cluster_max_resources[resource_type] 

-

170 ): 

-

171 # TODO write a better warning 

-

172 logging.warning( 

-

173 f"\n\n#### WARNING - {resource_type} per trial are more than what is available. " 

-

174 f"{resource_type} per trial: {self.config['tune'][resurce_key]} " 

-

175 f"available: {cluster_max_resources[resource_type]} " 

-

176 "overwriting value to max available", 

-

177 ) 

-

178 per_trial_resource = cluster_max_resources[resource_type] 

-

179 

-

180 # if per_trial_resource has not been asked and there is none available set them to zero 

-

181 elif resurce_key not in self.config["tune"] and cluster_max_resources[resource_type] == 0.0: 

-

182 per_trial_resource = 0 

-

183 

-

184 # if per_trial_resource has not been asked and the resource is available set the value to either 1 or number_available resource / num_samples 

-

185 elif resurce_key not in self.config["tune"] and cluster_max_resources[resource_type] != 0.0: 

-

186 # TODO maybe set the default to 0.5 instead of 1 ? fractional use in case of GPU? Should this be a mandatory parameter? 

-

187 per_trial_resource = max( 

-

188 1, 

-

189 (cluster_max_resources[resource_type] // self.config["tune"]["tune_params"]["num_samples"]), 

-

190 ) 

-

191 

-

192 return per_trial_resource 

-

193 

-

194 

-

195class TuneModel(Trainable): 

-

196 """Trainable model class for Ray Tune.""" 

-

197 

-

198 def setup(self, config: dict, training: object, validation: object) -> None: 

-

199 """Get the model, loss function(s), optimizer, train and test data from the config.""" 

-

200 # set the seeds the second time, first in TuneWrapper initialization. This will make all important seed worker specific. 

-

201 set_general_seeds(self.config["ray_worker_seed"]) 

-

202 

-

203 # Initialize model with the config params 

-

204 self.model = config["model"](**config["model_params"]) 

-

205 

-

206 # Add data path 

-

207 self.data_path = config["data_path"] 

-

208 

-

209 # Get the loss function(s) from the config model params 

-

210 # Note that the loss function(s) are stored in a dictionary, 

-

211 # where the keys are the key of loss_params in the yaml config file and the values are the loss functions associated to such keys. 

-

212 self.loss_dict = config["loss_params"] 

-

213 for key, loss_fn in self.loss_dict.items(): 

-

214 try: 

-

215 self.loss_dict[key] = getattr(nn, loss_fn)() 

-

216 except AttributeError as err: 

-

217 raise ValueError( 

-

218 f"Invalid loss function: {loss_fn}, check PyTorch for documentation on available loss functions", 

-

219 ) from err 

-

220 

-

221 # get the optimizer parameters 

-

222 optimizer_lr = config["optimizer_params"]["lr"] 

-

223 

-

224 # get the optimizer from PyTorch 

-

225 self.optimizer = getattr(optim, config["optimizer_params"]["method"])(self.model.parameters(), lr=optimizer_lr) 

-

226 

-

227 # get step size from the config 

-

228 self.step_size = config["tune"]["step_size"] 

-

229 

-

230 # use dataloader on training/validation data 

-

231 self.batch_size = config["data_params"]["batch_size"] 

-

232 self.training = DataLoader( 

-

233 training, 

-

234 batch_size=self.batch_size, 

-

235 shuffle=True, 

-

236 ) # TODO need to check the reproducibility of this shuffling 

-

237 self.validation = DataLoader(validation, batch_size=self.batch_size, shuffle=True) 

-

238 

-

239 # debug section, first create a dedicated directory for each worker inside Ray_results/<tune_model_run_specific_dir> location 

-

240 debug_dir = os.path.join( 

-

241 config["tune_run_path"], 

-

242 "debug", 

-

243 ("worker_with_seed_" + str(self.config["ray_worker_seed"])), 

-

244 ) 

-

245 if config["_debug"]: 

-

246 # creating a special directory for it one that is worker/trial/experiment specific 

-

247 os.makedirs(debug_dir) 

-

248 seed_filename = os.path.join(debug_dir, "seeds.txt") 

-

249 

-

250 # save the initialized model weights 

-

251 self.export_model(export_dir=debug_dir) 

-

252 

-

253 # save the seeds 

-

254 with open(seed_filename, "a") as seed_f: 

-

255 # you can not retrieve the actual seed once it set, or the current seed neither for python, numpy nor torch. so we select five numbers randomly. If that is the first draw of numbers they are always the same. 

-

256 python_values = random.sample(range(100), 5) 

-

257 numpy_values = list(np.random.randint(0, 100, size=5)) 

-

258 torch_values = torch.randint(0, 100, (5,)).tolist() 

-

259 seed_f.write( 

-

260 f"python drawn numbers : {python_values}\nnumpy drawn numbers : {numpy_values}\ntorch drawn numbers : {torch_values}\n", 

-

261 ) 

-

262 

-

263 def step(self) -> dict: 

-

264 """For each batch in the training data, calculate the loss and update the model parameters. 

-

265 

-

266 This calculation is performed based on the model's batch function. 

-

267 At the end, return the objective metric(s) for the tuning process. 

-

268 """ 

-

269 for _step_size in range(self.step_size): 

-

270 for x, y, _meta in self.training: 

-

271 # the loss dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable. 

-

272 self.model.batch(x=x, y=y, optimizer=self.optimizer, **self.loss_dict) 

-

273 return self.objective() 

-

274 

-

275 def objective(self) -> dict: 

-

276 """Compute the objective metric(s) for the tuning process.""" 

-

277 metrics = [ 

-

278 "loss", 

-

279 "rocauc", 

-

280 "prauc", 

-

281 "mcc", 

-

282 "f1score", 

-

283 "precision", 

-

284 "recall", 

-

285 "spearmanr", 

-

286 ] # TODO maybe we report only a subset of metrics, given certain criteria (eg. if classification or regression) 

-

287 predict_val = PredictWrapper(self.model, self.validation, loss_dict=self.loss_dict) 

-

288 predict_train = PredictWrapper(self.model, self.training, loss_dict=self.loss_dict) 

-

289 return { 

-

290 **{"val_" + metric: value for metric, value in predict_val.compute_metrics(metrics).items()}, 

-

291 **{"train_" + metric: value for metric, value in predict_train.compute_metrics(metrics).items()}, 

-

292 } 

-

293 

-

294 def export_model(self, export_dir: str) -> None: 

-

295 """Export model to safetensors format.""" 

-

296 safe_save_model(self.model, os.path.join(export_dir, "model.safetensors")) 

-

297 

-

298 def load_checkpoint(self, checkpoint_dir: str) -> None: 

-

299 """Load model and optimizer state from checkpoint.""" 

-

300 self.model = safe_load_model(self.model, os.path.join(checkpoint_dir, "model.safetensors")) 

-

301 self.optimizer.load_state_dict(torch.load(os.path.join(checkpoint_dir, "optimizer.pt"))) 

-

302 

-

303 def save_checkpoint(self, checkpoint_dir: str) -> dict | None: 

-

304 """Save model and optimizer state to checkpoint.""" 

-

305 safe_save_model(self.model, os.path.join(checkpoint_dir, "model.safetensors")) 

-

306 torch.save(self.optimizer.state_dict(), os.path.join(checkpoint_dir, "optimizer.pt")) 

-

307 return checkpoint_dir 

-
- - - diff --git a/coverage/z_156de03ec865cdf8_raytune_parser_py.html b/coverage/z_156de03ec865cdf8_raytune_parser_py.html deleted file mode 100644 index 1bbec3ad..00000000 --- a/coverage/z_156de03ec865cdf8_raytune_parser_py.html +++ /dev/null @@ -1,174 +0,0 @@ - - - - - Coverage for src/stimulus/learner/raytune_parser.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/learner/raytune_parser.py: - 0% -

- -

- 39 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Ray Tune results parser for extracting and saving best model configurations and weights.""" 

-

2 

-

3import json 

-

4import os 

-

5 

-

6import torch 

-

7from safetensors.torch import load_file as safe_load_file 

-

8from safetensors.torch import save_file as safe_save_file 

-

9 

-

10 

-

11class TuneParser: 

-

12 """Parser class for Ray Tune results to extract best configurations and model weights.""" 

-

13 

-

14 def __init__(self, results: object) -> None: 

-

15 """`results` is the output of ray.tune.""" 

-

16 self.results = results 

-

17 

-

18 def get_best_config(self) -> dict: 

-

19 """Get the best config from the results.""" 

-

20 return self.results.get_best_result().config 

-

21 

-

22 def save_best_config(self, output: str) -> None: 

-

23 """Save the best config to a file. 

-

24 

-

25 TODO maybe only save the relevant config values. 

-

26 """ 

-

27 config = self.get_best_config() 

-

28 config = self.fix_config_values(config) 

-

29 with open(output, "w") as f: 

-

30 json.dump(config, f, indent=4) 

-

31 

-

32 def fix_config_values(self, config: dict) -> dict: 

-

33 """Correct config values. 

-

34 

-

35 Args: 

-

36 config: Configuration dictionary to fix 

-

37 

-

38 Returns: 

-

39 Fixed configuration dictionary 

-

40 """ 

-

41 # fix the model and experiment values to avoid problems with serialization 

-

42 # TODO this is a quick fix to avoid the problem with serializing class objects. maybe there is a better way. 

-

43 config["model"] = config["model"].__name__ 

-

44 config["experiment"] = config["experiment"].__class__.__name__ 

-

45 if "tune" in config and "tune_params" in config["tune"]: 

-

46 del config["tune"]["tune_params"]["scheduler"] 

-

47 # delete miscellaneus keys, used only during debug mode for example 

-

48 del config["_debug"], config["tune_run_path"] 

-

49 

-

50 return config 

-

51 

-

52 def save_best_metrics_dataframe(self, output: str) -> None: 

-

53 """Save the dataframe with the metrics at each iteration of the best sample to a file.""" 

-

54 df = self.results.get_best_result().metrics_dataframe 

-

55 columns = [col for col in df.columns if "config" not in col] 

-

56 df = df[columns] 

-

57 df.to_csv(output, index=False) 

-

58 

-

59 def get_best_model(self) -> dict: 

-

60 """Get the best model weights from the results.""" 

-

61 checkpoint = self.results.get_best_result().checkpoint.to_directory() 

-

62 checkpoint = os.path.join(checkpoint, "model.safetensors") 

-

63 return safe_load_file(checkpoint) 

-

64 

-

65 def save_best_model(self, output: str) -> None: 

-

66 """Save the best model weights to a file.""" 

-

67 safe_save_file(self.get_best_model(), output) 

-

68 

-

69 def get_best_optimizer(self) -> dict: 

-

70 """Get the best optimizer state from the results.""" 

-

71 checkpoint = self.results.get_best_result().checkpoint.to_directory() 

-

72 checkpoint = os.path.join(checkpoint, "optimizer.pt") 

-

73 return torch.load(checkpoint) 

-

74 

-

75 def save_best_optimizer(self, output: str) -> None: 

-

76 """Save the best optimizer state to a file.""" 

-

77 torch.save(self.get_best_optimizer(), output) 

-
- - - diff --git a/coverage/z_218b1cef00a0b898___init___py.html b/coverage/z_218b1cef00a0b898___init___py.html deleted file mode 100644 index c2884b09..00000000 --- a/coverage/z_218b1cef00a0b898___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/utils/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/utils/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Utility functions package.""" 

-
- - - diff --git a/coverage/z_218b1cef00a0b898_generic_utils_py.html b/coverage/z_218b1cef00a0b898_generic_utils_py.html deleted file mode 100644 index ffda1ecc..00000000 --- a/coverage/z_218b1cef00a0b898_generic_utils_py.html +++ /dev/null @@ -1,127 +0,0 @@ - - - - - Coverage for src/stimulus/utils/generic_utils.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/utils/generic_utils.py: - 0% -

- -

- 13 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Utility functions for general purpose operations like seed setting and tensor manipulation.""" 

-

2 

-

3import random 

-

4from typing import Union 

-

5 

-

6import numpy as np 

-

7import torch 

-

8 

-

9 

-

10def ensure_at_least_1d(tensor: torch.Tensor) -> torch.Tensor: 

-

11 """Function to make sure tensors given are not zero dimensional. if they are add one dimension.""" 

-

12 if tensor.dim() == 0: 

-

13 tensor = tensor.unsqueeze(0) 

-

14 return tensor 

-

15 

-

16 

-

17def set_general_seeds(seed_value: Union[int, None]) -> None: 

-

18 """Set all relevant random seeds to a given value. 

-

19 

-

20 Especially useful in case of ray.tune. Ray does not have a "generic" seed as far as ray 2.23. 

-

21 """ 

-

22 # Set python seed 

-

23 random.seed(seed_value) 

-

24 

-

25 # set numpy seed 

-

26 np.random.seed(seed_value) 

-

27 

-

28 # set torch seed, diffrently from the two above torch can nopt take Noneas input value so it will not be called in that case. 

-

29 if seed_value is not None: 

-

30 torch.manual_seed(seed_value) 

-
- - - diff --git a/coverage/z_218b1cef00a0b898_launch_utils_py.html b/coverage/z_218b1cef00a0b898_launch_utils_py.html deleted file mode 100644 index b34c18cb..00000000 --- a/coverage/z_218b1cef00a0b898_launch_utils_py.html +++ /dev/null @@ -1,196 +0,0 @@ - - - - - Coverage for src/stimulus/utils/launch_utils.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/utils/launch_utils.py: - 0% -

- -

- 38 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Utility functions for launching and configuring experiments and ray tuning.""" 

-

2 

-

3import importlib.util 

-

4import math 

-

5import os 

-

6from typing import Union 

-

7 

-

8import stimulus.data.experiments as exp 

-

9 

-

10 

-

11def import_class_from_file(file_path: str) -> type: 

-

12 """Import and return the Model class from a specified Python file. 

-

13 

-

14 Args: 

-

15 file_path (str): Path to the Python file containing the Model class. 

-

16 

-

17 Returns: 

-

18 type: The Model class found in the file. 

-

19 

-

20 Raises: 

-

21 ImportError: If no class starting with 'Model' is found in the file. 

-

22 """ 

-

23 # Extract directory path and file name 

-

24 directory, file_name = os.path.split(file_path) 

-

25 module_name = os.path.splitext(file_name)[0] # Remove extension to get module name 

-

26 

-

27 # Create a module from the file path 

-

28 # In summary, these three lines of code are responsible for creating a module specification based on a file location, creating a module object from that specification, and then executing the module's code to populate the module object with the definitions from the Python file. 

-

29 spec = importlib.util.spec_from_file_location(module_name, file_path) 

-

30 module = importlib.util.module_from_spec(spec) 

-

31 spec.loader.exec_module(module) 

-

32 

-

33 # Find the class dynamically 

-

34 for name in dir(module): 

-

35 model_class = getattr(module, name) 

-

36 if isinstance(model_class, type) and name.startswith("Model"): 

-

37 return model_class 

-

38 

-

39 # Class not found 

-

40 raise ImportError("No class starting with 'Model' found in the file.") 

-

41 

-

42 

-

43def get_experiment(experiment_name: str) -> object: 

-

44 """Get an experiment instance by name. 

-

45 

-

46 Args: 

-

47 experiment_name (str): Name of the experiment class to instantiate. 

-

48 

-

49 Returns: 

-

50 object: An instance of the requested experiment class. 

-

51 """ 

-

52 return getattr(exp, experiment_name)() 

-

53 

-

54 

-

55def memory_split_for_ray_init(memory_str: Union[str, None]) -> tuple[float, float]: 

-

56 """Process the input memory value into the right unit and allocates 30% for overhead and 70% for tuning. 

-

57 

-

58 Useful in case ray detects them wrongly. Memory is split in two for ray: for store_object memory 

-

59 and the other actual memory for tuning. The following function takes the total possible 

-

60 usable/allocated memory as a string parameter and returns in bytes the values for store_memory 

-

61 (30% as default in ray) and memory (70%). 

-

62 

-

63 Args: 

-

64 memory_str (Union[str, None]): Memory string in format like "8G", "16GB", etc. 

-

65 

-

66 Returns: 

-

67 tuple[float, float]: A tuple containing (store_memory, memory) in bytes. 

-

68 """ 

-

69 if memory_str is None: 

-

70 return None, None 

-

71 

-

72 units = {"B": 1, "K": 2**10, "M": 2**20, "G": 2**30, "T": 2**40, "P": 2**50} 

-

73 

-

74 # Extract the numerical part and the unit 

-

75 value_str = "" 

-

76 unit = "" 

-

77 

-

78 for char in memory_str: 

-

79 if char.isdigit() or char == ".": 

-

80 value_str += char 

-

81 elif char.isalpha(): 

-

82 unit += char.upper() 

-

83 

-

84 value = float(value_str) 

-

85 

-

86 # Normalize the unit (to handle cases like Gi, GB, Mi, etc.) 

-

87 if unit.endswith(("I", "i", "B", "b")): 

-

88 unit = unit[:-1] 

-

89 

-

90 if unit not in units: 

-

91 raise ValueError(f"Unknown unit: {unit}") 

-

92 

-

93 bytes_value = value * units[unit] 

-

94 

-

95 # Calculate 30% and 70% 

-

96 thirty_percent = math.floor(bytes_value * 0.30) 

-

97 seventy_percent = math.floor(bytes_value * 0.70) 

-

98 

-

99 return float(thirty_percent), float(seventy_percent) 

-
- - - diff --git a/coverage/z_218b1cef00a0b898_performance_py.html b/coverage/z_218b1cef00a0b898_performance_py.html deleted file mode 100644 index 23d66a04..00000000 --- a/coverage/z_218b1cef00a0b898_performance_py.html +++ /dev/null @@ -1,228 +0,0 @@ - - - - - Coverage for src/stimulus/utils/performance.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/utils/performance.py: - 0% -

- -

- 51 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Utility module for computing various performance metrics for machine learning models.""" 

-

2 

-

3from typing import Any 

-

4 

-

5import numpy as np 

-

6import torch 

-

7from scipy.stats import spearmanr 

-

8from sklearn.metrics import ( 

-

9 average_precision_score, 

-

10 f1_score, 

-

11 matthews_corrcoef, 

-

12 precision_score, 

-

13 recall_score, 

-

14 roc_auc_score, 

-

15) 

-

16 

-

17# Constants for threshold and number of classes 

-

18BINARY_THRESHOLD = 0.5 

-

19BINARY_CLASS_COUNT = 2 

-

20 

-

21 

-

22class Performance: 

-

23 """Returns the value of a given metric. 

-

24 

-

25 Parameters 

-

26 ---------- 

-

27 labels (np.array) : labels 

-

28 predictions (np.array) : predictions 

-

29 metric (str) : the metric to compute 

-

30 

-

31 Returns: 

-

32 ------- 

-

33 value (float) : the value of the metric 

-

34 

-

35 TODO we can add more metrics here 

-

36 

-

37 TODO currently for classification metrics like precision, recall, f1score and mcc, 

-

38 we are using a threshold of 0.5 to convert the probabilities to binary predictions. 

-

39 However for models with imbalanced predictions, where the meaningful threshold is not 

-

40 located at 0.5, one can end up with full of 0s or 1s, and thus meaningless performance 

-

41 metrics. 

-

42 """ 

-

43 

-

44 def __init__(self, labels: Any, predictions: Any, metric: str = "rocauc") -> float: 

-

45 """Initialize Performance class with labels, predictions and metric type. 

-

46 

-

47 Args: 

-

48 labels: Ground truth labels 

-

49 predictions: Model predictions 

-

50 metric: Type of metric to compute (default: "rocauc") 

-

51 """ 

-

52 labels = self.data2array(labels) 

-

53 predictions = self.data2array(predictions) 

-

54 labels, predictions = self.handle_multiclass(labels, predictions) 

-

55 if labels.shape != predictions.shape: 

-

56 raise ValueError( 

-

57 f"The labels have shape {labels.shape} whereas predictions have shape {predictions.shape}.", 

-

58 ) 

-

59 function = getattr(self, metric) 

-

60 self.val = function(labels, predictions) 

-

61 

-

62 def data2array(self, data: Any) -> np.array: 

-

63 """Convert input data to numpy array. 

-

64 

-

65 Args: 

-

66 data: Input data in various formats 

-

67 

-

68 Returns: 

-

69 np.array: Converted numpy array 

-

70 

-

71 Raises: 

-

72 ValueError: If input data type is not supported 

-

73 """ 

-

74 if isinstance(data, list): 

-

75 return np.array(data) 

-

76 if isinstance(data, np.ndarray): 

-

77 return data 

-

78 if isinstance(data, torch.Tensor): 

-

79 return data.detach().cpu().numpy() 

-

80 if isinstance(data, (int, float)): 

-

81 return np.array([data]) 

-

82 raise ValueError(f"The data must be a list, np.array, torch.Tensor, int or float. Instead it is {type(data)}") 

-

83 

-

84 def handle_multiclass(self, labels: np.array, predictions: np.array) -> tuple[np.array, np.array]: 

-

85 """Handle the case of multiclass classification. 

-

86 

-

87 TODO currently only two class predictions are handled. Needs to handle the other scenarios. 

-

88 """ 

-

89 # if only one columns for labels and predictions 

-

90 if (len(labels.shape) == 1) and (len(predictions.shape) == 1): 

-

91 return labels, predictions 

-

92 

-

93 # if one columns for labels, but two columns for predictions 

-

94 if (len(labels.shape) == 1) and (predictions.shape[1] == BINARY_CLASS_COUNT): 

-

95 predictions = predictions[:, 1] # assumes the second column is the positive class 

-

96 return labels, predictions 

-

97 

-

98 # other scenarios not implemented yet 

-

99 raise ValueError(f"Labels have shape {labels.shape} and predictions have shape {predictions.shape}.") 

-

100 

-

101 def rocauc(self, labels: np.array, predictions: np.array) -> float: 

-

102 """Compute ROC AUC score.""" 

-

103 return roc_auc_score(labels, predictions) 

-

104 

-

105 def prauc(self, labels: np.array, predictions: np.array) -> float: 

-

106 """Compute PR AUC score.""" 

-

107 return average_precision_score(labels, predictions) 

-

108 

-

109 def mcc(self, labels: np.array, predictions: np.array) -> float: 

-

110 """Compute Matthews Correlation Coefficient.""" 

-

111 predictions = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions]) 

-

112 return matthews_corrcoef(labels, predictions) 

-

113 

-

114 def f1score(self, labels: np.array, predictions: np.array) -> float: 

-

115 """Compute F1 score.""" 

-

116 predictions = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions]) 

-

117 return f1_score(labels, predictions) 

-

118 

-

119 def precision(self, labels: np.array, predictions: np.array) -> float: 

-

120 """Compute precision score.""" 

-

121 predictions = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions]) 

-

122 return precision_score(labels, predictions) 

-

123 

-

124 def recall(self, labels: np.array, predictions: np.array) -> float: 

-

125 """Compute recall score.""" 

-

126 predictions = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions]) 

-

127 return recall_score(labels, predictions) 

-

128 

-

129 def spearmanr(self, labels: np.array, predictions: np.array) -> float: 

-

130 """Compute Spearman correlation coefficient.""" 

-

131 return spearmanr(labels, predictions)[0] 

-
- - - diff --git a/coverage/z_218b1cef00a0b898_yaml_data_py.html b/coverage/z_218b1cef00a0b898_yaml_data_py.html deleted file mode 100644 index 17ab8463..00000000 --- a/coverage/z_218b1cef00a0b898_yaml_data_py.html +++ /dev/null @@ -1,453 +0,0 @@ - - - - - Coverage for src/stimulus/utils/yaml_data.py: 95% - - - - - -
-
-

- Coverage for src/stimulus/utils/yaml_data.py: - 95% -

- -

- 150 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Utility module for handling YAML configuration files and their validation.""" 

-

2 

-

3from typing import Any, Optional, Union 

-

4 

-

5import yaml 

-

6from pydantic import BaseModel, ValidationError, field_validator 

-

7 

-

8 

-

9class YamlGlobalParams(BaseModel): 

-

10 """Model for global parameters in YAML configuration.""" 

-

11 

-

12 seed: int 

-

13 

-

14 

-

15class YamlColumnsEncoder(BaseModel): 

-

16 """Model for column encoder configuration.""" 

-

17 

-

18 name: str 

-

19 params: Optional[dict[str, Union[str, list]]] # Allow both string and list values 

-

20 

-

21 

-

22class YamlColumns(BaseModel): 

-

23 """Model for column configuration.""" 

-

24 

-

25 column_name: str 

-

26 column_type: str 

-

27 data_type: str 

-

28 encoder: list[YamlColumnsEncoder] 

-

29 

-

30 

-

31class YamlTransformColumnsTransformation(BaseModel): 

-

32 """Model for column transformation configuration.""" 

-

33 

-

34 name: str 

-

35 params: Optional[dict[str, Union[list, float]]] # Allow both list and float values 

-

36 

-

37 

-

38class YamlTransformColumns(BaseModel): 

-

39 """Model for transform columns configuration.""" 

-

40 

-

41 column_name: str 

-

42 transformations: list[YamlTransformColumnsTransformation] 

-

43 

-

44 

-

45class YamlTransform(BaseModel): 

-

46 """Model for transform configuration.""" 

-

47 

-

48 transformation_name: str 

-

49 columns: list[YamlTransformColumns] 

-

50 

-

51 @field_validator("columns") 

-

52 @classmethod 

-

53 def validate_param_lists_across_columns(cls, columns: list[YamlTransformColumns]) -> list[YamlTransformColumns]: 

-

54 """Validate that parameter lists across columns have consistent lengths. 

-

55 

-

56 Args: 

-

57 columns: List of transform columns to validate 

-

58 

-

59 Returns: 

-

60 The validated columns list 

-

61 """ 

-

62 # Get all parameter list lengths across all columns and transformations 

-

63 all_list_lengths = set() 

-

64 

-

65 for column in columns: 

-

66 for transformation in column.transformations: 

-

67 if transformation.params and any( 

-

68 isinstance(param_value, list) and len(param_value) > 0 

-

69 for param_value in transformation.params.values() 

-

70 ): 

-

71 all_list_lengths.update( 

-

72 len(param_value) 

-

73 for param_value in transformation.params.values() 

-

74 if isinstance(param_value, list) and len(param_value) > 0 

-

75 ) 

-

76 

-

77 # Skip validation if no lists found 

-

78 if not all_list_lengths: 

-

79 return columns 

-

80 

-

81 # Check if all lists either have length 1, or all have the same length 

-

82 all_list_lengths.discard(1) # Remove length 1 as it's always valid 

-

83 if len(all_list_lengths) > 1: # Multiple different lengths found 

-

84 raise ValueError( 

-

85 "All parameter lists across columns must either contain one element or have the same length", 

-

86 ) 

-

87 

-

88 return columns 

-

89 

-

90 

-

91class YamlSplit(BaseModel): 

-

92 """Model for split configuration.""" 

-

93 

-

94 split_method: str 

-

95 params: dict[str, list[float]] # More specific type for split parameters 

-

96 split_input_columns: list[str] 

-

97 

-

98 

-

99class YamlConfigDict(BaseModel): 

-

100 """Model for main YAML configuration.""" 

-

101 

-

102 global_params: YamlGlobalParams 

-

103 columns: list[YamlColumns] 

-

104 transforms: list[YamlTransform] 

-

105 split: list[YamlSplit] 

-

106 

-

107 

-

108class YamlSubConfigDict(BaseModel): 

-

109 """Model for sub-configuration generated from main config.""" 

-

110 

-

111 global_params: YamlGlobalParams 

-

112 columns: list[YamlColumns] 

-

113 transforms: YamlTransform 

-

114 split: YamlSplit 

-

115 

-

116 

-

117class YamlSchema(BaseModel): 

-

118 """Model for validating YAML schema.""" 

-

119 

-

120 yaml_conf: YamlConfigDict 

-

121 

-

122 

-

123def extract_transform_parameters_at_index(transform: YamlTransform, index: int = 0) -> YamlTransform: 

-

124 """Get a transform with parameters at the specified index. 

-

125 

-

126 Args: 

-

127 transform: The original transform containing parameter lists 

-

128 index: Index to extract parameters from (default 0) 

-

129 

-

130 Returns: 

-

131 A new transform with single parameter values at the specified index 

-

132 """ 

-

133 # Create a copy of the transform 

-

134 new_transform = YamlTransform(**transform.model_dump()) 

-

135 

-

136 # Process each column and transformation 

-

137 for column in new_transform.columns: 

-

138 for transformation in column.transformations: 

-

139 if transformation.params: 

-

140 # Convert each parameter list to single value at index 

-

141 new_params = {} 

-

142 for param_name, param_value in transformation.params.items(): 

-

143 if isinstance(param_value, list): 

-

144 new_params[param_name] = param_value[index] 

-

145 else: 

-

146 new_params[param_name] = param_value 

-

147 transformation.params = new_params 

-

148 

-

149 return new_transform 

-

150 

-

151 

-

152def expand_transform_parameter_combinations(transform: YamlTransform) -> list[YamlTransform]: 

-

153 """Get all possible transforms by extracting parameters at each valid index. 

-

154 

-

155 For a transform with parameter lists, creates multiple new transforms, each containing 

-

156 single parameter values from the corresponding indices of the parameter lists. 

-

157 

-

158 Args: 

-

159 transform: The original transform containing parameter lists 

-

160 

-

161 Returns: 

-

162 A list of transforms, each with single parameter values from sequential indices 

-

163 """ 

-

164 # Find the length of parameter lists - we only need to check the first list we find 

-

165 # since all lists must have the same length (enforced by pydantic validator) 

-

166 max_length = 1 

-

167 for column in transform.columns: 

-

168 for transformation in column.transformations: 

-

169 if transformation.params: 

-

170 list_lengths = [len(v) for v in transformation.params.values() if isinstance(v, list) and len(v) > 1] 

-

171 if list_lengths: 

-

172 max_length = list_lengths[0] # All lists have same length due to validator 

-

173 break 

-

174 

-

175 # Generate a transform for each index 

-

176 transforms = [] 

-

177 for i in range(max_length): 

-

178 transforms.append(extract_transform_parameters_at_index(transform, i)) 

-

179 

-

180 return transforms 

-

181 

-

182 

-

183def expand_transform_list_combinations(transform_list: list[YamlTransform]) -> list[YamlTransform]: 

-

184 """Expands a list of transforms into all possible parameter combinations. 

-

185 

-

186 Takes a list of transforms where each transform may contain parameter lists, 

-

187 and expands them into separate transforms with single parameter values. 

-

188 For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will 

-

189 create two transforms: one with 0.1/1 and another with 0.2/2. 

-

190 

-

191 Args: 

-

192 transform_list: A list of YamlTransform objects containing parameter lists 

-

193 that need to be expanded into individual transforms. 

-

194 

-

195 Returns: 

-

196 list[YamlTransform]: A flattened list of transforms where each transform 

-

197 has single parameter values instead of parameter lists. The length of 

-

198 the returned list will be the sum of the number of parameter combinations 

-

199 for each input transform. 

-

200 """ 

-

201 sub_transforms = [] 

-

202 for transform in transform_list: 

-

203 sub_transforms.extend(expand_transform_parameter_combinations(transform)) 

-

204 return sub_transforms 

-

205 

-

206 

-

207def generate_data_configs(yaml_config: YamlConfigDict) -> list[YamlSubConfigDict]: 

-

208 """Generates all possible data configurations from a YAML config. 

-

209 

-

210 Takes a YAML configuration that may contain parameter lists and splits, 

-

211 and generates all possible combinations of parameters and splits into 

-

212 separate data configurations. 

-

213 

-

214 For example, if the config has: 

-

215 - A transform with parameters [0.1, 0.2] 

-

216 - Two splits [0.7/0.3] and [0.8/0.2] 

-

217 This will generate 4 configs, 2 for each split. 

-

218 

-

219 Args: 

-

220 yaml_config: The source YAML configuration containing transforms with 

-

221 parameter lists and multiple splits. 

-

222 

-

223 Returns: 

-

224 list[YamlSubConfigDict]: A list of data configurations, where each 

-

225 config has single parameter values and one split configuration. The 

-

226 length will be the product of the number of parameter combinations 

-

227 and the number of splits. 

-

228 """ 

-

229 if isinstance(yaml_config, dict) and not isinstance(yaml_config, YamlConfigDict): 

-

230 raise TypeError("Input must be a YamlConfigDict object") 

-

231 

-

232 sub_transforms = expand_transform_list_combinations(yaml_config.transforms) 

-

233 sub_splits = yaml_config.split 

-

234 sub_configs = [] 

-

235 for split in sub_splits: 

-

236 for transform in sub_transforms: 

-

237 sub_configs.append( 

-

238 YamlSubConfigDict( 

-

239 global_params=yaml_config.global_params, 

-

240 columns=yaml_config.columns, 

-

241 transforms=transform, 

-

242 split=split, 

-

243 ), 

-

244 ) 

-

245 return sub_configs 

-

246 

-

247 

-

248def dump_yaml_list_into_files( 

-

249 yaml_list: list[YamlSubConfigDict], 

-

250 directory_path: str, 

-

251 base_name: str, 

-

252) -> None: 

-

253 """Dumps a list of YAML configurations into separate files with custom formatting.""" 

-

254 # Disable YAML aliases to prevent reference-style output 

-

255 yaml.Dumper.ignore_aliases = lambda *args: True 

-

256 

-

257 def represent_none(dumper: yaml.Dumper, _: Any) -> yaml.Node: 

-

258 """Custom representer to format None values as empty strings in YAML output.""" 

-

259 return dumper.represent_scalar("tag:yaml.org,2002:null", "") 

-

260 

-

261 def custom_representer(dumper: yaml.Dumper, data: Any) -> yaml.Node: 

-

262 """Custom representer to handle different types of lists with appropriate formatting.""" 

-

263 if isinstance(data, list): 

-

264 if len(data) == 0: 

-

265 return dumper.represent_scalar("tag:yaml.org,2002:null", "") 

-

266 if isinstance(data[0], dict): 

-

267 return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False) 

-

268 if isinstance(data[0], list): 

-

269 return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True) 

-

270 return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True) 

-

271 

-

272 class CustomDumper(yaml.Dumper): 

-

273 """Custom YAML dumper that adds extra formatting controls.""" 

-

274 

-

275 def write_line_break(self, data: Any = None) -> None: 

-

276 """Add extra newline after root-level elements.""" 

-

277 super().write_line_break(data) 

-

278 if len(self.indents) <= 1: # At root level 

-

279 super().write_line_break(data) 

-

280 

-

281 def increase_indent(self, *, flow: bool = False, indentless: bool = False) -> bool: 

-

282 """Ensure consistent indentation by preventing indentless sequences.""" 

-

283 return super().increase_indent(flow=flow, indentless=indentless) 

-

284 

-

285 # Register the custom representers with our dumper 

-

286 yaml.add_representer(type(None), represent_none, Dumper=CustomDumper) 

-

287 yaml.add_representer(list, custom_representer, Dumper=CustomDumper) 

-

288 

-

289 for i, yaml_dict in enumerate(yaml_list): 

-

290 dict_data = yaml_dict.model_dump(exclude_none=True) 

-

291 

-

292 def fix_params(input_dict: dict[str, Any]) -> dict[str, Any]: 

-

293 """Recursively process dictionary to properly handle params fields.""" 

-

294 if isinstance(input_dict, dict): 

-

295 processed_dict = {} 

-

296 for key, value in input_dict.items(): 

-

297 if key == "encoder" and isinstance(value, list): 

-

298 processed_dict[key] = [] 

-

299 for encoder in value: 

-

300 processed_encoder = dict(encoder) 

-

301 if "params" not in processed_encoder or not processed_encoder["params"]: 

-

302 processed_encoder["params"] = {} 

-

303 processed_dict[key].append(processed_encoder) 

-

304 elif key == "transformations" and isinstance(value, list): 

-

305 processed_dict[key] = [] 

-

306 for transformation in value: 

-

307 processed_transformation = dict(transformation) 

-

308 if "params" not in processed_transformation or not processed_transformation["params"]: 

-

309 processed_transformation["params"] = {} 

-

310 processed_dict[key].append(processed_transformation) 

-

311 elif isinstance(value, dict): 

-

312 processed_dict[key] = fix_params(value) 

-

313 elif isinstance(value, list): 

-

314 processed_dict[key] = [ 

-

315 fix_params(list_item) if isinstance(list_item, dict) else list_item for list_item in value 

-

316 ] 

-

317 else: 

-

318 processed_dict[key] = value 

-

319 return processed_dict 

-

320 return input_dict 

-

321 

-

322 dict_data = fix_params(dict_data) 

-

323 

-

324 with open(f"{directory_path}/{base_name}_{i}.yaml", "w") as f: 

-

325 yaml.dump( 

-

326 dict_data, 

-

327 f, 

-

328 Dumper=CustomDumper, 

-

329 sort_keys=False, 

-

330 default_flow_style=False, 

-

331 indent=2, 

-

332 width=float("inf"), # Prevent line wrapping 

-

333 ) 

-

334 

-

335 

-

336def check_yaml_schema(config_yaml: str) -> str: 

-

337 """Validate YAML configuration fields have correct types. 

-

338 

-

339 If the children field is specific to a parent, the children fields class is hosted in the parent fields class. 

-

340 If any field in not the right type, the function prints an error message explaining the problem and exits the python code. 

-

341 

-

342 Args: 

-

343 config_yaml (dict): The dict containing the fields of the yaml configuration file 

-

344 

-

345 Returns: 

-

346 str: Empty string if validation succeeds 

-

347 

-

348 Raises: 

-

349 ValueError: If validation fails 

-

350 """ 

-

351 try: 

-

352 YamlSchema(yaml_conf=config_yaml) 

-

353 except ValidationError as e: 

-

354 # Use logging instead of print for error handling 

-

355 raise ValueError("Wrong type on a field, see the pydantic report above") from e 

-

356 return "" 

-
- - - diff --git a/coverage/z_218b1cef00a0b898_yaml_model_schema_py.html b/coverage/z_218b1cef00a0b898_yaml_model_schema_py.html deleted file mode 100644 index 4d3107a2..00000000 --- a/coverage/z_218b1cef00a0b898_yaml_model_schema_py.html +++ /dev/null @@ -1,251 +0,0 @@ - - - - - Coverage for src/stimulus/utils/yaml_model_schema.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/utils/yaml_model_schema.py: - 0% -

- -

- 50 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Module for handling YAML configuration files and converting them to Ray Tune format.""" 

-

2 

-

3import random 

-

4from collections.abc import Callable 

-

5from copy import deepcopy 

-

6 

-

7import yaml 

-

8from ray import tune 

-

9 

-

10 

-

11class YamlRayConfigLoader: 

-

12 """Load and convert YAML configurations to Ray Tune format. 

-

13 

-

14 This class handles loading YAML configuration files and converting them into 

-

15 formats compatible with Ray Tune's hyperparameter search spaces. 

-

16 """ 

-

17 

-

18 def __init__(self, config_path: str) -> None: 

-

19 """Initialize the config loader with a YAML file path. 

-

20 

-

21 Args: 

-

22 config_path: Path to the YAML configuration file 

-

23 """ 

-

24 with open(config_path) as f: 

-

25 self.config = yaml.safe_load(f) 

-

26 self.config = self.convert_config_to_ray(self.config) 

-

27 

-

28 def raytune_space_selector(self, mode: Callable, space: list) -> Callable: 

-

29 """Convert space parameters to Ray Tune format based on the mode. 

-

30 

-

31 Args: 

-

32 mode: Ray Tune search space function (e.g., tune.choice, tune.uniform) 

-

33 space: List of parameters defining the search space 

-

34 

-

35 Returns: 

-

36 Configured Ray Tune search space 

-

37 

-

38 Raises: 

-

39 NotImplementedError: If the mode is not supported 

-

40 """ 

-

41 if mode.__name__ == "choice": 

-

42 return mode(space) 

-

43 

-

44 if mode.__name__ in ["uniform", "loguniform", "quniform", "qloguniform", "qnormal", "randint"]: 

-

45 return mode(*tuple(space)) 

-

46 

-

47 raise NotImplementedError(f"Mode {mode.__name__} not implemented yet") 

-

48 

-

49 def raytune_sample_from(self, mode: Callable, param: dict) -> Callable: 

-

50 """Apply tune.sample_from to a given custom sampling function. 

-

51 

-

52 Args: 

-

53 mode: Ray Tune sampling function 

-

54 param: Dictionary containing sampling parameters 

-

55 

-

56 Returns: 

-

57 Configured sampling function 

-

58 

-

59 Raises: 

-

60 NotImplementedError: If the sampling function is not supported 

-

61 """ 

-

62 if param["function"] == "sampint": 

-

63 return mode(lambda _: self.sampint(param["sample_space"], param["n_space"])) 

-

64 

-

65 raise NotImplementedError(f"Function {param['function']} not implemented yet") 

-

66 

-

67 def convert_raytune(self, param: dict) -> dict: 

-

68 """Convert parameter configuration to Ray Tune format. 

-

69 

-

70 Args: 

-

71 param: Parameter configuration dictionary 

-

72 

-

73 Returns: 

-

74 Ray Tune compatible parameter configuration 

-

75 

-

76 Raises: 

-

77 AttributeError: If the mode is not recognized in Ray Tune 

-

78 """ 

-

79 try: 

-

80 mode = getattr(tune, param["mode"]) 

-

81 except AttributeError as err: 

-

82 raise AttributeError( 

-

83 f"Mode {param['mode']} not recognized, check the ray.tune documentation at https://docs.ray.io/en/master/tune/api_docs/suggestion.html", 

-

84 ) from err 

-

85 

-

86 if param["mode"] != "sample_from": 

-

87 return self.raytune_space_selector(mode, param["space"]) 

-

88 return self.raytune_sample_from(mode, param) 

-

89 

-

90 def convert_config_to_ray(self, config: dict) -> dict: 

-

91 """Convert YAML configuration to Ray Tune format. 

-

92 

-

93 Converts parameters in model_params, loss_params, optimizer_params, and data_params 

-

94 to Ray Tune search spaces when a mode is specified. 

-

95 

-

96 Args: 

-

97 config: Raw configuration dictionary from YAML 

-

98 

-

99 Returns: 

-

100 Ray Tune compatible configuration dictionary 

-

101 """ 

-

102 new_config = deepcopy(config) 

-

103 for key in ["model_params", "loss_params", "optimizer_params", "data_params"]: 

-

104 for sub_key in config[key]: 

-

105 if "mode" in config[key][sub_key]: 

-

106 new_config[key][sub_key] = self.convert_raytune(config[key][sub_key]) 

-

107 

-

108 return new_config 

-

109 

-

110 def get_config_instance(self) -> dict: 

-

111 """Generate a configuration instance with sampled values. 

-

112 

-

113 Returns: 

-

114 Configuration dictionary with concrete sampled values 

-

115 """ 

-

116 config_instance = deepcopy(self.config) 

-

117 for key in ["model_params", "loss_params", "optimizer_params", "data_params"]: 

-

118 config_instance[key] = {} 

-

119 for sub_key in self.config[key]: 

-

120 config_instance[key][sub_key] = self.config[key][sub_key].sample() 

-

121 

-

122 return config_instance 

-

123 

-

124 def get_config(self) -> dict: 

-

125 """Return the current configuration. 

-

126 

-

127 Returns: 

-

128 Current configuration dictionary 

-

129 """ 

-

130 return self.config 

-

131 

-

132 @staticmethod 

-

133 def sampint(sample_space: list, n_space: list) -> list: 

-

134 """Return a list of n random samples from the sample_space. 

-

135 

-

136 This function is useful for sampling different numbers of layers, 

-

137 each with different numbers of neurons. 

-

138 

-

139 Args: 

-

140 sample_space: List [min, max] defining range of values to sample from 

-

141 n_space: List [min, max] defining range for number of samples 

-

142 

-

143 Returns: 

-

144 List of randomly sampled integers 

-

145 

-

146 Note: 

-

147 Uses Python's random module which is not cryptographically secure. 

-

148 This is acceptable for hyperparameter sampling but should not be 

-

149 used for security-critical purposes (S311 fails when linting). 

-

150 """ 

-

151 sample_space = range(sample_space[0], sample_space[1] + 1) 

-

152 n_space = range(n_space[0], n_space[1] + 1) 

-

153 n = random.choice(tuple(n_space)) # noqa: S311 

-

154 return random.sample(tuple(sample_space), n) 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5___init___py.html b/coverage/z_3382268cc3ca4be5___init___py.html deleted file mode 100644 index cc98b232..00000000 --- a/coverage/z_3382268cc3ca4be5___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/cli/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/cli/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Command line interface package for the stimulus library.""" 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_analysis_default_py.html b/coverage/z_3382268cc3ca4be5_analysis_default_py.html deleted file mode 100644 index eb9db2f9..00000000 --- a/coverage/z_3382268cc3ca4be5_analysis_default_py.html +++ /dev/null @@ -1,324 +0,0 @@ - - - - - Coverage for src/stimulus/cli/analysis_default.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/analysis_default.py: - 0% -

- -

- 60 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""Analysis default module for running model analysis and performance evaluation.""" 

-

3 

-

4import argparse 

-

5import json 

-

6import os 

-

7from typing import Any 

-

8 

-

9from safetensors.torch import load_model as safe_load 

-

10 

-

11from stimulus.analysis.analysis_default import AnalysisPerformanceTune, AnalysisRobustness 

-

12from stimulus.utils.launch_utils import get_experiment, import_class_from_file 

-

13 

-

14 

-

15def get_args() -> argparse.Namespace: 

-

16 """Get the arguments when using from the commandline. 

-

17 

-

18 Returns: 

-

19 Parsed command line arguments. 

-

20 """ 

-

21 parser = argparse.ArgumentParser(description="") 

-

22 parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="The model .py file") 

-

23 parser.add_argument( 

-

24 "-w", 

-

25 "--weight", 

-

26 type=str, 

-

27 required=True, 

-

28 nargs="+", 

-

29 metavar="FILE", 

-

30 help="Model weights .pt file", 

-

31 ) 

-

32 parser.add_argument( 

-

33 "-me", 

-

34 "--metrics", 

-

35 type=str, 

-

36 required=True, 

-

37 nargs="+", 

-

38 metavar="FILE", 

-

39 help="The file path for the metrics file obtained during tuning", 

-

40 ) 

-

41 parser.add_argument( 

-

42 "-ec", 

-

43 "--experiment_config", 

-

44 type=str, 

-

45 required=True, 

-

46 nargs="+", 

-

47 metavar="FILE", 

-

48 help="The experiment config used to modify the data.", 

-

49 ) 

-

50 parser.add_argument( 

-

51 "-mc", 

-

52 "--model_config", 

-

53 type=str, 

-

54 required=True, 

-

55 nargs="+", 

-

56 metavar="FILE", 

-

57 help="The tune config file.", 

-

58 ) 

-

59 parser.add_argument( 

-

60 "-d", 

-

61 "--data", 

-

62 type=str, 

-

63 required=True, 

-

64 nargs="+", 

-

65 metavar="FILE", 

-

66 help="List of data files to be used for the analysis.", 

-

67 ) 

-

68 parser.add_argument("-o", "--outdir", type=str, required=True, help="output directory") 

-

69 

-

70 return parser.parse_args() 

-

71 

-

72 

-

73def main( 

-

74 model_path: str, 

-

75 weight_list: list[str], 

-

76 mconfig_list: list[str], 

-

77 metrics_list: list[str], 

-

78 econfig_list: list[str], 

-

79 data_list: list[str], 

-

80 outdir: str, 

-

81) -> None: 

-

82 """Run the main analysis pipeline. 

-

83 

-

84 Args: 

-

85 model_path: Path to model file 

-

86 weight_list: List of model weight paths 

-

87 mconfig_list: List of model config paths 

-

88 metrics_list: List of metric file paths 

-

89 econfig_list: List of experiment config paths 

-

90 data_list: List of data file paths 

-

91 outdir: Output directory path 

-

92 """ 

-

93 metrics = ["rocauc", "prauc", "mcc", "f1score", "precision", "recall"] 

-

94 

-

95 # Plot the performance during tuning/training 

-

96 run_analysis_performance_tune( 

-

97 metrics_list, 

-

98 [*metrics, "loss"], # Use list unpacking instead of concatenation 

-

99 os.path.join(outdir, "performance_tune_train"), 

-

100 ) 

-

101 

-

102 # Run robustness analysis 

-

103 run_analysis_performance_model( 

-

104 metrics, 

-

105 model_path, 

-

106 weight_list, 

-

107 mconfig_list, 

-

108 econfig_list, 

-

109 data_list, 

-

110 os.path.join(outdir, "performance_robustness"), 

-

111 ) 

-

112 

-

113 

-

114def run_analysis_performance_tune(metrics_list: list[str], metrics: list[str], outdir: str) -> None: 

-

115 """Run performance analysis during tuning/training. 

-

116 

-

117 Each model has a metrics file obtained during tuning/training, 

-

118 check the performance there and plot it. 

-

119 This is to track the model performance per training iteration. 

-

120 

-

121 Args: 

-

122 metrics_list: List of metric file paths 

-

123 metrics: List of metrics to analyze 

-

124 outdir: Output directory path 

-

125 """ 

-

126 if not os.path.exists(outdir): 

-

127 os.makedirs(outdir) 

-

128 

-

129 for metrics_path in metrics_list: 

-

130 AnalysisPerformanceTune(metrics_path).plot_metric_vs_iteration( 

-

131 metrics=metrics, 

-

132 output=os.path.join(outdir, metrics_path.replace("-metrics.csv", "") + "-metric_vs_iteration.png"), 

-

133 ) 

-

134 

-

135 

-

136def run_analysis_performance_model( 

-

137 metrics: list[str], 

-

138 model_path: str, 

-

139 weight_list: list[str], 

-

140 mconfig_list: list[str], 

-

141 econfig_list: list[str], 

-

142 data_list: list[str], 

-

143 outdir: str, 

-

144) -> None: 

-

145 """Run analysis to report model robustness. 

-

146 

-

147 This block will compute the predictions of each model for each dataset. 

-

148 This information will be parsed and plots will be generated to report the model robustness. 

-

149 

-

150 Args: 

-

151 metrics: List of metrics to analyze 

-

152 model_path: Path to model file 

-

153 weight_list: List of model weight paths 

-

154 mconfig_list: List of model config paths 

-

155 econfig_list: List of experiment config paths 

-

156 data_list: List of data file paths 

-

157 outdir: Output directory path 

-

158 """ 

-

159 if not os.path.exists(outdir): 

-

160 os.makedirs(outdir) 

-

161 

-

162 # Load all the models weights into a list 

-

163 model_names = [] 

-

164 model_list = [] 

-

165 model_class = import_class_from_file(model_path) 

-

166 for weight_path, mconfig_path in zip(weight_list, mconfig_list): 

-

167 model = load_model(model_class, weight_path, mconfig_path) 

-

168 model_names.append(mconfig_path.split("/")[-1].replace("-config.json", "")) 

-

169 model_list.append(model) 

-

170 

-

171 # Read experiment config and initialize experiment class 

-

172 with open(econfig_list[0]) as in_json: 

-

173 experiment_name = json.load(in_json)["experiment"] 

-

174 initialized_experiment_class = get_experiment(experiment_name) 

-

175 

-

176 # Initialize analysis 

-

177 analysis = AnalysisRobustness(metrics, initialized_experiment_class, batch_size=256) 

-

178 

-

179 # Compute performance metrics 

-

180 df = analysis.get_performance_table(model_names, model_list, data_list) 

-

181 df.to_csv(os.path.join(outdir, "performance_table.csv"), index=False) 

-

182 

-

183 # Get average performance 

-

184 tmp = analysis.get_average_performance_table(df) 

-

185 tmp.to_csv(os.path.join(outdir, "average_performance_table.csv"), index=False) 

-

186 

-

187 # Plot heatmap 

-

188 analysis.plot_performance_heatmap(df, output=os.path.join(outdir, "performance_heatmap.png")) 

-

189 

-

190 # Plot delta performance 

-

191 outdir2 = os.path.join(outdir, "delta_performance_vs_data") 

-

192 if not os.path.exists(outdir2): 

-

193 os.makedirs(outdir2) 

-

194 for metric in metrics: 

-

195 analysis.plot_delta_performance( 

-

196 metric, 

-

197 df, 

-

198 output=os.path.join(outdir2, f"delta_performance_{metric}.png"), 

-

199 ) 

-

200 

-

201 

-

202def load_model(model_class: Any, weight_path: str, mconfig_path: str) -> Any: 

-

203 """Load the model with its config and weights. 

-

204 

-

205 Args: 

-

206 model_class: Model class to instantiate 

-

207 weight_path: Path to model weights 

-

208 mconfig_path: Path to model config 

-

209 

-

210 Returns: 

-

211 Loaded model instance 

-

212 """ 

-

213 with open(mconfig_path) as in_json: 

-

214 mconfig = json.load(in_json)["model_params"] 

-

215 

-

216 model = model_class(**mconfig) 

-

217 return safe_load(model, weight_path, strict=True) 

-

218 

-

219 

-

220def run() -> None: 

-

221 """Run the analysis script.""" 

-

222 args = get_args() 

-

223 main(args.model, args.weight, args.model_config, args.metrics, args.experiment_config, args.data, args.outdir) 

-

224 

-

225 

-

226if __name__ == "__main__": 

-

227 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_check_model_py.html b/coverage/z_3382268cc3ca4be5_check_model_py.html deleted file mode 100644 index db41dcee..00000000 --- a/coverage/z_3382268cc3ca4be5_check_model_py.html +++ /dev/null @@ -1,324 +0,0 @@ - - - - - Coverage for src/stimulus/cli/check_model.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/check_model.py: - 0% -

- -

- 62 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for checking model configuration and running initial tests.""" 

-

3 

-

4import argparse 

-

5import json 

-

6import logging 

-

7import os 

-

8 

-

9import yaml 

-

10 

-

11from stimulus.data.csv import CsvProcessing 

-

12from stimulus.learner.raytune_learner import TuneWrapper as StimulusTuneWrapper 

-

13from stimulus.utils.json_schema import JsonSchema 

-

14from stimulus.utils.launch_utils import get_experiment, import_class_from_file, memory_split_for_ray_init 

-

15 

-

16 

-

17def get_args() -> argparse.Namespace: 

-

18 """Get the arguments when using from the commandline. 

-

19 

-

20 Returns: 

-

21 Parsed command line arguments. 

-

22 """ 

-

23 parser = argparse.ArgumentParser(description="Launch check_model.") 

-

24 parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input csv file.") 

-

25 parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model file.") 

-

26 parser.add_argument( 

-

27 "-e", 

-

28 "--experiment", 

-

29 type=str, 

-

30 required=True, 

-

31 metavar="FILE", 

-

32 help="Experiment config file. From this the experiment class name is extracted.", 

-

33 ) 

-

34 parser.add_argument( 

-

35 "-c", 

-

36 "--config", 

-

37 type=str, 

-

38 required=True, 

-

39 metavar="FILE", 

-

40 help="Path to yaml config training file.", 

-

41 ) 

-

42 parser.add_argument( 

-

43 "-w", 

-

44 "--initial_weights", 

-

45 type=str, 

-

46 required=False, 

-

47 nargs="?", 

-

48 const=None, 

-

49 default=None, 

-

50 metavar="FILE", 

-

51 help="The path to the initial weights. These can be used by the model instead of the random initialization.", 

-

52 ) 

-

53 parser.add_argument( 

-

54 "--gpus", 

-

55 type=int, 

-

56 required=False, 

-

57 nargs="?", 

-

58 const=None, 

-

59 default=None, 

-

60 metavar="NUM_OF_MAX_GPU", 

-

61 help="Use to limit the number of GPUs ray can use. This might be useful on many occasions, especially in a cluster system.", 

-

62 ) 

-

63 parser.add_argument( 

-

64 "--cpus", 

-

65 type=int, 

-

66 required=False, 

-

67 nargs="?", 

-

68 const=None, 

-

69 default=None, 

-

70 metavar="NUM_OF_MAX_CPU", 

-

71 help="Use to limit the number of CPUs ray can use. This might be useful on many occasions, especially in a cluster system.", 

-

72 ) 

-

73 parser.add_argument( 

-

74 "--memory", 

-

75 type=str, 

-

76 required=False, 

-

77 nargs="?", 

-

78 const=None, 

-

79 default=None, 

-

80 metavar="MAX_MEMORY", 

-

81 help="Ray can have a limiter on the total memory it can use. This might be useful on many occasions, especially in a cluster system.", 

-

82 ) 

-

83 parser.add_argument( 

-

84 "-n", 

-

85 "--num_samples", 

-

86 type=int, 

-

87 required=False, 

-

88 nargs="?", 

-

89 const=3, 

-

90 default=3, 

-

91 metavar="NUM_SAMPLES", 

-

92 help="Number of samples for tuning. Overwrites tune.tune_params.num_samples in config.", 

-

93 ) 

-

94 parser.add_argument( 

-

95 "--ray_results_dirpath", 

-

96 type=str, 

-

97 required=False, 

-

98 nargs="?", 

-

99 const=None, 

-

100 default=None, 

-

101 metavar="DIR_PATH", 

-

102 help="Location where ray_results output dir should be written. If None, uses ~/ray_results.", 

-

103 ) 

-

104 parser.add_argument( 

-

105 "--debug_mode", 

-

106 action="store_true", 

-

107 help="Activate debug mode for tuning. Default false, no debug.", 

-

108 ) 

-

109 

-

110 return parser.parse_args() 

-

111 

-

112 

-

113def main( 

-

114 data_path: str, 

-

115 model_path: str, 

-

116 experiment_config: str, 

-

117 config_path: str, 

-

118 initial_weights_path: str | None = None, 

-

119 gpus: int | None = None, 

-

120 cpus: int | None = None, 

-

121 memory: str | None = None, 

-

122 num_samples: int = 3, 

-

123 ray_results_dirpath: str | None = None, 

-

124 *, 

-

125 debug_mode: bool = False, 

-

126) -> None: 

-

127 """Run the main model checking pipeline. 

-

128 

-

129 Args: 

-

130 data_path: Path to input data file. 

-

131 model_path: Path to model file. 

-

132 experiment_config: Path to experiment config. 

-

133 config_path: Path to training config. 

-

134 initial_weights_path: Optional path to initial weights. 

-

135 gpus: Maximum number of GPUs to use. 

-

136 cpus: Maximum number of CPUs to use. 

-

137 memory: Maximum memory to use. 

-

138 num_samples: Number of samples for tuning. 

-

139 ray_results_dirpath: Directory for ray results. 

-

140 debug_mode: Whether to run in debug mode. 

-

141 """ 

-

142 # Load experiment config 

-

143 with open(experiment_config) as in_json: 

-

144 exp_config = json.load(in_json) 

-

145 

-

146 # Initialize json schema and experiment class 

-

147 schema = JsonSchema(exp_config) 

-

148 initialized_experiment_class = get_experiment(schema.experiment) 

-

149 model_class = import_class_from_file(model_path) 

-

150 

-

151 # Update tune config 

-

152 updated_tune_conf = "check_model_modified_tune_config.yaml" 

-

153 with open(config_path) as conf_file, open(updated_tune_conf, "w") as new_conf: 

-

154 user_tune_config = yaml.safe_load(conf_file) 

-

155 user_tune_config["tune"]["tune_params"]["num_samples"] = num_samples 

-

156 

-

157 if user_tune_config["tune"]["scheduler"]["name"] == "ASHAScheduler": 

-

158 user_tune_config["tune"]["scheduler"]["params"]["max_t"] = 1 

-

159 user_tune_config["tune"]["scheduler"]["params"]["grace_period"] = 1 

-

160 user_tune_config["tune"]["step_size"] = 1 

-

161 elif user_tune_config["tune"]["scheduler"]["name"] == "FIFOScheduler": 

-

162 user_tune_config["tune"]["run_params"]["stop"]["training_iteration"] = 1 

-

163 

-

164 if initial_weights_path is not None: 

-

165 user_tune_config["model_params"]["initial_weights"] = os.path.abspath(initial_weights_path) 

-

166 

-

167 yaml.dump(user_tune_config, new_conf) 

-

168 

-

169 # Process CSV data 

-

170 csv_obj = CsvProcessing(initialized_experiment_class, data_path) 

-

171 downsampled_csv = "downsampled.csv" 

-

172 

-

173 if "split" not in csv_obj.check_and_get_categories(): 

-

174 config_default = {"name": "RandomSplitter", "params": {"split": [0.5, 0.5, 0.0]}} 

-

175 csv_obj.add_split(config_default) 

-

176 

-

177 csv_obj.save(downsampled_csv) 

-

178 

-

179 # Initialize ray 

-

180 object_store_mem, mem = memory_split_for_ray_init(memory) 

-

181 ray_results_dirpath = None if ray_results_dirpath is None else os.path.abspath(ray_results_dirpath) 

-

182 

-

183 # Create and run learner 

-

184 learner = StimulusTuneWrapper( 

-

185 updated_tune_conf, 

-

186 model_class, 

-

187 downsampled_csv, 

-

188 initialized_experiment_class, 

-

189 max_gpus=gpus, 

-

190 max_cpus=cpus, 

-

191 max_object_store_mem=object_store_mem, 

-

192 max_mem=mem, 

-

193 ray_results_dir=ray_results_dirpath, 

-

194 _debug=debug_mode, 

-

195 ) 

-

196 

-

197 grid_results = learner.tune() 

-

198 

-

199 # Check results 

-

200 logger = logging.getLogger(__name__) 

-

201 for i, result in enumerate(grid_results): 

-

202 if not result.error: 

-

203 logger.info("Trial %d finished successfully with metrics %s.", i, result.metrics) 

-

204 else: 

-

205 raise TypeError(f"Trial {i} failed with error {result.error}.") 

-

206 

-

207 

-

208def run() -> None: 

-

209 """Run the model checking script.""" 

-

210 args = get_args() 

-

211 main( 

-

212 args.data, 

-

213 args.model, 

-

214 args.experiment, 

-

215 args.config, 

-

216 args.initial_weights, 

-

217 args.gpus, 

-

218 args.cpus, 

-

219 args.memory, 

-

220 args.num_samples, 

-

221 args.ray_results_dirpath, 

-

222 debug_mode=args.debug_mode, 

-

223 ) 

-

224 

-

225 

-

226if __name__ == "__main__": 

-

227 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_predict_py.html b/coverage/z_3382268cc3ca4be5_predict_py.html deleted file mode 100644 index 2514e960..00000000 --- a/coverage/z_3382268cc3ca4be5_predict_py.html +++ /dev/null @@ -1,303 +0,0 @@ - - - - - Coverage for src/stimulus/cli/predict.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/predict.py: - 0% -

- -

- 73 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for model prediction on datasets.""" 

-

3 

-

4import argparse 

-

5import json 

-

6from collections.abc import Sequence 

-

7from typing import Any 

-

8 

-

9import polars as pl 

-

10import torch 

-

11from torch.utils.data import DataLoader 

-

12 

-

13from stimulus.data.handlertorch import TorchDataset 

-

14from stimulus.learner.predict import PredictWrapper 

-

15from stimulus.utils.launch_utils import get_experiment, import_class_from_file 

-

16 

-

17 

-

18def get_args() -> argparse.Namespace: 

-

19 """Parse command line arguments. 

-

20 

-

21 Returns: 

-

22 Parsed command line arguments. 

-

23 """ 

-

24 parser = argparse.ArgumentParser(description="Predict model outputs on a dataset.") 

-

25 parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model .py file.") 

-

26 parser.add_argument("-w", "--weight", type=str, required=True, metavar="FILE", help="Path to model weights file.") 

-

27 parser.add_argument( 

-

28 "-mc", 

-

29 "--model_config", 

-

30 type=str, 

-

31 required=True, 

-

32 metavar="FILE", 

-

33 help="Path to tune config file with model hyperparameters.", 

-

34 ) 

-

35 parser.add_argument( 

-

36 "-ec", 

-

37 "--experiment_config", 

-

38 type=str, 

-

39 required=True, 

-

40 metavar="FILE", 

-

41 help="Path to experiment config for data modification.", 

-

42 ) 

-

43 parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input data.") 

-

44 parser.add_argument("-o", "--output", type=str, required=True, metavar="FILE", help="Path for output predictions.") 

-

45 parser.add_argument("--split", type=int, help="Data split to use (default: None).") 

-

46 parser.add_argument("--return_labels", action="store_true", help="Include labels with predictions.") 

-

47 

-

48 return parser.parse_args() 

-

49 

-

50 

-

51def load_model(model_class: Any, weight_path: str, mconfig: dict[str, Any]) -> Any: 

-

52 """Load model with hyperparameters and weights. 

-

53 

-

54 Args: 

-

55 model_class: Model class to instantiate. 

-

56 weight_path: Path to model weights. 

-

57 mconfig: Model configuration dictionary. 

-

58 

-

59 Returns: 

-

60 Loaded model instance. 

-

61 """ 

-

62 hyperparameters = mconfig["model_params"] 

-

63 model = model_class(**hyperparameters) 

-

64 model.load_state_dict(torch.load(weight_path)) 

-

65 return model 

-

66 

-

67 

-

68def get_batch_size(mconfig: dict[str, Any]) -> int: 

-

69 """Get batch size from model config. 

-

70 

-

71 Args: 

-

72 mconfig: Model configuration dictionary. 

-

73 

-

74 Returns: 

-

75 Batch size to use for predictions. 

-

76 """ 

-

77 default_batch_size = 256 

-

78 if "data_params" in mconfig and "batch_size" in mconfig["data_params"]: 

-

79 return mconfig["data_params"]["batch_size"] 

-

80 return default_batch_size 

-

81 

-

82 

-

83def parse_y_keys(y: dict[str, Any], data: pl.DataFrame, y_type: str = "pred") -> dict[str, Any]: 

-

84 """Parse dictionary keys to match input data format. 

-

85 

-

86 Args: 

-

87 y: Dictionary of predictions or labels. 

-

88 data: Input DataFrame. 

-

89 y_type: Type of values ('pred' or 'label'). 

-

90 

-

91 Returns: 

-

92 Dictionary with updated keys. 

-

93 """ 

-

94 if not y: 

-

95 return y 

-

96 

-

97 parsed_y = {} 

-

98 for k1 in y: 

-

99 for k2 in data.columns: 

-

100 if k1 == k2.split(":")[0]: 

-

101 new_key = f"{k1}:{y_type}:{k2.split(':')[2]}" 

-

102 parsed_y[new_key] = y[k1] 

-

103 

-

104 return parsed_y 

-

105 

-

106 

-

107def add_meta_info(data: pl.DataFrame, y: dict[str, Any]) -> dict[str, Any]: 

-

108 """Add metadata columns to predictions/labels dictionary. 

-

109 

-

110 Args: 

-

111 data: Input DataFrame with metadata. 

-

112 y: Dictionary of predictions/labels. 

-

113 

-

114 Returns: 

-

115 Updated dictionary with metadata. 

-

116 """ 

-

117 keys = get_meta_keys(data.columns) 

-

118 for key in keys: 

-

119 y[key] = data[key].to_list() 

-

120 return y 

-

121 

-

122 

-

123def get_meta_keys(names: Sequence[str]) -> list[str]: 

-

124 """Extract metadata column keys. 

-

125 

-

126 Args: 

-

127 names: List of column names. 

-

128 

-

129 Returns: 

-

130 List of metadata column keys. 

-

131 """ 

-

132 return [name for name in names if name.split(":")[1] == "meta"] 

-

133 

-

134 

-

135def main( 

-

136 model_path: str, 

-

137 weight_path: str, 

-

138 mconfig_path: str, 

-

139 econfig_path: str, 

-

140 data_path: str, 

-

141 output: str, 

-

142 *, 

-

143 return_labels: bool, 

-

144 split: int | None, 

-

145) -> None: 

-

146 """Run model prediction pipeline. 

-

147 

-

148 Args: 

-

149 model_path: Path to model file. 

-

150 weight_path: Path to model weights. 

-

151 mconfig_path: Path to model config. 

-

152 econfig_path: Path to experiment config. 

-

153 data_path: Path to input data. 

-

154 output: Path for output predictions. 

-

155 return_labels: Whether to include labels. 

-

156 split: Data split to use. 

-

157 """ 

-

158 with open(mconfig_path) as in_json: 

-

159 mconfig = json.load(in_json) 

-

160 

-

161 model_class = import_class_from_file(model_path) 

-

162 model = load_model(model_class, weight_path, mconfig) 

-

163 

-

164 with open(econfig_path) as in_json: 

-

165 experiment_name = json.load(in_json)["experiment"] 

-

166 initialized_experiment_class = get_experiment(experiment_name) 

-

167 

-

168 dataloader = DataLoader( 

-

169 TorchDataset(data_path, initialized_experiment_class, split=split), 

-

170 batch_size=get_batch_size(mconfig), 

-

171 shuffle=False, 

-

172 ) 

-

173 

-

174 out = PredictWrapper(model, dataloader).predict(return_labels=return_labels) 

-

175 y_pred, y_true = out if return_labels else (out, {}) 

-

176 

-

177 y_pred = {k: v.tolist() for k, v in y_pred.items()} 

-

178 y_true = {k: v.tolist() for k, v in y_true.items()} 

-

179 

-

180 data = pl.read_csv(data_path) 

-

181 y_pred = parse_y_keys(y_pred, data, y_type="pred") 

-

182 y_true = parse_y_keys(y_true, data, y_type="label") 

-

183 

-

184 y = {**y_pred, **y_true} 

-

185 y = add_meta_info(data, y) 

-

186 df = pl.from_dict(y) 

-

187 df.write_csv(output) 

-

188 

-

189 

-

190def run() -> None: 

-

191 """Execute model prediction pipeline.""" 

-

192 args = get_args() 

-

193 main( 

-

194 args.model, 

-

195 args.weight, 

-

196 args.model_config, 

-

197 args.experiment_config, 

-

198 args.data, 

-

199 args.output, 

-

200 return_labels=args.return_labels, 

-

201 split=args.split, 

-

202 ) 

-

203 

-

204 

-

205if __name__ == "__main__": 

-

206 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_shuffle_csv_py.html b/coverage/z_3382268cc3ca4be5_shuffle_csv_py.html deleted file mode 100644 index b83939d4..00000000 --- a/coverage/z_3382268cc3ca4be5_shuffle_csv_py.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - Coverage for src/stimulus/cli/shuffle_csv.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/shuffle_csv.py: - 0% -

- -

- 29 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for shuffling CSV data files.""" 

-

3 

-

4import argparse 

-

5import json 

-

6import os 

-

7 

-

8from stimulus.data.csv import CsvProcessing 

-

9from stimulus.utils.launch_utils import get_experiment 

-

10 

-

11 

-

12def get_args() -> argparse.Namespace: 

-

13 """Get the arguments when using from the commandline. 

-

14 

-

15 Returns: 

-

16 Parsed command line arguments. 

-

17 """ 

-

18 parser = argparse.ArgumentParser(description="Shuffle rows in a CSV data file.") 

-

19 parser.add_argument( 

-

20 "-c", 

-

21 "--csv", 

-

22 type=str, 

-

23 required=True, 

-

24 metavar="FILE", 

-

25 help="The file path for the csv containing all data", 

-

26 ) 

-

27 parser.add_argument( 

-

28 "-j", 

-

29 "--json", 

-

30 type=str, 

-

31 required=True, 

-

32 metavar="FILE", 

-

33 help="The json config file that hold all parameter info", 

-

34 ) 

-

35 parser.add_argument( 

-

36 "-o", 

-

37 "--output", 

-

38 type=str, 

-

39 required=True, 

-

40 metavar="FILE", 

-

41 help="The output file path to write the noised csv", 

-

42 ) 

-

43 

-

44 return parser.parse_args() 

-

45 

-

46 

-

47def main(data_csv: str, config_json: str, out_path: str) -> None: 

-

48 """Shuffle the data and split it according to the default split method. 

-

49 

-

50 Args: 

-

51 data_csv: Path to input CSV file. 

-

52 config_json: Path to config JSON file. 

-

53 out_path: Path to output shuffled CSV. 

-

54 

-

55 TODO major changes when this is going to select a given shuffle method and integration with split. 

-

56 """ 

-

57 # open and read Json, just to extract the experiment name, so all other fields are scratched 

-

58 config = None 

-

59 with open(config_json) as in_json: 

-

60 tmp = json.load(in_json) 

-

61 config = tmp 

-

62 # add fake transform informations 

-

63 config["transform"] = "shuffle (special case)" 

-

64 

-

65 # write the config modified, this will be associated to the shuffled data. TODO better solution to renaming like this 

-

66 modified_json = os.path.splitext(os.path.basename(data_csv))[0].split("-split")[0] + "-shuffled-experiment.json" 

-

67 with open(modified_json, "w") as out_json: 

-

68 json.dump(config, out_json) 

-

69 

-

70 # initialize the experiment class 

-

71 exp_obj = get_experiment(config["experiment"]) 

-

72 

-

73 # initialize the csv processing class, it open and reads the csv in automatic 

-

74 csv_obj = CsvProcessing(exp_obj, data_csv) 

-

75 

-

76 # shuffle the data with a default seed. TODO get the seed for the config if and when that is going to be set there. 

-

77 csv_obj.shuffle_labels(seed=42) 

-

78 

-

79 # save the modified csv 

-

80 csv_obj.save(out_path) 

-

81 

-

82 

-

83def run() -> None: 

-

84 """Run the CSV shuffling script.""" 

-

85 args = get_args() 

-

86 main(args.csv, args.json, args.output) 

-

87 

-

88 

-

89if __name__ == "__main__": 

-

90 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_split_csv_py.html b/coverage/z_3382268cc3ca4be5_split_csv_py.html deleted file mode 100644 index 9283c3a4..00000000 --- a/coverage/z_3382268cc3ca4be5_split_csv_py.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - Coverage for src/stimulus/cli/split_csv.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/split_csv.py: - 0% -

- -

- 33 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for splitting CSV data files.""" 

-

3 

-

4import argparse 

-

5import json 

-

6import logging 

-

7 

-

8from stimulus.data.csv import CsvProcessing 

-

9from stimulus.utils.launch_utils import get_experiment 

-

10 

-

11 

-

12def get_args() -> argparse.Namespace: 

-

13 """Get the arguments when using from the commandline.""" 

-

14 parser = argparse.ArgumentParser(description="Split a CSV data file.") 

-

15 parser.add_argument( 

-

16 "-c", 

-

17 "--csv", 

-

18 type=str, 

-

19 required=True, 

-

20 metavar="FILE", 

-

21 help="The file path for the csv containing all data", 

-

22 ) 

-

23 parser.add_argument( 

-

24 "-j", 

-

25 "--json", 

-

26 type=str, 

-

27 required=True, 

-

28 metavar="FILE", 

-

29 help="The json config file that hold all parameter info", 

-

30 ) 

-

31 parser.add_argument( 

-

32 "-o", 

-

33 "--output", 

-

34 type=str, 

-

35 required=True, 

-

36 metavar="FILE", 

-

37 help="The output file path to write the noised csv", 

-

38 ) 

-

39 

-

40 return parser.parse_args() 

-

41 

-

42 

-

43def main(data_csv: str, config_json: str, out_path: str) -> None: 

-

44 """Connect CSV and JSON configuration and handle sanity checks. 

-

45 

-

46 Args: 

-

47 data_csv: Path to input CSV file. 

-

48 config_json: Path to config JSON file. 

-

49 out_path: Path to output split CSV. 

-

50 

-

51 TODO what happens when the user write his own experiment class? how should he do it ? how does it integrates here? 

-

52 """ 

-

53 # open and read Json 

-

54 config = {} 

-

55 with open(config_json) as in_json: 

-

56 config = json.load(in_json) 

-

57 

-

58 # initialize the experiment class 

-

59 exp_obj = get_experiment(config["experiment"]) 

-

60 

-

61 # initialize the csv processing class, it open and reads the csv in automatic 

-

62 csv_obj = CsvProcessing(exp_obj, data_csv) 

-

63 

-

64 # CASE 1: SPLIT in csv, not in json --> keep the split from the csv 

-

65 if "split" in csv_obj.check_and_get_categories() and config["split"] is None: 

-

66 pass 

-

67 

-

68 # CASE 2: SPLIT in csv and in json --> use the split from the json 

-

69 # TODO change this behaviour to do both, maybe 

-

70 elif "split" in csv_obj.check_and_get_categories() and config["split"]: 

-

71 logging.info("SPLIT present in both csv and json --> use the split from the json") 

-

72 csv_obj.add_split(config["split"], force=True) 

-

73 

-

74 # CASE 3: SPLIT nor in csv and or json --> use the default RandomSplitter 

-

75 elif "split" not in csv_obj.check_and_get_categories() and config["split"] is None: 

-

76 # In case no split is provided, we use the default RandomSplitter 

-

77 logging.warning("SPLIT nor in csv and or json --> use the default RandomSplitter") 

-

78 # if the user config is None then set to default splitter -> RandomSplitter. 

-

79 config_default = {"name": "RandomSplitter", "params": {}} 

-

80 csv_obj.add_split(config_default) 

-

81 

-

82 # CASE 4: SPLIT in json, not in csv --> use the split from the json 

-

83 else: 

-

84 csv_obj.add_split(config["split"], force=True) 

-

85 

-

86 # save the modified csv 

-

87 csv_obj.save(out_path) 

-

88 

-

89 

-

90def run() -> None: 

-

91 """Run the CSV splitting script.""" 

-

92 args = get_args() 

-

93 main(args.csv, args.json, args.output) 

-

94 

-

95 

-

96if __name__ == "__main__": 

-

97 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_split_yaml_py.html b/coverage/z_3382268cc3ca4be5_split_yaml_py.html deleted file mode 100644 index ed43e718..00000000 --- a/coverage/z_3382268cc3ca4be5_split_yaml_py.html +++ /dev/null @@ -1,175 +0,0 @@ - - - - - Coverage for src/stimulus/cli/split_yaml.py: 68% - - - - - -
-
-

- Coverage for src/stimulus/cli/split_yaml.py: - 68% -

- -

- 19 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for splitting YAML configuration files. 

-

3 

-

4This module provides functionality to split a single YAML configuration file into multiple 

-

5YAML files, each containing a specific combination of data transformations and splits. 

-

6The resulting YAML files can be used as input configurations for the stimulus package. 

-

7""" 

-

8 

-

9import argparse 

-

10 

-

11import yaml 

-

12 

-

13from src.stimulus.utils.yaml_data import ( 

-

14 YamlConfigDict, 

-

15 check_yaml_schema, 

-

16 dump_yaml_list_into_files, 

-

17 generate_data_configs, 

-

18) 

-

19 

-

20 

-

21def get_args() -> argparse.Namespace: 

-

22 """Get the arguments when using from the command line.""" 

-

23 parser = argparse.ArgumentParser(description="") 

-

24 parser.add_argument( 

-

25 "-j", 

-

26 "--yaml", 

-

27 type=str, 

-

28 required=True, 

-

29 metavar="FILE", 

-

30 help="The YAML config file that hold all transform - split - parameter info", 

-

31 ) 

-

32 parser.add_argument( 

-

33 "-d", 

-

34 "--out_dir", 

-

35 type=str, 

-

36 required=False, 

-

37 nargs="?", 

-

38 const="./", 

-

39 default="./", 

-

40 metavar="DIR", 

-

41 help="The output dir where all the YAMLs are written to. Output YAML will be called split-#[number].yaml transform-#[number].yaml. Default -> ./", 

-

42 ) 

-

43 

-

44 return parser.parse_args() 

-

45 

-

46 

-

47def main(config_yaml: str, out_dir_path: str) -> str: 

-

48 """Reads a YAML config file and generates all possible data configurations. 

-

49 

-

50 This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to 

-

51 the stimulus package. 

-

52 

-

53 The structure of the YAML is described here -> TODO paste here link to documentation. 

-

54 This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations. 

-

55 Each resulting YAML will hold only one combination of the above three things. 

-

56 

-

57 This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform) 

-

58 and uses the default split behavior. 

-

59 """ 

-

60 # read the yaml experiment config and load it to dictionary 

-

61 yaml_config = {} 

-

62 with open(config_yaml) as conf_file: 

-

63 yaml_config = yaml.safe_load(conf_file) 

-

64 

-

65 # check if the yaml schema is correct 

-

66 check_yaml_schema(yaml_config) 

-

67 

-

68 # generate all the YAML configs 

-

69 config_dict = YamlConfigDict(**yaml_config) 

-

70 data_configs = generate_data_configs(config_dict) 

-

71 

-

72 # dump all the YAML configs into files 

-

73 dump_yaml_list_into_files(data_configs, out_dir_path, "test") 

-

74 

-

75 

-

76if __name__ == "__main__": 

-

77 args = get_args() 

-

78 main(args.yaml, args.out_dir) 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_transform_csv_py.html b/coverage/z_3382268cc3ca4be5_transform_csv_py.html deleted file mode 100644 index 3341308e..00000000 --- a/coverage/z_3382268cc3ca4be5_transform_csv_py.html +++ /dev/null @@ -1,172 +0,0 @@ - - - - - Coverage for src/stimulus/cli/transform_csv.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/transform_csv.py: - 0% -

- -

- 24 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for transforming CSV data files.""" 

-

3 

-

4import argparse 

-

5import json 

-

6 

-

7from stimulus.data.csv import CsvProcessing 

-

8from stimulus.utils.launch_utils import get_experiment 

-

9 

-

10 

-

11def get_args() -> argparse.Namespace: 

-

12 """Get the arguments when using from the commandline.""" 

-

13 parser = argparse.ArgumentParser(description="") 

-

14 parser.add_argument( 

-

15 "-c", 

-

16 "--csv", 

-

17 type=str, 

-

18 required=True, 

-

19 metavar="FILE", 

-

20 help="The file path for the csv containing all data", 

-

21 ) 

-

22 parser.add_argument( 

-

23 "-j", 

-

24 "--json", 

-

25 type=str, 

-

26 required=True, 

-

27 metavar="FILE", 

-

28 help="The json config file that hold all parameter info", 

-

29 ) 

-

30 parser.add_argument( 

-

31 "-o", 

-

32 "--output", 

-

33 type=str, 

-

34 required=True, 

-

35 metavar="FILE", 

-

36 help="The output file path to write the noised csv", 

-

37 ) 

-

38 

-

39 return parser.parse_args() 

-

40 

-

41 

-

42def main(data_csv: str, config_json: str, out_path: str) -> None: 

-

43 """Connect CSV and JSON configuration and handle sanity checks. 

-

44 

-

45 This launcher will be the connection between the csv and one json configuration. 

-

46 It should also handle some sanity checks. 

-

47 """ 

-

48 # open and read Json 

-

49 config = {} 

-

50 with open(config_json) as in_json: 

-

51 config = json.load(in_json) 

-

52 

-

53 # initialize the experiment class 

-

54 exp_obj = get_experiment(config["experiment"]) 

-

55 

-

56 # initialize the csv processing class, it open and reads the csv in automatic 

-

57 csv_obj = CsvProcessing(exp_obj, data_csv) 

-

58 

-

59 # Transform the data according to what defined in the experiment class and the specifics of the user in the Json 

-

60 # in case of no transformation specification so when the config has "augmentation" : None just save a copy of the original csv file 

-

61 if config.get("transform") is not None: 

-

62 csv_obj.transform(config["transform"]) 

-

63 

-

64 # save the modified csv 

-

65 csv_obj.save(out_path) 

-

66 

-

67 

-

68def run() -> None: 

-

69 """Run the CSV transformation script.""" 

-

70 args = get_args() 

-

71 main(args.csv, args.json, args.output) 

-

72 

-

73 

-

74if __name__ == "__main__": 

-

75 run() 

-
- - - diff --git a/coverage/z_3382268cc3ca4be5_tuning_py.html b/coverage/z_3382268cc3ca4be5_tuning_py.html deleted file mode 100644 index 7d08be1b..00000000 --- a/coverage/z_3382268cc3ca4be5_tuning_py.html +++ /dev/null @@ -1,381 +0,0 @@ - - - - - Coverage for src/stimulus/cli/tuning.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/cli/tuning.py: - 0% -

- -

- 66 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1#!/usr/bin/env python3 

-

2"""CLI module for tuning model hyperparameters using Ray Tune. 

-

3 

-

4This module provides functionality to tune hyperparameters of machine learning models 

-

5using Ray Tune. It supports configuring resources like GPUs/CPUs, saving best models 

-

6and metrics, and debugging capabilities. 

-

7""" 

-

8 

-

9import argparse 

-

10import json 

-

11import os 

-

12from typing import Optional 

-

13 

-

14import yaml 

-

15from torch.utils.data import DataLoader 

-

16 

-

17from stimulus.data.handlertorch import TorchDataset 

-

18from stimulus.learner.predict import PredictWrapper 

-

19from stimulus.learner.raytune_learner import TuneWrapper as StimulusTuneWrapper 

-

20from stimulus.learner.raytune_parser import TuneParser as StimulusTuneParser 

-

21from stimulus.utils.launch_utils import get_experiment, import_class_from_file, memory_split_for_ray_init 

-

22 

-

23 

-

24def get_args() -> argparse.Namespace: 

-

25 """Get the arguments when using from the commandline.""" 

-

26 parser = argparse.ArgumentParser(description="") 

-

27 parser.add_argument( 

-

28 "-c", 

-

29 "--config", 

-

30 type=str, 

-

31 required=True, 

-

32 metavar="FILE", 

-

33 help="The file path for the config file", 

-

34 ) 

-

35 parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="The model file") 

-

36 parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="The data file") 

-

37 parser.add_argument( 

-

38 "-e", 

-

39 "--experiment_config", 

-

40 type=str, 

-

41 required=True, 

-

42 metavar="FILE", 

-

43 help="The json used to modify the data. Inside it has the experiment name as specified in the experimets.py, this will then be dinamically imported during training. It is necessary to recover how the user specified the encoding of the data. Data is encoded on the fly.", 

-

44 ) 

-

45 parser.add_argument( 

-

46 "-o", 

-

47 "--output", 

-

48 type=str, 

-

49 required=False, 

-

50 nargs="?", 

-

51 const="best_model.pt", 

-

52 default="best_model.pt", 

-

53 metavar="FILE", 

-

54 help="The output file path to write the trained model to", 

-

55 ) 

-

56 parser.add_argument( 

-

57 "-bc", 

-

58 "--best_config", 

-

59 type=str, 

-

60 required=False, 

-

61 nargs="?", 

-

62 const="best_config.json", 

-

63 default="best_config.json", 

-

64 metavar="FILE", 

-

65 help="The path to write the best config to", 

-

66 ) 

-

67 parser.add_argument( 

-

68 "-bm", 

-

69 "--best_metrics", 

-

70 type=str, 

-

71 required=False, 

-

72 nargs="?", 

-

73 const="best_metrics.csv", 

-

74 default="best_metrics.csv", 

-

75 metavar="FILE", 

-

76 help="The path to write the best metrics to", 

-

77 ) 

-

78 parser.add_argument( 

-

79 "-bo", 

-

80 "--best_optimizer", 

-

81 type=str, 

-

82 required=False, 

-

83 nargs="?", 

-

84 const="best_optimizer.pt", 

-

85 default="best_optimizer.pt", 

-

86 metavar="FILE", 

-

87 help="The path to write the best optimizer to", 

-

88 ) 

-

89 parser.add_argument( 

-

90 "-w", 

-

91 "--initial_weights", 

-

92 type=str, 

-

93 required=False, 

-

94 nargs="?", 

-

95 const=None, 

-

96 default=None, 

-

97 metavar="FILE", 

-

98 help="The path to the initial weights. These can be used by the model instead of the random initialization", 

-

99 ) 

-

100 parser.add_argument( 

-

101 "--gpus", 

-

102 type=int, 

-

103 required=False, 

-

104 nargs="?", 

-

105 const=None, 

-

106 default=None, 

-

107 metavar="NUM_OF_MAX_GPU", 

-

108 help="Use to limit the number of GPUs ray can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all GPUs available. It can be set to 0 to use only CPUs.", 

-

109 ) 

-

110 parser.add_argument( 

-

111 "--cpus", 

-

112 type=int, 

-

113 required=False, 

-

114 nargs="?", 

-

115 const=None, 

-

116 default=None, 

-

117 metavar="NUM_OF_MAX_CPU", 

-

118 help="Use to limit the number of CPUs ray can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all CPUs available. It can be set to 0 to use only GPUs.", 

-

119 ) 

-

120 parser.add_argument( 

-

121 "--memory", 

-

122 type=str, 

-

123 required=False, 

-

124 nargs="?", 

-

125 const=None, 

-

126 default=None, 

-

127 metavar="MAX_MEMORY", 

-

128 help="ray can have a limiter on the total memory it can use. This might be useful on many occasions, especially in a cluster system. The default value is None meaning ray will use all memory available.", 

-

129 ) 

-

130 parser.add_argument( 

-

131 "--ray_results_dirpath", 

-

132 type=str, 

-

133 required=False, 

-

134 nargs="?", 

-

135 const=None, 

-

136 default=None, 

-

137 metavar="DIR_PATH", 

-

138 help="the location where ray_results output dir should be written. if set to None (default) ray will be place it in ~/ray_results ", 

-

139 ) 

-

140 parser.add_argument( 

-

141 "--tune_run_name", 

-

142 type=str, 

-

143 required=False, 

-

144 nargs="?", 

-

145 const=None, 

-

146 default=None, 

-

147 metavar="CUSTOM_RUN_NAME", 

-

148 help="tells ray tune what that the 'experiment_name' aka the given tune_run name should be. This is controlled be the variable name in the RunConfig class of tune. This has two behaviuors: 1 if set the subdir of ray_results is going to be named with this value, 2 the subdir of the above mentioned will also have this value as prefix for the single train dir name. Default None, meaning ray will generate such a name on its own.", 

-

149 ) 

-

150 parser.add_argument( 

-

151 "--debug_mode", 

-

152 type=str, 

-

153 required=False, 

-

154 nargs="?", 

-

155 const=False, 

-

156 default=False, 

-

157 metavar="DEV", 

-

158 help="activate debug mode for tuning. default false, no debug.", 

-

159 ) 

-

160 

-

161 return parser.parse_args() 

-

162 

-

163 

-

164def main( 

-

165 config_path: str, 

-

166 model_path: str, 

-

167 data_path: str, 

-

168 experiment_config: str, 

-

169 output: str, 

-

170 best_config_path: str, 

-

171 best_metrics_path: str, 

-

172 best_optimizer_path: str, 

-

173 initial_weights_path: Optional[str] = None, 

-

174 gpus: Optional[int] = None, 

-

175 cpus: Optional[int] = None, 

-

176 memory: Optional[str] = None, 

-

177 ray_results_dirpath: Optional[str] = None, 

-

178 tune_run_name: Optional[str] = None, 

-

179 *, 

-

180 debug_mode: bool = False, 

-

181) -> None: 

-

182 """This launcher use ray tune to find the best hyperparameters for a given model.""" 

-

183 # TODO update to yaml the experiment config 

-

184 # load json into dictionary 

-

185 exp_config = {} 

-

186 with open(experiment_config) as in_json: 

-

187 exp_config = json.load(in_json) 

-

188 

-

189 # initialize the experiment class 

-

190 initialized_experiment_class = get_experiment(exp_config["experiment"]) 

-

191 

-

192 # import the model correctly but do not initialize it yet, ray_tune does that itself 

-

193 model_class = import_class_from_file(model_path) 

-

194 

-

195 # Update the tune config file. Because if resources are specified for cpu and gpu they are overwritten with what nextflow has otherwise this field is created 

-

196 updated_tune_conf = "check_model_modified_tune_config.yaml" 

-

197 with open(config_path) as conf_file, open(updated_tune_conf, "w") as new_conf: 

-

198 user_tune_config = yaml.safe_load(conf_file) 

-

199 

-

200 # add initial weights to the config, when provided 

-

201 if initial_weights_path is not None: 

-

202 user_tune_config["model_params"]["initial_weights"] = os.path.abspath(initial_weights_path) 

-

203 

-

204 # save to file the new dictionary because StimulusTuneWrapper only takes paths 

-

205 yaml.dump(user_tune_config, new_conf) 

-

206 

-

207 # compute the memory requirements for ray init. Usefull in case ray detects them wrongly. Memory is split in two for ray: for store_object memory and the other actual memory for tuning. The following function takes the total possible usable/allocated memory as a string parameter and return in bytes the values for store_memory (30% as default in ray) and memory (70%). 

-

208 object_store_mem, mem = memory_split_for_ray_init(memory) 

-

209 

-

210 # set ray_result dir ubication. TODO this version of pytorch does not support relative paths, in future maybe good to remove abspath. 

-

211 ray_results_dirpath = None if ray_results_dirpath is None else os.path.abspath(ray_results_dirpath) 

-

212 

-

213 # Create the learner 

-

214 learner = StimulusTuneWrapper( 

-

215 updated_tune_conf, 

-

216 model_class, 

-

217 data_path, 

-

218 initialized_experiment_class, 

-

219 max_gpus=gpus, 

-

220 max_cpus=cpus, 

-

221 max_object_store_mem=object_store_mem, 

-

222 max_mem=mem, 

-

223 ray_results_dir=ray_results_dirpath, 

-

224 tune_run_name=tune_run_name, 

-

225 _debug=debug_mode, 

-

226 ) 

-

227 

-

228 # Tune the model and get the tuning results 

-

229 grid_results = learner.tune() 

-

230 

-

231 # parse raytune results 

-

232 results = StimulusTuneParser(grid_results) 

-

233 results.save_best_model(output) 

-

234 results.save_best_config(best_config_path) 

-

235 results.save_best_metrics_dataframe(best_metrics_path) 

-

236 results.save_best_optimizer(best_optimizer_path) 

-

237 

-

238 # debug section. predict the validation data using the best model. 

-

239 if debug_mode: 

-

240 # imitialize the model class with the respective tune parameters from the associated config 

-

241 best_tune_config = results.get_best_config() 

-

242 best_model = model_class(**best_tune_config["model_params"]) 

-

243 # get the weights associated to the best model and load them onto the model class 

-

244 best_model.load_state_dict(results.get_best_model()) 

-

245 # load the data in a dataloader and then predict them in an ordered manner, aka no shuffle. 

-

246 validation_set = DataLoader( 

-

247 TorchDataset(data_path, initialized_experiment_class, split=1), 

-

248 batch_size=learner.config["data_params"]["batch_size"].sample(), 

-

249 shuffle=False, 

-

250 ) 

-

251 predictions = PredictWrapper(best_model, validation_set).predict() 

-

252 # write to file the predictions, in the ray result tune specific folder. 

-

253 pred_filename = os.path.join(learner.config["tune_run_path"], "debug", "best_model_val_pred.txt") 

-

254 # save which was the best model found, the easiest is to get its seed 

-

255 best_model_seed = os.path.join(learner.config["tune_run_path"], "debug", "best_model_seed.txt") 

-

256 with open(pred_filename, "w") as pred_f, open(best_model_seed, "w") as seed_f: 

-

257 pred_f.write(str(predictions)) 

-

258 seed_f.write(str(best_tune_config["ray_worker_seed"])) 

-

259 

-

260 

-

261def run() -> None: 

-

262 """Run the model tuning script.""" 

-

263 args = get_args() 

-

264 main( 

-

265 args.config, 

-

266 args.model, 

-

267 args.data, 

-

268 args.experiment_config, 

-

269 args.output, 

-

270 args.best_config, 

-

271 args.best_metrics, 

-

272 args.best_optimizer, 

-

273 args.initial_weights, 

-

274 args.gpus, 

-

275 args.cpus, 

-

276 args.memory, 

-

277 args.ray_results_dirpath, 

-

278 args.tune_run_name, 

-

279 debug_mode=args.debug_mode, 

-

280 ) 

-

281 

-

282 

-

283if __name__ == "__main__": 

-

284 run() 

-
- - - diff --git a/coverage/z_3a7c4543ac712e3a___init___py.html b/coverage/z_3a7c4543ac712e3a___init___py.html deleted file mode 100644 index 4fa7677e..00000000 --- a/coverage/z_3a7c4543ac712e3a___init___py.html +++ /dev/null @@ -1,107 +0,0 @@ - - - - - Coverage for src/stimulus/data/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/data/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Data handling and processing module. 

-

2 

-

3This module provides functionality for loading, transforming, and managing data 

-

4in various formats like CSV. It includes classes and utilities for: 

-

5 

-

6- Loading and processing CSV data files 

-

7- Applying data transformations and augmentations 

-

8- Splitting data into train/validation/test sets 

-

9- Converting data into PyTorch datasets 

-

10""" 

-
- - - diff --git a/coverage/z_3a7c4543ac712e3a_csv_py.html b/coverage/z_3a7c4543ac712e3a_csv_py.html deleted file mode 100644 index cf746033..00000000 --- a/coverage/z_3a7c4543ac712e3a_csv_py.html +++ /dev/null @@ -1,578 +0,0 @@ - - - - - Coverage for src/stimulus/data/csv.py: 88% - - - - - -
-
-

- Coverage for src/stimulus/data/csv.py: - 88% -

- -

- 129 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This module provides classes for handling CSV data files in the STIMULUS format. 

-

2 

-

3The module contains three main classes: 

-

4- DatasetHandler: Base class for loading and managing CSV data 

-

5- DatasetProcessor: Class for preprocessing data with transformations and splits 

-

6- DatasetLoader: Class for loading processed data for model training 

-

7 

-

8The data format consists of: 

-

91. A CSV file containing the raw data 

-

102. A YAML configuration file that defines: 

-

11 - Column names and their roles (input/label/meta) 

-

12 - Data types and encoders for each column 

-

13 - Transformations to apply (noise, augmentation, etc.) 

-

14 - Split configuration for train/val/test sets 

-

15 

-

16The data handling pipeline consists of: 

-

171. Loading raw CSV data according to the YAML config 

-

182. Applying configured transformations 

-

193. Splitting into train/val/test sets based on config 

-

204. Encoding data for model training using specified encoders 

-

21 

-

22See titanic.yaml in tests/test_data/titanic/ for an example configuration file format. 

-

23""" 

-

24 

-

25from typing import Any, Optional, Union 

-

26 

-

27import numpy as np 

-

28import polars as pl 

-

29import torch 

-

30import yaml 

-

31 

-

32from stimulus.data import experiments 

-

33from stimulus.utils import yaml_data 

-

34 

-

35 

-

36class DatasetManager: 

-

37 """Class for managing the dataset. 

-

38 

-

39 This class handles loading and organizing dataset configuration from YAML files. 

-

40 It manages column categorization into input, label and meta types based on the config. 

-

41 

-

42 Attributes: 

-

43 config (dict): The loaded configuration dictionary from YAML 

-

44 column_categories (dict): Dictionary mapping column types to lists of column names 

-

45 

-

46 Methods: 

-

47 _load_config(config_path: str) -> dict: Loads the config from a YAML file. 

-

48 categorize_columns_by_type() -> dict: Organizes the columns into input, label, meta based on the config. 

-

49 """ 

-

50 

-

51 def __init__( 

-

52 self, 

-

53 config_path: str, 

-

54 ) -> None: 

-

55 """Initialize the DatasetManager.""" 

-

56 self.config = self._load_config(config_path) 

-

57 self.column_categories = self.categorize_columns_by_type() 

-

58 

-

59 def categorize_columns_by_type(self) -> dict: 

-

60 """Organizes columns from config into input, label, and meta categories. 

-

61 

-

62 Reads the column definitions from the config and sorts them into categories 

-

63 based on their column_type field. 

-

64 

-

65 Returns: 

-

66 dict: Dictionary containing lists of column names for each category: 

-

67 { 

-

68 "input": ["col1", "col2"], # Input columns 

-

69 "label": ["target"], # Label/output columns 

-

70 "meta": ["id"] # Metadata columns 

-

71 } 

-

72 

-

73 Example: 

-

74 >>> manager = DatasetManager("config.yaml") 

-

75 >>> categories = manager.categorize_columns_by_type() 

-

76 >>> print(categories) 

-

77 { 

-

78 'input': ['hello', 'bonjour'], 

-

79 'label': ['ciao'], 

-

80 'meta': ["id"] 

-

81 } 

-

82 """ 

-

83 input_columns = [] 

-

84 label_columns = [] 

-

85 meta_columns = [] 

-

86 for column in self.config.columns: 

-

87 if column.column_type == "input": 

-

88 input_columns.append(column.column_name) 

-

89 elif column.column_type == "label": 

-

90 label_columns.append(column.column_name) 

-

91 elif column.column_type == "meta": 

-

92 meta_columns.append(column.column_name) 

-

93 

-

94 return {"input": input_columns, "label": label_columns, "meta": meta_columns} 

-

95 

-

96 def _load_config(self, config_path: str) -> dict: 

-

97 """Loads and parses a YAML configuration file. 

-

98 

-

99 Args: 

-

100 config_path (str): Path to the YAML config file 

-

101 

-

102 Returns: 

-

103 dict: Parsed configuration dictionary 

-

104 

-

105 Example: 

-

106 >>> manager = DatasetManager() 

-

107 >>> config = manager._load_config("config.yaml") 

-

108 >>> print(config["columns"][0]["column_name"]) 

-

109 'hello' 

-

110 """ 

-

111 with open(config_path) as file: 

-

112 return yaml_data.YamlSubConfigDict(**yaml.safe_load(file)) 

-

113 

-

114 def get_split_columns(self) -> str: 

-

115 """Get the columns that are used for splitting.""" 

-

116 return self.config.split.split_input_columns 

-

117 

-

118 def get_transform_logic(self) -> dict: 

-

119 """Get the transformation logic. 

-

120 

-

121 Returns a dictionary in the following structure : 

-

122 { 

-

123 "transformation_name": str, 

-

124 "transformations": list[tuple[str, str, dict]] 

-

125 } 

-

126 """ 

-

127 transformation_logic = { 

-

128 "transformation_name": self.config.transforms.transformation_name, 

-

129 "transformations": [], 

-

130 } 

-

131 for column in self.config.transforms.columns: 

-

132 for transformation in column.transformations: 

-

133 transformation_logic["transformations"].append( 

-

134 (column.column_name, transformation.name, transformation.params), 

-

135 ) 

-

136 return transformation_logic 

-

137 

-

138 

-

139class EncodeManager: 

-

140 """Manages the encoding of data columns using configured encoders. 

-

141 

-

142 This class handles encoding of data columns based on the encoders specified in the 

-

143 configuration. It uses an EncoderLoader to get the appropriate encoder for each column 

-

144 and applies the encoding. 

-

145 

-

146 Attributes: 

-

147 encoder_loader (experiments.EncoderLoader): Loader that provides encoders based on config. 

-

148 

-

149 Example: 

-

150 >>> encoder_loader = EncoderLoader(config) 

-

151 >>> encode_manager = EncodeManager(encoder_loader) 

-

152 >>> data = ["ACGT", "TGCA", "GCTA"] 

-

153 >>> encoded = encode_manager.encode_column("dna_seq", data) 

-

154 >>> print(encoded.shape) 

-

155 torch.Size([3, 4, 4]) # 3 sequences, length 4, one-hot encoded 

-

156 """ 

-

157 

-

158 def __init__( 

-

159 self, 

-

160 encoder_loader: experiments.EncoderLoader, 

-

161 ) -> None: 

-

162 """Initialize the EncodeManager. 

-

163 

-

164 Args: 

-

165 encoder_loader: Loader that provides encoders based on configuration. 

-

166 """ 

-

167 self.encoder_loader = encoder_loader 

-

168 

-

169 def encode_column(self, column_name: str, column_data: list) -> torch.Tensor: 

-

170 """Encodes a column of data using the configured encoder. 

-

171 

-

172 Gets the appropriate encoder for the column from the encoder_loader and uses it 

-

173 to encode all the data in the column. 

-

174 

-

175 Args: 

-

176 column_name: Name of the column to encode. 

-

177 column_data: List of data values from the column to encode. 

-

178 

-

179 Returns: 

-

180 Encoded data as a torch.Tensor. The exact shape depends on the encoder used. 

-

181 

-

182 Example: 

-

183 >>> data = ["ACGT", "TGCA"] 

-

184 >>> encoded = encode_manager.encode_column("dna_seq", data) 

-

185 >>> print(encoded.shape) 

-

186 torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded 

-

187 """ 

-

188 encode_all_function = self.encoder_loader.get_function_encode_all(column_name) 

-

189 return encode_all_function(column_data) 

-

190 

-

191 def encode_columns(self, column_data: dict) -> dict: 

-

192 """Encodes multiple columns of data using the configured encoders. 

-

193 

-

194 Gets the appropriate encoder for each column from the encoder_loader and encodes 

-

195 all data values in those columns. 

-

196 

-

197 Args: 

-

198 column_data: Dict mapping column names to lists of data values to encode. 

-

199 

-

200 Returns: 

-

201 Dict mapping column names to their encoded tensors. The exact shape of each 

-

202 tensor depends on the encoder used for that column. 

-

203 

-

204 Example: 

-

205 >>> data = {"dna_seq": ["ACGT", "TGCA"], "labels": ["1", "2"]} 

-

206 >>> encoded = encode_manager.encode_columns(data) 

-

207 >>> print(encoded["dna_seq"].shape) 

-

208 torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded 

-

209 """ 

-

210 return {col: self.encode_column(col, values) for col, values in column_data.items()} 

-

211 

-

212 def encode_dataframe(self, dataframe: pl.DataFrame) -> dict[str, torch.Tensor]: 

-

213 """Encode the dataframe using the encoders.""" 

-

214 return {col: self.encode_column(col, dataframe[col].to_list()) for col in dataframe.columns} 

-

215 

-

216 

-

217class TransformManager: 

-

218 """Class for managing the transformations.""" 

-

219 

-

220 def __init__( 

-

221 self, 

-

222 transform_loader: experiments.TransformLoader, 

-

223 ) -> None: 

-

224 """Initialize the TransformManager.""" 

-

225 self.transform_loader = transform_loader 

-

226 

-

227 def transform_column(self, column_name: str, transform_name: str, column_data: list) -> tuple[list, bool]: 

-

228 """Transform a column of data using the specified transformation. 

-

229 

-

230 Args: 

-

231 column_name (str): The name of the column to transform. 

-

232 transform_name (str): The name of the transformation to use. 

-

233 column_data (list): The data to transform. 

-

234 

-

235 Returns: 

-

236 list: The transformed data. 

-

237 bool: Whether the transformation added new rows to the data. 

-

238 """ 

-

239 transformer = self.transform_loader.__getattribute__(column_name)[transform_name] 

-

240 return transformer.transform_all(column_data), transformer.add_row 

-

241 

-

242 

-

243class SplitManager: 

-

244 """Class for managing the splitting.""" 

-

245 

-

246 def __init__( 

-

247 self, 

-

248 split_loader: experiments.SplitLoader, 

-

249 ) -> None: 

-

250 """Initialize the SplitManager.""" 

-

251 self.split_loader = split_loader 

-

252 

-

253 def get_split_indices(self, data: dict) -> tuple[np.ndarray, np.ndarray, np.ndarray]: 

-

254 """Get the indices for train, validation, and test splits.""" 

-

255 return self.split_loader.get_function_split()(data) 

-

256 

-

257 

-

258class DatasetHandler: 

-

259 """Main class for handling dataset loading, encoding, transformation and splitting. 

-

260 

-

261 This class coordinates the interaction between different managers to process 

-

262 CSV datasets according to the provided configuration. 

-

263 

-

264 Attributes: 

-

265 encoder_manager (EncodeManager): Manager for handling data encoding operations. 

-

266 transform_manager (TransformManager): Manager for handling data transformations. 

-

267 split_manager (SplitManager): Manager for handling dataset splitting. 

-

268 dataset_manager (DatasetManager): Manager for organizing dataset columns and config. 

-

269 """ 

-

270 

-

271 def __init__( 

-

272 self, 

-

273 config_path: str, 

-

274 csv_path: str, 

-

275 ) -> None: 

-

276 """Initialize the DatasetHandler with required config. 

-

277 

-

278 Args: 

-

279 config_path (str): Path to the dataset configuration file. 

-

280 csv_path (str): Path to the CSV data file. 

-

281 """ 

-

282 self.dataset_manager = DatasetManager(config_path) 

-

283 self.columns = self.read_csv_header(csv_path) 

-

284 

-

285 def read_csv_header(self, csv_path: str) -> list: 

-

286 """Get the column names from the header of the CSV file. 

-

287 

-

288 Args: 

-

289 csv_path (str): Path to the CSV file to read headers from. 

-

290 

-

291 Returns: 

-

292 list: List of column names from the CSV header. 

-

293 """ 

-

294 with open(csv_path) as f: 

-

295 return f.readline().strip().split(",") 

-

296 

-

297 def select_columns(self, columns: list) -> dict: 

-

298 """Select specific columns from the DataFrame and return as a dictionary. 

-

299 

-

300 Args: 

-

301 columns (list): List of column names to select. 

-

302 

-

303 Returns: 

-

304 dict: A dictionary where keys are column names and values are lists containing the column data. 

-

305 

-

306 Example: 

-

307 >>> handler = DatasetHandler(...) 

-

308 >>> data_dict = handler.select_columns(["col1", "col2"]) 

-

309 >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]} 

-

310 """ 

-

311 df = self.data.select(columns) 

-

312 return {col: df[col].to_list() for col in columns} 

-

313 

-

314 def load_csv(self, csv_path: str) -> pl.DataFrame: 

-

315 """Load the CSV file into a polars DataFrame. 

-

316 

-

317 Args: 

-

318 csv_path (str): Path to the CSV file to load. 

-

319 

-

320 Returns: 

-

321 pl.DataFrame: Polars DataFrame containing the loaded CSV data. 

-

322 """ 

-

323 return pl.read_csv(csv_path) 

-

324 

-

325 def save(self, path: str) -> None: 

-

326 """Saves the data to a csv file.""" 

-

327 self.data.write_csv(path) 

-

328 

-

329 

-

330class DatasetProcessor(DatasetHandler): 

-

331 """Class for loading dataset, applying transformations and splitting.""" 

-

332 

-

333 def __init__(self, config_path: str, csv_path: str) -> None: 

-

334 """Initialize the DatasetProcessor.""" 

-

335 super().__init__(config_path, csv_path) 

-

336 

-

337 def add_split(self, split_manager: SplitManager, *, force: bool = False) -> None: 

-

338 """Add a column specifying the train, validation, test splits of the data. 

-

339 

-

340 An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True. 

-

341 

-

342 Args: 

-

343 split_manager (SplitManager): Manager for handling dataset splitting 

-

344 force (bool): If True, the split column present in the csv file will be overwritten. 

-

345 """ 

-

346 if ("split" in self.columns) and (not force): 

-

347 raise ValueError( 

-

348 "The category split is already present in the csv file. If you want to still use this function, set force=True", 

-

349 ) 

-

350 # get relevant split columns from the dataset_manager 

-

351 split_columns = self.dataset_manager.get_split_columns() 

-

352 split_input_data = self.select_columns(split_columns) 

-

353 

-

354 # get the split indices 

-

355 train, validation, test = split_manager.get_split_indices(split_input_data) 

-

356 

-

357 # add the split column to the data 

-

358 split_column = np.full(len(self.data), -1).astype(int) 

-

359 split_column[train] = 0 

-

360 split_column[validation] = 1 

-

361 split_column[test] = 2 

-

362 self.data = self.data.with_columns(pl.Series("split", split_column)) 

-

363 

-

364 if "split" not in self.columns: 

-

365 self.columns.append("split") 

-

366 

-

367 def apply_transformation_group(self, transform_manager: TransformManager) -> None: 

-

368 """Apply the transformation group to the data.""" 

-

369 for column_name, transform_name, _params in self.dataset_manager.get_transform_logic()["transformations"]: 

-

370 transformed_data, add_row = transform_manager.transform_column( 

-

371 column_name, 

-

372 transform_name, 

-

373 self.data[column_name], 

-

374 ) 

-

375 if add_row: 

-

376 new_rows = self.data.with_columns(pl.Series(column_name, transformed_data)) 

-

377 self.data = pl.vstack(self.data, new_rows) 

-

378 else: 

-

379 self.data = self.data.with_columns(pl.Series(column_name, transformed_data)) 

-

380 

-

381 def shuffle_labels(self, seed: Optional[float] = None) -> None: 

-

382 """Shuffles the labels in the data.""" 

-

383 # set the np seed 

-

384 np.random.seed(seed) 

-

385 

-

386 label_keys = self.dataset_manager.get_label_columns()["label"] 

-

387 for key in label_keys: 

-

388 self.data = self.data.with_columns(pl.Series(key, np.random.permutation(list(self.data[key])))) 

-

389 

-

390 

-

391class DatasetLoader(DatasetHandler): 

-

392 """Class for loading dataset and passing it to the deep learning model.""" 

-

393 

-

394 def __init__( 

-

395 self, 

-

396 config_path: str, 

-

397 csv_path: str, 

-

398 encoder_loader: experiments.EncoderLoader, 

-

399 split: Union[int, None] = None, 

-

400 ) -> None: 

-

401 """Initialize the DatasetLoader.""" 

-

402 super().__init__(config_path, csv_path) 

-

403 self.encoder_manager = EncodeManager(encoder_loader) 

-

404 self.data = self.load_csv_per_split(csv_path, split) if split is not None else self.load_csv(csv_path) 

-

405 

-

406 def get_all_items(self) -> tuple[dict, dict, dict]: 

-

407 """Get the full dataset as three separate dictionaries for inputs, labels and metadata. 

-

408 

-

409 Returns: 

-

410 tuple[dict, dict, dict]: Three dictionaries containing: 

-

411 - Input dictionary mapping input column names to encoded input data 

-

412 - Label dictionary mapping label column names to encoded label data 

-

413 - Meta dictionary mapping meta column names to meta data 

-

414 

-

415 Example: 

-

416 >>> handler = DatasetHandler(...) 

-

417 >>> input_dict, label_dict, meta_dict = handler.get_dataset() 

-

418 >>> print(input_dict.keys()) 

-

419 dict_keys(['age', 'fare']) 

-

420 >>> print(label_dict.keys()) 

-

421 dict_keys(['survived']) 

-

422 >>> print(meta_dict.keys()) 

-

423 dict_keys(['passenger_id']) 

-

424 """ 

-

425 input_columns, label_columns, meta_columns = ( 

-

426 self.dataset_manager.column_categories["input"], 

-

427 self.dataset_manager.column_categories["label"], 

-

428 self.dataset_manager.column_categories["meta"], 

-

429 ) 

-

430 input_data = self.encoder_manager.encode_dataframe(self.data[input_columns]) 

-

431 label_data = self.encoder_manager.encode_dataframe(self.data[label_columns]) 

-

432 meta_data = {key: self.data[key].to_list() for key in meta_columns} 

-

433 return input_data, label_data, meta_data 

-

434 

-

435 def get_all_items_and_length(self) -> tuple[dict, dict, dict, int]: 

-

436 """Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.""" 

-

437 return self.get_all_items(), len(self) 

-

438 

-

439 def load_csv_per_split(self, csv_path: str, split: int) -> pl.DataFrame: 

-

440 """Load the part of csv file that has the specified split value. 

-

441 

-

442 Split is a number that for 0 is train, 1 is validation, 2 is test. 

-

443 This is accessed through the column with category `split`. Example column name could be `split:split:int`. 

-

444 

-

445 NOTE that the aim of having this function is that depending on the training, validation and test scenarios, 

-

446 we are gonna load only the relevant data for it. 

-

447 """ 

-

448 if "split" not in self.columns: 

-

449 raise ValueError("The category split is not present in the csv file") 

-

450 if split not in [0, 1, 2]: 

-

451 raise ValueError(f"The split value should be 0, 1 or 2. The specified split value is {split}") 

-

452 return pl.scan_csv(csv_path).filter(pl.col("split") == split).collect() 

-

453 

-

454 def __len__(self) -> int: 

-

455 """Return the length of the first list in input, assumes that all are the same length.""" 

-

456 return len(self.data) 

-

457 

-

458 def __getitem__(self, idx: Any) -> dict: 

-

459 """Get the data at a given index, and encodes the input and label, leaving meta as it is. 

-

460 

-

461 Args: 

-

462 idx: The index of the data to be returned, it can be a single index, a list of indexes or a slice 

-

463 """ 

-

464 # Handle different index types 

-

465 if isinstance(idx, slice): 

-

466 data_at_index = self.data.slice(idx.start or 0, idx.stop or len(self.data)) 

-

467 elif isinstance(idx, int): 

-

468 # Convert single row to DataFrame to maintain consistent interface 

-

469 data_at_index = self.data.slice(idx, idx + 1) 

-

470 else: 

-

471 data_at_index = self.data[idx] 

-

472 

-

473 input_columns, label_columns, meta_columns = ( 

-

474 self.dataset_manager.column_categories["input"], 

-

475 self.dataset_manager.column_categories["label"], 

-

476 self.dataset_manager.column_categories["meta"], 

-

477 ) 

-

478 input_data = self.encoder_manager.encode_dataframe(data_at_index[input_columns]) 

-

479 label_data = self.encoder_manager.encode_dataframe(data_at_index[label_columns]) 

-

480 meta_data = {key: data_at_index[key].to_list() for key in meta_columns} 

-

481 return input_data, label_data, meta_data 

-
- - - diff --git a/coverage/z_3a7c4543ac712e3a_experiments_py.html b/coverage/z_3a7c4543ac712e3a_experiments_py.html deleted file mode 100644 index 00526e0f..00000000 --- a/coverage/z_3a7c4543ac712e3a_experiments_py.html +++ /dev/null @@ -1,341 +0,0 @@ - - - - - Coverage for src/stimulus/data/experiments.py: 74% - - - - - -
-
-

- Coverage for src/stimulus/data/experiments.py: - 74% -

- -

- 78 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Loaders serve as interfaces between the CSV master class and custom methods. 

-

2 

-

3Mainly, three types of custom methods are supported: 

-

4- Encoders: methods for encoding data before it is fed into the model 

-

5- Data transformers: methods for transforming data (i.e. augmenting, noising...) 

-

6- Splitters: methods for splitting data into train, validation and test sets 

-

7 

-

8Loaders are built from an input config YAML file which format is described in the documentation, you can find an example here: tests/test_data/dna_experiment/dna_experiment_config_template.yaml 

-

9""" 

-

10 

-

11import inspect 

-

12import logging 

-

13from typing import Any, Optional 

-

14 

-

15from stimulus.data.encoding import encoders 

-

16from stimulus.data.splitters import splitters 

-

17from stimulus.data.transform import data_transformation_generators 

-

18from stimulus.utils import yaml_data 

-

19 

-

20 

-

21class EncoderLoader: 

-

22 """Class for loading encoders from a config file.""" 

-

23 

-

24 def __init__(self, seed: Optional[float] = None) -> None: 

-

25 """Initialize the encoder loader. 

-

26 

-

27 Args: 

-

28 seed: Random seed for reproducibility 

-

29 """ 

-

30 self.seed = seed 

-

31 

-

32 def initialize_column_encoders_from_config(self, column_config: yaml_data.YamlColumns) -> None: 

-

33 """Build the loader from a config dictionary. 

-

34 

-

35 Args: 

-

36 column_config (yaml_data.YamlColumns): Configuration dictionary containing field names (column_name) and their encoder specifications. 

-

37 """ 

-

38 for field in column_config: 

-

39 encoder = self.get_encoder(field.encoder[0].name, field.encoder[0].params) 

-

40 self.set_encoder_as_attribute(field.column_name, encoder) 

-

41 

-

42 def get_function_encode_all(self, field_name: str) -> Any: 

-

43 """Gets the encoding function for a specific field. 

-

44 

-

45 Args: 

-

46 field_name (str): The field name to get the encoder for 

-

47 

-

48 Returns: 

-

49 Any: The encode_all function for the specified field 

-

50 """ 

-

51 return getattr(self, field_name).encode_all 

-

52 

-

53 def get_encoder(self, encoder_name: str, encoder_params: Optional[dict] = None) -> Any: 

-

54 """Gets an encoder object from the encoders module and initializes it with the given parameters. 

-

55 

-

56 Args: 

-

57 encoder_name (str): The name of the encoder to get 

-

58 encoder_params (dict): The parameters for the encoder 

-

59 

-

60 Returns: 

-

61 Any: The encoder function for the specified field and parameters 

-

62 """ 

-

63 try: 

-

64 return getattr(encoders, encoder_name)(**encoder_params) 

-

65 except AttributeError: 

-

66 logging.exception(f"Encoder '{encoder_name}' not found in the encoders module.") 

-

67 logging.exception( 

-

68 f"Available encoders: {[name for name, obj in encoders.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}", 

-

69 ) 

-

70 raise 

-

71 

-

72 except TypeError: 

-

73 if encoder_params is None: 

-

74 return getattr(encoders, encoder_name)() 

-

75 logging.exception(f"Encoder '{encoder_name}' has incorrect parameters: {encoder_params}") 

-

76 logging.exception( 

-

77 f"Expected parameters for '{encoder_name}': {inspect.signature(getattr(encoders, encoder_name))}", 

-

78 ) 

-

79 raise 

-

80 

-

81 def set_encoder_as_attribute(self, field_name: str, encoder: encoders.AbstractEncoder) -> None: 

-

82 """Sets the encoder as an attribute of the loader. 

-

83 

-

84 Args: 

-

85 field_name (str): The name of the field to set the encoder for 

-

86 encoder (encoders.AbstractEncoder): The encoder to set 

-

87 """ 

-

88 setattr(self, field_name, encoder) 

-

89 

-

90 

-

91class TransformLoader: 

-

92 """Class for loading transformations from a config file.""" 

-

93 

-

94 def __init__(self, seed: Optional[float] = None) -> None: 

-

95 """Initialize the transform loader. 

-

96 

-

97 Args: 

-

98 seed: Random seed for reproducibility 

-

99 """ 

-

100 self.seed = seed 

-

101 

-

102 def get_data_transformer(self, transformation_name: str, transformation_params: Optional[dict] = None) -> Any: 

-

103 """Gets a transformer object from the transformers module. 

-

104 

-

105 Args: 

-

106 transformation_name (str): The name of the transformer to get 

-

107 transformation_params (Optional[dict]): Parameters for the transformer 

-

108 

-

109 Returns: 

-

110 Any: The transformer function for the specified transformation 

-

111 """ 

-

112 try: 

-

113 return getattr(data_transformation_generators, transformation_name)(**transformation_params) 

-

114 except AttributeError: 

-

115 logging.exception(f"Transformer '{transformation_name}' not found in the transformers module.") 

-

116 logging.exception( 

-

117 f"Available transformers: {[name for name, obj in data_transformation_generators.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}", 

-

118 ) 

-

119 raise 

-

120 

-

121 except TypeError: 

-

122 if transformation_params is None: 

-

123 return getattr(data_transformation_generators, transformation_name)() 

-

124 logging.exception(f"Transformer '{transformation_name}' has incorrect parameters: {transformation_params}") 

-

125 logging.exception( 

-

126 f"Expected parameters for '{transformation_name}': {inspect.signature(getattr(data_transformation_generators, transformation_name))}", 

-

127 ) 

-

128 raise 

-

129 

-

130 def set_data_transformer_as_attribute(self, field_name: str, data_transformer: Any) -> None: 

-

131 """Sets the data transformer as an attribute of the loader. 

-

132 

-

133 Args: 

-

134 field_name (str): The name of the field to set the data transformer for 

-

135 data_transformer (Any): The data transformer to set 

-

136 """ 

-

137 # check if the field already exists, if it does not, initialize it to an empty dict 

-

138 if not hasattr(self, field_name): 

-

139 setattr(self, field_name, {data_transformer.__class__.__name__: data_transformer}) 

-

140 else: 

-

141 self.field_name[data_transformer.__class__.__name__] = data_transformer 

-

142 

-

143 def initialize_column_data_transformers_from_config(self, transform_config: yaml_data.YamlTransform) -> None: 

-

144 """Build the loader from a config dictionary. 

-

145 

-

146 Args: 

-

147 transform_config (yaml_data.YamlTransform): Configuration dictionary containing transforms configurations. 

-

148 

-

149 Example: 

-

150 Given a YAML config like: 

-

151 ```yaml 

-

152 transforms: 

-

153 transformation_name: noise 

-

154 columns: 

-

155 - column_name: age 

-

156 transformations: 

-

157 - name: GaussianNoise 

-

158 params: 

-

159 std: 0.1 

-

160 - column_name: fare 

-

161 transformations: 

-

162 - name: GaussianNoise 

-

163 params: 

-

164 std: 0.1 

-

165 ``` 

-

166 

-

167 The loader will: 

-

168 1. Iterate through each column (age, fare) 

-

169 2. For each transformation in the column: 

-

170 - Get the transformer (GaussianNoise) with its params (std=0.1) 

-

171 - Set it as an attribute on the loader using the column name as key 

-

172 """ 

-

173 for column in transform_config.columns: 

-

174 col_name = column.column_name 

-

175 for transform_spec in column.transformations: 

-

176 transformer = self.get_data_transformer(transform_spec.name, transform_spec.params) 

-

177 self.set_data_transformer_as_attribute(col_name, transformer) 

-

178 

-

179 

-

180class SplitLoader: 

-

181 """Class for loading splitters from a config file.""" 

-

182 

-

183 def __init__(self, seed: Optional[float] = None) -> None: 

-

184 """Initialize the split loader. 

-

185 

-

186 Args: 

-

187 seed: Random seed for reproducibility 

-

188 """ 

-

189 self.seed = seed 

-

190 

-

191 def get_function_split(self) -> Any: 

-

192 """Gets the function for splitting the data. 

-

193 

-

194 Returns: 

-

195 Any: The split function for the specified method 

-

196 

-

197 Raises: 

-

198 AttributeError: If splitter hasn't been initialized using initialize_splitter_from_config() 

-

199 """ 

-

200 if not hasattr(self, "split"): 

-

201 # Raise a more specific error and chain it to the original AttributeError 

-

202 raise AttributeError( 

-

203 "Splitter not initialized. Please call initialize_splitter_from_config() or set_splitter_as_attribute() " 

-

204 "before attempting to get split function.", 

-

205 ) 

-

206 return self.split.get_split_indexes 

-

207 

-

208 def get_splitter(self, splitter_name: str, splitter_params: Optional[dict] = None) -> Any: 

-

209 """Gets a splitter object from the splitters module. 

-

210 

-

211 Args: 

-

212 splitter_name (str): The name of the splitter to get 

-

213 splitter_params (Optional[dict]): Parameters for the splitter 

-

214 

-

215 Returns: 

-

216 Any: The splitter function for the specified splitter 

-

217 """ 

-

218 try: 

-

219 return getattr(splitters, splitter_name)(**splitter_params) 

-

220 except TypeError: 

-

221 if splitter_params is None: 

-

222 return getattr(splitters, splitter_name)() 

-

223 logging.exception(f"Splitter '{splitter_name}' has incorrect parameters: {splitter_params}") 

-

224 logging.exception( 

-

225 f"Expected parameters for '{splitter_name}': {inspect.signature(getattr(splitters, splitter_name))}", 

-

226 ) 

-

227 raise 

-

228 

-

229 def set_splitter_as_attribute(self, splitter: Any) -> None: 

-

230 """Sets the splitter as an attribute of the loader. 

-

231 

-

232 Args: 

-

233 splitter (Any): The splitter to set 

-

234 """ 

-

235 self.split = splitter 

-

236 

-

237 def initialize_splitter_from_config(self, split_config: yaml_data.YamlSplit) -> None: 

-

238 """Build the loader from a config dictionary. 

-

239 

-

240 Args: 

-

241 split_config (yaml_data.YamlSplit): Configuration dictionary containing split configurations. 

-

242 """ 

-

243 splitter = self.get_splitter(split_config.split_method, split_config.params) 

-

244 self.set_splitter_as_attribute(splitter) 

-
- - - diff --git a/coverage/z_3a7c4543ac712e3a_handlertorch_py.html b/coverage/z_3a7c4543ac712e3a_handlertorch_py.html deleted file mode 100644 index 1c524d97..00000000 --- a/coverage/z_3a7c4543ac712e3a_handlertorch_py.html +++ /dev/null @@ -1,136 +0,0 @@ - - - - - Coverage for src/stimulus/data/handlertorch.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/data/handlertorch.py: - 100% -

- -

- 10 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.""" 

-

2 

-

3from typing import Optional 

-

4 

-

5from torch.utils.data import Dataset 

-

6 

-

7from src.stimulus.data import csv, experiments 

-

8 

-

9 

-

10class TorchDataset(Dataset): 

-

11 """Class for creating a torch dataset.""" 

-

12 

-

13 def __init__( 

-

14 self, 

-

15 config_path: str, 

-

16 csv_path: str, 

-

17 encoder_loader: experiments.EncoderLoader, 

-

18 split: Optional[tuple[None, int]] = None, 

-

19 ) -> None: 

-

20 """Initialize the TorchDataset. 

-

21 

-

22 Args: 

-

23 config_path: Path to the configuration file 

-

24 csv_path: Path to the CSV data file 

-

25 encoder_loader: Encoder loader instance 

-

26 split: Optional tuple containing split information 

-

27 """ 

-

28 self.loader = csv.DatasetLoader( 

-

29 config_path=config_path, 

-

30 csv_path=csv_path, 

-

31 encoder_loader=encoder_loader, 

-

32 split=split, 

-

33 ) 

-

34 

-

35 def __len__(self) -> int: 

-

36 return len(self.loader) 

-

37 

-

38 def __getitem__(self, idx: int) -> tuple[dict, dict, dict]: 

-

39 return self.loader[idx] 

-
- - - diff --git a/coverage/z_77b10b442f9d4059___init___py.html b/coverage/z_77b10b442f9d4059___init___py.html deleted file mode 100644 index 050d168d..00000000 --- a/coverage/z_77b10b442f9d4059___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/data/encoding/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/data/encoding/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Encoding package for data transformation.""" 

-
- - - diff --git a/coverage/z_77b10b442f9d4059_encoders_py.html b/coverage/z_77b10b442f9d4059_encoders_py.html deleted file mode 100644 index 1d3fba13..00000000 --- a/coverage/z_77b10b442f9d4059_encoders_py.html +++ /dev/null @@ -1,628 +0,0 @@ - - - - - Coverage for src/stimulus/data/encoding/encoders.py: 93% - - - - - -
-
-

- Coverage for src/stimulus/data/encoding/encoders.py: - 93% -

- -

- 145 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This file contains encoders classes for encoding various types of data.""" 

-

2 

-

3import logging 

-

4import multiprocessing as mp 

-

5from abc import ABC, abstractmethod 

-

6from typing import Any, Union 

-

7 

-

8import numpy as np 

-

9import torch 

-

10from sklearn import preprocessing 

-

11 

-

12logger = logging.getLogger(__name__) 

-

13 

-

14 

-

15class AbstractEncoder(ABC): 

-

16 """Abstract class for encoders. 

-

17 

-

18 Encoders are classes that encode the raw data into torch.tensors. 

-

19 Different encoders provide different encoding methods. 

-

20 Different encoders may take different types of data as input. 

-

21 

-

22 Methods: 

-

23 encode: encodes a single data point 

-

24 encode_all: encodes a list of data points into a torch.tensor 

-

25 encode_multiprocess: encodes a list of data points using multiprocessing 

-

26 decode: decodes a single data point 

-

27 """ 

-

28 

-

29 @abstractmethod 

-

30 def encode(self, data: Any) -> Any: 

-

31 """Encode a single data point. 

-

32 

-

33 This is an abstract method, child classes should overwrite it. 

-

34 

-

35 Args: 

-

36 data (any): a single data point 

-

37 

-

38 Returns: 

-

39 encoded_data_point (any): the encoded data point 

-

40 """ 

-

41 raise NotImplementedError 

-

42 

-

43 @abstractmethod 

-

44 def encode_all(self, data: list) -> np.array: 

-

45 """Encode a list of data points. 

-

46 

-

47 This is an abstract method, child classes should overwrite it. 

-

48 

-

49 Args: 

-

50 data (list): a list of data points 

-

51 

-

52 Returns: 

-

53 encoded_data (np.array): encoded data points 

-

54 """ 

-

55 raise NotImplementedError 

-

56 

-

57 @abstractmethod 

-

58 def decode(self, data: Any) -> Any: 

-

59 """Decode a single data point. 

-

60 

-

61 This is an abstract method, child classes should overwrite it. 

-

62 

-

63 Args: 

-

64 data (any): a single encoded data point 

-

65 

-

66 Returns: 

-

67 decoded_data_point (any): the decoded data point 

-

68 """ 

-

69 raise NotImplementedError 

-

70 

-

71 def encode_multiprocess(self, data: list) -> list: 

-

72 """Helper function for encoding the data using multiprocessing. 

-

73 

-

74 Args: 

-

75 data (list): a list of data points 

-

76 

-

77 Returns: 

-

78 encoded_data (list): encoded data points 

-

79 """ 

-

80 with mp.Pool(mp.cpu_count()) as pool: 

-

81 return pool.map(self.encode, data) 

-

82 

-

83 

-

84class TextOneHotEncoder(AbstractEncoder): 

-

85 """One hot encoder for text data. 

-

86 

-

87 NOTE encodes based on the given alphabet 

-

88 If a character c is not in the alphabet, c will be represented by a vector of zeros. 

-

89 

-

90 Attributes: 

-

91 alphabet (str): the alphabet to one hot encode the data with. 

-

92 convert_lowercase (bool): whether to convert the sequence and alphabet to lowercase. Default is False. 

-

93 padding (bool): whether to pad the sequences with zeros. Default is False. 

-

94 encoder (OneHotEncoder): preprocessing.OneHotEncoder object initialized with self.alphabet 

-

95 

-

96 Methods: 

-

97 encode: encodes a single data point 

-

98 encode_all: encodes a list of data points into a numpy array 

-

99 encode_multiprocess: encodes a list of data points using multiprocessing 

-

100 decode: decodes a single data point 

-

101 _sequence_to_array: transforms a sequence into a numpy array 

-

102 """ 

-

103 

-

104 def __init__(self, alphabet: str = "acgt", *, convert_lowercase: bool = False, padding: bool = False) -> None: 

-

105 """Initialize the TextOneHotEncoder class. 

-

106 

-

107 Args: 

-

108 alphabet (str): the alphabet to one hot encode the data with. 

-

109 

-

110 Raises: 

-

111 TypeError: If the input alphabet is not a string. 

-

112 """ 

-

113 if not isinstance(alphabet, str): 

-

114 error_msg = f"Expected a string input for alphabet, got {type(alphabet).__name__}" 

-

115 logger.error(error_msg) 

-

116 raise TypeError(error_msg) 

-

117 

-

118 if convert_lowercase: 

-

119 alphabet = alphabet.lower() 

-

120 

-

121 self.alphabet = alphabet 

-

122 self.convert_lowercase = convert_lowercase 

-

123 self.padding = padding 

-

124 

-

125 self.encoder = preprocessing.OneHotEncoder( 

-

126 categories=[list(alphabet)], 

-

127 handle_unknown="ignore", 

-

128 ) # handle_unknown='ignore' unsures that a vector of zeros is returned for unknown characters, such as 'Ns' in DNA sequences 

-

129 self.encoder.fit(np.array(list(alphabet)).reshape(-1, 1)) 

-

130 

-

131 def _sequence_to_array(self, sequence: str) -> np.array: 

-

132 """This function transforms the given sequence to an array. 

-

133 

-

134 Args: 

-

135 sequence (str): a sequence of characters. 

-

136 

-

137 Returns: 

-

138 sequence_array (np.array): the sequence as a numpy array 

-

139 

-

140 Raises: 

-

141 TypeError: If the input data is not a string. 

-

142 

-

143 Examples: 

-

144 >>> encoder = TextOneHotEncoder(alphabet="acgt") 

-

145 >>> encoder._sequence_to_array("acctg") 

-

146 array(['a'],['c'],['c'],['t'],['g']) 

-

147 """ 

-

148 if not isinstance(sequence, str): 

-

149 error_msg = f"Expected string input for sequence, got {type(sequence).__name__}" 

-

150 logger.error(error_msg) 

-

151 raise TypeError(error_msg) 

-

152 

-

153 if self.convert_lowercase: 

-

154 sequence = sequence.lower() 

-

155 

-

156 sequence_array = np.array(list(sequence)) 

-

157 return sequence_array.reshape(-1, 1) 

-

158 

-

159 def encode(self, data: str) -> torch.Tensor: 

-

160 """One hot encodes a single sequence. 

-

161 

-

162 Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length). 

-

163 The returned tensor corresponds to the one hot encoding of the sequence. 

-

164 Unknown characters are represented by a vector of zeros. 

-

165 

-

166 Args: 

-

167 data (str): single sequence 

-

168 

-

169 Returns: 

-

170 encoded_data_point (torch.Tensor): one hot encoded sequence 

-

171 

-

172 Raises: 

-

173 TypeError: If the input data is not a string. 

-

174 

-

175 Examples: 

-

176 >>> encoder = TextOneHotEncoder(alphabet="acgt") 

-

177 >>> encoder.encode("acgt") 

-

178 tensor([[1, 0, 0, 0], 

-

179 [0, 1, 0, 0], 

-

180 [0, 0, 1, 0], 

-

181 [0, 0, 0, 1]]) 

-

182 >>> encoder.encode("acgtn") 

-

183 tensor([[1, 0, 0, 0], 

-

184 [0, 1, 0, 0], 

-

185 [0, 0, 1, 0], 

-

186 [0, 0, 0, 1], 

-

187 [0, 0, 0, 0]]) 

-

188 

-

189 >>> encoder = TextOneHotEncoder(alphabet="ACgt") 

-

190 >>> encoder.encode("acgt") 

-

191 tensor([[0, 0, 0, 0], 

-

192 [0, 0, 0, 0], 

-

193 [0, 0, 1, 0], 

-

194 [0, 0, 0, 1]]) 

-

195 >>> encoder.encode("ACgt") 

-

196 tensor([[1, 0, 0, 0], 

-

197 [0, 1, 0, 0], 

-

198 [0, 0, 1, 0], 

-

199 [0, 0, 0, 1]]) 

-

200 """ 

-

201 sequence_array = self._sequence_to_array(data) 

-

202 transformed = self.encoder.transform(sequence_array) 

-

203 numpy_array = np.squeeze(np.stack(transformed.toarray())) 

-

204 return torch.from_numpy(numpy_array) 

-

205 

-

206 def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor: 

-

207 """Encodes a list of sequences. 

-

208 

-

209 Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length). 

-

210 The returned tensor corresponds to the one hot encoding of the sequences. 

-

211 Unknown characters are represented by a vector of zeros. 

-

212 

-

213 Args: 

-

214 data (Union[list, str]): list of sequences or a single sequence 

-

215 

-

216 Returns: 

-

217 encoded_data (torch.Tensor): one hot encoded sequences 

-

218 

-

219 Raises: 

-

220 TypeError: If the input data is not a list or a string. 

-

221 ValueError: If all sequences do not have the same length when padding is False. 

-

222 

-

223 Examples: 

-

224 >>> encoder = TextOneHotEncoder(alphabet="acgt") 

-

225 >>> encoder.encode_all(["acgt", "acgtn"]) 

-

226 tensor([[[1, 0, 0, 0], 

-

227 [0, 1, 0, 0], 

-

228 [0, 0, 1, 0], 

-

229 [0, 0, 0, 1], 

-

230 [0, 0, 0, 0]], // this is padded with zeros 

-

231 

-

232 [[1, 0, 0, 0], 

-

233 [0, 1, 0, 0], 

-

234 [0, 0, 1, 0], 

-

235 [0, 0, 0, 1], 

-

236 [0, 0, 0, 0]]]) 

-

237 """ 

-

238 # encode data 

-

239 if isinstance(data, str): 

-

240 encoded_data = self.encode(data) 

-

241 return torch.stack([encoded_data]) 

-

242 if isinstance(data, list): 

-

243 # TODO instead maybe we can run encode_multiprocess when data size is larger than a certain threshold. 

-

244 encoded_data = self.encode_multiprocess(data) 

-

245 else: 

-

246 error_msg = f"Expected list or string input for data, got {type(data).__name__}" 

-

247 logger.error(error_msg) 

-

248 raise TypeError(error_msg) 

-

249 

-

250 # handle padding 

-

251 if self.padding: 

-

252 max_length = max([len(d) for d in encoded_data]) 

-

253 encoded_data = [np.pad(d, ((0, max_length - len(d)), (0, 0))) for d in encoded_data] 

-

254 else: 

-

255 lengths = {len(d) for d in encoded_data} 

-

256 if len(lengths) > 1: 

-

257 error_msg = "All sequences must have the same length when padding is False." 

-

258 logger.error(error_msg) 

-

259 raise ValueError(error_msg) 

-

260 

-

261 return torch.from_numpy(np.array(encoded_data)) 

-

262 

-

263 def decode(self, data: torch.Tensor) -> Union[str, list[str]]: 

-

264 """Decodes one-hot encoded tensor back to sequences. 

-

265 

-

266 Args: 

-

267 data (torch.Tensor): 2D or 3D tensor of one-hot encoded sequences 

-

268 - 2D shape: (sequence_length, alphabet_size) 

-

269 - 3D shape: (batch_size, sequence_length, alphabet_size) 

-

270 

-

271 NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length. 

-

272 

-

273 Returns: 

-

274 Union[str, List[str]]: Single sequence string or list of sequence strings 

-

275 

-

276 Raises: 

-

277 TypeError: If the input data is not a 2D or 3D tensor 

-

278 """ 

-

279 expected_2d_tensor = 2 

-

280 expected_3d_tensor = 3 

-

281 

-

282 if data.dim() == expected_2d_tensor: 

-

283 # Single sequence 

-

284 data_np = data.numpy().reshape(-1, len(self.alphabet)) 

-

285 decoded = self.encoder.inverse_transform(data_np).flatten() 

-

286 return "".join([i for i in decoded if i is not None]) 

-

287 

-

288 if data.dim() == expected_3d_tensor: 

-

289 # Multiple sequences 

-

290 batch_size, seq_len, _ = data.shape 

-

291 data_np = data.reshape(-1, len(self.alphabet)).numpy() 

-

292 decoded = self.encoder.inverse_transform(data_np) 

-

293 sequences = decoded.reshape(batch_size, seq_len) 

-

294 # Convert to masked array where None values are masked 

-

295 masked_sequences = np.ma.masked_equal(sequences, None) 

-

296 # Fill masked values with "-" 

-

297 filled_sequences = masked_sequences.filled("-") 

-

298 return ["".join(seq) for seq in filled_sequences] 

-

299 

-

300 raise ValueError(f"Expected 2D or 3D tensor, got {data.dim()}D") 

-

301 

-

302 

-

303class NumericEncoder(AbstractEncoder): 

-

304 """Encoder for float/int data. 

-

305 

-

306 Attributes: 

-

307 dtype (torch.dtype): The data type of the encoded data. Default = torch.float32 (32-bit floating point) 

-

308 """ 

-

309 

-

310 def __init__(self, dtype: torch.dtype = torch.float32) -> None: 

-

311 """Initialize the NumericEncoder class. 

-

312 

-

313 Args: 

-

314 dtype (torch.dtype): the data type of the encoded data. Default = torch.float (32-bit floating point) 

-

315 """ 

-

316 self.dtype = dtype 

-

317 

-

318 def encode(self, data: float) -> torch.Tensor: 

-

319 """Encodes the data. 

-

320 

-

321 This method takes as input a single data point, should be mappable to a single output. 

-

322 

-

323 Args: 

-

324 data (float or int): a single data point 

-

325 

-

326 Returns: 

-

327 encoded_data_point (torch.Tensor): the encoded data point 

-

328 """ 

-

329 return self.encode_all(data) # there is no difference in this case 

-

330 

-

331 def encode_all(self, data: float | list[float]) -> torch.Tensor: 

-

332 """Encodes the data. 

-

333 

-

334 This method takes as input a list of data points, or a single float, and returns a torch.tensor. 

-

335 

-

336 Args: 

-

337 data (float or int): a list of data points or a single data point 

-

338 

-

339 Returns: 

-

340 encoded_data (torch.Tensor): the encoded data 

-

341 """ 

-

342 if not isinstance(data, list): 

-

343 data = [data] 

-

344 

-

345 self._check_input_dtype(data) 

-

346 self._warn_float_is_converted_to_int(data) 

-

347 

-

348 return torch.tensor(data, dtype=self.dtype) 

-

349 

-

350 def decode(self, data: torch.Tensor) -> list[float]: 

-

351 """Decodes the data. 

-

352 

-

353 Args: 

-

354 data (torch.Tensor): the encoded data 

-

355 

-

356 Returns: 

-

357 decoded_data (List[float]): the decoded data 

-

358 """ 

-

359 return data.cpu().numpy().tolist() 

-

360 

-

361 def _check_input_dtype(self, data: Union[list[float], list[int]]) -> None: 

-

362 """Check if the input data is int or float data. 

-

363 

-

364 Args: 

-

365 data (float or int): a list of float or integer data points 

-

366 

-

367 Raises: 

-

368 ValueError: If the input data contains a non-integer or non-float data point 

-

369 """ 

-

370 if not all(isinstance(d, (int, float)) for d in data): 

-

371 err_msg = "Expected input data to be a float or int" 

-

372 logger.error(err_msg) 

-

373 raise ValueError(err_msg) 

-

374 

-

375 def _warn_float_is_converted_to_int(self, data: Union[list[float], list[int]]) -> None: 

-

376 """Warn if float data is encoded into int data. 

-

377 

-

378 Args: 

-

379 data (float or int): a list of float or integer data points 

-

380 """ 

-

381 if any(isinstance(d, float) for d in data) and ( 

-

382 self.dtype in [torch.int, torch.int8, torch.int16, torch.int32, torch.int64] 

-

383 ): 

-

384 logger.warning("Encoding float data to torch.int data type.") 

-

385 

-

386 

-

387class StrClassificationEncoder(AbstractEncoder): 

-

388 """A string classification encoder that converts lists of strings into numeric labels using scikit-learn. 

-

389 

-

390 When scale is set to True, the labels are scaled to be between 0 and 1. 

-

391 

-

392 Attributes: 

-

393 scale (bool): Whether to scale the labels to be between 0 and 1. Default = False 

-

394 

-

395 Methods: 

-

396 encode(data: str) -> int: 

-

397 Raises a NotImplementedError, as encoding a single string is not meaningful in this context. 

-

398 encode_all(data: List[str]) -> torch.tensor: 

-

399 Encodes an entire list of string data into a numeric representation using LabelEncoder and 

-

400 returns a torch tensor. Ensures that the provided data items are valid strings prior to encoding. 

-

401 decode(data: Any) -> Any: 

-

402 Raises a NotImplementedError, as decoding is not supported with the current design. 

-

403 _check_dtype(data: List[str]) -> None: 

-

404 Validates that all items in the data list are strings, raising a ValueError otherwise. 

-

405 """ 

-

406 

-

407 def __init__(self, *, scale: bool = False) -> None: 

-

408 """Initialize the StrClassificationEncoder class. 

-

409 

-

410 Args: 

-

411 scale (bool): whether to scale the labels to be between 0 and 1. Default = False 

-

412 """ 

-

413 self.scale = scale 

-

414 

-

415 def encode(self, data: str) -> int: 

-

416 """Returns an error since encoding a single string does not make sense. 

-

417 

-

418 Args: 

-

419 data (str): a single string 

-

420 """ 

-

421 raise NotImplementedError("Encoding a single string does not make sense. Use encode_all instead.") 

-

422 

-

423 def encode_all(self, data: list[str]) -> torch.tensor: 

-

424 """Encodes the data. 

-

425 

-

426 This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array. 

-

427 For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html 

-

428 

-

429 Args: 

-

430 data (List[str]): a list of strings 

-

431 

-

432 Returns: 

-

433 encoded_data (torch.tensor): the encoded data 

-

434 """ 

-

435 if not isinstance(data, list): 

-

436 data = [data] 

-

437 

-

438 self._check_dtype(data) 

-

439 

-

440 encoder = preprocessing.LabelEncoder() 

-

441 encoded_data = torch.tensor(encoder.fit_transform(data)) 

-

442 if self.scale: 

-

443 encoded_data = encoded_data / max(len(encoded_data) - 1, 1) 

-

444 

-

445 return encoded_data 

-

446 

-

447 def decode(self, data: Any) -> Any: 

-

448 """Returns an error since decoding does not make sense without encoder information, which is not yet supported.""" 

-

449 raise NotImplementedError("Decoding is not yet supported for StrClassification.") 

-

450 

-

451 def _check_dtype(self, data: list[str]) -> None: 

-

452 """Check if the input data is string data. 

-

453 

-

454 Args: 

-

455 data (List[str]): a list of strings 

-

456 

-

457 Raises: 

-

458 ValueError: If the input data is not a string 

-

459 """ 

-

460 if not all(isinstance(d, str) for d in data): 

-

461 err_msg = "Expected input data to be a list of strings" 

-

462 logger.error(err_msg) 

-

463 raise ValueError(err_msg) 

-

464 

-

465 

-

466class NumericRankEncoder(AbstractEncoder): 

-

467 """Encoder for float/int data that encodes the data based on their rank. 

-

468 

-

469 Attributes: 

-

470 scale (bool): whether to scale the ranks to be between 0 and 1. Default = False 

-

471 

-

472 Methods: 

-

473 encode: encodes a single data point 

-

474 encode_all: encodes a list of data points into a torch.tensor 

-

475 decode: decodes a single data point 

-

476 _check_input_dtype: checks if the input data is int or float data 

-

477 """ 

-

478 

-

479 def __init__(self, *, scale: bool = False) -> None: 

-

480 """Initialize the NumericRankEncoder class. 

-

481 

-

482 Args: 

-

483 scale (bool): whether to scale the ranks to be between 0 and 1. Default = False 

-

484 """ 

-

485 self.scale = scale 

-

486 

-

487 def encode(self, data: Any) -> torch.Tensor: 

-

488 """Returns an error since encoding a single float does not make sense.""" 

-

489 raise NotImplementedError("Encoding a single float does not make sense. Use encode_all instead.") 

-

490 

-

491 def encode_all(self, data: Union[list[float], list[int]]) -> torch.Tensor: 

-

492 """Encodes the data. 

-

493 

-

494 This method takes as input a list of data points, and returns the ranks of the data points. 

-

495 The ranks are normalized to be between 0 and 1, when scale is set to True. 

-

496 

-

497 Args: 

-

498 data (Union[List[float], List[int]]): a list of numeric values 

-

499 

-

500 Returns: 

-

501 encoded_data (torch.Tensor): the encoded data 

-

502 """ 

-

503 if not isinstance(data, list): 

-

504 data = [data] 

-

505 self._check_input_dtype(data) 

-

506 

-

507 # Get ranks (0 is lowest, n-1 is highest) 

-

508 # and normalize to be between 0 and 1 

-

509 data = np.array(data) 

-

510 ranks = np.argsort(np.argsort(data)) 

-

511 if self.scale: 

-

512 ranks = ranks / max(len(ranks) - 1, 1) 

-

513 return torch.tensor(ranks) 

-

514 

-

515 def decode(self, data: Any) -> Any: 

-

516 """Returns an error since decoding does not make sense without encoder information, which is not yet supported.""" 

-

517 raise NotImplementedError("Decoding is not yet supported for NumericRank.") 

-

518 

-

519 def _check_input_dtype(self, data: list) -> None: 

-

520 """Check if the input data is int or float data. 

-

521 

-

522 Args: 

-

523 data (int or float): a single data point or a list of data points 

-

524 

-

525 Raises: 

-

526 ValueError: If the input data is not a float 

-

527 """ 

-

528 if not all(isinstance(d, (int, float)) for d in data): 

-

529 err_msg = f"Expected input data to be a float or int, got {type(data).__name__}" 

-

530 logger.error(err_msg) 

-

531 raise ValueError(err_msg) 

-
- - - diff --git a/coverage/z_8855e5c0f7f22643___init___py.html b/coverage/z_8855e5c0f7f22643___init___py.html deleted file mode 100644 index fff292a0..00000000 --- a/coverage/z_8855e5c0f7f22643___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/analysis/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/analysis/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Analysis package for stimulus, analysis_default is to be refactored, see git issues.""" 

-
- - - diff --git a/coverage/z_8855e5c0f7f22643_analysis_default_py.html b/coverage/z_8855e5c0f7f22643_analysis_default_py.html deleted file mode 100644 index 1ddad1c1..00000000 --- a/coverage/z_8855e5c0f7f22643_analysis_default_py.html +++ /dev/null @@ -1,473 +0,0 @@ - - - - - Coverage for src/stimulus/analysis/analysis_default.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/analysis/analysis_default.py: - 0% -

- -

- 152 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Default analysis module for stimulus package.""" 

-

2 

-

3import math 

-

4from typing import Any 

-

5 

-

6import matplotlib as mpl 

-

7import numpy as np 

-

8import pandas as pd 

-

9from matplotlib import pyplot as plt 

-

10from torch.utils.data import DataLoader 

-

11 

-

12from stimulus.data.handlertorch import TorchDataset 

-

13from stimulus.learner.predict import PredictWrapper 

-

14 

-

15 

-

16class Analysis: 

-

17 """General functions for analysis and plotting. 

-

18 

-

19 TODO automatically set up proper figsize depends on the number of subplots, etc 

-

20 """ 

-

21 

-

22 @staticmethod 

-

23 def get_grid_shape(n: int) -> tuple[int, int]: 

-

24 """Calculates rows and columns for a rectangle layout (flexible).""" 

-

25 rows = int(math.ceil(math.sqrt(n))) # Round up the square root for rows 

-

26 cols = int(math.ceil(n / rows)) # Calculate columns based on rows 

-

27 return rows, cols 

-

28 

-

29 @staticmethod 

-

30 def heatmap( 

-

31 data: np.ndarray, 

-

32 row_labels: list[str], 

-

33 col_labels: list[str], 

-

34 ax: Any | None = None, 

-

35 cbar_kw: dict | None = None, 

-

36 cbarlabel: str = "", 

-

37 **kwargs: Any, 

-

38 ) -> tuple[Any, Any]: 

-

39 """Create a heatmap from a numpy array and two lists of labels. 

-

40 

-

41 Parameters 

-

42 ---------- 

-

43 data 

-

44 A 2D numpy array of shape (M, N). 

-

45 row_labels 

-

46 A list or array of length M with the labels for the rows. 

-

47 col_labels 

-

48 A list or array of length N with the labels for the columns. 

-

49 ax 

-

50 A `matplotlib.axes.Axes` instance to which the heatmap is plotted. If 

-

51 not provided, use current axes or create a new one. Optional. 

-

52 cbar_kw 

-

53 A dictionary with arguments to `matplotlib.Figure.colorbar`. Optional. 

-

54 cbarlabel 

-

55 The label for the colorbar. Optional. 

-

56 **kwargs 

-

57 All other arguments are forwarded to `imshow`. 

-

58 """ 

-

59 if ax is None: 

-

60 ax = plt.gca() 

-

61 

-

62 if cbar_kw is None: 

-

63 cbar_kw = {} 

-

64 

-

65 # Plot the heatmap 

-

66 im = ax.imshow(data, **kwargs) 

-

67 

-

68 # Create colorbar 

-

69 cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw) 

-

70 cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom") 

-

71 

-

72 # Show all ticks and label them with the respective list entries. 

-

73 ax.set_xticks(np.arange(data.shape[1]), labels=col_labels) 

-

74 ax.set_yticks(np.arange(data.shape[0]), labels=row_labels) 

-

75 

-

76 # Let the horizontal axes labeling appear on top. 

-

77 ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False) 

-

78 

-

79 # Rotate the tick labels and set their alignment. 

-

80 plt.setp(ax.get_xticklabels(), rotation=-30, ha="right", rotation_mode="anchor") 

-

81 

-

82 # Turn spines off and create white grid. 

-

83 ax.spines[:].set_visible(False) 

-

84 

-

85 ax.set_xticks(np.arange(data.shape[1] + 1) - 0.5, minor=True) 

-

86 ax.set_yticks(np.arange(data.shape[0] + 1) - 0.5, minor=True) 

-

87 ax.grid(which="minor", color="w", linestyle="-", linewidth=3) 

-

88 ax.tick_params(which="minor", bottom=False, left=False) 

-

89 

-

90 return im, cbar 

-

91 

-

92 @staticmethod 

-

93 def annotate_heatmap( 

-

94 im: Any, 

-

95 data: np.ndarray | None = None, 

-

96 valfmt: str = "{x:.2f}", 

-

97 textcolors: tuple[str, str] = ("black", "white"), 

-

98 threshold: float | None = None, 

-

99 **textkw: Any, 

-

100 ) -> list[Any]: 

-

101 """A function to annotate a heatmap. 

-

102 

-

103 Parameters 

-

104 ---------- 

-

105 im 

-

106 The AxesImage to be labeled. 

-

107 data 

-

108 Data used to annotate. If None, the image's data is used. Optional. 

-

109 valfmt 

-

110 The format of the annotations inside the heatmap. This should either 

-

111 use the string format method, e.g. "$ {x:.2f}", or be a 

-

112 `matplotlib.ticker.Formatter`. Optional. 

-

113 textcolors 

-

114 A pair of colors. The first is used for values below a threshold, 

-

115 the second for those above. Optional. 

-

116 threshold 

-

117 Value in data units according to which the colors from textcolors are 

-

118 applied. If None (the default) uses the middle of the colormap as 

-

119 separation. Optional. 

-

120 **kwargs 

-

121 All other arguments are forwarded to each call to `text` used to create 

-

122 the text labels. 

-

123 """ 

-

124 if not isinstance(data, (list, np.ndarray)): 

-

125 data = im.get_array() 

-

126 

-

127 # Normalize the threshold to the images color range. 

-

128 threshold = im.norm(threshold) if threshold is not None else im.norm(data.max()) / 2.0 

-

129 

-

130 # Set default alignment to center, but allow it to be 

-

131 # overwritten by textkw. 

-

132 kw = {"horizontalalignment": "center", "verticalalignment": "center"} 

-

133 kw.update(textkw) 

-

134 

-

135 # Get the formatter in case a string is supplied 

-

136 if isinstance(valfmt, str): 

-

137 valfmt = mpl.ticker.StrMethodFormatter(valfmt) 

-

138 

-

139 # Loop over the data and create a `Text` for each "pixel". 

-

140 # Change the text's color depending on the data. 

-

141 texts = [] 

-

142 for i in range(data.shape[0]): 

-

143 for j in range(data.shape[1]): 

-

144 kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)]) 

-

145 text = im.axes.text(j, i, valfmt(data[i, j], None), **kw) 

-

146 texts.append(text) 

-

147 

-

148 return texts 

-

149 

-

150 

-

151class AnalysisPerformanceTune(Analysis): 

-

152 """Report the performance during tuning. 

-

153 

-

154 TODO maybe instead of reporting one pdf for one model with all metrics, 

-

155 report one pdf for all models with one metric. 

-

156 TODO or maybe one pdf for all models with all metrics, colored by model. One for train, one for val. 

-

157 """ 

-

158 

-

159 def __init__(self, results_path: str) -> None: 

-

160 """Initialize the AnalysisPerformanceTune class.""" 

-

161 super().__init__() 

-

162 self.results = pd.read_csv(results_path) 

-

163 

-

164 def plot_metric_vs_iteration( 

-

165 self, 

-

166 metrics: list, 

-

167 figsize: tuple = (10, 10), 

-

168 output: str | None = None, 

-

169 ) -> None: 

-

170 """Plot metrics vs iteration for training and validation.""" 

-

171 # create figure 

-

172 rows, cols = self.get_grid_shape(len(metrics)) 

-

173 fig, axs = plt.subplots(rows, cols, figsize=figsize) 

-

174 

-

175 # plot each metric 

-

176 for i, ax in enumerate(axs.flat): 

-

177 if i >= len(metrics): 

-

178 ax.axis("off") 

-

179 continue 

-

180 self.plot_metric_vs_iteration_per_metric(axs.flat[i], metrics[i]) 

-

181 

-

182 # add legend 

-

183 # axs.flat[0].legend() 

-

184 handles, labels = axs[0, 0].get_legend_handles_labels() # Get handles and labels from one subplot 

-

185 plt.legend(handles, labels, loc="upper left") # Adjust location as needed 

-

186 

-

187 # save plot 

-

188 plt.tight_layout() 

-

189 if output: 

-

190 plt.savefig(output) 

-

191 plt.show() 

-

192 

-

193 def plot_metric_vs_iteration_per_metric(self, ax: Any, metric: str) -> Any: 

-

194 """Plot the metric vs the iteration.""" 

-

195 # plot training performance 

-

196 ax.plot( 

-

197 self.results.training_iteration, 

-

198 self.results["train_" + metric], 

-

199 c="blue", 

-

200 label="train", 

-

201 ) 

-

202 

-

203 # plot validation performance 

-

204 ax.plot( 

-

205 self.results.training_iteration, 

-

206 self.results["val_" + metric], 

-

207 c="orange", 

-

208 label="val", 

-

209 ) 

-

210 

-

211 # TODO set x-axis labels into integer 

-

212 # plt.xticks(range(min(self.results.training_iteration), max(self.results.training_iteration))) 

-

213 

-

214 # add labels 

-

215 ax.set_xlabel("epoch") 

-

216 ax.set_ylabel(metric) 

-

217 

-

218 return ax 

-

219 

-

220 

-

221class AnalysisRobustness(Analysis): 

-

222 """Report the robustness of the models.""" 

-

223 

-

224 def __init__(self, metrics: list, experiment: object, batch_size: int) -> None: 

-

225 """Initialize the AnalysisRobustness class.""" 

-

226 super().__init__() 

-

227 self.metrics = metrics 

-

228 self.experiment = experiment 

-

229 self.batch_size = batch_size 

-

230 

-

231 def get_performance_table(self, names: list, model_list: dict, data_list: list) -> pd.DataFrame: 

-

232 """Compute the performance metrics of each model on each dataset. 

-

233 

-

234 Args: 

-

235 names: List of names that identifies each model. 

-

236 model_list: Dictionary of models in same order as data_list. 

-

237 data_list: List of datasets used for training. 

-

238 

-

239 Returns: 

-

240 DataFrame containing performance metrics. 

-

241 """ 

-

242 # check same length 

-

243 if (len(names) != len(model_list)) and (len(names) != len(data_list)): 

-

244 raise ValueError("The length of the names, model_list and data_list should be the same.") 

-

245 

-

246 # initialize 

-

247 df = pd.DataFrame() 

-

248 model_names = [] 

-

249 

-

250 # for each model, get the performance table, and concat 

-

251 for i, model in enumerate(model_list): 

-

252 df = pd.concat([df, self.get_performance_table_for_one_model(names, model, data_list)]) 

-

253 model_names += [names[i]] * len(data_list) 

-

254 df["model"] = model_names 

-

255 

-

256 return df 

-

257 

-

258 def get_performance_table_for_one_model(self, names: list, model: object, data_list: list) -> pd.DataFrame: 

-

259 """Compute the performance table of one model on each dataset.""" 

-

260 df = pd.DataFrame() 

-

261 for data_path in data_list: # for each data, get the performance metrics, and concat 

-

262 # initialize the dataframe keeping the original order, aka no shuffle 

-

263 dataloader = DataLoader( 

-

264 TorchDataset(data_path, self.experiment, split=2), 

-

265 batch_size=self.batch_size, 

-

266 shuffle=False, 

-

267 ) 

-

268 metric_values = PredictWrapper(model, dataloader).compute_metrics(self.metrics) 

-

269 df = pd.concat([df, pd.DataFrame(metric_values, index=[0])]) 

-

270 df["data"] = names 

-

271 return df 

-

272 

-

273 def get_average_performance_table(self, df: pd.DataFrame) -> pd.DataFrame: 

-

274 """Compute the average performance of each model on each dataset. 

-

275 

-

276 Args: 

-

277 df: DataFrame containing the performance table. 

-

278 

-

279 Returns: 

-

280 DataFrame with averaged metrics. 

-

281 """ 

-

282 df = df[[*self.metrics, "model"]] # Use list unpacking instead of concatenation 

-

283 return df.groupby(["model"]).mean().reset_index() 

-

284 

-

285 def plot_performance_heatmap(self, df: pd.DataFrame, figsize: tuple = (10, 10), output: str | None = None) -> None: 

-

286 """Plot the performance of each model on each dataset.""" 

-

287 # create figure 

-

288 rows, cols = self.get_grid_shape(len(self.metrics)) 

-

289 fig, axs = plt.subplots(rows, cols, figsize=figsize) 

-

290 

-

291 # if there is only one plot plot.sublots will output a simple list, while if there are more than one it will return a list of lists. there is the need to unify the two cases. following line does this 

-

292 if not isinstance(axs, np.ndarray): 

-

293 axs = np.array([axs]) 

-

294 

-

295 for i, ax in enumerate(axs.flat): 

-

296 if i >= len(self.metrics): 

-

297 ax.axis("off") 

-

298 continue 

-

299 

-

300 # reshape the data frame into the matrix for one metric 

-

301 mat = df[["model", "data", self.metrics[i]]] 

-

302 mat = mat.pivot(index="model", columns="data", values=self.metrics[i]) 

-

303 

-

304 # plot heatmap 

-

305 im, cbar = self.heatmap(mat, mat.index, mat.columns, ax=ax, cmap="YlGn", cbarlabel=self.metrics[i]) 

-

306 self.annotate_heatmap(im, valfmt="{x:.2f}") # Don't assign to unused variable 

-

307 

-

308 # save plot 

-

309 plt.tight_layout() 

-

310 if output: 

-

311 plt.savefig(output) 

-

312 plt.show() 

-

313 

-

314 def plot_delta_performance( 

-

315 self, 

-

316 metric: str, 

-

317 df: pd.DataFrame, 

-

318 figsize: tuple = (10, 10), 

-

319 output: str | None = None, 

-

320 ) -> None: 

-

321 """Plot the delta performance of each model on each dataset.""" 

-

322 # create figure 

-

323 rows, cols = self.get_grid_shape(len(df["model"].unique())) 

-

324 fig, axs = plt.subplots(rows, cols, figsize=figsize) 

-

325 

-

326 # if there is only one plot plot.sublots will output <class 'matplotlib.axes._axes.Axes'>, while if there are more than one it will return a np.ndarray. there is the need to unify the two cases. following line does this 

-

327 if not isinstance(axs, np.ndarray): 

-

328 axs = np.array([axs]) 

-

329 

-

330 # plot each model 

-

331 for i, ax in enumerate(axs.flat): 

-

332 if i >= len(df["model"].unique()): 

-

333 ax.axis("off") 

-

334 continue 

-

335 self.plot_delta_performance_for_one_model(ax, metric, df, df["model"].unique()[i]) 

-

336 

-

337 # set common y limits 

-

338 ymin = min([ax.get_ylim()[0] for ax in axs.flat]) 

-

339 ymax = max([ax.get_ylim()[1] for ax in axs.flat]) 

-

340 for ax in axs.flat: 

-

341 spacer = abs(ymin - ymax) 

-

342 spacer = spacer * 0.01 

-

343 ax.set_ylim(ymin - spacer, ymax + spacer) 

-

344 

-

345 # save plot 

-

346 plt.tight_layout() 

-

347 if output: 

-

348 plt.savefig(output) 

-

349 plt.show() 

-

350 

-

351 def plot_delta_performance_for_one_model(self, ax: Any, metric: str, df: pd.DataFrame, model_name: str) -> Any: 

-

352 """Plot the delta performance of one model.""" 

-

353 df = self.parse_delta_performance_for_one_model(metric, df, model_name) 

-

354 

-

355 # plot a barplot with positive negative values for each row 

-

356 # TODO use different colors for positive and negative values 

-

357 df = df.set_index("data") 

-

358 df.plot(kind="bar", ax=ax, stacked=True) 

-

359 

-

360 ax.set_xlabel("") 

-

361 ax.get_legend().remove() 

-

362 ax.set_title(model_name) 

-

363 

-

364 return ax 

-

365 

-

366 def parse_delta_performance_for_one_model(self, metric: str, df: pd.DataFrame, model_name: str) -> pd.DataFrame: 

-

367 """Compute the delta performance of one model.""" 

-

368 # filter data frame 

-

369 df = df[["data", "model", metric]] 

-

370 df = df[df["model"] == model_name] 

-

371 

-

372 # compute the delta performance between each row vs the reference 

-

373 reference_row = df.loc[df["data"] == model_name] 

-

374 df[metric] = -df[metric].sub(reference_row[metric]) 

-

375 

-

376 return df 

-
- - - diff --git a/coverage/z_e019996b82b92b6e___init___py.html b/coverage/z_e019996b82b92b6e___init___py.html deleted file mode 100644 index 3687c7c1..00000000 --- a/coverage/z_e019996b82b92b6e___init___py.html +++ /dev/null @@ -1,98 +0,0 @@ - - - - - Coverage for src/stimulus/data/transform/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/data/transform/__init__.py: - 100% -

- -

- 0 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Transform package for data manipulation.""" 

-
- - - diff --git a/coverage/z_e019996b82b92b6e_data_transformation_generators_py.html b/coverage/z_e019996b82b92b6e_data_transformation_generators_py.html deleted file mode 100644 index 27d72ef1..00000000 --- a/coverage/z_e019996b82b92b6e_data_transformation_generators_py.html +++ /dev/null @@ -1,415 +0,0 @@ - - - - - Coverage for src/stimulus/data/transform/data_transformation_generators.py: 86% - - - - - -
-
-

- Coverage for src/stimulus/data/transform/data_transformation_generators.py: - 86% -

- -

- 84 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This file contains noise generators classes for generating various types of noise.""" 

-

2 

-

3import multiprocessing as mp 

-

4from abc import ABC, abstractmethod 

-

5from typing import Any 

-

6 

-

7import numpy as np 

-

8 

-

9 

-

10class AbstractDataTransformer(ABC): 

-

11 """Abstract class for data transformers. 

-

12 

-

13 Data transformers implement in_place or augmentation transformations. 

-

14 Whether it is in_place or augmentation is specified in the "add_row" attribute (should be True or False and set in children classes constructor) 

-

15 

-

16 Child classes should override the `transform` and `transform_all` methods. 

-

17 

-

18 `transform_all` should always return a list 

-

19 

-

20 Both methods should take an optional `seed` argument set to `None` by default to be compliant with stimulus' core principle of reproducibility. 

-

21 Seed should be initialized through `np.random.seed(seed)` in the method implementation. 

-

22 

-

23 Attributes: 

-

24 add_row (bool): whether the transformer adds rows to the data 

-

25 

-

26 Methods: 

-

27 transform: transforms a data point 

-

28 transform_all: transforms a list of data points 

-

29 """ 

-

30 

-

31 def __init__(self) -> None: 

-

32 """Initialize the data transformer.""" 

-

33 self.add_row = None 

-

34 self.seed = 42 

-

35 

-

36 @abstractmethod 

-

37 def transform(self, data: Any) -> Any: 

-

38 """Transforms a single data point. 

-

39 

-

40 This is an abstract method that should be implemented by the child class. 

-

41 

-

42 Args: 

-

43 data (Any): the data to be transformed 

-

44 

-

45 Returns: 

-

46 transformed_data (Any): the transformed data 

-

47 """ 

-

48 # np.random.seed(self.seed) 

-

49 raise NotImplementedError 

-

50 

-

51 @abstractmethod 

-

52 def transform_all(self, data: list) -> list: 

-

53 """Transforms a list of data points. 

-

54 

-

55 This is an abstract method that should be implemented by the child class. 

-

56 

-

57 Args: 

-

58 data (list): the data to be transformed 

-

59 

-

60 Returns: 

-

61 transformed_data (list): the transformed data 

-

62 """ 

-

63 # np.random.seed(self.seed) 

-

64 raise NotImplementedError 

-

65 

-

66 

-

67class AbstractNoiseGenerator(AbstractDataTransformer): 

-

68 """Abstract class for noise generators. 

-

69 

-

70 All noise function should have the seed in it. This is because the multiprocessing of them could unset the seed. 

-

71 """ 

-

72 

-

73 def __init__(self) -> None: 

-

74 """Initialize the noise generator.""" 

-

75 super().__init__() 

-

76 self.add_row = False 

-

77 

-

78 

-

79class AbstractAugmentationGenerator(AbstractDataTransformer): 

-

80 """Abstract class for augmentation generators. 

-

81 

-

82 All augmentation function should have the seed in it. This is because the multiprocessing of them could unset the seed. 

-

83 """ 

-

84 

-

85 def __init__(self) -> None: 

-

86 """Initialize the augmentation generator.""" 

-

87 super().__init__() 

-

88 self.add_row = True 

-

89 

-

90 

-

91class UniformTextMasker(AbstractNoiseGenerator): 

-

92 """Mask characters in text. 

-

93 

-

94 This noise generators replace characters with a masking character with a given probability. 

-

95 

-

96 Methods: 

-

97 transform: adds character masking to a single data point 

-

98 transform_all: adds character masking to a list of data points 

-

99 """ 

-

100 

-

101 def __init__(self, probability: float = 0.1, mask: str = "*", seed: float = 42) -> None: 

-

102 """Initialize the text masker. 

-

103 

-

104 Args: 

-

105 probability: Probability of masking each character 

-

106 mask: Character to use for masking 

-

107 seed: Random seed for reproducibility 

-

108 """ 

-

109 super().__init__() 

-

110 self.probability = probability 

-

111 self.mask = mask 

-

112 self.seed = seed 

-

113 

-

114 def transform(self, data: str) -> str: 

-

115 """Adds character masking to the data. 

-

116 

-

117 Args: 

-

118 data (str): the data to be transformed 

-

119 

-

120 Returns: 

-

121 transformed_data (str): the transformed data point 

-

122 """ 

-

123 np.random.seed(self.seed) 

-

124 return "".join([c if np.random.rand() > self.probability else self.mask for c in data]) 

-

125 

-

126 def transform_all(self, data: list) -> list: 

-

127 """Adds character masking to multiple data points using multiprocessing. 

-

128 

-

129 Args: 

-

130 data (list): the data to be transformed 

-

131 

-

132 

-

133 Returns: 

-

134 transformed_data (list): the transformed data points 

-

135 """ 

-

136 with mp.Pool(mp.cpu_count()) as pool: 

-

137 function_specific_input = list(data) 

-

138 return pool.starmap(self.transform, function_specific_input) 

-

139 

-

140 

-

141class GaussianNoise(AbstractNoiseGenerator): 

-

142 """Add Gaussian noise to data. 

-

143 

-

144 This noise generator adds Gaussian noise to float values. 

-

145 

-

146 Methods: 

-

147 transform: adds noise to a single data point 

-

148 transform_all: adds noise to a list of data points 

-

149 """ 

-

150 

-

151 def __init__(self, mean: float = 0, std: float = 1, seed: float = 42) -> None: 

-

152 """Initialize the Gaussian noise generator. 

-

153 

-

154 Args: 

-

155 mean: Mean of the Gaussian noise 

-

156 std: Standard deviation of the Gaussian noise 

-

157 seed: Random seed for reproducibility 

-

158 """ 

-

159 super().__init__() 

-

160 self.mean = mean 

-

161 self.std = std 

-

162 self.seed = seed 

-

163 

-

164 def transform(self, data: float) -> float: 

-

165 """Adds Gaussian noise to a single point of data. 

-

166 

-

167 Args: 

-

168 data (float): the data to be transformed 

-

169 

-

170 Returns: 

-

171 transformed_data (float): the transformed data point 

-

172 """ 

-

173 np.random.seed(self.seed) 

-

174 return data + np.random.normal(self.mean, self.std) 

-

175 

-

176 def transform_all(self, data: list) -> np.array: 

-

177 """Adds Gaussian noise to a list of data points. 

-

178 

-

179 Args: 

-

180 data (list): the data to be transformed 

-

181 

-

182 Returns: 

-

183 transformed_data (np.array): the transformed data points 

-

184 """ 

-

185 np.random.seed(self.seed) 

-

186 return np.array(np.array(data) + np.random.normal(self.mean, self.std, len(data))) 

-

187 

-

188 

-

189class ReverseComplement(AbstractAugmentationGenerator): 

-

190 """Reverse complement biological sequences. 

-

191 

-

192 This augmentation strategy reverse complements the input nucleotide sequences. 

-

193 

-

194 Methods: 

-

195 transform: reverse complements a single data point 

-

196 transform_all: reverse complements a list of data points 

-

197 

-

198 Raises: 

-

199 ValueError: if the type of the sequence is not DNA or RNA 

-

200 """ 

-

201 

-

202 def __init__(self, sequence_type: str = "DNA") -> None: 

-

203 """Initialize the reverse complement generator. 

-

204 

-

205 Args: 

-

206 sequence_type: Type of sequence ('DNA' or 'RNA') 

-

207 """ 

-

208 super().__init__() 

-

209 if sequence_type not in ("DNA", "RNA"): 

-

210 raise ValueError( 

-

211 "Currently only DNA and RNA sequences are supported. Update the class ReverseComplement to support other types.", 

-

212 ) 

-

213 if sequence_type == "DNA": 

-

214 self.complement_mapping = str.maketrans("ATCG", "TAGC") 

-

215 elif sequence_type == "RNA": 

-

216 self.complement_mapping = str.maketrans("AUCG", "UAGC") 

-

217 

-

218 def transform(self, data: str) -> str: 

-

219 """Returns the reverse complement of a list of string data using the complement_mapping. 

-

220 

-

221 Args: 

-

222 data (str): the sequence to be transformed 

-

223 

-

224 Returns: 

-

225 transformed_data (str): the reverse complement of the sequence 

-

226 """ 

-

227 return data.translate(self.complement_mapping)[::-1] 

-

228 

-

229 def transform_all(self, data: list) -> list: 

-

230 """Reverse complement multiple data points using multiprocessing. 

-

231 

-

232 Args: 

-

233 data (list): the sequences to be transformed 

-

234 

-

235 Returns: 

-

236 transformed_data (list): the reverse complement of the sequences 

-

237 """ 

-

238 with mp.Pool(mp.cpu_count()) as pool: 

-

239 function_specific_input = list(data) 

-

240 return pool.map(self.transform, function_specific_input) 

-

241 

-

242 

-

243class GaussianChunk(AbstractAugmentationGenerator): 

-

244 """Subset data around a random midpoint. 

-

245 

-

246 This augmentation strategy chunks the input sequences, for which the middle positions are obtained through a gaussian distribution. 

-

247 

-

248 In concrete, it changes the middle position (ie. peak summit) to another position. This position is chosen based on a gaussian distribution, so the region close to the middle point are more likely to be chosen than the rest. 

-

249 Then a chunk with size `chunk_size` around the new middle point is returned. 

-

250 This process will be repeated for each sequence with `transform_all`. 

-

251 

-

252 Methods: 

-

253 transform: chunk a single list 

-

254 transform_all: chunks multiple lists 

-

255 """ 

-

256 

-

257 def __init__(self, chunk_size: int, seed: float = 42, std: float = 1) -> None: 

-

258 """Initialize the Gaussian chunk generator. 

-

259 

-

260 Args: 

-

261 chunk_size: Size of chunks to extract 

-

262 seed: Random seed for reproducibility 

-

263 std: Standard deviation for the Gaussian distribution 

-

264 """ 

-

265 super().__init__() 

-

266 self.chunk_size = chunk_size 

-

267 self.seed = seed 

-

268 self.std = std 

-

269 

-

270 def transform(self, data: str) -> str: 

-

271 """Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution. 

-

272 

-

273 Args: 

-

274 data (str): the sequence to be transformed 

-

275 

-

276 Returns: 

-

277 transformed_data (str): the chunk of the sequence 

-

278 

-

279 Raises: 

-

280 AssertionError: if the input data is shorter than the chunk size 

-

281 """ 

-

282 np.random.seed(self.seed) 

-

283 

-

284 # make sure that the data is longer than chunk_size otherwise raise an error 

-

285 if len(data) <= self.chunk_size: 

-

286 raise ValueError("The input data is shorter than the chunk size") 

-

287 

-

288 # Get the middle position of the input sequence 

-

289 middle_position = len(data) // 2 

-

290 

-

291 # Change the middle position by a value obtained through a gaussian distribution 

-

292 new_middle_position = int(middle_position + np.random.normal(0, self.std)) 

-

293 

-

294 # Get the start and end position of the chunk 

-

295 start_position = new_middle_position - self.chunk_size // 2 

-

296 end_position = new_middle_position + self.chunk_size // 2 

-

297 

-

298 # if the start position is negative, set it to 0 

-

299 start_position = max(start_position, 0) 

-

300 

-

301 # Get the chunk of size chunk_size from the start position if the end position is smaller than the length of the data 

-

302 if end_position < len(data): 

-

303 return data[start_position : start_position + self.chunk_size] 

-

304 # Otherwise return the chunk of the sequence from the end of the sequence of size chunk_size 

-

305 return data[-self.chunk_size :] 

-

306 

-

307 def transform_all(self, data: list) -> list: 

-

308 """Adds chunks to multiple lists using multiprocessing. 

-

309 

-

310 Args: 

-

311 data (list): the sequences to be transformed 

-

312 

-

313 Returns: 

-

314 transformed_data (list): the transformed sequences 

-

315 """ 

-

316 with mp.Pool(mp.cpu_count()) as pool: 

-

317 function_specific_input = list(data) 

-

318 return pool.starmap(self.transform, function_specific_input) 

-
- - - diff --git a/coverage/z_eeca319ae6b94751___init___py.html b/coverage/z_eeca319ae6b94751___init___py.html deleted file mode 100644 index b8aa4c6e..00000000 --- a/coverage/z_eeca319ae6b94751___init___py.html +++ /dev/null @@ -1,102 +0,0 @@ - - - - - Coverage for src/stimulus/data/splitters/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/data/splitters/__init__.py: - 100% -

- -

- 2 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This package provides splitter classes for splitting data into train, validation, and test sets.""" 

-

2 

-

3from src.stimulus.data.splitters.splitters import AbstractSplitter, RandomSplit 

-

4 

-

5__all__ = ["AbstractSplitter", "RandomSplit"] 

-
- - - diff --git a/coverage/z_eeca319ae6b94751_splitters_py.html b/coverage/z_eeca319ae6b94751_splitters_py.html deleted file mode 100644 index 650aa0b3..00000000 --- a/coverage/z_eeca319ae6b94751_splitters_py.html +++ /dev/null @@ -1,231 +0,0 @@ - - - - - Coverage for src/stimulus/data/splitters/splitters.py: 84% - - - - - -
-
-

- Coverage for src/stimulus/data/splitters/splitters.py: - 84% -

- -

- 38 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""This file contains the splitter classes for splitting data accordingly.""" 

-

2 

-

3from abc import ABC, abstractmethod 

-

4from typing import Any, Optional 

-

5 

-

6import numpy as np 

-

7import polars as pl 

-

8 

-

9# Constants 

-

10SPLIT_SIZE = 3 # Number of splits (train/val/test) 

-

11 

-

12 

-

13class AbstractSplitter(ABC): 

-

14 """Abstract class for splitters. 

-

15 

-

16 A splitter splits the data into train, validation, and test sets. 

-

17 

-

18 Methods: 

-

19 get_split_indexes: calculates split indices for the data 

-

20 distance: calculates the distance between two elements of the data 

-

21 """ 

-

22 

-

23 def __init__(self, seed: float = 42) -> None: 

-

24 """Initialize the splitter. 

-

25 

-

26 Args: 

-

27 seed: Random seed for reproducibility 

-

28 """ 

-

29 self.seed = seed 

-

30 

-

31 @abstractmethod 

-

32 def get_split_indexes(self, data: pl.DataFrame) -> list: 

-

33 """Splits the data. Always return indices mapping to the original list. 

-

34 

-

35 This is an abstract method that should be implemented by the child class. 

-

36 

-

37 Args: 

-

38 data (pl.DataFrame): the data to be split 

-

39 

-

40 Returns: 

-

41 split_indices (list): the indices for train, validation, and test sets 

-

42 """ 

-

43 raise NotImplementedError 

-

44 

-

45 @abstractmethod 

-

46 def distance(self, data_one: Any, data_two: Any) -> float: 

-

47 """Calculates the distance between two elements of the data. 

-

48 

-

49 This is an abstract method that should be implemented by the child class. 

-

50 

-

51 Args: 

-

52 data_one (Any): the first data point 

-

53 data_two (Any): the second data point 

-

54 

-

55 Returns: 

-

56 distance (float): the distance between the two data points 

-

57 """ 

-

58 raise NotImplementedError 

-

59 

-

60 

-

61class RandomSplit(AbstractSplitter): 

-

62 """This splitter randomly splits the data.""" 

-

63 

-

64 def __init__(self, split: Optional[list] = None, seed: Optional[float] = None) -> None: 

-

65 """Initialize the random splitter. 

-

66 

-

67 Args: 

-

68 split: List of proportions for train/val/test splits 

-

69 seed: Random seed for reproducibility 

-

70 """ 

-

71 super().__init__() 

-

72 self.split = [0.7, 0.2, 0.1] if split is None else split 

-

73 self.seed = seed 

-

74 if len(self.split) != SPLIT_SIZE: 

-

75 raise ValueError( 

-

76 "The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.", 

-

77 ) 

-

78 

-

79 def get_split_indexes( 

-

80 self, 

-

81 data: dict, 

-

82 ) -> tuple[list, list, list]: 

-

83 """Splits the data indices into train, validation, and test sets. 

-

84 

-

85 One can use these lists of indices to parse the data afterwards. 

-

86 

-

87 Args: 

-

88 data (dict): Dictionary mapping column names to lists of data values. 

-

89 

-

90 Returns: 

-

91 train (list): The indices for the training set. 

-

92 validation (list): The indices for the validation set. 

-

93 test (list): The indices for the test set. 

-

94 

-

95 Raises: 

-

96 ValueError: If the split argument is not a list with length 3. 

-

97 ValueError: If the sum of the split proportions is not 1. 

-

98 """ 

-

99 # Use round to avoid errors due to floating point imprecisions 

-

100 if round(sum(self.split), 3) < 1.0: 

-

101 raise ValueError(f"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.") 

-

102 

-

103 if not data: 

-

104 raise ValueError("No data provided for splitting") 

-

105 # Get length from first column's data list 

-

106 length_of_data = len(next(iter(data.values()))) 

-

107 

-

108 # Generate a list of indices and shuffle it 

-

109 indices = np.arange(length_of_data) 

-

110 np.random.seed(self.seed) 

-

111 np.random.shuffle(indices) 

-

112 

-

113 # Calculate the sizes of the train, validation, and test sets 

-

114 train_size = int(self.split[0] * length_of_data) 

-

115 validation_size = int(self.split[1] * length_of_data) 

-

116 

-

117 # Split the shuffled indices according to the calculated sizes 

-

118 train = indices[:train_size].tolist() 

-

119 validation = indices[train_size : train_size + validation_size].tolist() 

-

120 test = indices[train_size + validation_size :].tolist() 

-

121 

-

122 return train, validation, test 

-

123 

-

124 def distance(self, data_one: Any, data_two: Any) -> float: 

-

125 """Calculate distance between two data points. 

-

126 

-

127 Args: 

-

128 data_one: First data point 

-

129 data_two: Second data point 

-

130 

-

131 Returns: 

-

132 Distance between the points 

-

133 """ 

-

134 raise NotImplementedError 

-
- - - diff --git a/coverage/z_f33f92589633b86b___init___py.html b/coverage/z_f33f92589633b86b___init___py.html deleted file mode 100644 index cac7dbff..00000000 --- a/coverage/z_f33f92589633b86b___init___py.html +++ /dev/null @@ -1,102 +0,0 @@ - - - - - Coverage for src/stimulus/__init__.py: 100% - - - - - -
-
-

- Coverage for src/stimulus/__init__.py: - 100% -

- -

- 2 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""stimulus-py package.""" 

-

2 

-

3from __future__ import annotations 

-

4 

-

5__all__: list[str] = [] 

-
- - - diff --git a/coverage/z_f33f92589633b86b_debug_py.html b/coverage/z_f33f92589633b86b_debug_py.html deleted file mode 100644 index 0fb9ec3e..00000000 --- a/coverage/z_f33f92589633b86b_debug_py.html +++ /dev/null @@ -1,206 +0,0 @@ - - - - - Coverage for src/stimulus/debug.py: 0% - - - - - -
-
-

- Coverage for src/stimulus/debug.py: - 0% -

- -

- 63 statements   - - - -

-

- « prev     - ^ index     - » next -       - coverage.py v7.6.4, - created at 2025-01-22 16:46 +0100 -

- -
-
-
-

1"""Debugging utilities.""" 

-

2 

-

3from __future__ import annotations 

-

4 

-

5import os 

-

6import platform 

-

7import sys 

-

8from dataclasses import dataclass 

-

9from importlib import metadata 

-

10 

-

11 

-

12@dataclass 

-

13class Variable: 

-

14 """Dataclass describing an environment variable.""" 

-

15 

-

16 name: str 

-

17 """Variable name.""" 

-

18 value: str 

-

19 """Variable value.""" 

-

20 

-

21 

-

22@dataclass 

-

23class Package: 

-

24 """Dataclass describing a Python package.""" 

-

25 

-

26 name: str 

-

27 """Package name.""" 

-

28 version: str 

-

29 """Package version.""" 

-

30 

-

31 

-

32@dataclass 

-

33class Environment: 

-

34 """Dataclass to store environment information.""" 

-

35 

-

36 interpreter_name: str 

-

37 """Python interpreter name.""" 

-

38 interpreter_version: str 

-

39 """Python interpreter version.""" 

-

40 interpreter_path: str 

-

41 """Path to Python executable.""" 

-

42 platform: str 

-

43 """Operating System.""" 

-

44 packages: list[Package] 

-

45 """Installed packages.""" 

-

46 variables: list[Variable] 

-

47 """Environment variables.""" 

-

48 

-

49 

-

50def _interpreter_name_version() -> tuple[str, str]: 

-

51 if hasattr(sys, "implementation"): 

-

52 impl = sys.implementation.version 

-

53 version = f"{impl.major}.{impl.minor}.{impl.micro}" 

-

54 kind = impl.releaselevel 

-

55 if kind != "final": 

-

56 version += kind[0] + str(impl.serial) 

-

57 return sys.implementation.name, version 

-

58 return "", "0.0.0" 

-

59 

-

60 

-

61def get_version(dist: str = "stimulus-py") -> str: 

-

62 """Get version of the given distribution. 

-

63 

-

64 Parameters: 

-

65 dist: A distribution name. 

-

66 

-

67 Returns: 

-

68 A version number. 

-

69 """ 

-

70 try: 

-

71 return metadata.version(dist) 

-

72 except metadata.PackageNotFoundError: 

-

73 return "0.0.0" 

-

74 

-

75 

-

76def get_debug_info() -> Environment: 

-

77 """Get debug/environment information. 

-

78 

-

79 Returns: 

-

80 Environment information. 

-

81 """ 

-

82 py_name, py_version = _interpreter_name_version() 

-

83 packages = ["stimulus-py"] 

-

84 variables = ["PYTHONPATH", *[var for var in os.environ if var.startswith("STIMULUS_PY")]] 

-

85 return Environment( 

-

86 interpreter_name=py_name, 

-

87 interpreter_version=py_version, 

-

88 interpreter_path=sys.executable, 

-

89 platform=platform.platform(), 

-

90 variables=[Variable(var, val) for var in variables if (val := os.getenv(var))], 

-

91 packages=[Package(pkg, get_version(pkg)) for pkg in packages], 

-

92 ) 

-

93 

-

94 

-

95def print_debug_info() -> None: 

-

96 """Print debug/environment information.""" 

-

97 info = get_debug_info() 

-

98 print(f"- __System__: {info.platform}") 

-

99 print(f"- __Python__: {info.interpreter_name} {info.interpreter_version} ({info.interpreter_path})") 

-

100 print("- __Environment variables__:") 

-

101 for var in info.variables: 

-

102 print(f" - `{var.name}`: `{var.value}`") 

-

103 print("- __Installed packages__:") 

-

104 for pkg in info.packages: 

-

105 print(f" - `{pkg.name}` v{pkg.version}") 

-

106 

-

107 

-

108if __name__ == "__main__": 

-

109 print_debug_info() 

-
- - - diff --git a/credits/index.html b/credits/index.html index d01c29aa..c78030b0 100644 --- a/credits/index.html +++ b/credits/index.html @@ -1,3316 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - Credits - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - - - -
-
-
- - - - - - - -
-
-
- - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - -

Credits

-

These projects were used to build stimulus-py. Thank you!

-

Python | -uv | -copier-uv

-

Runtime dependencies

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ProjectSummaryVersion (accepted)Version (last resolved)License
aiohappyeyeballsHappy Eyeballs for asyncio>=2.3.02.4.3PSF-2.0
aiohttpAsync http client/server framework (asyncio)>=3.73.11.0Apache 2
aiohttp-corsCORS support for aiohttp0.7.0Apache License, Version 2.0
aiosignalaiosignal: a list of registered asynchronous callbacks1.3.1Apache 2.0
annotated-typesReusable constraint types to use with typing.Annotated>=0.6.00.7.0MIT License
attrsClasses Without Boilerplate>=17.3.024.2.0MIT
cachetoolsExtensible memoizing collections and decorators>=2.0.0, <6.05.5.0MIT
certifiPython package for providing Mozilla's CA Bundle.>=2017.4.172024.8.30MPL-2.0
charset-normalizerThe Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.>=2, <43.4.0MIT
clickComposable command line interface toolkit>=7.08.1.7BSD-3-Clause
coloramaCross-platform colored terminal text.>=0.40.4.6BSD License
colorfulTerminal string styling done right, in Python.0.5.6MIT License
contourpyPython library for calculating contours of 2D quadrilateral grids>=1.0.11.3.1BSD License
cyclerComposable style cycles>=0.100.12.1BSD License
dillserialize all of Python>=0.3.90.3.9BSD-3-Clause
distlibDistribution utilities>=0.3.7, <10.3.9PSF-2.0
filelockA platform independent file lock.3.16.1Unlicense
fonttoolsTools to manipulate font files>=4.22.04.54.1MIT
frozenlistA list-like structure which implements collections.abc.MutableSequence1.5.0Apache 2
fsspecFile-system specification2024.10.0BSD License
google-api-coreGoogle API client core library>=1.0.0, <2.0.02.23.0Apache 2.0
google-authGoogle Authentication Library>=2.14.1, <3.0.dev02.36.0Apache 2.0
googleapis-common-protosCommon protobufs used in Google APIs>=1.56.2, <2.0.dev01.66.0Apache-2.0
grpcioHTTP/2-based RPC framework>=1.32.01.67.1Apache License 2.0
idnaInternationalized Domain Names in Applications (IDNA)>=2.5, <43.10BSD License
importlib_metadataRead metadata from Python packages>=6.6, >=4.48.5.0Apache Software License
iniconfigbrain-dead simple config-ini parsing2.0.0MIT
Jinja2A very fast and expressive template engine.>=2.11.13.1.4BSD License
joblibLightweight pipelining with Python functions>=1.2.01.4.2BSD 3-Clause
jsonschemaAn implementation of JSON Schema validation for Python4.23.0MIT
jsonschema-specificationsThe JSON Schema meta-schemas and vocabularies, exposed as a Registry>=2023.03.62024.10.1MIT License
kiwisolverA fast implementation of the Cassowary constraint solver>=1.3.11.4.7BSD License
markdown-it-pyPython port of markdown-it. Markdown parsing, done right!>=2.2.0, >=2.1.03.0.0MIT License
MarkupSafeSafely add untrusted strings to HTML/XML markup.>=2.0.1, >=2.03.0.2BSD License
matplotlibPython plotting package>=3.9.03.9.2Python Software Foundation License
mdurlMarkdown URL utilities~=0.10.1.2MIT License
memrayA memory profiler for Python applications1.14.0Apache 2.0
mpmathPython library for arbitrary-precision floating-point arithmetic>=1.1.0, <1.41.3.0BSD
msgpackMessagePack serializer>=1.0.0, <2.0.01.1.0Apache 2.0
multidictmultidict implementation>=4.5, <7.06.1.0Apache 2
multiprocessbetter multiprocessing and multithreading in Python==0.70.170.70.17BSD-3-Clause
networkxPython package for creating and manipulating graphs and networks3.4.2BSD License
numpyFundamental package for array computing in Python>=1.26.0, <2.0.01.26.4BSD License
opencensusA stats collection and distributed tracing framework0.11.4Apache-2.0
opencensus-contextOpenCensus Runtime Context>=0.1.30.1.3Apache-2.0
packagingCore utilities for Python packages>=20.5, >=20.024.2Apache Software License + BSD License
pandasPowerful data structures for data analysis, time series, and statistics>=2.2.02.2.3BSD License
pillowPython Imaging Library (Fork)>=811.0.0MIT-CMU
platformdirsA small Python package for determining appropriate platform-specific dirs, e.g. a user data dir.>=3.9.1, >=2.2.0, <54.3.6MIT
pluggyplugin and hook calling mechanisms for python>=1.5, <21.5.0MIT
polars-lts-cpuBlazingly fast DataFrame library>=0.20.30, <1.12.01.11.0MIT License
prometheus_clientPython client for the Prometheus monitoring system.>=0.7.10.21.0Apache Software License 2.0
propcacheAccelerated property cache>=0.2.00.2.0Apache-2.0
proto-plusBeautiful, Pythonic protocol buffers.>=1.22.3, <2.0.0dev1.25.0Apache 2.0
protobuf>=3.15.3, !=3.19.55.28.33-Clause BSD License
py-spySampling profiler for Python programs>=0.2.00.4.0MIT
pyarrowPython library for Apache Arrow>=6.0.117.0.0Apache Software License
pyasn1Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)>=0.1.30.6.1BSD-2-Clause
pyasn1_modulesA collection of ASN.1-based protocols modules>=0.2.10.4.1BSD
pydanticData validation using Python type hints>=2.0.02.9.2MIT
pydantic_coreCore functionality for Pydantic validation and serialization==2.23.42.23.4MIT
PygmentsPygments is a syntax highlighting package written in Python.>=2.5.1, >=2.13.0, <3.0.02.18.0BSD-2-Clause
pyparsingpyparsing module - Classes and methods to define and execute parsing grammars>=2.3.13.2.0MIT License
pytestpytest: simple powerful testing with Python>=8.2, >=7.0.0, <9.0.08.3.3MIT
python-dateutilExtensions to the standard Python datetime module>=2.8.2, >=2.8.12.9.0.post0BSD License + Apache Software License
pytzWorld timezone definitions, modern and historical>=2020.12024.2MIT
PyYAMLYAML parser and emitter for Python>=5.16.0.2MIT
rayRay provides a simple, universal API for building distributed applications.>=2.38.02.39.0Apache 2.0
referencingJSON Referencing + Python>=0.28.40.35.1MIT License
requestsPython HTTP for Humans.>=2.202.32.3Apache-2.0
richRender rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal>=12.0.0, >=11.2.013.9.4MIT
rpds-pyPython bindings to Rust's persistent data structures (rpds)>=0.7.10.21.0MIT License
rsaPure-Python RSA implementation>=3.1.4, <54.9Apache-2.0
safetensors>=0.4.50.4.5Apache Software License
scikit-learnA set of python modules for machine learning and data mining>=1.5.01.5.2BSD License
scipyFundamental algorithms for scientific computing in Python==1.14.11.14.1BSD License
sixPython 2 and 3 compatibility utilities>=1.51.16.0MIT
smart-openUtils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)7.0.5MIT
sympyComputer algebra system (CAS) in Python1.13.1BSD
syrupyPytest Snapshot Test Utility>=4.8.04.8.1Apache-2.0
tensorboardXTensorBoardX lets you watch Tensors Flow without Tensorflow>=1.92.6.2.2MIT license
textualModern Text User Interface framework>=0.41.00.85.2MIT
threadpoolctlthreadpoolctl>=3.1.03.5.0BSD-3-Clause
torchTensors and Dynamic neural networks in Python with strong GPU acceleration==2.2.22.2.2BSD-3
typing_extensionsBackported and Experimental Type Hints for Python 3.8+>=4.12.2, >=3.6.44.12.2Python Software Foundation License
tzdataProvider of IANA time zone data>=2022.72024.2Apache-2.0
urllib3HTTP library with thread-safe connection pooling, file post, and more.>=1.26.0, >=1.21.1, <32.2.3MIT License
virtualenvVirtual Python Environment builder>=20.0.24, !=20.21.120.27.1MIT
wraptModule for decorators, wrappers and monkey patching.1.16.0BSD
yarlYet another URL library>=1.17.0, <2.01.17.1Apache-2.0
zippBackport of pathlib-compatible object wrapper for zip files>=3.203.21.0MIT License
-

Development dependencies

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ProjectSummaryVersion (accepted)Version (last resolved)License
ansimarkupProduce colored terminal text with an xml-like markup~=1.41.5.0Revised BSD License
appdirsA small Python module for determining appropriate platform-specific dirs, e.g. a "user data dir".>=1.41.4.4MIT
babelInternationalization utilities>=2.7.02.16.0BSD-3-Clause
blackThe uncompromising code formatter.>=24.424.10.0MIT
buildA simple, correct Python build frontend>=1.21.2.2.post1MIT License
certifiPython package for providing Mozilla's CA Bundle.>=2017.4.172024.8.30MPL-2.0
charset-normalizerThe Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.>=2, <43.4.0MIT
clickComposable command line interface toolkit>=7.08.1.7BSD-3-Clause
coloramaCross-platform colored terminal text.>=0.40.4.6BSD License
coverageCode coverage measurement for Python>=7.57.6.4Apache-2.0
csscompressorA python port of YUI CSS Compressor>=0.9.50.9.5BSD
docutilsDocutils -- Python Documentation Utilities>=0.21.20.21.2Public Domain + Python Software Foundation License + BSD License + GNU General Public License (GPL)
dutyA simple task runner.>=1.41.4.3ISC
editablesEditable installations>=0.50.5MIT License
execnetexecnet: rapid multi-Python deployment>=2.12.1.1MIT
failprintRun a command, print its output only if it fails.>=0.11, !=1.0.01.0.3ISC
ghp-importCopy your docs directly to the gh-pages branch.>=1.02.1.0Apache Software License
git-changelogAutomatic Changelog generator using Jinja2 templates.>=2.52.5.2ISC
gitdbGit Object Database>=4.0.1, <54.0.11BSD License
GitPythonGitPython is a Python library used to interact with Git repositories3.1.43BSD-3-Clause
griffeSignatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API.>=0.491.5.1ISC
htmlmin2An HTML Minifier>=0.1.130.1.13BSD
idnaInternationalized Domain Names in Applications (IDNA)>=2.5, <43.10BSD License
importlib_metadataRead metadata from Python packages>=6.6, >=4.48.5.0Apache Software License
iniconfigbrain-dead simple config-ini parsing2.0.0MIT
jaraco.classesUtility functions for Python class constructs3.4.0MIT License
jaraco.contextUseful decorators and context managers6.0.1MIT License
jaraco.functoolsFunctools like those found in stdlib4.1.0MIT License
Jinja2A very fast and expressive template engine.>=2.11.13.1.4BSD License
jsminJavaScript minifier.>=3.0.13.0.1MIT License
keyringStore and access your passwords safely.>=15.125.5.0MIT License
MarkdownPython implementation of John Gruber's Markdown.>=3.3.63.7BSD License
markdown-calloutsMarkdown extension: a classier syntax for admonitions>=0.40.4.0MIT
markdown-execUtilities to execute code blocks in Markdown files.>=1.81.9.3ISC
markdown-it-pyPython port of markdown-it. Markdown parsing, done right!>=2.2.0, >=2.1.03.0.0MIT License
MarkupSafeSafely add untrusted strings to HTML/XML markup.>=2.0.1, >=2.03.0.2BSD License
mdurlMarkdown URL utilities~=0.10.1.2MIT License
mergedeepA deep merge function for 🐍.>=1.3.41.3.4MIT License
mkdocsProject documentation with Markdown.>=1.61.6.1BSD-2-Clause
mkdocs-autorefsAutomatically link across pages in MkDocs.>=1.21.2.0ISC
mkdocs-coverageMkDocs plugin to integrate your coverage HTML report into your site.>=1.01.1.0ISC
mkdocs-gen-filesMkDocs plugin to programmatically generate documentation pages during the build>=0.50.5.0MIT
mkdocs-get-depsMkDocs extension that lists all dependencies according to a mkdocs.yml file>=0.2.00.2.0MIT
mkdocs-git-revision-date-localized-pluginMkdocs plugin that enables displaying the localized date of the last git modification of a markdown file.>=1.21.3.0MIT
mkdocs-literate-navMkDocs plugin to specify the navigation in Markdown instead of YAML>=0.60.6.1MIT
mkdocs-materialDocumentation that simply works>=9.59.5.44MIT
mkdocs-material-extensionsExtension pack for Python Markdown and MkDocs Material.~=1.31.3.1MIT
mkdocs-minify-pluginAn MkDocs plugin to minify HTML, JS or CSS files prior to being written to disk>=0.80.8.0MIT
mkdocstringsAutomatic documentation from sources, for MkDocs.>=0.250.27.0ISC
mkdocstrings-pythonA Python handler for mkdocstrings.>=0.5.21.12.2ISC
more-itertoolsMore routines for operating on iterables, beyond itertools10.5.0MIT License
mypyOptional static typing for Python>=1.101.13.0MIT
mypy-extensionsType system extensions for programs checked with the mypy type checker.>=1.0.01.0.0MIT License
nh3Python bindings to the ammonia HTML sanitization library.>=0.2.140.2.18MIT
packagingCore utilities for Python packages>=20.5, >=20.024.2Apache Software License + BSD License
paginateDivides large result sets into pages for easier browsing~=0.50.5.7MIT
pathspecUtility library for gitignore style pattern matching of file paths.>=0.11.10.12.1Mozilla Public License 2.0 (MPL 2.0)
pkginfoQuery metadata from sdists / bdists / installed packages.>=1.8.11.10.0MIT
platformdirsA small Python package for determining appropriate platform-specific dirs, e.g. a user data dir.>=3.9.1, >=2.2.0, <54.3.6MIT
pluggyplugin and hook calling mechanisms for python>=1.5, <21.5.0MIT
ptyprocessRun a subprocess in a pseudo terminal~=0.60.7.0ISC License (ISCL)
PygmentsPygments is a syntax highlighting package written in Python.>=2.5.1, >=2.13.0, <3.0.02.18.0BSD-2-Clause
pymdown-extensionsExtension pack for Python Markdown.>=910.12MIT
pyproject_hooksWrappers to call pyproject.toml-based build backend hooks.1.2.0MIT License
pytestpytest: simple powerful testing with Python>=8.2, >=7.0.0, <9.0.08.3.3MIT
pytest-covPytest plugin for measuring coverage.>=5.06.0.0MIT
pytest-randomlyPytest plugin to randomly order tests and control random.seed.>=3.153.16.0MIT License
pytest-xdistpytest xdist plugin for distributed testing, most importantly across multiple CPUs>=3.63.6.1MIT License
python-dateutilExtensions to the standard Python datetime module>=2.8.2, >=2.8.12.9.0.post0BSD License + Apache Software License
pytzWorld timezone definitions, modern and historical>=2020.12024.2MIT
PyYAMLYAML parser and emitter for Python>=5.16.0.2MIT
pyyaml_env_tagA custom YAML tag for referencing environment variables in YAML files.>=0.10.1MIT License
readme_rendererreadme_renderer is a library for rendering readme descriptions for Warehouse>=35.044.0Apache License, Version 2.0
regexAlternative regular expression module, to replace re.>=2022.42024.11.6Apache Software License
requestsPython HTTP for Humans.>=2.202.32.3Apache-2.0
requests-toolbeltA utility belt for advanced users of python-requests>=0.8.0, !=0.9.01.0.0Apache 2.0
rfc3986Validating URI References per RFC 3986>=1.4.02.0.0Apache 2.0
richRender rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal>=12.0.0, >=11.2.013.9.4MIT
ruffAn extremely fast Python linter and code formatter, written in Rust.>=0.40.7.3MIT
semverPython helper for Semantic Versioning (https://semver.org)>=2.133.0.2BSD
sixPython 2 and 3 compatibility utilities>=1.51.16.0MIT
smmapA pure Python implementation of a sliding window memory map manager>=3.0.1, <65.0.1BSD
twineCollection of utilities for publishing packages on PyPI>=5.15.1.1Apache Software License
types-MarkdownTyping stubs for Markdown>=3.63.7.0.20240822Apache-2.0
types-PyYAMLTyping stubs for PyYAML>=6.06.0.12.20240917Apache-2.0
typing_extensionsBackported and Experimental Type Hints for Python 3.8+>=4.12.2, >=3.6.44.12.2Python Software Foundation License
urllib3HTTP library with thread-safe connection pooling, file post, and more.>=1.26.0, >=1.21.1, <32.2.3MIT License
watchdogFilesystem events monitoring>=2.06.0.0Apache-2.0
zippBackport of pathlib-compatible object wrapper for zip files>=3.203.21.0MIT License
-

- - - - - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/css/timeago.css b/css/timeago.css new file mode 100644 index 00000000..f7ab7d69 --- /dev/null +++ b/css/timeago.css @@ -0,0 +1,15 @@ +/* + timeago output is dynamic, which breaks when you print a page. + + This CSS is only included when type: timeago + and ensures fallback to type "iso_date" when printing. + + */ + +.git-revision-date-localized-plugin-iso_date { display: none } + +@media print { + .git-revision-date-localized-plugin-iso_date { display: inline } + .git-revision-date-localized-plugin-timeago { display: none } +} + diff --git a/index.html b/index.html index 61d6c875..eb374c08 100644 --- a/index.html +++ b/index.html @@ -1,2044 +1,4 @@ - - - - - - - - - - - - - - - - - - - - - - - Overview - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - - - -
-
-
- - - - - - - -
-
-
- - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -

STIMULUS

-

Stochastic Testing with Input Modification for Unbiased Learning Systems.

-

ci -documentation -Build with us on slack!

- - -
-

Warning

-

-This package is in active development and breaking changes may occur. The API is not yet stable and features might be added, modified, or removed without notice. Use in production environments is not recommended at this stage.

-

We encourage you to:

-
    -
  • -

    📝 Report bugs and issues on our GitHub Issues page

    -
  • -
  • -

    💡 Suggest features and improvements through GitHub Discussions

    -
  • -
  • -

    🤝 Contribute by submitting pull requests

    -
  • -
-

We are actively working towards release 1.0.0 (see milestone), check the slack channel by clicking on the badge above where we are actively discussing. Build with us every wednesday at 14:00 CET until 18:00 CET on the nf-core gathertown (see slack for calendar updates i.e. some weeks open dev hours are not possible)

-
-

Introduction

-

Most (if not all) quality software is thouroughly tested. Deep neural networks seem to have escaped this paradigm.

-

In the age of large-scale deep learning, it is critical that early-stage dl models (prototypes) are tested to ensure costly bugs do not happen at scale.

-

Here, we attempt at solving the testing problem by proposing an extensive library to test deep neural networks beyond test-set performance.

-

Stimulus provides those functionalities

-
    -
  1. -

    Data Perturbation Testing:
    - Modify training data to test model's robustness to perturbations and uncover which pre-processing steps increase performance

    -
  2. -
  3. -

    Hyperparameter Optimization:
    - Perform tuning on model architecture with user-defined search spaces using Ray[tune] to ensure comparable performance across data transformations

    -
  4. -
  5. -

    Comprehensive Analysis:
    - Generate all-against-all model report to guide data pre-processing decisions

    -
  6. -
-

For large scale experiments, we recommend our nf-core deepmodeloptim pipeline which is still under development and will be released alongside stimulus v1.0.0.

-

📹 Stimulus was featured at the nextflow summit 2024 in Barcelona, which is a nice intoduction to current package capabilities, you can watch the talk here

-

Stimulus aims at providing those functionalities in a near future, stay tuned for updates!

-
    -
  1. -

    Model Architecture Testing:
    - Run routine checks on model architecture and training process including type-checking, model execution, and weight updates

    -
  2. -
  3. -

    Post-Training Validation:
    - Perform comprehensive model validation including overfitting detection and out-of-distribution performance testing

    -
  4. -
  5. -

    Informed Hyperparameter Tuning:
    - Encourage tuning strategies that follow Google's Deep Learning Tuning Playbook 1

    -
  6. -
  7. -

    Scaling Analysis:
    - Generate scaling law reports to understand prototype model behavior at different scales

    -
  8. -
-

User guide

-

Repository organization

-

Stimulus is organized as follows, we will reference to this structure in the following sections

-
src/stimulus/ 🧪
+ Overview - stimulus-py      

STIMULUS

Stochastic Testing with Input Modification for Unbiased Learning Systems.

ci documentation Build with us on slack!

Warning

This package is in active development and breaking changes may occur. The API is not yet stable and features might be added, modified, or removed without notice. Use in production environments is not recommended at this stage.

We encourage you to:

  • 📝 Report bugs and issues on our GitHub Issues page

  • 💡 Suggest features and improvements through GitHub Discussions

  • 🤝 Contribute by submitting pull requests

We are actively working towards release 1.0.0 (see milestone), check the slack channel by clicking on the badge above where we are actively discussing. Build with us every wednesday at 14:00 CET until 18:00 CET on the nf-core gathertown (see slack for calendar updates i.e. some weeks open dev hours are not possible)

Introduction

Most (if not all) quality software is thouroughly tested. Deep neural networks seem to have escaped this paradigm.

In the age of large-scale deep learning, it is critical that early-stage dl models (prototypes) are tested to ensure costly bugs do not happen at scale.

Here, we attempt at solving the testing problem by proposing an extensive library to test deep neural networks beyond test-set performance.

Stimulus provides those functionalities

  1. Data Perturbation Testing:
    Modify training data to test model's robustness to perturbations and uncover which pre-processing steps increase performance

  2. Hyperparameter Optimization:
    Perform tuning on model architecture with user-defined search spaces using Ray[tune] to ensure comparable performance across data transformations

  3. Comprehensive Analysis:
    Generate all-against-all model report to guide data pre-processing decisions

For large scale experiments, we recommend our nf-core deepmodeloptim pipeline which is still under development and will be released alongside stimulus v1.0.0.

📹 Stimulus was featured at the nextflow summit 2024 in Barcelona, which is a nice intoduction to current package capabilities, you can watch the talk here

Stimulus aims at providing those functionalities in a near future, stay tuned for updates!

  1. Model Architecture Testing:
    Run routine checks on model architecture and training process including type-checking, model execution, and weight updates

  2. Post-Training Validation:
    Perform comprehensive model validation including overfitting detection and out-of-distribution performance testing

  3. Informed Hyperparameter Tuning:
    Encourage tuning strategies that follow Google's Deep Learning Tuning Playbook 1

  4. Scaling Analysis:
    Generate scaling law reports to understand prototype model behavior at different scales

User guide

Repository organization

Stimulus is organized as follows, we will reference to this structure in the following sections

src/stimulus/ 🧪
 ├── analysis/ 📊
 │   └── analysis_default.py
 ├── cli/ 🖥️
@@ -2070,259 +30,33 @@ 

Repository organization

-

Data encoding

-

Data in stimulus can take many forms (files, text, images, networks...) in order to support this diversity, stimulus relies on the encoding module. List of available encoders can be found here.

-

If the provided encoders do not support the type of data you are working with, you can write your own encoder by inheriting from the AbstractEncoder class and implementing the encode, decode and encode_all methods.

-
    -
  • encode is currently optional, can return a NotImplementedError if the encoder does not support encoding a single data point
  • -
  • decode is currently optional, can return a NotImplementedError if the encoder does not support decoding
  • -
  • encode_all is called by other stimulus functions, and is expected to return a np.array .
  • -
-

Expected data format

-

Data is expected to be presented in a csv samplesheet file with the following format:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
input1:input:input_typeinput2:input:input_typemeta1:meta:meta_typelabel1:label:label_typelabel2:label:label_type
sample1 input1sample1 input2sample1 meta1sample1 label1sample1 label2
sample2 input1sample2 input2sample2 meta1sample2 label1sample2 label2
sample3 input1sample3 input2sample3 meta1sample3 label1sample3 label2
-

Columns are expected to follow this name convention : name:type:data_type

-
    -
  • -

    name corresponds to the column name, this should be the same as input names in model batch definition (see model section for more details)

    -
  • -
  • -

    type is either input, meta or label, typically models predict the labels from the input, and meta is used to perform downstream analysis

    -
  • -
  • -

    data_type is the column data type.

    -
  • -
-
-

Note

-

-This rigid data format is expected to change once we move to release v1.0.0, data types and information will be defined in a yaml config and only column names will be required in the data, see this github issue

-
-

Connecting encoders and datasets

-

Once we have our data formated and our encoders ready, we need to explicitly state which encoder is used for which data type. This is done through an experiment class.

-

To understand how experiment classes are used to connect data types and encoders, let's have a look at a minimal DnaToFloat example :

-
class DnaToFloat(AbstractExperiment):
-    def __init__(self) -> None:
-        super().__init__()
-        self.dna = {
-            "encoder": encoders.TextOneHotEncoder(alphabet="acgt"),
-        }
-        self.float = {
-            "encoder": encoders.FloatEncoder(),
-        }
-
-

Here we define the data_type for the dna and float types, note that those data_type are the same as the ones defined in the samplesheet dataset above, for example, a dataset on which this experiment would run could look like this:

- - - - - - - - - - - - - - - - - - - - - -
mouse_dna:input:dnamouse_rnaseq:label:float
ACTAGGCATGCTAGTCG0.53
ACTGGGGCTAGTCGAA0.23
GATGTTCTGATGCT0.98
-

Note how the data_type for the mouse_dna and mouse_rnaseq columns match exactly the attribute names defined in the DnaToFloat minimal class above.

-

stimulus-py ships with a few basic experiment classes, if you need to write your own experiment class, simply inherit from the base AbstractExperiment class and overwrite the class __init__ method like shown above.

-
-

Note

-

-This has the drawback of requiring a build of the experiment class each time a new task is defined (for instance, let's say we want to use dna and protein sequences to predict rna).

-

Once we move to release v1.0.0, type (i.e. input, meta, label) and data_type will be defined in the data yaml config, and the relevant experiment class will be automatically built.

-
-

Loading the data

-

Finally, once we have defined our encoders, the experiment class and the samplesheet, stimulus will transparently load the data using the csv.py module

-

csv.py contains two important classes, CsvLoader and CsvProcessing

-

CsvLoader is responsible for naïvely loading the data (without changing anything), it works by performing a couple of checks on the dataset to ensure it is correctly formated, and then uses the experiment class in conjunction with the column names to call the proper encoders and output inputs, labels, and meta dictionary objects.

-

CsvLoader is used by the handlertorch module to load data into pytorch tensors.

-
-

Tip

-

-So, to recap, -when you load a dataset into a torch tensor,

-
    -
  1. -

    handlertorch will call CsvLoader with the csv samplesheet and the experiment class

    -
  2. -
  3. -

    CsvLoader will use the experiment class to fetch the proper encoder encode_all method for each data column

    -
  4. -
  5. -

    CsvLoader will use the encode_all method to encode the data and output dictionary objects for inputs, labels and meta data

    -
  6. -
  7. -

    handlertorch will convert the contents to torch tensors

    -
  8. -
  9. -

    handlertorch will feed the input torch tensor to the model, use the label torch tensor for loss computation and will store the meta tensor for downstream analysis

    -
  10. -
-

Great, now you know how stimulus transparently loads your data into your pytorch model! While this seems complicated, the only thing you really have to do, is to format your data correctly in a csv samplesheet and define your experiment class with the proper encoders (either by using the provided encoders or by writing your own).

-
-

Data transformation

-

Measuring the impact of data transformations (noising, down/upsampling, augmentation...) on models at training time is a major feature of stimulus.

-

Data transformations materialize as DataTransformer classes, and should inherit from the AbstractDataTransformer class (see docs)

-
-

Note

-

-Writing your own DataTransformer class is the same as writing your own Encoder class, you should overwrite the transform and transform_all methods

-
-
-

Warning

-

-Every DataTransformer class has to have seed in transform and transform_all methods parameters, and np.random.seed(seed) should be called in those methods.

-
-
-

Warning

-

-Every DataTransformer class should have an add_row argument set to either True or False depending on if it is augmenting the data (adding rows) or not.

-
-

Connecting transformations and dataset

-

Just like encoders, data transformations are defined in the Experiment class alongside encoders. Let's upgrade our DnaToFloat minimal class defined above to reflect this.

-
class DnaToFloat(AbstractExperiment):
-    def __init__(self) -> None:
-        super().__init__()
-        self.dna = {
-            "encoder": encoders.TextOneHotEncoder(alphabet="acgt"),
-            "data_transformation_generators": {
-                "UniformTextMasker": data_transformation_generators.UniformTextMasker(mask="N"),
-                "ReverseComplement": data_transformation_generators.ReverseComplement(),
-                "GaussianChunk": data_transformation_generators.GaussianChunk(),
-            },
-        }
-        self.float = {
-            "encoder": encoders.FloatEncoder(),
-            "data_transformation_generators": {"GaussianNoise": data_transformation_generators.GaussianNoise()},
-        }
-
-

As you can see, our data_type arguments get an other field, "data_transformation_generators", there we can initialize the DataTransformer classes with their relevant parameters.

-

In the csv module, the CsvProcessing class will call the transform_all methods from the classes contained in "data_transformation_generators" based on the column type and a list of transformations.

-

i.e., if we give the ["ReverseComplement","GaussianChunk"] list to the CsvProcessing class transform method the data contained in the mouse_dna:input:dna column in our minimal example above will be first reverse complemented and then chunked.

-
-

Tip

-

-Recap : -To transform your dataset,

-
    -
  • -

    define your own DataTransformer class or use one we provide

    -
  • -
  • -

    add it to your experiment class

    -
  • -
  • -

    load your data through CsvProcessing

    -
  • -
  • -

    set a list of transforms

    -
  • -
  • -

    call CsvProcessing.transform(transform_list)

    -
  • -
-
-

Installation

-

stimulus is still under development, you can install it from test-pypi by running the following command:

-
pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple stimulus-py==0.0.10
-
-

citations

-
-
-
    -
  1. -

    Godbole, V., Dahl, G. E., Gilmer, J., Shallue, C. J., & Nado, Z. (2023). Deep Learning Tuning Playbook (Version 1.0) [Computer software]. http://github.com/google-research/tuning_playbook 

    -
  2. -
-
- - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - - - - - \ No newline at end of file +
\ No newline at end of file diff --git a/js/timeago.min.js b/js/timeago.min.js new file mode 100644 index 00000000..a8530a5f --- /dev/null +++ b/js/timeago.min.js @@ -0,0 +1,2 @@ +/* Taken from https://cdnjs.cloudflare.com/ajax/libs/timeago.js/4.0.2/timeago.min.js */ +!function(s,n){"object"==typeof exports&&"undefined"!=typeof module?n(exports):"function"==typeof define&&define.amd?define(["exports"],n):n((s=s||self).timeago={})}(this,function(s){"use strict";var a=["second","minute","hour","day","week","month","year"];function n(s,n){if(0===n)return["just now","right now"];var e=a[Math.floor(n/2)];return 1=m[t]&&t=m[e]&&e 0) { + var locale = getLocale(nodes[0]); + timeago.render(nodes, locale); + } + }) +} else { + var nodes = document.querySelectorAll('.timeago'); + if (nodes.length > 0) { + var locale = getLocale(nodes[0]); + timeago.render(nodes, locale); + } +} diff --git a/license/index.html b/license/index.html index 332a316c..15b15b63 100644 --- a/license/index.html +++ b/license/index.html @@ -1,1867 +1,4 @@ - - - - - - - - - - - - - - - - - - - - - - - - - License - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - - - -
-
-
- - - - - - - -
-
-
- - - - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -

License

-
MIT License
+ License - stimulus-py      

License

MIT License
 
 Copyright (c) 2024 Mathys Grapotte
 
@@ -1882,50 +19,8 @@ 

License

- -
-
-
-
- - - - - - - - - - - - - - \ No newline at end of file +
\ No newline at end of file diff --git a/reference/SUMMARY/index.html b/reference/SUMMARY/index.html index 525266e6..00717f2c 100644 --- a/reference/SUMMARY/index.html +++ b/reference/SUMMARY/index.html @@ -1,1945 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - SUMMARY - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - -

SUMMARY

- - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/analysis_default/index.html b/reference/stimulus/cli/analysis_default/index.html index 38406e31..0c01076d 100644 --- a/reference/stimulus/cli/analysis_default/index.html +++ b/reference/stimulus/cli/analysis_default/index.html @@ -1,2947 +1,423 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.analysis_default - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- analysis_default - - -

- -
- -

Analysis default module for running model analysis and performance evaluation.

- - - - - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - load_model - – -
    -

    Load the model with its config and weights.

    -
    -
  • -
  • - main - – -
    -

    Run the main analysis pipeline.

    -
    -
  • -
  • - run - – -
    -

    Run the analysis script.

    -
    -
  • -
  • - run_analysis_performance_model - – -
    -

    Run analysis to report model robustness.

    -
    -
  • -
  • - run_analysis_performance_tune - – -
    -

    Run performance analysis during tuning/training.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- - -

Returns:

-
    -
  • - Namespace - – -
    -

    Parsed command line arguments.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/analysis_default.py -
15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline.
-
-    Returns:
-        Parsed command line arguments.
-    """
-    parser = argparse.ArgumentParser(description="")
-    parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="The model .py file")
-    parser.add_argument(
-        "-w",
-        "--weight",
-        type=str,
-        required=True,
-        nargs="+",
-        metavar="FILE",
-        help="Model weights .pt file",
-    )
-    parser.add_argument(
-        "-me",
-        "--metrics",
-        type=str,
-        required=True,
-        nargs="+",
-        metavar="FILE",
-        help="The file path for the metrics file obtained during tuning",
-    )
-    parser.add_argument(
-        "-ec",
-        "--experiment_config",
-        type=str,
-        required=True,
-        nargs="+",
-        metavar="FILE",
-        help="The experiment config used to modify the data.",
-    )
-    parser.add_argument(
-        "-mc",
-        "--model_config",
-        type=str,
-        required=True,
-        nargs="+",
-        metavar="FILE",
-        help="The tune config file.",
-    )
-    parser.add_argument(
-        "-d",
-        "--data",
-        type=str,
-        required=True,
-        nargs="+",
-        metavar="FILE",
-        help="List of data files to be used for the analysis.",
-    )
-    parser.add_argument("-o", "--outdir", type=str, required=True, help="output directory")
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- load_model - - -

-
load_model(
-    model_class: Any, weight_path: str, mconfig_path: str
-) -> Any
-
- -
- -

Load the model with its config and weights.

- - -

Parameters:

-
    -
  • - model_class - (Any) - – -
    -

    Model class to instantiate

    -
    -
  • -
  • - weight_path - (str) - – -
    -

    Path to model weights

    -
    -
  • -
  • - mconfig_path - (str) - – -
    -

    Path to model config

    -
    -
  • -
- - -

Returns:

-
    -
  • - Any - – -
    -

    Loaded model instance

    -
    -
  • -
- -
- Source code in src/stimulus/cli/analysis_default.py -
202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-213
-214
-215
-216
-217
def load_model(model_class: Any, weight_path: str, mconfig_path: str) -> Any:
-    """Load the model with its config and weights.
-
-    Args:
-        model_class: Model class to instantiate
-        weight_path: Path to model weights
-        mconfig_path: Path to model config
-
-    Returns:
-        Loaded model instance
-    """
-    with open(mconfig_path) as in_json:
-        mconfig = json.load(in_json)["model_params"]
-
-    model = model_class(**mconfig)
-    return safe_load(model, weight_path, strict=True)
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    model_path: str,
-    weight_list: list[str],
-    mconfig_list: list[str],
-    metrics_list: list[str],
-    econfig_list: list[str],
-    data_list: list[str],
-    outdir: str,
-) -> None
-
- -
- -

Run the main analysis pipeline.

- - -

Parameters:

-
    -
  • - model_path - (str) - – -
    -

    Path to model file

    -
    -
  • -
  • - weight_list - (list[str]) - – -
    -

    List of model weight paths

    -
    -
  • -
  • - mconfig_list - (list[str]) - – -
    -

    List of model config paths

    -
    -
  • -
  • - metrics_list - (list[str]) - – -
    -

    List of metric file paths

    -
    -
  • -
  • - econfig_list - (list[str]) - – -
    -

    List of experiment config paths

    -
    -
  • -
  • - data_list - (list[str]) - – -
    -

    List of data file paths

    -
    -
  • -
  • - outdir - (str) - – -
    -

    Output directory path

    -
    -
  • -
- -
- Source code in src/stimulus/cli/analysis_default.py -
 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
def main(
-    model_path: str,
-    weight_list: list[str],
-    mconfig_list: list[str],
-    metrics_list: list[str],
-    econfig_list: list[str],
-    data_list: list[str],
-    outdir: str,
-) -> None:
-    """Run the main analysis pipeline.
-
-    Args:
-        model_path: Path to model file
-        weight_list: List of model weight paths
-        mconfig_list: List of model config paths
-        metrics_list: List of metric file paths
-        econfig_list: List of experiment config paths
-        data_list: List of data file paths
-        outdir: Output directory path
-    """
-    metrics = ["rocauc", "prauc", "mcc", "f1score", "precision", "recall"]
-
-    # Plot the performance during tuning/training
-    run_analysis_performance_tune(
-        metrics_list,
-        [*metrics, "loss"],  # Use list unpacking instead of concatenation
-        os.path.join(outdir, "performance_tune_train"),
-    )
-
-    # Run robustness analysis
-    run_analysis_performance_model(
-        metrics,
-        model_path,
-        weight_list,
-        mconfig_list,
-        econfig_list,
-        data_list,
-        os.path.join(outdir, "performance_robustness"),
-    )
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the analysis script.

- -
- Source code in src/stimulus/cli/analysis_default.py -
220
-221
-222
-223
def run() -> None:
-    """Run the analysis script."""
-    args = get_args()
-    main(args.model, args.weight, args.model_config, args.metrics, args.experiment_config, args.data, args.outdir)
-
-
-
- -
- -
- - -

- run_analysis_performance_model - - -

-
run_analysis_performance_model(
-    metrics: list[str],
-    model_path: str,
-    weight_list: list[str],
-    mconfig_list: list[str],
-    econfig_list: list[str],
-    data_list: list[str],
-    outdir: str,
-) -> None
-
- -
- -

Run analysis to report model robustness.

-

This block will compute the predictions of each model for each dataset. -This information will be parsed and plots will be generated to report the model robustness.

- - -

Parameters:

-
    -
  • - metrics - (list[str]) - – -
    -

    List of metrics to analyze

    -
    -
  • -
  • - model_path - (str) - – -
    -

    Path to model file

    -
    -
  • -
  • - weight_list - (list[str]) - – -
    -

    List of model weight paths

    -
    -
  • -
  • - mconfig_list - (list[str]) - – -
    -

    List of model config paths

    -
    -
  • -
  • - econfig_list - (list[str]) - – -
    -

    List of experiment config paths

    -
    -
  • -
  • - data_list - (list[str]) - – -
    -

    List of data file paths

    -
    -
  • -
  • - outdir - (str) - – -
    -

    Output directory path

    -
    -
  • -
- -
- Source code in src/stimulus/cli/analysis_default.py -
136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
def run_analysis_performance_model(
-    metrics: list[str],
-    model_path: str,
-    weight_list: list[str],
-    mconfig_list: list[str],
-    econfig_list: list[str],
-    data_list: list[str],
-    outdir: str,
-) -> None:
-    """Run analysis to report model robustness.
-
-    This block will compute the predictions of each model for each dataset.
-    This information will be parsed and plots will be generated to report the model robustness.
-
-    Args:
-        metrics: List of metrics to analyze
-        model_path: Path to model file
-        weight_list: List of model weight paths
-        mconfig_list: List of model config paths
-        econfig_list: List of experiment config paths
-        data_list: List of data file paths
-        outdir: Output directory path
-    """
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
-
-    # Load all the models weights into a list
-    model_names = []
-    model_list = []
-    model_class = import_class_from_file(model_path)
-    for weight_path, mconfig_path in zip(weight_list, mconfig_list):
-        model = load_model(model_class, weight_path, mconfig_path)
-        model_names.append(mconfig_path.split("/")[-1].replace("-config.json", ""))
-        model_list.append(model)
-
-    # Read experiment config and initialize experiment class
-    with open(econfig_list[0]) as in_json:
-        experiment_name = json.load(in_json)["experiment"]
-    initialized_experiment_class = get_experiment(experiment_name)
-
-    # Initialize analysis
-    analysis = AnalysisRobustness(metrics, initialized_experiment_class, batch_size=256)
-
-    # Compute performance metrics
-    df = analysis.get_performance_table(model_names, model_list, data_list)
-    df.to_csv(os.path.join(outdir, "performance_table.csv"), index=False)
-
-    # Get average performance
-    tmp = analysis.get_average_performance_table(df)
-    tmp.to_csv(os.path.join(outdir, "average_performance_table.csv"), index=False)
-
-    # Plot heatmap
-    analysis.plot_performance_heatmap(df, output=os.path.join(outdir, "performance_heatmap.png"))
-
-    # Plot delta performance
-    outdir2 = os.path.join(outdir, "delta_performance_vs_data")
-    if not os.path.exists(outdir2):
-        os.makedirs(outdir2)
-    for metric in metrics:
-        analysis.plot_delta_performance(
-            metric,
-            df,
-            output=os.path.join(outdir2, f"delta_performance_{metric}.png"),
-        )
-
-
-
- -
- -
- - -

- run_analysis_performance_tune - - -

-
run_analysis_performance_tune(
-    metrics_list: list[str], metrics: list[str], outdir: str
-) -> None
-
- -
- -

Run performance analysis during tuning/training.

-

Each model has a metrics file obtained during tuning/training, -check the performance there and plot it. -This is to track the model performance per training iteration.

- - -

Parameters:

-
    -
  • - metrics_list - (list[str]) - – -
    -

    List of metric file paths

    -
    -
  • -
  • - metrics - (list[str]) - – -
    -

    List of metrics to analyze

    -
    -
  • -
  • - outdir - (str) - – -
    -

    Output directory path

    -
    -
  • -
- -
- Source code in src/stimulus/cli/analysis_default.py -
114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
def run_analysis_performance_tune(metrics_list: list[str], metrics: list[str], outdir: str) -> None:
-    """Run performance analysis during tuning/training.
-
-    Each model has a metrics file obtained during tuning/training,
-    check the performance there and plot it.
-    This is to track the model performance per training iteration.
-
-    Args:
-        metrics_list: List of metric file paths
-        metrics: List of metrics to analyze
-        outdir: Output directory path
-    """
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
-
-    for metrics_path in metrics_list:
-        AnalysisPerformanceTune(metrics_path).plot_metric_vs_iteration(
-            metrics=metrics,
-            output=os.path.join(outdir, metrics_path.replace("-metrics.csv", "") + "-metric_vs_iteration.png"),
-        )
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/check_model/index.html b/reference/stimulus/cli/check_model/index.html index ec38086d..dd467cf0 100644 --- a/reference/stimulus/cli/check_model/index.html +++ b/reference/stimulus/cli/check_model/index.html @@ -1,2689 +1,379 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.check_model - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- check_model - - -

- -
- -

CLI module for checking model configuration and running initial tests.

- - - - - -

Modules:

-
    -
  • - handlertorch - – -
    -

    This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

    -
    -
  • -
  • - launch_utils - – -
    -

    Utility functions for launching and configuring experiments and ray tuning.

    -
    -
  • -
  • - loaders - – -
    -

    Loaders serve as interfaces between the CSV master class and custom methods.

    -
    -
  • -
  • - raytune_learner - – -
    -

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

    -
    -
  • -
  • - yaml_data - – -
    -

    Utility module for handling YAML configuration files and their validation.

    -
    -
  • -
  • - yaml_model_schema - – -
    -

    Module for handling YAML configuration files and converting them to Ray Tune format.

    -
    -
  • -
- - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - main - – -
    -

    Run the main model checking pipeline.

    -
    -
  • -
  • - run - – -
    -

    Run the model checking script.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- - -

Returns:

-
    -
  • - Namespace - – -
    -

    Parsed command line arguments.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/check_model.py -
17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline.
-
-    Returns:
-        Parsed command line arguments.
-    """
-    parser = argparse.ArgumentParser(description="Launch check_model.")
-    parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input csv file.")
-    parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model file.")
-    parser.add_argument(
-        "-e",
-        "--data_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to data config file.",
-    )
-    parser.add_argument(
-        "-c",
-        "--model_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to yaml config training file.",
-    )
-    parser.add_argument(
-        "-w",
-        "--initial_weights",
-        type=str,
-        required=False,
-        nargs="?",
-        const=None,
-        default=None,
-        metavar="FILE",
-        help="The path to the initial weights (optional).",
-    )
-
-    parser.add_argument(
-        "-n",
-        "--num_samples",
-        type=int,
-        required=False,
-        nargs="?",
-        const=3,
-        default=3,
-        metavar="NUM_SAMPLES",
-        help="Number of samples for tuning. Overwrites tune.tune_params.num_samples in config.",
-    )
-    parser.add_argument(
-        "--ray_results_dirpath",
-        type=str,
-        required=False,
-        nargs="?",
-        const=None,
-        default=None,
-        metavar="DIR_PATH",
-        help="Location where ray_results output dir should be written. If None, uses ~/ray_results.",
-    )
-    parser.add_argument(
-        "--debug_mode",
-        action="store_true",
-        help="Activate debug mode for tuning. Default false, no debug.",
-    )
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    model_path: str,
-    data_path: str,
-    data_config_path: str,
-    model_config_path: str,
-    initial_weights: str | None = None,
-    num_samples: int = 3,
-    ray_results_dirpath: str | None = None,
-    *,
-    debug_mode: bool = False
-) -> None
-
- -
- -

Run the main model checking pipeline.

- - -

Parameters:

-
    -
  • - data_path - (str) - – -
    -

    Path to input data file.

    -
    -
  • -
  • - model_path - (str) - – -
    -

    Path to model file.

    -
    -
  • -
  • - data_config_path - (str) - – -
    -

    Path to data config file.

    -
    -
  • -
  • - model_config_path - (str) - – -
    -

    Path to model config file.

    -
    -
  • -
  • - initial_weights - (str | None, default: - None -) - – -
    -

    Optional path to initial weights.

    -
    -
  • -
  • - num_samples - (int, default: - 3 -) - – -
    -

    Number of samples for tuning.

    -
    -
  • -
  • - ray_results_dirpath - (str | None, default: - None -) - – -
    -

    Directory for ray results.

    -
    -
  • -
  • - debug_mode - (bool, default: - False -) - – -
    -

    Whether to run in debug mode.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/check_model.py -
 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
def main(
-    model_path: str,
-    data_path: str,
-    data_config_path: str,
-    model_config_path: str,
-    initial_weights: str | None = None,  # noqa: ARG001
-    num_samples: int = 3,
-    ray_results_dirpath: str | None = None,
-    *,
-    debug_mode: bool = False,
-) -> None:
-    """Run the main model checking pipeline.
-
-    Args:
-        data_path: Path to input data file.
-        model_path: Path to model file.
-        data_config_path: Path to data config file.
-        model_config_path: Path to model config file.
-        initial_weights: Optional path to initial weights.
-        num_samples: Number of samples for tuning.
-        ray_results_dirpath: Directory for ray results.
-        debug_mode: Whether to run in debug mode.
-    """
-    with open(data_config_path) as file:
-        data_config = yaml.safe_load(file)
-        data_config = yaml_data.YamlSubConfigDict(**data_config)
-
-    with open(model_config_path) as file:
-        model_config = yaml.safe_load(file)
-        model_config = yaml_model_schema.Model(**model_config)
-
-    encoder_loader = loaders.EncoderLoader()
-    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)
-
-    logger.info("Dataset loaded successfully.")
-
-    model_class = launch_utils.import_class_from_file(model_path)
-
-    logger.info("Model class loaded successfully.")
-
-    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)
-    ray_config_dict = ray_config_loader.get_config().model_dump()
-    ray_config_model = ray_config_loader.get_config()
-
-    logger.info("Ray config loaded successfully.")
-
-    sampled_model_params = {
-        key: domain.sample() if hasattr(domain, "sample") else domain
-        for key, domain in ray_config_dict["network_params"].items()
-    }
-
-    logger.info("Sampled model params loaded successfully.")
-
-    model_instance = model_class(**sampled_model_params)
-
-    logger.info("Model instance loaded successfully.")
-
-    torch_dataset = handlertorch.TorchDataset(
-        config_path=data_config_path,
-        csv_path=data_path,
-        encoder_loader=encoder_loader,
-    )
-
-    torch_dataloader = DataLoader(torch_dataset, batch_size=10, shuffle=True)
-
-    logger.info("Torch dataloader loaded successfully.")
-
-    # try to run the model on a single batch
-    for batch in torch_dataloader:
-        input_data, labels, metadata = batch
-        # Log shapes of tensors in each dictionary
-        for key, tensor in input_data.items():
-            logger.debug(f"Input tensor '{key}' shape: {tensor.shape}")
-        for key, tensor in labels.items():
-            logger.debug(f"Label tensor '{key}' shape: {tensor.shape}")
-        for key, list_object in metadata.items():
-            logger.debug(f"Metadata lists '{key}' length: {len(list_object)}")
-        output = model_instance(**input_data)
-        logger.info("model ran successfully on a single batch")
-        logger.debug(f"Output shape: {output.shape}")
-        break
-
-    logger.info("Model checking single pass completed successfully.")
-
-    # override num_samples
-    model_config.tune.tune_params.num_samples = num_samples
-
-    tuner = raytune_learner.TuneWrapper(
-        model_config=ray_config_model,
-        data_config_path=data_config_path,
-        model_class=model_class,
-        data_path=data_path,
-        encoder_loader=encoder_loader,
-        seed=42,
-        ray_results_dir=ray_results_dirpath,
-        debug=debug_mode,
-    )
-
-    logger.info("Tuner initialized successfully.")
-
-    tuner.tune()
-
-    logger.info("Tuning completed successfully.")
-    logger.info("Checks complete")
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the model checking script.

- -
- Source code in src/stimulus/cli/check_model.py -
190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
def run() -> None:
-    """Run the model checking script."""
-    args = get_args()
-    main(
-        data_path=args.data,
-        model_path=args.model,
-        data_config_path=args.data_config,
-        model_config_path=args.model_config,
-        initial_weights=args.initial_weights,
-        num_samples=args.num_samples,
-        ray_results_dirpath=args.ray_results_dirpath,
-        debug_mode=args.debug_mode,
-    )
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/index.html b/reference/stimulus/cli/index.html index 5cfe3aad..252fa1f7 100644 --- a/reference/stimulus/cli/index.html +++ b/reference/stimulus/cli/index.html @@ -1,2046 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- cli - - -

- -
- -

Command line interface package for the stimulus library.

- - - - - -

Modules:

-
    -
  • - analysis_default - – -
    -

    Analysis default module for running model analysis and performance evaluation.

    -
    -
  • -
  • - check_model - – -
    -

    CLI module for checking model configuration and running initial tests.

    -
    -
  • -
  • - predict - – -
    -

    CLI module for model prediction on datasets.

    -
    -
  • -
  • - shuffle_csv - – -
    -

    CLI module for shuffling CSV data files.

    -
    -
  • -
  • - split_csv - – -
    -

    CLI module for splitting CSV data files.

    -
    -
  • -
  • - split_yaml - – -
    -

    CLI module for splitting YAML configuration files.

    -
    -
  • -
  • - transform_csv - – -
    -

    CLI module for transforming CSV data files.

    -
    -
  • -
  • - tuning - – -
    -

    CLI module for running raytune tuning experiment.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/predict/index.html b/reference/stimulus/cli/predict/index.html index 0b7424ad..5f5607cb 100644 --- a/reference/stimulus/cli/predict/index.html +++ b/reference/stimulus/cli/predict/index.html @@ -1,3028 +1,367 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.predict - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- predict - - -

- -
- -

CLI module for model prediction on datasets.

- - - - - - - - - -

Functions:

-
    -
  • - add_meta_info - – -
    -

    Add metadata columns to predictions/labels dictionary.

    -
    -
  • -
  • - get_args - – -
    -

    Parse command line arguments.

    -
    -
  • -
  • - get_batch_size - – -
    -

    Get batch size from model config.

    -
    -
  • -
  • - get_meta_keys - – -
    -

    Extract metadata column keys.

    -
    -
  • -
  • - load_model - – -
    -

    Load model with hyperparameters and weights.

    -
    -
  • -
  • - main - – -
    -

    Run model prediction pipeline.

    -
    -
  • -
  • - parse_y_keys - – -
    -

    Parse dictionary keys to match input data format.

    -
    -
  • -
  • - run - – -
    -

    Execute model prediction pipeline.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- add_meta_info - - -

-
add_meta_info(
-    data: DataFrame, y: dict[str, Any]
-) -> dict[str, Any]
-
- -
- -

Add metadata columns to predictions/labels dictionary.

- - -

Parameters:

-
    -
  • - data - (DataFrame) - – -
    -

    Input DataFrame with metadata.

    -
    -
  • -
  • - y - (dict[str, Any]) - – -
    -

    Dictionary of predictions/labels.

    -
    -
  • -
- - -

Returns:

-
    -
  • - dict[str, Any] - – -
    -

    Updated dictionary with metadata.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
def add_meta_info(data: pl.DataFrame, y: dict[str, Any]) -> dict[str, Any]:
-    """Add metadata columns to predictions/labels dictionary.
-
-    Args:
-        data: Input DataFrame with metadata.
-        y: Dictionary of predictions/labels.
-
-    Returns:
-        Updated dictionary with metadata.
-    """
-    keys = get_meta_keys(data.columns)
-    for key in keys:
-        y[key] = data[key].to_list()
-    return y
-
-
-
- -
- -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Parse command line arguments.

- - -

Returns:

-
    -
  • - Namespace - – -
    -

    Parsed command line arguments.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
def get_args() -> argparse.Namespace:
-    """Parse command line arguments.
-
-    Returns:
-        Parsed command line arguments.
-    """
-    parser = argparse.ArgumentParser(description="Predict model outputs on a dataset.")
-    parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model .py file.")
-    parser.add_argument("-w", "--weight", type=str, required=True, metavar="FILE", help="Path to model weights file.")
-    parser.add_argument(
-        "-mc",
-        "--model_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to tune config file with model hyperparameters.",
-    )
-    parser.add_argument(
-        "-ec",
-        "--experiment_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to experiment config for data modification.",
-    )
-    parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input data.")
-    parser.add_argument("-o", "--output", type=str, required=True, metavar="FILE", help="Path for output predictions.")
-    parser.add_argument("--split", type=int, help="Data split to use (default: None).")
-    parser.add_argument("--return_labels", action="store_true", help="Include labels with predictions.")
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- get_batch_size - - -

-
get_batch_size(mconfig: dict[str, Any]) -> int
-
- -
- -

Get batch size from model config.

- - -

Parameters:

-
    -
  • - mconfig - (dict[str, Any]) - – -
    -

    Model configuration dictionary.

    -
    -
  • -
- - -

Returns:

-
    -
  • - int - – -
    -

    Batch size to use for predictions.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
def get_batch_size(mconfig: dict[str, Any]) -> int:
-    """Get batch size from model config.
-
-    Args:
-        mconfig: Model configuration dictionary.
-
-    Returns:
-        Batch size to use for predictions.
-    """
-    default_batch_size = 256
-    if "data_params" in mconfig and "batch_size" in mconfig["data_params"]:
-        return mconfig["data_params"]["batch_size"]
-    return default_batch_size
-
-
-
- -
- -
- - -

- get_meta_keys - - -

-
get_meta_keys(names: Sequence[str]) -> list[str]
-
- -
- -

Extract metadata column keys.

- - -

Parameters:

-
    -
  • - names - (Sequence[str]) - – -
    -

    List of column names.

    -
    -
  • -
- - -

Returns:

-
    -
  • - list[str] - – -
    -

    List of metadata column keys.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
123
-124
-125
-126
-127
-128
-129
-130
-131
-132
def get_meta_keys(names: Sequence[str]) -> list[str]:
-    """Extract metadata column keys.
-
-    Args:
-        names: List of column names.
-
-    Returns:
-        List of metadata column keys.
-    """
-    return [name for name in names if name.split(":")[1] == "meta"]
-
-
-
- -
- -
- - -

- load_model - - -

-
load_model(
-    model_class: Any,
-    weight_path: str,
-    mconfig: dict[str, Any],
-) -> Any
-
- -
- -

Load model with hyperparameters and weights.

- - -

Parameters:

-
    -
  • - model_class - (Any) - – -
    -

    Model class to instantiate.

    -
    -
  • -
  • - weight_path - (str) - – -
    -

    Path to model weights.

    -
    -
  • -
  • - mconfig - (dict[str, Any]) - – -
    -

    Model configuration dictionary.

    -
    -
  • -
- - -

Returns:

-
    -
  • - Any - – -
    -

    Loaded model instance.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
def load_model(model_class: Any, weight_path: str, mconfig: dict[str, Any]) -> Any:
-    """Load model with hyperparameters and weights.
-
-    Args:
-        model_class: Model class to instantiate.
-        weight_path: Path to model weights.
-        mconfig: Model configuration dictionary.
-
-    Returns:
-        Loaded model instance.
-    """
-    hyperparameters = mconfig["model_params"]
-    model = model_class(**hyperparameters)
-    model.load_state_dict(torch.load(weight_path))
-    return model
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    model_path: str,
-    weight_path: str,
-    mconfig_path: str,
-    econfig_path: str,
-    data_path: str,
-    output: str,
-    *,
-    return_labels: bool = False,
-    split: int | None = None
-) -> None
-
- -
- -

Run model prediction pipeline.

- - -

Parameters:

-
    -
  • - model_path - (str) - – -
    -

    Path to model file.

    -
    -
  • -
  • - weight_path - (str) - – -
    -

    Path to model weights.

    -
    -
  • -
  • - mconfig_path - (str) - – -
    -

    Path to model config.

    -
    -
  • -
  • - econfig_path - (str) - – -
    -

    Path to experiment config.

    -
    -
  • -
  • - data_path - (str) - – -
    -

    Path to input data.

    -
    -
  • -
  • - output - (str) - – -
    -

    Path for output predictions.

    -
    -
  • -
  • - return_labels - (bool, default: - False -) - – -
    -

    Whether to include labels.

    -
    -
  • -
  • - split - (int | None, default: - None -) - – -
    -

    Data split to use.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
def main(
-    model_path: str,
-    weight_path: str,
-    mconfig_path: str,
-    econfig_path: str,
-    data_path: str,
-    output: str,
-    *,
-    return_labels: bool = False,
-    split: int | None = None,
-) -> None:
-    """Run model prediction pipeline.
-
-    Args:
-        model_path: Path to model file.
-        weight_path: Path to model weights.
-        mconfig_path: Path to model config.
-        econfig_path: Path to experiment config.
-        data_path: Path to input data.
-        output: Path for output predictions.
-        return_labels: Whether to include labels.
-        split: Data split to use.
-    """
-    with open(mconfig_path) as in_json:
-        mconfig = json.load(in_json)
-
-    model_class = import_class_from_file(model_path)
-    model = load_model(model_class, weight_path, mconfig)
-
-    with open(econfig_path) as in_json:
-        experiment_name = json.load(in_json)["experiment"]
-    initialized_experiment_class = get_experiment(experiment_name)
-
-    dataloader = DataLoader(
-        TorchDataset(data_path, initialized_experiment_class, split=split),
-        batch_size=get_batch_size(mconfig),
-        shuffle=False,
-    )
-
-    predictor = PredictWrapper(model, dataloader)
-    out = predictor.predict(return_labels=return_labels)
-    y_pred, y_true = out if return_labels else (out, {})
-
-    y_pred = {k: v.tolist() for k, v in y_pred.items()}
-    y_true = {k: v.tolist() for k, v in y_true.items()}
-
-    data = pl.read_csv(data_path)
-    y_pred = parse_y_keys(y_pred, data, y_type="pred")
-    y_true = parse_y_keys(y_true, data, y_type="label")
-
-    y = {**y_pred, **y_true}
-    y = add_meta_info(data, y)
-    df = pl.from_dict(y)
-    df.write_csv(output)
-
-
-
- -
- -
- - -

- parse_y_keys - - -

-
parse_y_keys(
-    y: dict[str, Any], data: DataFrame, y_type: str = "pred"
-) -> dict[str, Any]
-
- -
- -

Parse dictionary keys to match input data format.

- - -

Parameters:

-
    -
  • - y - (dict[str, Any]) - – -
    -

    Dictionary of predictions or labels.

    -
    -
  • -
  • - data - (DataFrame) - – -
    -

    Input DataFrame.

    -
    -
  • -
  • - y_type - (str, default: - 'pred' -) - – -
    -

    Type of values ('pred' or 'label').

    -
    -
  • -
- - -

Returns:

-
    -
  • - dict[str, Any] - – -
    -

    Dictionary with updated keys.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/predict.py -
 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
def parse_y_keys(y: dict[str, Any], data: pl.DataFrame, y_type: str = "pred") -> dict[str, Any]:
-    """Parse dictionary keys to match input data format.
-
-    Args:
-        y: Dictionary of predictions or labels.
-        data: Input DataFrame.
-        y_type: Type of values ('pred' or 'label').
-
-    Returns:
-        Dictionary with updated keys.
-    """
-    if not y:
-        return y
-
-    parsed_y = {}
-    for k1, v1 in y.items():
-        for k2 in data.columns:
-            if k1 == k2.split(":")[0]:
-                new_key = f"{k1}:{y_type}:{k2.split(':')[2]}"
-                parsed_y[new_key] = v1
-
-    return parsed_y
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Execute model prediction pipeline.

- -
- Source code in src/stimulus/cli/predict.py -
191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
def run() -> None:
-    """Execute model prediction pipeline."""
-    args = get_args()
-    main(
-        args.model,
-        args.weight,
-        args.model_config,
-        args.experiment_config,
-        args.data,
-        args.output,
-        return_labels=args.return_labels,
-        split=args.split,
-    )
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/shuffle_csv/index.html b/reference/stimulus/cli/shuffle_csv/index.html index 4f447eb5..5f009c11 100644 --- a/reference/stimulus/cli/shuffle_csv/index.html +++ b/reference/stimulus/cli/shuffle_csv/index.html @@ -1,2333 +1,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.shuffle_csv - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- shuffle_csv - - -

- -
- -

CLI module for shuffling CSV data files.

- - - - - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - main - – -
    -

    Shuffle the data and split it according to the default split method.

    -
    -
  • -
  • - run - – -
    -

    Run the CSV shuffling script.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- - -

Returns:

-
    -
  • - Namespace - – -
    -

    Parsed command line arguments.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/shuffle_csv.py -
 9
-10
-11
-12
-13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline.
-
-    Returns:
-        Parsed command line arguments.
-    """
-    parser = argparse.ArgumentParser(description="Shuffle rows in a CSV data file.")
-    parser.add_argument(
-        "-c",
-        "--csv",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The file path for the csv containing all data",
-    )
-    parser.add_argument(
-        "-y",
-        "--yaml",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The YAML config file that hold all parameter info",
-    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The output file path to write the noised csv",
-    )
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    data_csv: str, config_yaml: str, out_path: str
-) -> None
-
- -
- -

Shuffle the data and split it according to the default split method.

- - -

Parameters:

-
    -
  • - data_csv - (str) - – -
    -

    Path to input CSV file.

    -
    -
  • -
  • - config_yaml - (str) - – -
    -

    Path to config YAML file.

    -
    -
  • -
  • - out_path - (str) - – -
    -

    Path to output shuffled CSV.

    -
    -
  • -
-

TODO major changes when this is going to select a given shuffle method and integration with split.

- -
- Source code in src/stimulus/cli/shuffle_csv.py -
44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
def main(data_csv: str, config_yaml: str, out_path: str) -> None:
-    """Shuffle the data and split it according to the default split method.
-
-    Args:
-        data_csv: Path to input CSV file.
-        config_yaml: Path to config YAML file.
-        out_path: Path to output shuffled CSV.
-
-    TODO major changes when this is going to select a given shuffle method and integration with split.
-    """
-    # create a DatasetProcessor object from the config and the csv
-    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)
-
-    # shuffle the data with a default seed. TODO get the seed for the config if and when that is going to be set there.
-    processor.shuffle_labels(seed=42)
-
-    # save the modified csv
-    processor.save(out_path)
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the CSV shuffling script.

- -
- Source code in src/stimulus/cli/shuffle_csv.py -
64
-65
-66
-67
def run() -> None:
-    """Run the CSV shuffling script."""
-    args = get_args()
-    main(args.csv, args.yaml, args.output)
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/split_csv/index.html b/reference/stimulus/cli/split_csv/index.html index 7288efdd..35f9d85f 100644 --- a/reference/stimulus/cli/split_csv/index.html +++ b/reference/stimulus/cli/split_csv/index.html @@ -1,2356 +1,143 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.split_csv - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- split_csv - - -

- -
- -

CLI module for splitting CSV data files.

- - - - - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - main - – -
    -

    Connect CSV and YAML configuration and handle sanity checks.

    -
    -
  • -
  • - run - – -
    -

    Run the CSV splitting script.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- -
- Source code in src/stimulus/cli/split_csv.py -
13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline."""
-    parser = argparse.ArgumentParser(description="Split a CSV data file.")
-    parser.add_argument(
-        "-c",
-        "--csv",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The file path for the csv containing all data",
-    )
-    parser.add_argument(
-        "-y",
-        "--yaml",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The YAML config file that hold all parameter info",
-    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The output file path to write the noised csv",
-    )
-    parser.add_argument(
-        "-f",
-        "--force",
-        type=bool,
-        required=False,
-        default=False,
-        help="Overwrite the split column if it already exists in the csv",
-    )
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    data_csv: str,
-    config_yaml: str,
-    out_path: str,
-    *,
-    force: bool = False
-) -> None
-
- -
- -

Connect CSV and YAML configuration and handle sanity checks.

- - -

Parameters:

-
    -
  • - data_csv - (str) - – -
    -

    Path to input CSV file.

    -
    -
  • -
  • - config_yaml - (str) - – -
    -

    Path to config YAML file.

    -
    -
  • -
  • - out_path - (str) - – -
    -

    Path to output split CSV.

    -
    -
  • -
  • - force - (bool, default: - False -) - – -
    -

    Overwrite the split column if it already exists in the CSV.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/split_csv.py -
52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
def main(data_csv: str, config_yaml: str, out_path: str, *, force: bool = False) -> None:
-    """Connect CSV and YAML configuration and handle sanity checks.
-
-    Args:
-        data_csv: Path to input CSV file.
-        config_yaml: Path to config YAML file.
-        out_path: Path to output split CSV.
-        force: Overwrite the split column if it already exists in the CSV.
-    """
-    # create a DatasetProcessor object from the config and the csv
-    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)
-
-    # create a split manager from the config
-    split_config = processor.dataset_manager.config.split
-    with open(config_yaml) as f:
-        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))
-    split_loader = SplitLoader(seed=yaml_config.global_params.seed)
-    split_loader.initialize_splitter_from_config(split_config)
-    split_manager = SplitManager(split_loader)
-
-    # apply the split method to the data
-    processor.add_split(split_manager=split_manager, force=force)
-
-    # save the modified csv
-    processor.save(out_path)
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the CSV splitting script.

- -
- Source code in src/stimulus/cli/split_csv.py -
79
-80
-81
-82
def run() -> None:
-    """Run the CSV splitting script."""
-    args = get_args()
-    main(args.csv, args.yaml, args.output, force=args.force)
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/split_yaml/index.html b/reference/stimulus/cli/split_yaml/index.html index 0350d317..11fbf09a 100644 --- a/reference/stimulus/cli/split_yaml/index.html +++ b/reference/stimulus/cli/split_yaml/index.html @@ -1,2244 +1,107 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.split_yaml - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- split_yaml - - -

- -
- -

CLI module for splitting YAML configuration files.

-

This module provides functionality to split a single YAML configuration file into multiple -YAML files, each containing a specific combination of data transformations and splits. -The resulting YAML files can be used as input configurations for the stimulus package.

- - - - - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the command line.

    -
    -
  • -
  • - main - – -
    -

    Reads a YAML config file and generates all possible data configurations.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the command line.

- -
- Source code in src/stimulus/cli/split_yaml.py -
22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the command line."""
-    parser = argparse.ArgumentParser(description="")
-    parser.add_argument(
-        "-j",
-        "--yaml",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The YAML config file that hold all transform - split - parameter info",
-    )
-    parser.add_argument(
-        "-d",
-        "--out_dir",
-        type=str,
-        required=False,
-        nargs="?",
-        const="./",
-        default="./",
-        metavar="DIR",
-        help="The output dir where all the YAMLs are written to. Output YAML will be called split-#[number].yaml transform-#[number].yaml. Default -> ./",
-    )
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(config_yaml: str, out_dir_path: str) -> None
-
- -
- -

Reads a YAML config file and generates all possible data configurations.

-

This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to -the stimulus package.

-

The structure of the YAML is described here -> TODO paste here link to documentation. -This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations. -Each resulting YAML will hold only one combination of the above three things.

-

This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform) -and uses the default split behavior.

- -
- Source code in src/stimulus/cli/split_yaml.py -
48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
def main(config_yaml: str, out_dir_path: str) -> None:
-    """Reads a YAML config file and generates all possible data configurations.
-
-    This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to
-    the stimulus package.
-
-    The structure of the YAML is described here -> TODO paste here link to documentation.
-    This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations.
-    Each resulting YAML will hold only one combination of the above three things.
-
-    This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform)
-    and uses the default split behavior.
-    """
-    # read the yaml experiment config and load it to dictionary
-    yaml_config: dict[str, Any] = {}
-    with open(config_yaml) as conf_file:
-        yaml_config = yaml.safe_load(conf_file)
-
-    yaml_config_dict: YamlConfigDict = YamlConfigDict(**yaml_config)
-    # check if the yaml schema is correct
-    check_yaml_schema(yaml_config_dict)
-
-    # generate all the YAML configs
-    data_configs = generate_data_configs(yaml_config_dict)
-
-    # dump all the YAML configs into files
-    dump_yaml_list_into_files(data_configs, out_dir_path, "test")
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/transform_csv/index.html b/reference/stimulus/cli/transform_csv/index.html index ad326a1d..4319694e 100644 --- a/reference/stimulus/cli/transform_csv/index.html +++ b/reference/stimulus/cli/transform_csv/index.html @@ -1,2293 +1,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.transform_csv - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- transform_csv - - -

- -
- -

CLI module for transforming CSV data files.

- - - - - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - main - – -
    -

    Connect CSV and YAML configuration and handle sanity checks.

    -
    -
  • -
  • - run - – -
    -

    Run the CSV transformation script.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- -
- Source code in src/stimulus/cli/transform_csv.py -
13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline."""
-    parser = argparse.ArgumentParser(description="CLI for transforming CSV data files using YAML configuration.")
-    parser.add_argument(
-        "-c",
-        "--csv",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The file path for the csv containing all data",
-    )
-    parser.add_argument(
-        "-y",
-        "--yaml",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The YAML config file that holds all parameter info",
-    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="The output file path to write the noised csv",
-    )
-
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    data_csv: str, config_yaml: str, out_path: str
-) -> None
-
- -
- -

Connect CSV and YAML configuration and handle sanity checks.

-

This launcher will be the connection between the csv and one YAML configuration. -It should also handle some sanity checks.

- -
- Source code in src/stimulus/cli/transform_csv.py -
44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
def main(data_csv: str, config_yaml: str, out_path: str) -> None:
-    """Connect CSV and YAML configuration and handle sanity checks.
-
-    This launcher will be the connection between the csv and one YAML configuration.
-    It should also handle some sanity checks.
-    """
-    # initialize the csv processing class, it open and reads the csv in automatic
-    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)
-
-    # initialize the transform manager
-    transform_config = processor.dataset_manager.config.transforms
-    with open(config_yaml) as f:
-        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))
-    transform_loader = TransformLoader(seed=yaml_config.global_params.seed)
-    transform_loader.initialize_column_data_transformers_from_config(transform_config)
-    transform_manager = TransformManager(transform_loader)
-
-    # apply the transformations to the data
-    processor.apply_transformation_group(transform_manager)
-
-    # write the transformed data to a new csv
-    processor.save(out_path)
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the CSV transformation script.

- -
- Source code in src/stimulus/cli/transform_csv.py -
68
-69
-70
-71
def run() -> None:
-    """Run the CSV transformation script."""
-    args = get_args()
-    main(args.csv, args.yaml, args.output)
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/cli/tuning/index.html b/reference/stimulus/cli/tuning/index.html index 28682188..36878128 100644 --- a/reference/stimulus/cli/tuning/index.html +++ b/reference/stimulus/cli/tuning/index.html @@ -1,2786 +1,446 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.cli.tuning - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- tuning - - -

- -
- -

CLI module for running raytune tuning experiment.

- - - - - -

Modules:

-
    -
  • - launch_utils - – -
    -

    Utility functions for launching and configuring experiments and ray tuning.

    -
    -
  • -
  • - loaders - – -
    -

    Loaders serve as interfaces between the CSV master class and custom methods.

    -
    -
  • -
  • - raytune_learner - – -
    -

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

    -
    -
  • -
  • - raytune_parser - – -
    -

    Ray Tune results parser for extracting and saving best model configurations and weights.

    -
    -
  • -
  • - yaml_data - – -
    -

    Utility module for handling YAML configuration files and their validation.

    -
    -
  • -
  • - yaml_model_schema - – -
    -

    Module for handling YAML configuration files and converting them to Ray Tune format.

    -
    -
  • -
- - - - - - -

Functions:

-
    -
  • - get_args - – -
    -

    Get the arguments when using from the commandline.

    -
    -
  • -
  • - main - – -
    -

    Run the main model checking pipeline.

    -
    -
  • -
  • - run - – -
    -

    Run the model checking script.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- get_args - - -

-
get_args() -> Namespace
-
- -
- -

Get the arguments when using from the commandline.

- - -

Returns:

-
    -
  • - Namespace - – -
    -

    Parsed command line arguments.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/tuning.py -
 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
def get_args() -> argparse.Namespace:
-    """Get the arguments when using from the commandline.
-
-    Returns:
-        Parsed command line arguments.
-    """
-    parser = argparse.ArgumentParser(description="Launch check_model.")
-    parser.add_argument("-d", "--data", type=str, required=True, metavar="FILE", help="Path to input csv file.")
-    parser.add_argument("-m", "--model", type=str, required=True, metavar="FILE", help="Path to model file.")
-    parser.add_argument(
-        "-e",
-        "--data_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to data config file.",
-    )
-    parser.add_argument(
-        "-c",
-        "--model_config",
-        type=str,
-        required=True,
-        metavar="FILE",
-        help="Path to yaml config training file.",
-    )
-    parser.add_argument(
-        "-w",
-        "--initial_weights",
-        type=str,
-        required=False,
-        nargs="?",
-        const=None,
-        default=None,
-        metavar="FILE",
-        help="The path to the initial weights (optional).",
-    )
-    parser.add_argument(
-        "--ray_results_dirpath",
-        type=str,
-        required=False,
-        nargs="?",
-        const=None,
-        default=None,
-        metavar="DIR_PATH",
-        help="Location where ray_results output dir should be written. If None, uses ~/ray_results.",
-    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        type=str,
-        required=False,
-        nargs="?",
-        const="best_model.pt",
-        default="best_model.pt",
-        metavar="FILE",
-        help="The output file path to write the trained model to",
-    )
-    parser.add_argument(
-        "-bm",
-        "--best_metrics",
-        type=str,
-        required=False,
-        nargs="?",
-        const="best_metrics.csv",
-        default="best_metrics.csv",
-        metavar="FILE",
-        help="The path to write the best metrics to",
-    )
-    parser.add_argument(
-        "-bc",
-        "--best_config",
-        type=str,
-        required=False,
-        nargs="?",
-        const="best_config.yaml",
-        default="best_config.yaml",
-        metavar="FILE",
-        help="The path to write the best config to",
-    )
-    parser.add_argument(
-        "-bo",
-        "--best_optimizer",
-        type=str,
-        required=False,
-        nargs="?",
-        const="best_optimizer.pt",
-        default="best_optimizer.pt",
-        metavar="FILE",
-        help="The path to write the best optimizer to",
-    )
-    parser.add_argument(
-        "--tune_run_name",
-        type=str,
-        required=False,
-        nargs="?",
-        const=None,
-        default=None,
-        metavar="CUSTOM_RUN_NAME",
-        help=(
-            "Tells ray tune what the 'experiment_name' (i.e. the given tune_run name) should be. "
-            "If set, the subdirectory of ray_results is named with this value and its train dir is prefixed accordingly. "
-            "Default None means that ray will generate such a name on its own."
-        ),
-    )
-    parser.add_argument(
-        "--debug_mode",
-        action="store_true",
-        help="Activate debug mode for tuning. Default false, no debug.",
-    )
-    return parser.parse_args()
-
-
-
- -
- -
- - -

- main - - -

-
main(
-    model_path: str,
-    data_path: str,
-    data_config_path: str,
-    model_config_path: str,
-    initial_weights: str | None = None,
-    ray_results_dirpath: str | None = None,
-    output_path: str | None = None,
-    best_optimizer_path: str | None = None,
-    best_metrics_path: str | None = None,
-    best_config_path: str | None = None,
-    *,
-    debug_mode: bool = False
-) -> None
-
- -
- -

Run the main model checking pipeline.

- - -

Parameters:

-
    -
  • - data_path - (str) - – -
    -

    Path to input data file.

    -
    -
  • -
  • - model_path - (str) - – -
    -

    Path to model file.

    -
    -
  • -
  • - data_config_path - (str) - – -
    -

    Path to data config file.

    -
    -
  • -
  • - model_config_path - (str) - – -
    -

    Path to model config file.

    -
    -
  • -
  • - initial_weights - (str | None, default: - None -) - – -
    -

    Optional path to initial weights.

    -
    -
  • -
  • - ray_results_dirpath - (str | None, default: - None -) - – -
    -

    Directory for ray results.

    -
    -
  • -
  • - debug_mode - (bool, default: - False -) - – -
    -

    Whether to run in debug mode.

    -
    -
  • -
  • - output_path - (str | None, default: - None -) - – -
    -

    Path to write the best model to.

    -
    -
  • -
  • - best_optimizer_path - (str | None, default: - None -) - – -
    -

    Path to write the best optimizer to.

    -
    -
  • -
  • - best_metrics_path - (str | None, default: - None -) - – -
    -

    Path to write the best metrics to.

    -
    -
  • -
  • - best_config_path - (str | None, default: - None -) - – -
    -

    Path to write the best config to.

    -
    -
  • -
- -
- Source code in src/stimulus/cli/tuning.py -
136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
def main(
-    model_path: str,
-    data_path: str,
-    data_config_path: str,
-    model_config_path: str,
-    initial_weights: str | None = None,  # noqa: ARG001
-    ray_results_dirpath: str | None = None,
-    output_path: str | None = None,
-    best_optimizer_path: str | None = None,
-    best_metrics_path: str | None = None,
-    best_config_path: str | None = None,
-    *,
-    debug_mode: bool = False,
-) -> None:
-    """Run the main model checking pipeline.
-
-    Args:
-        data_path: Path to input data file.
-        model_path: Path to model file.
-        data_config_path: Path to data config file.
-        model_config_path: Path to model config file.
-        initial_weights: Optional path to initial weights.
-        ray_results_dirpath: Directory for ray results.
-        debug_mode: Whether to run in debug mode.
-        output_path: Path to write the best model to.
-        best_optimizer_path: Path to write the best optimizer to.
-        best_metrics_path: Path to write the best metrics to.
-        best_config_path: Path to write the best config to.
-    """
-    # Convert data config to proper type
-    with open(data_config_path) as file:
-        data_config_dict: dict[str, Any] = yaml.safe_load(file)
-    data_config: yaml_data.YamlSubConfigDict = yaml_data.YamlSubConfigDict(**data_config_dict)
-
-    with open(model_config_path) as file:
-        model_config_dict: dict[str, Any] = yaml.safe_load(file)
-    model_config: yaml_model_schema.Model = yaml_model_schema.Model(**model_config_dict)
-
-    encoder_loader = loaders.EncoderLoader()
-    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)
-
-    model_class = launch_utils.import_class_from_file(model_path)
-
-    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)
-    ray_config_model = ray_config_loader.get_config()
-
-    tuner = raytune_learner.TuneWrapper(
-        model_config=ray_config_model,
-        data_config_path=data_config_path,
-        model_class=model_class,
-        data_path=data_path,
-        encoder_loader=encoder_loader,
-        seed=42,
-        ray_results_dir=ray_results_dirpath,
-        debug=debug_mode,
-    )
-
-    # Ensure output_path is provided
-    if output_path is None:
-        raise ValueError("output_path must not be None")
-    try:
-        grid_results = tuner.tune()
-        if not grid_results:
-            _raise_empty_grid()
-
-        # Initialize parser with results
-        parser = raytune_parser.TuneParser(result=grid_results)
-
-        # Ensure output directory exists
-        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
-
-        # Save outputs using proper Result object API
-        parser.save_best_model(output=output_path)
-        parser.save_best_optimizer(output=best_optimizer_path)
-        parser.save_best_metrics_dataframe(output=best_metrics_path)
-        parser.save_best_config(output=best_config_path)
-
-    except RuntimeError:
-        logger.exception("Tuning failed")
-        raise
-    except KeyError:
-        logger.exception("Missing expected result key")
-        raise
-    finally:
-        if debug_mode:
-            logger.info("Debug mode - preserving Ray results directory")
-        elif ray_results_dirpath:
-            shutil.rmtree(ray_results_dirpath, ignore_errors=True)
-
-
-
- -
- -
- - -

- run - - -

-
run() -> None
-
- -
- -

Run the model checking script.

- -
- Source code in src/stimulus/cli/tuning.py -
226
-227
-228
-229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
-241
def run() -> None:
-    """Run the model checking script."""
-    args = get_args()
-    main(
-        data_path=args.data,
-        model_path=args.model,
-        data_config_path=args.data_config,
-        model_config_path=args.model_config,
-        initial_weights=args.initial_weights,
-        ray_results_dirpath=args.ray_results_dirpath,
-        output_path=args.output,
-        best_optimizer_path=args.best_optimizer,
-        best_metrics_path=args.best_metrics,
-        best_config_path=args.best_config,
-        debug_mode=args.debug_mode,
-    )
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/data_handlers/index.html b/reference/stimulus/data/data_handlers/index.html index 5e03b79f..e8737541 100644 --- a/reference/stimulus/data/data_handlers/index.html +++ b/reference/stimulus/data/data_handlers/index.html @@ -1,5494 +1,804 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.data_handlers - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- data_handlers - - -

- -
- -

This module provides classes for handling CSV data files in the STIMULUS format.

-

The module contains three main classes: -- DatasetHandler: Base class for loading and managing CSV data -- DatasetProcessor: Class for preprocessing data with transformations and splits -- DatasetLoader: Class for loading processed data for model training

-

The data format consists of: -1. A CSV file containing the raw data -2. A YAML configuration file that defines: - - Column names and their roles (input/label/meta) - - Data types and encoders for each column - - Transformations to apply (noise, augmentation, etc.) - - Split configuration for train/val/test sets

-

The data handling pipeline consists of: -1. Loading raw CSV data according to the YAML config -2. Applying configured transformations -3. Splitting into train/val/test sets based on config -4. Encoding data for model training using specified encoders

-

See titanic.yaml in tests/test_data/titanic/ for an example configuration file format.

- - - - - -

Modules:

-
    -
  • - loaders - – -
    -

    Loaders serve as interfaces between the CSV master class and custom methods.

    -
    -
  • -
  • - yaml_data - – -
    -

    Utility module for handling YAML configuration files and their validation.

    -
    -
  • -
- - - - -

Classes:

-
    -
  • - DatasetHandler - – -
    -

    Main class for handling dataset loading, encoding, transformation and splitting.

    -
    -
  • -
  • - DatasetLoader - – -
    -

    Class for loading dataset and passing it to the deep learning model.

    -
    -
  • -
  • - DatasetManager - – -
    -

    Class for managing the dataset.

    -
    -
  • -
  • - DatasetProcessor - – -
    -

    Class for loading dataset, applying transformations and splitting.

    -
    -
  • -
  • - EncodeManager - – -
    -

    Manages the encoding of data columns using configured encoders.

    -
    -
  • -
  • - SplitManager - – -
    -

    Class for managing the splitting.

    -
    -
  • -
  • - TransformManager - – -
    -

    Class for managing the transformations.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- DatasetHandler - - -

-
DatasetHandler(config_path: str, csv_path: str)
-
- -
- - -

Main class for handling dataset loading, encoding, transformation and splitting.

-

This class coordinates the interaction between different managers to process -CSV datasets according to the provided configuration.

- - -

Attributes:

-
    -
  • - encoder_manager - (EncodeManager) - – -
    -

    Manager for handling data encoding operations.

    -
    -
  • -
  • - transform_manager - (TransformManager) - – -
    -

    Manager for handling data transformations.

    -
    -
  • -
  • - split_manager - (SplitManager) - – -
    -

    Manager for handling dataset splitting.

    -
    -
  • -
  • - dataset_manager - (DatasetManager) - – -
    -

    Manager for organizing dataset columns and config.

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - config_path - (str) - – -
    -

    Path to the dataset configuration file.

    -
    -
  • -
  • - csv_path - (str) - – -
    -

    Path to the CSV data file.

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - load_csv - – -
    -

    Load the CSV file into a polars DataFrame.

    -
    -
  • -
  • - read_csv_header - – -
    -

    Get the column names from the header of the CSV file.

    -
    -
  • -
  • - save - – -
    -

    Saves the data to a csv file.

    -
    -
  • -
  • - select_columns - – -
    -

    Select specific columns from the DataFrame and return as a dictionary.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
271
-272
-273
-274
-275
-276
-277
-278
-279
-280
-281
-282
-283
-284
def __init__(
-    self,
-    config_path: str,
-    csv_path: str,
-) -> None:
-    """Initialize the DatasetHandler with required config.
-
-    Args:
-        config_path (str): Path to the dataset configuration file.
-        csv_path (str): Path to the CSV data file.
-    """
-    self.dataset_manager = DatasetManager(config_path)
-    self.columns = self.read_csv_header(csv_path)
-    self.data = self.load_csv(csv_path)
-
-
- - - -
- - - - - - - - - -
- - -

- load_csv - - -

-
load_csv(csv_path: str) -> DataFrame
-
- -
- -

Load the CSV file into a polars DataFrame.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to load.

    -
    -
  • -
- - -

Returns:

-
    -
  • - DataFrame - – -
    -

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
315
-316
-317
-318
-319
-320
-321
-322
-323
-324
def load_csv(self, csv_path: str) -> pl.DataFrame:
-    """Load the CSV file into a polars DataFrame.
-
-    Args:
-        csv_path (str): Path to the CSV file to load.
-
-    Returns:
-        pl.DataFrame: Polars DataFrame containing the loaded CSV data.
-    """
-    return pl.read_csv(csv_path)
-
-
-
- -
- -
- - -

- read_csv_header - - -

-
read_csv_header(csv_path: str) -> list
-
- -
- -

Get the column names from the header of the CSV file.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to read headers from.

    -
    -
  • -
- - -

Returns:

-
    -
  • -list ( list -) – -
    -

    List of column names from the CSV header.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
def read_csv_header(self, csv_path: str) -> list:
-    """Get the column names from the header of the CSV file.
-
-    Args:
-        csv_path (str): Path to the CSV file to read headers from.
-
-    Returns:
-        list: List of column names from the CSV header.
-    """
-    with open(csv_path) as f:
-        return f.readline().strip().split(",")
-
-
-
- -
- -
- - -

- save - - -

-
save(path: str) -> None
-
- -
- -

Saves the data to a csv file.

- -
- Source code in src/stimulus/data/data_handlers.py -
326
-327
-328
def save(self, path: str) -> None:
-    """Saves the data to a csv file."""
-    self.data.write_csv(path)
-
-
-
- -
- -
- - -

- select_columns - - -

-
select_columns(columns: list) -> dict
-
- -
- -

Select specific columns from the DataFrame and return as a dictionary.

- - -

Parameters:

-
    -
  • - columns - (list) - – -
    -

    List of column names to select.

    -
    -
  • -
- - -

Returns:

-
    -
  • -dict ( dict -) – -
    -

    A dictionary where keys are column names and values are lists containing the column data.

    -
    -
  • -
- - -
- Example -
-
-
-

handler = DatasetHandler(...) -data_dict = handler.select_columns(["col1", "col2"])

-

Returns

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
298
-299
-300
-301
-302
-303
-304
-305
-306
-307
-308
-309
-310
-311
-312
-313
def select_columns(self, columns: list) -> dict:
-    """Select specific columns from the DataFrame and return as a dictionary.
-
-    Args:
-        columns (list): List of column names to select.
-
-    Returns:
-        dict: A dictionary where keys are column names and values are lists containing the column data.
-
-    Example:
-        >>> handler = DatasetHandler(...)
-        >>> data_dict = handler.select_columns(["col1", "col2"])
-        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
-    """
-    df = self.data.select(columns)
-    return {col: df[col].to_list() for col in columns}
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- DatasetLoader - - -

-
DatasetLoader(
-    config_path: str,
-    csv_path: str,
-    encoder_loader: EncoderLoader,
-    split: Union[int, None] = None,
-)
-
- -
-

- Bases: DatasetHandler

- - -

Class for loading dataset and passing it to the deep learning model.

- - - - - - - - - -

Methods:

-
    -
  • - get_all_items - – -
    -

    Get the full dataset as three separate dictionaries for inputs, labels and metadata.

    -
    -
  • -
  • - get_all_items_and_length - – -
    -

    Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

    -
    -
  • -
  • - load_csv - – -
    -

    Load the CSV file into a polars DataFrame.

    -
    -
  • -
  • - load_csv_per_split - – -
    -

    Load the part of csv file that has the specified split value.

    -
    -
  • -
  • - read_csv_header - – -
    -

    Get the column names from the header of the CSV file.

    -
    -
  • -
  • - save - – -
    -

    Saves the data to a csv file.

    -
    -
  • -
  • - select_columns - – -
    -

    Select specific columns from the DataFrame and return as a dictionary.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
395
-396
-397
-398
-399
-400
-401
-402
-403
-404
-405
def __init__(
-    self,
-    config_path: str,
-    csv_path: str,
-    encoder_loader: loaders.EncoderLoader,
-    split: Union[int, None] = None,
-) -> None:
-    """Initialize the DatasetLoader."""
-    super().__init__(config_path, csv_path)
-    self.encoder_manager = EncodeManager(encoder_loader)
-    self.data = self.load_csv_per_split(csv_path, split) if split is not None else self.load_csv(csv_path)
-
-
- - - -
- - - - - - - - - -
- - -

- get_all_items - - -

-
get_all_items() -> tuple[dict, dict, dict]
-
- -
- -

Get the full dataset as three separate dictionaries for inputs, labels and metadata.

- - -

Returns:

-
    -
  • - tuple[dict, dict, dict] - – -
    -

    tuple[dict, dict, dict]: Three dictionaries containing: -- Input dictionary mapping input column names to encoded input data -- Label dictionary mapping label column names to encoded label data -- Meta dictionary mapping meta column names to meta data

    -
    -
  • -
- - -
- Example -
-
-
-

handler = DatasetHandler(...) -input_dict, label_dict, meta_dict = handler.get_dataset() -print(input_dict.keys()) -dict_keys(['age', 'fare']) -print(label_dict.keys()) -dict_keys(['survived']) -print(meta_dict.keys()) -dict_keys(['passenger_id'])

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
407
-408
-409
-410
-411
-412
-413
-414
-415
-416
-417
-418
-419
-420
-421
-422
-423
-424
-425
-426
-427
-428
-429
-430
-431
-432
-433
-434
def get_all_items(self) -> tuple[dict, dict, dict]:
-    """Get the full dataset as three separate dictionaries for inputs, labels and metadata.
-
-    Returns:
-        tuple[dict, dict, dict]: Three dictionaries containing:
-            - Input dictionary mapping input column names to encoded input data
-            - Label dictionary mapping label column names to encoded label data
-            - Meta dictionary mapping meta column names to meta data
-
-    Example:
-        >>> handler = DatasetHandler(...)
-        >>> input_dict, label_dict, meta_dict = handler.get_dataset()
-        >>> print(input_dict.keys())
-        dict_keys(['age', 'fare'])
-        >>> print(label_dict.keys())
-        dict_keys(['survived'])
-        >>> print(meta_dict.keys())
-        dict_keys(['passenger_id'])
-    """
-    input_columns, label_columns, meta_columns = (
-        self.dataset_manager.column_categories["input"],
-        self.dataset_manager.column_categories["label"],
-        self.dataset_manager.column_categories["meta"],
-    )
-    input_data = self.encoder_manager.encode_dataframe(self.data[input_columns])
-    label_data = self.encoder_manager.encode_dataframe(self.data[label_columns])
-    meta_data = {key: self.data[key].to_list() for key in meta_columns}
-    return input_data, label_data, meta_data
-
-
-
- -
- -
- - -

- get_all_items_and_length - - -

-
get_all_items_and_length() -> (
-    tuple[tuple[dict, dict, dict], int]
-)
-
- -
- -

Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

- -
- Source code in src/stimulus/data/data_handlers.py -
436
-437
-438
def get_all_items_and_length(self) -> tuple[tuple[dict, dict, dict], int]:
-    """Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data."""
-    return self.get_all_items(), len(self.data)
-
-
-
- -
- -
- - -

- load_csv - - -

-
load_csv(csv_path: str) -> DataFrame
-
- -
- -

Load the CSV file into a polars DataFrame.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to load.

    -
    -
  • -
- - -

Returns:

-
    -
  • - DataFrame - – -
    -

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
315
-316
-317
-318
-319
-320
-321
-322
-323
-324
def load_csv(self, csv_path: str) -> pl.DataFrame:
-    """Load the CSV file into a polars DataFrame.
-
-    Args:
-        csv_path (str): Path to the CSV file to load.
-
-    Returns:
-        pl.DataFrame: Polars DataFrame containing the loaded CSV data.
-    """
-    return pl.read_csv(csv_path)
-
-
-
- -
- -
- - -

- load_csv_per_split - - -

-
load_csv_per_split(csv_path: str, split: int) -> DataFrame
-
- -
- -

Load the part of csv file that has the specified split value.

-

Split is a number that for 0 is train, 1 is validation, 2 is test. -This is accessed through the column with category split. Example column name could be split:split:int.

-

NOTE that the aim of having this function is that depending on the training, validation and test scenarios, -we are gonna load only the relevant data for it.

- -
- Source code in src/stimulus/data/data_handlers.py -
440
-441
-442
-443
-444
-445
-446
-447
-448
-449
-450
-451
-452
-453
def load_csv_per_split(self, csv_path: str, split: int) -> pl.DataFrame:
-    """Load the part of csv file that has the specified split value.
-
-    Split is a number that for 0 is train, 1 is validation, 2 is test.
-    This is accessed through the column with category `split`. Example column name could be `split:split:int`.
-
-    NOTE that the aim of having this function is that depending on the training, validation and test scenarios,
-    we are gonna load only the relevant data for it.
-    """
-    if "split" not in self.columns:
-        raise ValueError("The category split is not present in the csv file")
-    if split not in [0, 1, 2]:
-        raise ValueError(f"The split value should be 0, 1 or 2. The specified split value is {split}")
-    return pl.scan_csv(csv_path).filter(pl.col("split") == split).collect()
-
-
-
- -
- -
- - -

- read_csv_header - - -

-
read_csv_header(csv_path: str) -> list
-
- -
- -

Get the column names from the header of the CSV file.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to read headers from.

    -
    -
  • -
- - -

Returns:

-
    -
  • -list ( list -) – -
    -

    List of column names from the CSV header.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
def read_csv_header(self, csv_path: str) -> list:
-    """Get the column names from the header of the CSV file.
-
-    Args:
-        csv_path (str): Path to the CSV file to read headers from.
-
-    Returns:
-        list: List of column names from the CSV header.
-    """
-    with open(csv_path) as f:
-        return f.readline().strip().split(",")
-
-
-
- -
- -
- - -

- save - - -

-
save(path: str) -> None
-
- -
- -

Saves the data to a csv file.

- -
- Source code in src/stimulus/data/data_handlers.py -
326
-327
-328
def save(self, path: str) -> None:
-    """Saves the data to a csv file."""
-    self.data.write_csv(path)
-
-
-
- -
- -
- - -

- select_columns - - -

-
select_columns(columns: list) -> dict
-
- -
- -

Select specific columns from the DataFrame and return as a dictionary.

- - -

Parameters:

-
    -
  • - columns - (list) - – -
    -

    List of column names to select.

    -
    -
  • -
- - -

Returns:

-
    -
  • -dict ( dict -) – -
    -

    A dictionary where keys are column names and values are lists containing the column data.

    -
    -
  • -
- - -
- Example -
-
-
-

handler = DatasetHandler(...) -data_dict = handler.select_columns(["col1", "col2"])

-

Returns

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
298
-299
-300
-301
-302
-303
-304
-305
-306
-307
-308
-309
-310
-311
-312
-313
def select_columns(self, columns: list) -> dict:
-    """Select specific columns from the DataFrame and return as a dictionary.
-
-    Args:
-        columns (list): List of column names to select.
-
-    Returns:
-        dict: A dictionary where keys are column names and values are lists containing the column data.
-
-    Example:
-        >>> handler = DatasetHandler(...)
-        >>> data_dict = handler.select_columns(["col1", "col2"])
-        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
-    """
-    df = self.data.select(columns)
-    return {col: df[col].to_list() for col in columns}
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- DatasetManager - - -

-
DatasetManager(config_path: str)
-
- -
- - -

Class for managing the dataset.

-

This class handles loading and organizing dataset configuration from YAML files. -It manages column categorization into input, label and meta types based on the config.

- - -

Attributes:

-
    -
  • - config - (dict) - – -
    -

    The loaded configuration dictionary from YAML

    -
    -
  • -
  • - column_categories - (dict) - – -
    -

    Dictionary mapping column types to lists of column names

    -
    -
  • -
- - -

Methods:

-
    -
  • - _load_config - – -
    -

    str) -> dict: Loads the config from a YAML file.

    -
    -
  • -
  • - categorize_columns_by_type - – -
    -

    Organizes the columns into input, label, meta based on the config.

    -
    -
  • -
- - - - - - - - - -

Methods:

- - - - -
- Source code in src/stimulus/data/data_handlers.py -
51
-52
-53
-54
-55
-56
-57
def __init__(
-    self,
-    config_path: str,
-) -> None:
-    """Initialize the DatasetManager."""
-    self.config = self._load_config(config_path)
-    self.column_categories = self.categorize_columns_by_type()
-
-
- - - -
- - - - - - - - - -
- - -

- categorize_columns_by_type - - -

-
categorize_columns_by_type() -> dict
-
- -
- -

Organizes columns from config into input, label, and meta categories.

-

Reads the column definitions from the config and sorts them into categories -based on their column_type field.

- - -

Returns:

-
    -
  • -dict ( dict -) – -
    -

    Dictionary containing lists of column names for each category: -{ - "input": ["col1", "col2"], # Input columns - "label": ["target"], # Label/output columns - "meta": ["id"] # Metadata columns -}

    -
    -
  • -
- - -
- Example -
-
-
-

manager = DatasetManager("config.yaml") -categories = manager.categorize_columns_by_type() -print(categories) -{ - 'input': ['hello', 'bonjour'], - 'label': ['ciao'], - 'meta': ["id"] -}

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
-93
-94
def categorize_columns_by_type(self) -> dict:
-    """Organizes columns from config into input, label, and meta categories.
-
-    Reads the column definitions from the config and sorts them into categories
-    based on their column_type field.
-
-    Returns:
-        dict: Dictionary containing lists of column names for each category:
-            {
-                "input": ["col1", "col2"],  # Input columns
-                "label": ["target"],        # Label/output columns
-                "meta": ["id"]     # Metadata columns
-            }
-
-    Example:
-        >>> manager = DatasetManager("config.yaml")
-        >>> categories = manager.categorize_columns_by_type()
-        >>> print(categories)
-        {
-            'input': ['hello', 'bonjour'],
-            'label': ['ciao'],
-            'meta': ["id"]
-        }
-    """
-    input_columns = []
-    label_columns = []
-    meta_columns = []
-    for column in self.config.columns:
-        if column.column_type == "input":
-            input_columns.append(column.column_name)
-        elif column.column_type == "label":
-            label_columns.append(column.column_name)
-        elif column.column_type == "meta":
-            meta_columns.append(column.column_name)
-
-    return {"input": input_columns, "label": label_columns, "meta": meta_columns}
-
-
-
- -
- -
- - -

- get_split_columns - - -

-
get_split_columns() -> list[str]
-
- -
- -

Get the columns that are used for splitting.

- -
- Source code in src/stimulus/data/data_handlers.py -
114
-115
-116
def get_split_columns(self) -> list[str]:
-    """Get the columns that are used for splitting."""
-    return self.config.split.split_input_columns
-
-
-
- -
- -
- - -

- get_transform_logic - - -

-
get_transform_logic() -> dict
-
- -
- -

Get the transformation logic.

-

Returns a dictionary in the following structure : -{ - "transformation_name": str, - "transformations": list[tuple[str, str, dict]] -}

- -
- Source code in src/stimulus/data/data_handlers.py -
118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
def get_transform_logic(self) -> dict:
-    """Get the transformation logic.
-
-    Returns a dictionary in the following structure :
-    {
-        "transformation_name": str,
-        "transformations": list[tuple[str, str, dict]]
-    }
-    """
-    transformation_logic = {
-        "transformation_name": self.config.transforms.transformation_name,
-        "transformations": [],
-    }
-    for column in self.config.transforms.columns:
-        for transformation in column.transformations:
-            transformation_logic["transformations"].append(
-                (column.column_name, transformation.name, transformation.params),
-            )
-    return transformation_logic
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- DatasetProcessor - - -

-
DatasetProcessor(config_path: str, csv_path: str)
-
- -
-

- Bases: DatasetHandler

- - -

Class for loading dataset, applying transformations and splitting.

- - - - - - - - - -

Methods:

-
    -
  • - add_split - – -
    -

    Add a column specifying the train, validation, test splits of the data.

    -
    -
  • -
  • - apply_transformation_group - – -
    -

    Apply the transformation group to the data.

    -
    -
  • -
  • - load_csv - – -
    -

    Load the CSV file into a polars DataFrame.

    -
    -
  • -
  • - read_csv_header - – -
    -

    Get the column names from the header of the CSV file.

    -
    -
  • -
  • - save - – -
    -

    Saves the data to a csv file.

    -
    -
  • -
  • - select_columns - – -
    -

    Select specific columns from the DataFrame and return as a dictionary.

    -
    -
  • -
  • - shuffle_labels - – -
    -

    Shuffles the labels in the data.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
334
-335
-336
def __init__(self, config_path: str, csv_path: str) -> None:
-    """Initialize the DatasetProcessor."""
-    super().__init__(config_path, csv_path)
-
-
- - - -
- - - - - - - - - -
- - -

- add_split - - -

-
add_split(
-    split_manager: SplitManager, *, force: bool = False
-) -> None
-
- -
- -

Add a column specifying the train, validation, test splits of the data.

-

An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.

- - -

Parameters:

-
    -
  • - split_manager - (SplitManager) - – -
    -

    Manager for handling dataset splitting

    -
    -
  • -
  • - force - (bool, default: - False -) - – -
    -

    If True, the split column present in the csv file will be overwritten.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
338
-339
-340
-341
-342
-343
-344
-345
-346
-347
-348
-349
-350
-351
-352
-353
-354
-355
-356
-357
-358
-359
-360
-361
-362
-363
-364
-365
-366
def add_split(self, split_manager: SplitManager, *, force: bool = False) -> None:
-    """Add a column specifying the train, validation, test splits of the data.
-
-    An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.
-
-    Args:
-        split_manager (SplitManager): Manager for handling dataset splitting
-        force (bool): If True, the split column present in the csv file will be overwritten.
-    """
-    if ("split" in self.columns) and (not force):
-        raise ValueError(
-            "The category split is already present in the csv file. If you want to still use this function, set force=True",
-        )
-    # get relevant split columns from the dataset_manager
-    split_columns = self.dataset_manager.get_split_columns()
-    split_input_data = self.select_columns(split_columns)
-
-    # get the split indices
-    train, validation, test = split_manager.get_split_indices(split_input_data)
-
-    # add the split column to the data
-    split_column = np.full(len(self.data), -1).astype(int)
-    split_column[train] = 0
-    split_column[validation] = 1
-    split_column[test] = 2
-    self.data = self.data.with_columns(pl.Series("split", split_column))
-
-    if "split" not in self.columns:
-        self.columns.append("split")
-
-
-
- -
- -
- - -

- apply_transformation_group - - -

-
apply_transformation_group(
-    transform_manager: TransformManager,
-) -> None
-
- -
- -

Apply the transformation group to the data.

- -
- Source code in src/stimulus/data/data_handlers.py -
368
-369
-370
-371
-372
-373
-374
-375
-376
-377
-378
-379
-380
def apply_transformation_group(self, transform_manager: TransformManager) -> None:
-    """Apply the transformation group to the data."""
-    for column_name, transform_name, _params in self.dataset_manager.get_transform_logic()["transformations"]:
-        transformed_data, add_row = transform_manager.transform_column(
-            column_name,
-            transform_name,
-            self.data[column_name],
-        )
-        if add_row:
-            new_rows = self.data.with_columns(pl.Series(column_name, transformed_data))
-            self.data = pl.vstack(self.data, new_rows)
-        else:
-            self.data = self.data.with_columns(pl.Series(column_name, transformed_data))
-
-
-
- -
- -
- - -

- load_csv - - -

-
load_csv(csv_path: str) -> DataFrame
-
- -
- -

Load the CSV file into a polars DataFrame.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to load.

    -
    -
  • -
- - -

Returns:

-
    -
  • - DataFrame - – -
    -

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
315
-316
-317
-318
-319
-320
-321
-322
-323
-324
def load_csv(self, csv_path: str) -> pl.DataFrame:
-    """Load the CSV file into a polars DataFrame.
-
-    Args:
-        csv_path (str): Path to the CSV file to load.
-
-    Returns:
-        pl.DataFrame: Polars DataFrame containing the loaded CSV data.
-    """
-    return pl.read_csv(csv_path)
-
-
-
- -
- -
- - -

- read_csv_header - - -

-
read_csv_header(csv_path: str) -> list
-
- -
- -

Get the column names from the header of the CSV file.

- - -

Parameters:

-
    -
  • - csv_path - (str) - – -
    -

    Path to the CSV file to read headers from.

    -
    -
  • -
- - -

Returns:

-
    -
  • -list ( list -) – -
    -

    List of column names from the CSV header.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
def read_csv_header(self, csv_path: str) -> list:
-    """Get the column names from the header of the CSV file.
-
-    Args:
-        csv_path (str): Path to the CSV file to read headers from.
-
-    Returns:
-        list: List of column names from the CSV header.
-    """
-    with open(csv_path) as f:
-        return f.readline().strip().split(",")
-
-
-
- -
- -
- - -

- save - - -

-
save(path: str) -> None
-
- -
- -

Saves the data to a csv file.

- -
- Source code in src/stimulus/data/data_handlers.py -
326
-327
-328
def save(self, path: str) -> None:
-    """Saves the data to a csv file."""
-    self.data.write_csv(path)
-
-
-
- -
- -
- - -

- select_columns - - -

-
select_columns(columns: list) -> dict
-
- -
- -

Select specific columns from the DataFrame and return as a dictionary.

- - -

Parameters:

-
    -
  • - columns - (list) - – -
    -

    List of column names to select.

    -
    -
  • -
- - -

Returns:

-
    -
  • -dict ( dict -) – -
    -

    A dictionary where keys are column names and values are lists containing the column data.

    -
    -
  • -
- - -
- Example -
-
-
-

handler = DatasetHandler(...) -data_dict = handler.select_columns(["col1", "col2"])

-

Returns

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
298
-299
-300
-301
-302
-303
-304
-305
-306
-307
-308
-309
-310
-311
-312
-313
def select_columns(self, columns: list) -> dict:
-    """Select specific columns from the DataFrame and return as a dictionary.
-
-    Args:
-        columns (list): List of column names to select.
-
-    Returns:
-        dict: A dictionary where keys are column names and values are lists containing the column data.
-
-    Example:
-        >>> handler = DatasetHandler(...)
-        >>> data_dict = handler.select_columns(["col1", "col2"])
-        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
-    """
-    df = self.data.select(columns)
-    return {col: df[col].to_list() for col in columns}
-
-
-
- -
- -
- - -

- shuffle_labels - - -

-
shuffle_labels(seed: Optional[float] = None) -> None
-
- -
- -

Shuffles the labels in the data.

- -
- Source code in src/stimulus/data/data_handlers.py -
382
-383
-384
-385
-386
-387
-388
-389
def shuffle_labels(self, seed: Optional[float] = None) -> None:
-    """Shuffles the labels in the data."""
-    # set the np seed
-    np.random.seed(seed)
-
-    label_keys = self.dataset_manager.column_categories["label"]
-    for key in label_keys:
-        self.data = self.data.with_columns(pl.Series(key, np.random.permutation(list(self.data[key]))))
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- EncodeManager - - -

-
EncodeManager(encoder_loader: EncoderLoader)
-
- -
- - -

Manages the encoding of data columns using configured encoders.

-

This class handles encoding of data columns based on the encoders specified in the -configuration. It uses an EncoderLoader to get the appropriate encoder for each column -and applies the encoding.

- - -

Attributes:

-
    -
  • - encoder_loader - (EncoderLoader) - – -
    -

    Loader that provides encoders based on config.

    -
    -
  • -
- - -
- Example -
-
-
-

encoder_loader = EncoderLoader(config) -encode_manager = EncodeManager(encoder_loader) -data = ["ACGT", "TGCA", "GCTA"] -encoded = encode_manager.encode_column("dna_seq", data) -print(encoded.shape) -torch.Size([3, 4, 4]) # 3 sequences, length 4, one-hot encoded

-
-
-
-
- - -

Parameters:

-
    -
  • - encoder_loader - (EncoderLoader) - – -
    -

    Loader that provides encoders based on configuration.

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - encode_column - – -
    -

    Encodes a column of data using the configured encoder.

    -
    -
  • -
  • - encode_columns - – -
    -

    Encodes multiple columns of data using the configured encoders.

    -
    -
  • -
  • - encode_dataframe - – -
    -

    Encode the dataframe using the encoders.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
158
-159
-160
-161
-162
-163
-164
-165
-166
-167
def __init__(
-    self,
-    encoder_loader: loaders.EncoderLoader,
-) -> None:
-    """Initialize the EncodeManager.
-
-    Args:
-        encoder_loader: Loader that provides encoders based on configuration.
-    """
-    self.encoder_loader = encoder_loader
-
-
- - - -
- - - - - - - - - -
- - -

- encode_column - - -

-
encode_column(
-    column_name: str, column_data: list
-) -> Tensor
-
- -
- -

Encodes a column of data using the configured encoder.

-

Gets the appropriate encoder for the column from the encoder_loader and uses it -to encode all the data in the column.

- - -

Parameters:

-
    -
  • - column_name - (str) - – -
    -

    Name of the column to encode.

    -
    -
  • -
  • - column_data - (list) - – -
    -

    List of data values from the column to encode.

    -
    -
  • -
- - -

Returns:

-
    -
  • - Tensor - – -
    -

    Encoded data as a torch.Tensor. The exact shape depends on the encoder used.

    -
    -
  • -
- - -
- Example -
-
-
-

data = ["ACGT", "TGCA"] -encoded = encode_manager.encode_column("dna_seq", data) -print(encoded.shape) -torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
def encode_column(self, column_name: str, column_data: list) -> torch.Tensor:
-    """Encodes a column of data using the configured encoder.
-
-    Gets the appropriate encoder for the column from the encoder_loader and uses it
-    to encode all the data in the column.
-
-    Args:
-        column_name: Name of the column to encode.
-        column_data: List of data values from the column to encode.
-
-    Returns:
-        Encoded data as a torch.Tensor. The exact shape depends on the encoder used.
-
-    Example:
-        >>> data = ["ACGT", "TGCA"]
-        >>> encoded = encode_manager.encode_column("dna_seq", data)
-        >>> print(encoded.shape)
-        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded
-    """
-    encode_all_function = self.encoder_loader.get_function_encode_all(column_name)
-    return encode_all_function(column_data)
-
-
-
- -
- -
- - -

- encode_columns - - -

-
encode_columns(column_data: dict) -> dict
-
- -
- -

Encodes multiple columns of data using the configured encoders.

-

Gets the appropriate encoder for each column from the encoder_loader and encodes -all data values in those columns.

- - -

Parameters:

-
    -
  • - column_data - (dict) - – -
    -

    Dict mapping column names to lists of data values to encode.

    -
    -
  • -
- - -

Returns:

-
    -
  • - dict - – -
    -

    Dict mapping column names to their encoded tensors. The exact shape of each

    -
    -
  • -
  • - dict - – -
    -

    tensor depends on the encoder used for that column.

    -
    -
  • -
- - -
- Example -
-
-
-

data = {"dna_seq": ["ACGT", "TGCA"], "labels": ["1", "2"]} -encoded = encode_manager.encode_columns(data) -print(encoded["dna_seq"].shape) -torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

-
-
-
-
-
- Source code in src/stimulus/data/data_handlers.py -
191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
-204
-205
-206
-207
-208
-209
-210
def encode_columns(self, column_data: dict) -> dict:
-    """Encodes multiple columns of data using the configured encoders.
-
-    Gets the appropriate encoder for each column from the encoder_loader and encodes
-    all data values in those columns.
-
-    Args:
-        column_data: Dict mapping column names to lists of data values to encode.
-
-    Returns:
-        Dict mapping column names to their encoded tensors. The exact shape of each
-        tensor depends on the encoder used for that column.
-
-    Example:
-        >>> data = {"dna_seq": ["ACGT", "TGCA"], "labels": ["1", "2"]}
-        >>> encoded = encode_manager.encode_columns(data)
-        >>> print(encoded["dna_seq"].shape)
-        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded
-    """
-    return {col: self.encode_column(col, values) for col, values in column_data.items()}
-
-
-
- -
- -
- - -

- encode_dataframe - - -

-
encode_dataframe(dataframe: DataFrame) -> dict[str, Tensor]
-
- -
- -

Encode the dataframe using the encoders.

- -
- Source code in src/stimulus/data/data_handlers.py -
212
-213
-214
def encode_dataframe(self, dataframe: pl.DataFrame) -> dict[str, torch.Tensor]:
-    """Encode the dataframe using the encoders."""
-    return {col: self.encode_column(col, dataframe[col].to_list()) for col in dataframe.columns}
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- SplitManager - - -

-
SplitManager(split_loader: SplitLoader)
-
- -
- - -

Class for managing the splitting.

- - - - - - - - - -

Methods:

-
    -
  • - get_split_indices - – -
    -

    Get the indices for train, validation, and test splits.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
246
-247
-248
-249
-250
-251
def __init__(
-    self,
-    split_loader: loaders.SplitLoader,
-) -> None:
-    """Initialize the SplitManager."""
-    self.split_loader = split_loader
-
-
- - - -
- - - - - - - - - -
- - -

- get_split_indices - - -

-
get_split_indices(
-    data: dict,
-) -> tuple[ndarray, ndarray, ndarray]
-
- -
- -

Get the indices for train, validation, and test splits.

- -
- Source code in src/stimulus/data/data_handlers.py -
253
-254
-255
def get_split_indices(self, data: dict) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-    """Get the indices for train, validation, and test splits."""
-    return self.split_loader.get_function_split()(data)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- TransformManager - - -

-
TransformManager(transform_loader: TransformLoader)
-
- -
- - -

Class for managing the transformations.

- - - - - - - - - -

Methods:

-
    -
  • - transform_column - – -
    -

    Transform a column of data using the specified transformation.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/data_handlers.py -
220
-221
-222
-223
-224
-225
def __init__(
-    self,
-    transform_loader: loaders.TransformLoader,
-) -> None:
-    """Initialize the TransformManager."""
-    self.transform_loader = transform_loader
-
-
- - - -
- - - - - - - - - -
- - -

- transform_column - - -

-
transform_column(
-    column_name: str, transform_name: str, column_data: list
-) -> tuple[list, bool]
-
- -
- -

Transform a column of data using the specified transformation.

- - -

Parameters:

-
    -
  • - column_name - (str) - – -
    -

    The name of the column to transform.

    -
    -
  • -
  • - transform_name - (str) - – -
    -

    The name of the transformation to use.

    -
    -
  • -
  • - column_data - (list) - – -
    -

    The data to transform.

    -
    -
  • -
- - -

Returns:

-
    -
  • -list ( list -) – -
    -

    The transformed data.

    -
    -
  • -
  • -bool ( bool -) – -
    -

    Whether the transformation added new rows to the data.

    -
    -
  • -
- -
- Source code in src/stimulus/data/data_handlers.py -
227
-228
-229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
def transform_column(self, column_name: str, transform_name: str, column_data: list) -> tuple[list, bool]:
-    """Transform a column of data using the specified transformation.
-
-    Args:
-        column_name (str): The name of the column to transform.
-        transform_name (str): The name of the transformation to use.
-        column_data (list): The data to transform.
-
-    Returns:
-        list: The transformed data.
-        bool: Whether the transformation added new rows to the data.
-    """
-    transformer = self.transform_loader.__getattribute__(column_name)[transform_name]
-    return transformer.transform_all(column_data), transformer.add_row
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/encoding/encoders/index.html b/reference/stimulus/data/encoding/encoders/index.html index b7809d61..55314b22 100644 --- a/reference/stimulus/data/encoding/encoders/index.html +++ b/reference/stimulus/data/encoding/encoders/index.html @@ -1,4645 +1,719 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.encoding.encoders - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
- -
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- encoders - - -

- -
- -

This file contains encoders classes for encoding various types of data.

- - - - - - - -

Classes:

-
    -
  • - AbstractEncoder - – -
    -

    Abstract class for encoders.

    -
    -
  • -
  • - NumericEncoder - – -
    -

    Encoder for float/int data.

    -
    -
  • -
  • - NumericRankEncoder - – -
    -

    Encoder for float/int data that encodes the data based on their rank.

    -
    -
  • -
  • - StrClassificationEncoder - – -
    -

    A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

    -
    -
  • -
  • - TextOneHotEncoder - – -
    -

    One hot encoder for text data.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- AbstractEncoder - - -

- - -
-

- Bases: ABC

- - -

Abstract class for encoders.

-

Encoders are classes that encode the raw data into torch.tensors. -Different encoders provide different encoding methods. -Different encoders may take different types of data as input.

- - -

Methods:

-
    -
  • - encode - – -
    -

    encodes a single data point

    -
    -
  • -
  • - encode_all - – -
    -

    encodes a list of data points into a torch.tensor

    -
    -
  • -
  • - encode_multiprocess - – -
    -

    encodes a list of data points using multiprocessing

    -
    -
  • -
  • - decode - – -
    -

    decodes a single data point

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - decode - – -
    -

    Decode a single data point.

    -
    -
  • -
  • - encode - – -
    -

    Encode a single data point.

    -
    -
  • -
  • - encode_all - – -
    -

    Encode a list of data points.

    -
    -
  • -
- - - - - - -
- - - - - - - - - -
- - -

- decode - - - - abstractmethod - - -

-
decode(data: Any) -> Any
-
- -
- -

Decode a single data point.

-

This is an abstract method, child classes should overwrite it.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    a single encoded data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -decoded_data_point ( Any -) – -
    -

    the decoded data point

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
@abstractmethod
-def decode(self, data: Any) -> Any:
-    """Decode a single data point.
-
-    This is an abstract method, child classes should overwrite it.
-
-    Args:
-        data (Any): a single encoded data point
-
-    Returns:
-        decoded_data_point (Any): the decoded data point
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- encode - - - - abstractmethod - - -

-
encode(data: Any) -> Any
-
- -
- -

Encode a single data point.

-

This is an abstract method, child classes should overwrite it.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    a single data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data_point ( Any -) – -
    -

    the encoded data point

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
@abstractmethod
-def encode(self, data: Any) -> Any:
-    """Encode a single data point.
-
-    This is an abstract method, child classes should overwrite it.
-
-    Args:
-        data (Any): a single data point
-
-    Returns:
-        encoded_data_point (Any): the encoded data point
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- encode_all - - - - abstractmethod - - -

-
encode_all(data: list[Any]) -> Tensor
-
- -
- -

Encode a list of data points.

-

This is an abstract method, child classes should overwrite it.

- - -

Parameters:

-
    -
  • - data - (list[Any]) - – -
    -

    a list of data points

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data ( Tensor -) – -
    -

    encoded data points

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
@abstractmethod
-def encode_all(self, data: list[Any]) -> torch.Tensor:
-    """Encode a list of data points.
-
-    This is an abstract method, child classes should overwrite it.
-
-    Args:
-        data (list[Any]): a list of data points
-
-    Returns:
-        encoded_data (torch.Tensor): encoded data points
-    """
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- NumericEncoder - - -

-
NumericEncoder(dtype: dtype = float32)
-
- -
-

- Bases: AbstractEncoder

- - -

Encoder for float/int data.

- - -

Attributes:

-
    -
  • - dtype - (dtype) - – -
    -

    The data type of the encoded data. Default = torch.float32 (32-bit floating point)

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - dtype - (dtype, default: - float32 -) - – -
    -

    the data type of the encoded data. Default = torch.float (32-bit floating point)

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - decode - – -
    -

    Decodes the data.

    -
    -
  • -
  • - encode - – -
    -

    Encodes the data.

    -
    -
  • -
  • - encode_all - – -
    -

    Encodes the data.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/encoding/encoders.py -
309
-310
-311
-312
-313
-314
-315
def __init__(self, dtype: torch.dtype = torch.float32) -> None:
-    """Initialize the NumericEncoder class.
-
-    Args:
-        dtype (torch.dtype): the data type of the encoded data. Default = torch.float (32-bit floating point)
-    """
-    self.dtype = dtype
-
-
- - - -
- - - - - - - - - -
- - -

- decode - - -

-
decode(data: Tensor) -> list[float]
-
- -
- -

Decodes the data.

- - -

Parameters:

-
    -
  • - data - (Tensor) - – -
    -

    the encoded data

    -
    -
  • -
- - -

Returns:

-
    -
  • -decoded_data ( list[float] -) – -
    -

    the decoded data

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
349
-350
-351
-352
-353
-354
-355
-356
-357
-358
def decode(self, data: torch.Tensor) -> list[float]:
-    """Decodes the data.
-
-    Args:
-        data (torch.Tensor): the encoded data
-
-    Returns:
-        decoded_data (list[float]): the decoded data
-    """
-    return data.cpu().numpy().tolist()
-
-
-
- -
- -
- - -

- encode - - -

-
encode(data: float) -> Tensor
-
- -
- -

Encodes the data.

-

This method takes as input a single data point, should be mappable to a single output.

- - -

Parameters:

-
    -
  • - data - (float) - – -
    -

    a single data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data_point ( Tensor -) – -
    -

    the encoded data point

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
317
-318
-319
-320
-321
-322
-323
-324
-325
-326
-327
-328
def encode(self, data: float) -> torch.Tensor:
-    """Encodes the data.
-
-    This method takes as input a single data point, should be mappable to a single output.
-
-    Args:
-        data (float): a single data point
-
-    Returns:
-        encoded_data_point (torch.Tensor): the encoded data point
-    """
-    return self.encode_all([data])
-
-
-
- -
- -
- - -

- encode_all - - -

-
encode_all(data: list[float]) -> Tensor
-
- -
- -

Encodes the data.

-

This method takes as input a list of data points, or a single float, and returns a torch.tensor.

- - -

Parameters:

-
    -
  • - data - (list[float]) - – -
    -

    a list of data points or a single data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data ( Tensor -) – -
    -

    the encoded data

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
330
-331
-332
-333
-334
-335
-336
-337
-338
-339
-340
-341
-342
-343
-344
-345
-346
-347
def encode_all(self, data: list[float]) -> torch.Tensor:
-    """Encodes the data.
-
-    This method takes as input a list of data points, or a single float, and returns a torch.tensor.
-
-    Args:
-        data (list[float]): a list of data points or a single data point
-
-    Returns:
-        encoded_data (torch.Tensor): the encoded data
-    """
-    if not isinstance(data, list):
-        data = [data]
-
-    self._check_input_dtype(data)
-    self._warn_float_is_converted_to_int(data)
-
-    return torch.tensor(data, dtype=self.dtype)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- NumericRankEncoder - - -

-
NumericRankEncoder(*, scale: bool = False)
-
- -
-

- Bases: AbstractEncoder

- - -

Encoder for float/int data that encodes the data based on their rank.

- - -

Attributes:

-
    -
  • - scale - (bool) - – -
    -

    whether to scale the ranks to be between 0 and 1. Default = False

    -
    -
  • -
- - -

Methods:

-
    -
  • - encode - – -
    -

    encodes a single data point

    -
    -
  • -
  • - encode_all - – -
    -

    encodes a list of data points into a torch.tensor

    -
    -
  • -
  • - decode - – -
    -

    decodes a single data point

    -
    -
  • -
  • - _check_input_dtype - – -
    -

    checks if the input data is int or float data

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - scale - (bool, default: - False -) - – -
    -

    whether to scale the ranks to be between 0 and 1. Default = False

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - decode - – -
    -

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

    -
    -
  • -
  • - encode - – -
    -

    Returns an error since encoding a single float does not make sense.

    -
    -
  • -
  • - encode_all - – -
    -

    Encodes the data.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/encoding/encoders.py -
478
-479
-480
-481
-482
-483
-484
def __init__(self, *, scale: bool = False) -> None:
-    """Initialize the NumericRankEncoder class.
-
-    Args:
-        scale (bool): whether to scale the ranks to be between 0 and 1. Default = False
-    """
-    self.scale = scale
-
-
- - - -
- - - - - - - - - -
- - -

- decode - - -

-
decode(data: Any) -> Any
-
- -
- -

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

- -
- Source code in src/stimulus/data/encoding/encoders.py -
514
-515
-516
def decode(self, data: Any) -> Any:
-    """Returns an error since decoding does not make sense without encoder information, which is not yet supported."""
-    raise NotImplementedError("Decoding is not yet supported for NumericRank.")
-
-
-
- -
- -
- - -

- encode - - -

-
encode(data: Any) -> Tensor
-
- -
- -

Returns an error since encoding a single float does not make sense.

- -
- Source code in src/stimulus/data/encoding/encoders.py -
486
-487
-488
def encode(self, data: Any) -> torch.Tensor:
-    """Returns an error since encoding a single float does not make sense."""
-    raise NotImplementedError("Encoding a single float does not make sense. Use encode_all instead.")
-
-
-
- -
- -
- - -

- encode_all - - -

-
encode_all(data: list[Union[int, float]]) -> Tensor
-
- -
- -

Encodes the data.

-

This method takes as input a list of data points, and returns the ranks of the data points. -The ranks are normalized to be between 0 and 1, when scale is set to True.

- - -

Parameters:

- - - -

Returns:

-
    -
  • -encoded_data ( Tensor -) – -
    -

    the encoded data

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
490
-491
-492
-493
-494
-495
-496
-497
-498
-499
-500
-501
-502
-503
-504
-505
-506
-507
-508
-509
-510
-511
-512
def encode_all(self, data: list[Union[int, float]]) -> torch.Tensor:
-    """Encodes the data.
-
-    This method takes as input a list of data points, and returns the ranks of the data points.
-    The ranks are normalized to be between 0 and 1, when scale is set to True.
-
-    Args:
-        data (list[Union[int, float]]): a list of numeric values
-
-    Returns:
-        encoded_data (torch.Tensor): the encoded data
-    """
-    if not isinstance(data, list):
-        data = [data]
-    self._check_input_dtype(data)
-
-    # Get ranks (0 is lowest, n-1 is highest)
-    # and normalize to be between 0 and 1
-    array_data: np.ndarray = np.array(data)
-    ranks: np.ndarray = np.argsort(np.argsort(array_data))
-    if self.scale:
-        ranks = ranks / max(len(ranks) - 1, 1)
-    return torch.tensor(ranks)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- StrClassificationEncoder - - -

-
StrClassificationEncoder(*, scale: bool = False)
-
- -
-

- Bases: AbstractEncoder

- - -

A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

-

When scale is set to True, the labels are scaled to be between 0 and 1.

- - -

Attributes:

-
    -
  • - scale - (bool) - – -
    -

    Whether to scale the labels to be between 0 and 1. Default = False

    -
    -
  • -
- - -

Methods:

-
    -
  • - encode - – -
    -

    str) -> int: -Raises a NotImplementedError, as encoding a single string is not meaningful in this context.

    -
    -
  • -
  • - encode_all - – -
    -

    list[str]) -> torch.tensor: -Encodes an entire list of string data into a numeric representation using LabelEncoder and -returns a torch tensor. Ensures that the provided data items are valid strings prior to encoding.

    -
    -
  • -
  • - decode - – -
    -

    Any) -> Any: -Raises a NotImplementedError, as decoding is not supported with the current design.

    -
    -
  • -
  • - _check_dtype - – -
    -

    list[str]) -> None: -Validates that all items in the data list are strings, raising a ValueError otherwise.

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - scale - (bool, default: - False -) - – -
    -

    whether to scale the labels to be between 0 and 1. Default = False

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - decode - – -
    -

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

    -
    -
  • -
  • - encode - – -
    -

    Returns an error since encoding a single string does not make sense.

    -
    -
  • -
  • - encode_all - – -
    -

    Encodes the data.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/encoding/encoders.py -
406
-407
-408
-409
-410
-411
-412
def __init__(self, *, scale: bool = False) -> None:
-    """Initialize the StrClassificationEncoder class.
-
-    Args:
-        scale (bool): whether to scale the labels to be between 0 and 1. Default = False
-    """
-    self.scale = scale
-
-
- - - -
- - - - - - - - - -
- - -

- decode - - -

-
decode(data: Any) -> Any
-
- -
- -

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

- -
- Source code in src/stimulus/data/encoding/encoders.py -
446
-447
-448
def decode(self, data: Any) -> Any:
-    """Returns an error since decoding does not make sense without encoder information, which is not yet supported."""
-    raise NotImplementedError("Decoding is not yet supported for StrClassification.")
-
-
-
- -
- -
- - -

- encode - - -

-
encode(data: str) -> int
-
- -
- -

Returns an error since encoding a single string does not make sense.

- - -

Parameters:

-
    -
  • - data - (str) - – -
    -

    a single string

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
414
-415
-416
-417
-418
-419
-420
def encode(self, data: str) -> int:
-    """Returns an error since encoding a single string does not make sense.
-
-    Args:
-        data (str): a single string
-    """
-    raise NotImplementedError("Encoding a single string does not make sense. Use encode_all instead.")
-
-
-
- -
- -
- - -

- encode_all - - -

-
encode_all(data: Union[str, list[str]]) -> Tensor
-
- -
- -

Encodes the data.

-

This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array. -For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html

- - -

Parameters:

-
    -
  • - data - (Union[str, list[str]]) - – -
    -

    a list of strings or single string

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data ( tensor -) – -
    -

    the encoded data

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
422
-423
-424
-425
-426
-427
-428
-429
-430
-431
-432
-433
-434
-435
-436
-437
-438
-439
-440
-441
-442
-443
-444
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:
-    """Encodes the data.
-
-    This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array.
-    For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
-
-    Args:
-        data (Union[str, list[str]]): a list of strings or single string
-
-    Returns:
-        encoded_data (torch.tensor): the encoded data
-    """
-    if not isinstance(data, list):
-        data = [data]
-
-    self._check_dtype(data)
-
-    encoder = preprocessing.LabelEncoder()
-    encoded_data = torch.tensor(encoder.fit_transform(data))
-    if self.scale:
-        encoded_data = encoded_data / max(len(encoded_data) - 1, 1)
-
-    return encoded_data
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- TextOneHotEncoder - - -

-
TextOneHotEncoder(
-    alphabet: str = "acgt",
-    *,
-    convert_lowercase: bool = False,
-    padding: bool = False
-)
-
- -
-

- Bases: AbstractEncoder

- - -

One hot encoder for text data.

-

NOTE encodes based on the given alphabet -If a character c is not in the alphabet, c will be represented by a vector of zeros.

- - -

Attributes:

-
    -
  • - alphabet - (str) - – -
    -

    the alphabet to one hot encode the data with.

    -
    -
  • -
  • - convert_lowercase - (bool) - – -
    -

    whether to convert the sequence and alphabet to lowercase. Default is False.

    -
    -
  • -
  • - padding - (bool) - – -
    -

    whether to pad the sequences with zeros. Default is False.

    -
    -
  • -
  • - encoder - (OneHotEncoder) - – -
    -

    preprocessing.OneHotEncoder object initialized with self.alphabet

    -
    -
  • -
- - -

Methods:

-
    -
  • - encode - – -
    -

    encodes a single data point

    -
    -
  • -
  • - encode_all - – -
    -

    encodes a list of data points into a numpy array

    -
    -
  • -
  • - encode_multiprocess - – -
    -

    encodes a list of data points using multiprocessing

    -
    -
  • -
  • - decode - – -
    -

    decodes a single data point

    -
    -
  • -
  • - _sequence_to_array - – -
    -

    transforms a sequence into a numpy array

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - alphabet - (str, default: - 'acgt' -) - – -
    -

    the alphabet to one hot encode the data with.

    -
    -
  • -
- - -

Raises:

-
    -
  • - TypeError - – -
    -

    If the input alphabet is not a string.

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - decode - – -
    -

    Decodes one-hot encoded tensor back to sequences.

    -
    -
  • -
  • - encode - – -
    -

    One hot encodes a single sequence.

    -
    -
  • -
  • - encode_all - – -
    -

    Encodes a list of sequences.

    -
    -
  • -
  • - encode_multiprocess - – -
    -

    Encodes a list of sequences using multiprocessing.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/encoding/encoders.py -
 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
def __init__(self, alphabet: str = "acgt", *, convert_lowercase: bool = False, padding: bool = False) -> None:
-    """Initialize the TextOneHotEncoder class.
-
-    Args:
-        alphabet (str): the alphabet to one hot encode the data with.
-
-    Raises:
-        TypeError: If the input alphabet is not a string.
-    """
-    if not isinstance(alphabet, str):
-        error_msg = f"Expected a string input for alphabet, got {type(alphabet).__name__}"
-        logger.error(error_msg)
-        raise TypeError(error_msg)
-
-    if convert_lowercase:
-        alphabet = alphabet.lower()
-
-    self.alphabet = alphabet
-    self.convert_lowercase = convert_lowercase
-    self.padding = padding
-
-    self.encoder = preprocessing.OneHotEncoder(
-        categories=[list(alphabet)],
-        handle_unknown="ignore",
-    )  # handle_unknown='ignore' unsures that a vector of zeros is returned for unknown characters, such as 'Ns' in DNA sequences
-    self.encoder.fit(np.array(list(alphabet)).reshape(-1, 1))
-
-
- - - -
- - - - - - - - - -
- - -

- decode - - -

-
decode(data: Tensor) -> Union[str, list[str]]
-
- -
- -

Decodes one-hot encoded tensor back to sequences.

- - -

Parameters:

-
    -
  • - data - (Tensor) - – -
    -

    2D or 3D tensor of one-hot encoded sequences -- 2D shape: (sequence_length, alphabet_size) -- 3D shape: (batch_size, sequence_length, alphabet_size)

    -
    -
  • -
-

NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.

- - -

Returns:

-
    -
  • - Union[str, list[str]] - – -
    -

    Union[str, list[str]]: Single sequence string or list of sequence strings

    -
    -
  • -
- - -

Raises:

-
    -
  • - TypeError - – -
    -

    If the input data is not a 2D or 3D tensor

    -
    -
  • -
- -
- Source code in src/stimulus/data/encoding/encoders.py -
262
-263
-264
-265
-266
-267
-268
-269
-270
-271
-272
-273
-274
-275
-276
-277
-278
-279
-280
-281
-282
-283
-284
-285
-286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
-297
-298
-299
def decode(self, data: torch.Tensor) -> Union[str, list[str]]:
-    """Decodes one-hot encoded tensor back to sequences.
-
-    Args:
-        data (torch.Tensor): 2D or 3D tensor of one-hot encoded sequences
-            - 2D shape: (sequence_length, alphabet_size)
-            - 3D shape: (batch_size, sequence_length, alphabet_size)
-
-    NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.
-
-    Returns:
-        Union[str, list[str]]: Single sequence string or list of sequence strings
-
-    Raises:
-        TypeError: If the input data is not a 2D or 3D tensor
-    """
-    expected_2d_tensor = 2
-    expected_3d_tensor = 3
-
-    if data.dim() == expected_2d_tensor:
-        # Single sequence
-        data_np = data.numpy().reshape(-1, len(self.alphabet))
-        decoded = self.encoder.inverse_transform(data_np).flatten()
-        return "".join([i for i in decoded if i is not None])
-
-    if data.dim() == expected_3d_tensor:
-        # Multiple sequences
-        batch_size, seq_len, _ = data.shape
-        data_np = data.reshape(-1, len(self.alphabet)).numpy()
-        decoded = self.encoder.inverse_transform(data_np)
-        sequences = decoded.reshape(batch_size, seq_len)
-        # Convert to masked array where None values are masked
-        masked_sequences = np.ma.masked_equal(sequences, None)
-        # Fill masked values with "-"
-        filled_sequences = masked_sequences.filled("-")
-        return ["".join(seq) for seq in filled_sequences]
-
-    raise ValueError(f"Expected 2D or 3D tensor, got {data.dim()}D")
-
-
-
- -
- -
- - -

- encode - - -

-
encode(data: str) -> Tensor
-
- -
- -

One hot encodes a single sequence.

-

Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length). -The returned tensor corresponds to the one hot encoding of the sequence. -Unknown characters are represented by a vector of zeros.

- - -

Parameters:

-
    -
  • - data - (str) - – -
    -

    single sequence

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data_point ( Tensor -) – -
    -

    one hot encoded sequence

    -
    -
  • -
- - -

Raises:

-
    -
  • - TypeError - – -
    -

    If the input data is not a string.

    -
    -
  • -
- - -

Examples:

-
>>> encoder = TextOneHotEncoder(alphabet="acgt")
->>> encoder.encode("acgt")
-tensor([[1, 0, 0, 0],
-        [0, 1, 0, 0],
-        [0, 0, 1, 0],
-        [0, 0, 0, 1]])
->>> encoder.encode("acgtn")
-tensor([[1, 0, 0, 0],
-        [0, 1, 0, 0],
-        [0, 0, 1, 0],
-        [0, 0, 0, 1],
-        [0, 0, 0, 0]])
-
-
>>> encoder = TextOneHotEncoder(alphabet="ACgt")
->>> encoder.encode("acgt")
-tensor([[0, 0, 0, 0],
-        [0, 0, 0, 0],
-        [0, 0, 1, 0],
-        [0, 0, 0, 1]])
->>> encoder.encode("ACgt")
-tensor([[1, 0, 0, 0],
-        [0, 1, 0, 0],
-        [0, 0, 1, 0],
-        [0, 0, 0, 1]])
-
- -
- Source code in src/stimulus/data/encoding/encoders.py -
148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
def encode(self, data: str) -> torch.Tensor:
-    """One hot encodes a single sequence.
-
-    Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length).
-    The returned tensor corresponds to the one hot encoding of the sequence.
-    Unknown characters are represented by a vector of zeros.
-
-    Args:
-        data (str): single sequence
-
-    Returns:
-        encoded_data_point (torch.Tensor): one hot encoded sequence
-
-    Raises:
-        TypeError: If the input data is not a string.
-
-    Examples:
-        >>> encoder = TextOneHotEncoder(alphabet="acgt")
-        >>> encoder.encode("acgt")
-        tensor([[1, 0, 0, 0],
-                [0, 1, 0, 0],
-                [0, 0, 1, 0],
-                [0, 0, 0, 1]])
-        >>> encoder.encode("acgtn")
-        tensor([[1, 0, 0, 0],
-                [0, 1, 0, 0],
-                [0, 0, 1, 0],
-                [0, 0, 0, 1],
-                [0, 0, 0, 0]])
-
-        >>> encoder = TextOneHotEncoder(alphabet="ACgt")
-        >>> encoder.encode("acgt")
-        tensor([[0, 0, 0, 0],
-                [0, 0, 0, 0],
-                [0, 0, 1, 0],
-                [0, 0, 0, 1]])
-        >>> encoder.encode("ACgt")
-        tensor([[1, 0, 0, 0],
-                [0, 1, 0, 0],
-                [0, 0, 1, 0],
-                [0, 0, 0, 1]])
-    """
-    sequence_array = self._sequence_to_array(data)
-    transformed = self.encoder.transform(sequence_array)
-    numpy_array = np.squeeze(np.stack(transformed.toarray()))
-    return torch.from_numpy(numpy_array)
-
-
-
- -
- -
- - -

- encode_all - - -

-
encode_all(data: Union[str, list[str]]) -> Tensor
-
- -
- -

Encodes a list of sequences.

-

Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length). -The returned tensor corresponds to the one hot encoding of the sequences. -Unknown characters are represented by a vector of zeros.

- - -

Parameters:

-
    -
  • - data - (Union[str, list[str]]) - – -
    -

    list of sequences or a single sequence

    -
    -
  • -
- - -

Returns:

-
    -
  • -encoded_data ( Tensor -) – -
    -

    one hot encoded sequences

    -
    -
  • -
- - -

Raises:

-
    -
  • - TypeError - – -
    -

    If the input data is not a list or a string.

    -
    -
  • -
  • - ValueError - – -
    -

    If all sequences do not have the same length when padding is False.

    -
    -
  • -
- - -

Examples:

-
>>> encoder = TextOneHotEncoder(alphabet="acgt")
->>> encoder.encode_all(["acgt", "acgtn"])
-tensor([[[1, 0, 0, 0],
-         [0, 1, 0, 0],
-         [0, 0, 1, 0],
-         [0, 0, 0, 1],
-         [0, 0, 0, 0]], // this is padded with zeros
-
-
    [[1, 0, 0, 0],
+ stimulus.data.encoding.encoders - stimulus-py      

encoders

This file contains encoders classes for encoding various types of data.

Classes:

AbstractEncoder

Bases: ABC

Abstract class for encoders.

Encoders are classes that encode the raw data into torch.tensors. Different encoders provide different encoding methods. Different encoders may take different types of data as input.

Methods:

  • encode

    encodes a single data point

  • encode_all

    encodes a list of data points into a torch.tensor

  • encode_multiprocess

    encodes a list of data points using multiprocessing

  • decode

    decodes a single data point

Methods:

  • decode

    Decode a single data point.

  • encode

    Encode a single data point.

  • encode_all

    Encode a list of data points.

decode abstractmethod

decode(data: Any) -> Any
+

Decode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) –

    a single encoded data point

Returns:

  • decoded_data_point ( Any ) –

    the decoded data point

Source code in src/stimulus/data/encoding/encoders.py
58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
@abstractmethod
+def decode(self, data: Any) -> Any:
+    """Decode a single data point.
+
+    This is an abstract method, child classes should overwrite it.
+
+    Args:
+        data (Any): a single encoded data point
+
+    Returns:
+        decoded_data_point (Any): the decoded data point
+    """
+    raise NotImplementedError
+

encode abstractmethod

encode(data: Any) -> Any
+

Encode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) –

    a single data point

Returns:

  • encoded_data_point ( Any ) –

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
@abstractmethod
+def encode(self, data: Any) -> Any:
+    """Encode a single data point.
+
+    This is an abstract method, child classes should overwrite it.
+
+    Args:
+        data (Any): a single data point
+
+    Returns:
+        encoded_data_point (Any): the encoded data point
+    """
+    raise NotImplementedError
+

encode_all abstractmethod

encode_all(data: list[Any]) -> Tensor
+

Encode a list of data points.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (list[Any]) –

    a list of data points

Returns:

  • encoded_data ( Tensor ) –

    encoded data points

Source code in src/stimulus/data/encoding/encoders.py
44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
@abstractmethod
+def encode_all(self, data: list[Any]) -> torch.Tensor:
+    """Encode a list of data points.
+
+    This is an abstract method, child classes should overwrite it.
+
+    Args:
+        data (list[Any]): a list of data points
+
+    Returns:
+        encoded_data (torch.Tensor): encoded data points
+    """
+    raise NotImplementedError
+

NumericEncoder

NumericEncoder(dtype: dtype = float32)
+

Bases: AbstractEncoder

Encoder for float/int data.

Attributes:

  • dtype (dtype) –

    The data type of the encoded data. Default = torch.float32 (32-bit floating point)

Parameters:

  • dtype (dtype, default: float32 ) –

    the data type of the encoded data. Default = torch.float (32-bit floating point)

Methods:

Source code in src/stimulus/data/encoding/encoders.py
309
+310
+311
+312
+313
+314
+315
def __init__(self, dtype: torch.dtype = torch.float32) -> None:
+    """Initialize the NumericEncoder class.
+
+    Args:
+        dtype (torch.dtype): the data type of the encoded data. Default = torch.float (32-bit floating point)
+    """
+    self.dtype = dtype
+

decode

decode(data: Tensor) -> list[float]
+

Decodes the data.

Parameters:

  • data (Tensor) –

    the encoded data

Returns:

  • decoded_data ( list[float] ) –

    the decoded data

Source code in src/stimulus/data/encoding/encoders.py
349
+350
+351
+352
+353
+354
+355
+356
+357
+358
def decode(self, data: torch.Tensor) -> list[float]:
+    """Decodes the data.
+
+    Args:
+        data (torch.Tensor): the encoded data
+
+    Returns:
+        decoded_data (list[float]): the decoded data
+    """
+    return data.cpu().numpy().tolist()
+

encode

encode(data: float) -> Tensor
+

Encodes the data.

This method takes as input a single data point, should be mappable to a single output.

Parameters:

  • data (float) –

    a single data point

Returns:

  • encoded_data_point ( Tensor ) –

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
def encode(self, data: float) -> torch.Tensor:
+    """Encodes the data.
+
+    This method takes as input a single data point, should be mappable to a single output.
+
+    Args:
+        data (float): a single data point
+
+    Returns:
+        encoded_data_point (torch.Tensor): the encoded data point
+    """
+    return self.encode_all([data])
+

encode_all

encode_all(data: list[float]) -> Tensor
+

Encodes the data.

This method takes as input a list of data points, or a single float, and returns a torch.tensor.

Parameters:

  • data (list[float]) –

    a list of data points or a single data point

Returns:

  • encoded_data ( Tensor ) –

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
def encode_all(self, data: list[float]) -> torch.Tensor:
+    """Encodes the data.
+
+    This method takes as input a list of data points, or a single float, and returns a torch.tensor.
+
+    Args:
+        data (list[float]): a list of data points or a single data point
+
+    Returns:
+        encoded_data (torch.Tensor): the encoded data
+    """
+    if not isinstance(data, list):
+        data = [data]
+
+    self._check_input_dtype(data)
+    self._warn_float_is_converted_to_int(data)
+
+    return torch.tensor(data, dtype=self.dtype)
+

NumericRankEncoder

NumericRankEncoder(*, scale: bool = False)
+

Bases: AbstractEncoder

Encoder for float/int data that encodes the data based on their rank.

Attributes:

  • scale (bool) –

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • encode

    encodes a single data point

  • encode_all

    encodes a list of data points into a torch.tensor

  • decode

    decodes a single data point

  • _check_input_dtype

    checks if the input data is int or float data

Parameters:

  • scale (bool, default: False ) –

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • decode

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode

    Returns an error since encoding a single float does not make sense.

  • encode_all

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
478
+479
+480
+481
+482
+483
+484
def __init__(self, *, scale: bool = False) -> None:
+    """Initialize the NumericRankEncoder class.
+
+    Args:
+        scale (bool): whether to scale the ranks to be between 0 and 1. Default = False
+    """
+    self.scale = scale
+

decode

decode(data: Any) -> Any
+

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
514
+515
+516
def decode(self, data: Any) -> Any:
+    """Returns an error since decoding does not make sense without encoder information, which is not yet supported."""
+    raise NotImplementedError("Decoding is not yet supported for NumericRank.")
+

encode

encode(data: Any) -> Tensor
+

Returns an error since encoding a single float does not make sense.

Source code in src/stimulus/data/encoding/encoders.py
486
+487
+488
def encode(self, data: Any) -> torch.Tensor:
+    """Returns an error since encoding a single float does not make sense."""
+    raise NotImplementedError("Encoding a single float does not make sense. Use encode_all instead.")
+

encode_all

encode_all(data: list[Union[int, float]]) -> Tensor
+

Encodes the data.

This method takes as input a list of data points, and returns the ranks of the data points. The ranks are normalized to be between 0 and 1, when scale is set to True.

Parameters:

Returns:

  • encoded_data ( Tensor ) –

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
def encode_all(self, data: list[Union[int, float]]) -> torch.Tensor:
+    """Encodes the data.
+
+    This method takes as input a list of data points, and returns the ranks of the data points.
+    The ranks are normalized to be between 0 and 1, when scale is set to True.
+
+    Args:
+        data (list[Union[int, float]]): a list of numeric values
+
+    Returns:
+        encoded_data (torch.Tensor): the encoded data
+    """
+    if not isinstance(data, list):
+        data = [data]
+    self._check_input_dtype(data)
+
+    # Get ranks (0 is lowest, n-1 is highest)
+    # and normalize to be between 0 and 1
+    array_data: np.ndarray = np.array(data)
+    ranks: np.ndarray = np.argsort(np.argsort(array_data))
+    if self.scale:
+        ranks = ranks / max(len(ranks) - 1, 1)
+    return torch.tensor(ranks)
+

StrClassificationEncoder

StrClassificationEncoder(*, scale: bool = False)
+

Bases: AbstractEncoder

A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

When scale is set to True, the labels are scaled to be between 0 and 1.

Attributes:

  • scale (bool) –

    Whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • encode

    str) -> int: Raises a NotImplementedError, as encoding a single string is not meaningful in this context.

  • encode_all

    list[str]) -> torch.tensor: Encodes an entire list of string data into a numeric representation using LabelEncoder and returns a torch tensor. Ensures that the provided data items are valid strings prior to encoding.

  • decode

    Any) -> Any: Raises a NotImplementedError, as decoding is not supported with the current design.

  • _check_dtype

    list[str]) -> None: Validates that all items in the data list are strings, raising a ValueError otherwise.

Parameters:

  • scale (bool, default: False ) –

    whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • decode

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode

    Returns an error since encoding a single string does not make sense.

  • encode_all

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
406
+407
+408
+409
+410
+411
+412
def __init__(self, *, scale: bool = False) -> None:
+    """Initialize the StrClassificationEncoder class.
+
+    Args:
+        scale (bool): whether to scale the labels to be between 0 and 1. Default = False
+    """
+    self.scale = scale
+

decode

decode(data: Any) -> Any
+

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
446
+447
+448
def decode(self, data: Any) -> Any:
+    """Returns an error since decoding does not make sense without encoder information, which is not yet supported."""
+    raise NotImplementedError("Decoding is not yet supported for StrClassification.")
+

encode

encode(data: str) -> int
+

Returns an error since encoding a single string does not make sense.

Parameters:

  • data (str) –

    a single string

Source code in src/stimulus/data/encoding/encoders.py
414
+415
+416
+417
+418
+419
+420
def encode(self, data: str) -> int:
+    """Returns an error since encoding a single string does not make sense.
+
+    Args:
+        data (str): a single string
+    """
+    raise NotImplementedError("Encoding a single string does not make sense. Use encode_all instead.")
+

encode_all

encode_all(data: Union[str, list[str]]) -> Tensor
+

Encodes the data.

This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array. For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html

Parameters:

Returns:

  • encoded_data ( tensor ) –

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:
+    """Encodes the data.
+
+    This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array.
+    For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
+
+    Args:
+        data (Union[str, list[str]]): a list of strings or single string
+
+    Returns:
+        encoded_data (torch.tensor): the encoded data
+    """
+    if not isinstance(data, list):
+        data = [data]
+
+    self._check_dtype(data)
+
+    encoder = preprocessing.LabelEncoder()
+    encoded_data = torch.tensor(encoder.fit_transform(data))
+    if self.scale:
+        encoded_data = encoded_data / max(len(encoded_data) - 1, 1)
+
+    return encoded_data
+

TextOneHotEncoder

TextOneHotEncoder(
+    alphabet: str = "acgt",
+    *,
+    convert_lowercase: bool = False,
+    padding: bool = False
+)
+

Bases: AbstractEncoder

One hot encoder for text data.

NOTE encodes based on the given alphabet If a character c is not in the alphabet, c will be represented by a vector of zeros.

Attributes:

  • alphabet (str) –

    the alphabet to one hot encode the data with.

  • convert_lowercase (bool) –

    whether to convert the sequence and alphabet to lowercase. Default is False.

  • padding (bool) –

    whether to pad the sequences with zeros. Default is False.

  • encoder (OneHotEncoder) –

    preprocessing.OneHotEncoder object initialized with self.alphabet

Methods:

  • encode

    encodes a single data point

  • encode_all

    encodes a list of data points into a numpy array

  • encode_multiprocess

    encodes a list of data points using multiprocessing

  • decode

    decodes a single data point

  • _sequence_to_array

    transforms a sequence into a numpy array

Parameters:

  • alphabet (str, default: 'acgt' ) –

    the alphabet to one hot encode the data with.

Raises:

  • TypeError

    If the input alphabet is not a string.

Methods:

  • decode

    Decodes one-hot encoded tensor back to sequences.

  • encode

    One hot encodes a single sequence.

  • encode_all

    Encodes a list of sequences.

  • encode_multiprocess

    Encodes a list of sequences using multiprocessing.

Source code in src/stimulus/data/encoding/encoders.py
 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
def __init__(self, alphabet: str = "acgt", *, convert_lowercase: bool = False, padding: bool = False) -> None:
+    """Initialize the TextOneHotEncoder class.
+
+    Args:
+        alphabet (str): the alphabet to one hot encode the data with.
+
+    Raises:
+        TypeError: If the input alphabet is not a string.
+    """
+    if not isinstance(alphabet, str):
+        error_msg = f"Expected a string input for alphabet, got {type(alphabet).__name__}"
+        logger.error(error_msg)
+        raise TypeError(error_msg)
+
+    if convert_lowercase:
+        alphabet = alphabet.lower()
+
+    self.alphabet = alphabet
+    self.convert_lowercase = convert_lowercase
+    self.padding = padding
+
+    self.encoder = preprocessing.OneHotEncoder(
+        categories=[list(alphabet)],
+        handle_unknown="ignore",
+    )  # handle_unknown='ignore' unsures that a vector of zeros is returned for unknown characters, such as 'Ns' in DNA sequences
+    self.encoder.fit(np.array(list(alphabet)).reshape(-1, 1))
+

decode

decode(data: Tensor) -> Union[str, list[str]]
+

Decodes one-hot encoded tensor back to sequences.

Parameters:

  • data (Tensor) –

    2D or 3D tensor of one-hot encoded sequences - 2D shape: (sequence_length, alphabet_size) - 3D shape: (batch_size, sequence_length, alphabet_size)

NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.

Returns:

  • Union[str, list[str]]

    Union[str, list[str]]: Single sequence string or list of sequence strings

Raises:

  • TypeError

    If the input data is not a 2D or 3D tensor

Source code in src/stimulus/data/encoding/encoders.py
262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
def decode(self, data: torch.Tensor) -> Union[str, list[str]]:
+    """Decodes one-hot encoded tensor back to sequences.
+
+    Args:
+        data (torch.Tensor): 2D or 3D tensor of one-hot encoded sequences
+            - 2D shape: (sequence_length, alphabet_size)
+            - 3D shape: (batch_size, sequence_length, alphabet_size)
+
+    NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.
+
+    Returns:
+        Union[str, list[str]]: Single sequence string or list of sequence strings
+
+    Raises:
+        TypeError: If the input data is not a 2D or 3D tensor
+    """
+    expected_2d_tensor = 2
+    expected_3d_tensor = 3
+
+    if data.dim() == expected_2d_tensor:
+        # Single sequence
+        data_np = data.numpy().reshape(-1, len(self.alphabet))
+        decoded = self.encoder.inverse_transform(data_np).flatten()
+        return "".join([i for i in decoded if i is not None])
+
+    if data.dim() == expected_3d_tensor:
+        # Multiple sequences
+        batch_size, seq_len, _ = data.shape
+        data_np = data.reshape(-1, len(self.alphabet)).numpy()
+        decoded = self.encoder.inverse_transform(data_np)
+        sequences = decoded.reshape(batch_size, seq_len)
+        # Convert to masked array where None values are masked
+        masked_sequences = np.ma.masked_equal(sequences, None)
+        # Fill masked values with "-"
+        filled_sequences = masked_sequences.filled("-")
+        return ["".join(seq) for seq in filled_sequences]
+
+    raise ValueError(f"Expected 2D or 3D tensor, got {data.dim()}D")
+

encode

encode(data: str) -> Tensor
+

One hot encodes a single sequence.

Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequence. Unknown characters are represented by a vector of zeros.

Parameters:

  • data (str) –

    single sequence

Returns:

  • encoded_data_point ( Tensor ) –

    one hot encoded sequence

Raises:

  • TypeError

    If the input data is not a string.

Examples:

>>> encoder = TextOneHotEncoder(alphabet="acgt")
+>>> encoder.encode("acgt")
+tensor([[1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0],
+        [0, 0, 0, 1]])
+>>> encoder.encode("acgtn")
+tensor([[1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0],
+        [0, 0, 0, 1],
+        [0, 0, 0, 0]])
+
>>> encoder = TextOneHotEncoder(alphabet="ACgt")
+>>> encoder.encode("acgt")
+tensor([[0, 0, 0, 0],
+        [0, 0, 0, 0],
+        [0, 0, 1, 0],
+        [0, 0, 0, 1]])
+>>> encoder.encode("ACgt")
+tensor([[1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0],
+        [0, 0, 0, 1]])
+
Source code in src/stimulus/data/encoding/encoders.py
148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
def encode(self, data: str) -> torch.Tensor:
+    """One hot encodes a single sequence.
+
+    Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length).
+    The returned tensor corresponds to the one hot encoding of the sequence.
+    Unknown characters are represented by a vector of zeros.
+
+    Args:
+        data (str): single sequence
+
+    Returns:
+        encoded_data_point (torch.Tensor): one hot encoded sequence
+
+    Raises:
+        TypeError: If the input data is not a string.
+
+    Examples:
+        >>> encoder = TextOneHotEncoder(alphabet="acgt")
+        >>> encoder.encode("acgt")
+        tensor([[1, 0, 0, 0],
+                [0, 1, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1]])
+        >>> encoder.encode("acgtn")
+        tensor([[1, 0, 0, 0],
+                [0, 1, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1],
+                [0, 0, 0, 0]])
+
+        >>> encoder = TextOneHotEncoder(alphabet="ACgt")
+        >>> encoder.encode("acgt")
+        tensor([[0, 0, 0, 0],
+                [0, 0, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1]])
+        >>> encoder.encode("ACgt")
+        tensor([[1, 0, 0, 0],
+                [0, 1, 0, 0],
+                [0, 0, 1, 0],
+                [0, 0, 0, 1]])
+    """
+    sequence_array = self._sequence_to_array(data)
+    transformed = self.encoder.transform(sequence_array)
+    numpy_array = np.squeeze(np.stack(transformed.toarray()))
+    return torch.from_numpy(numpy_array)
+

encode_all

encode_all(data: Union[str, list[str]]) -> Tensor
+

Encodes a list of sequences.

Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequences. Unknown characters are represented by a vector of zeros.

Parameters:

Returns:

  • encoded_data ( Tensor ) –

    one hot encoded sequences

Raises:

  • TypeError

    If the input data is not a list or a string.

  • ValueError

    If all sequences do not have the same length when padding is False.

Examples:

>>> encoder = TextOneHotEncoder(alphabet="acgt")
+>>> encoder.encode_all(["acgt", "acgtn"])
+tensor([[[1, 0, 0, 0],
+         [0, 1, 0, 0],
+         [0, 0, 1, 0],
+         [0, 0, 0, 1],
+         [0, 0, 0, 0]], // this is padded with zeros
+
    [[1, 0, 0, 0],
      [0, 1, 0, 0],
      [0, 0, 1, 0],
      [0, 0, 0, 1],
      [0, 0, 0, 0]]])
-
- -
- Source code in src/stimulus/data/encoding/encoders.py -
200
-201
-202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
-224
-225
-226
-227
-228
-229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
-241
-242
-243
-244
-245
-246
-247
-248
-249
-250
-251
-252
-253
-254
-255
-256
-257
-258
-259
-260
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:
-    """Encodes a list of sequences.
-
-    Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length).
-    The returned tensor corresponds to the one hot encoding of the sequences.
-    Unknown characters are represented by a vector of zeros.
-
-    Args:
-        data (Union[str, list[str]]): list of sequences or a single sequence
-
-    Returns:
-        encoded_data (torch.Tensor): one hot encoded sequences
-
-    Raises:
-        TypeError: If the input data is not a list or a string.
-        ValueError: If all sequences do not have the same length when padding is False.
-
-    Examples:
-        >>> encoder = TextOneHotEncoder(alphabet="acgt")
-        >>> encoder.encode_all(["acgt", "acgtn"])
-        tensor([[[1, 0, 0, 0],
-                 [0, 1, 0, 0],
-                 [0, 0, 1, 0],
-                 [0, 0, 0, 1],
-                 [0, 0, 0, 0]], // this is padded with zeros
-
-                [[1, 0, 0, 0],
-                 [0, 1, 0, 0],
-                 [0, 0, 1, 0],
-                 [0, 0, 0, 1],
-                 [0, 0, 0, 0]]])
-    """
-    encoded_data = None  # to prevent UnboundLocalError
-    # encode data
-    if isinstance(data, str):
-        encoded_data = self.encode(data)
-        return torch.stack([encoded_data])
-    if isinstance(data, list):
-        # TODO instead maybe we can run encode_multiprocess when data size is larger than a certain threshold.
-        encoded_list = self.encode_multiprocess(data)
-    else:
-        error_msg = f"Expected list or string input for data, got {type(data).__name__}"
-        logger.error(error_msg)
-        raise TypeError(error_msg)
-
-    # handle padding
-    if self.padding:
-        max_length = max([len(d) for d in encoded_list])
-        encoded_data = torch.stack([F.pad(d, (0, 0, 0, max_length - len(d))) for d in encoded_list])
-    else:
-        lengths = {len(d) for d in encoded_list}
-        if len(lengths) > 1:
-            error_msg = "All sequences must have the same length when padding is False."
-            logger.error(error_msg)
-            raise ValueError(error_msg)
-        encoded_data = torch.stack(encoded_list)
-
-    if encoded_data is None:
-        raise ValueError("Encoded data is None. This should not happen.")
-
-    return encoded_data
-
-
-
- -
- -
- - -

- encode_multiprocess - - -

-
encode_multiprocess(data: list[str]) -> list[Tensor]
-
- -
- -

Encodes a list of sequences using multiprocessing.

- -
- Source code in src/stimulus/data/encoding/encoders.py -
195
-196
-197
-198
def encode_multiprocess(self, data: list[str]) -> list[torch.Tensor]:
-    """Encodes a list of sequences using multiprocessing."""
-    with mp.Pool() as pool:
-        return pool.map(self.encode, data)
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - - - - - \ No newline at end of file +
\ No newline at end of file diff --git a/reference/stimulus/data/encoding/index.html b/reference/stimulus/data/encoding/index.html index 83be772f..7bf7a68b 100644 --- a/reference/stimulus/data/encoding/index.html +++ b/reference/stimulus/data/encoding/index.html @@ -1,1999 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.encoding - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- encoding - - -

- -
- -

Encoding package for data transformation.

- - - - - -

Modules:

-
    -
  • - encoders - – -
    -

    This file contains encoders classes for encoding various types of data.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/handlertorch/index.html b/reference/stimulus/data/handlertorch/index.html index af74b0b2..8c772a22 100644 --- a/reference/stimulus/data/handlertorch/index.html +++ b/reference/stimulus/data/handlertorch/index.html @@ -1,2214 +1,52 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.handlertorch - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- handlertorch - - -

- -
- -

This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

- - - - - -

Modules:

-
    -
  • - loaders - – -
    -

    Loaders serve as interfaces between the CSV master class and custom methods.

    -
    -
  • -
- - - - -

Classes:

-
    -
  • - TorchDataset - – -
    -

    Class for creating a torch dataset.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- TorchDataset - - -

-
TorchDataset(
-    config_path: str,
-    csv_path: str,
-    encoder_loader: EncoderLoader,
-    split: Optional[int] = None,
-)
-
- -
-

- Bases: Dataset

- - -

Class for creating a torch dataset.

- - - -

Parameters:

-
    -
  • - config_path - (str) - – -
    -

    Path to the configuration file

    -
    -
  • -
  • - csv_path - (str) - – -
    -

    Path to the CSV data file

    -
    -
  • -
  • - encoder_loader - (EncoderLoader) - – -
    -

    Encoder loader instance

    -
    -
  • -
  • - split - (Optional[int], default: - None -) - – -
    -

    Optional tuple containing split information

    -
    -
  • -
- - - - - - - - - - -
- Source code in src/stimulus/data/handlertorch.py -
14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
def __init__(
-    self,
-    config_path: str,
-    csv_path: str,
-    encoder_loader: loaders.EncoderLoader,
-    split: Optional[int] = None,
-) -> None:
-    """Initialize the TorchDataset.
-
-    Args:
-        config_path: Path to the configuration file
-        csv_path: Path to the CSV data file
-        encoder_loader: Encoder loader instance
-        split: Optional tuple containing split information
-    """
-    self.loader = data_handlers.DatasetLoader(
-        config_path=config_path,
-        csv_path=csv_path,
-        encoder_loader=encoder_loader,
-        split=split,
-    )
-
-
- - - -
- - - - - - - - - - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/index.html b/reference/stimulus/data/index.html index 5c1ac26a..3ea120f7 100644 --- a/reference/stimulus/data/index.html +++ b/reference/stimulus/data/index.html @@ -1,2040 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- data - - -

- -
- -

Data handling and processing module.

-

This module provides functionality for loading, transforming, and managing data -in various formats like CSV. It includes classes and utilities for:

-
    -
  • Loading and processing CSV data files
  • -
  • Applying data transformations and augmentations
  • -
  • Splitting data into train/validation/test sets
  • -
  • Converting data into PyTorch datasets
  • -
- - - - - -

Modules:

-
    -
  • - data_handlers - – -
    -

    This module provides classes for handling CSV data files in the STIMULUS format.

    -
    -
  • -
  • - encoding - – -
    -

    Encoding package for data transformation.

    -
    -
  • -
  • - handlertorch - – -
    -

    This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

    -
    -
  • -
  • - loaders - – -
    -

    Loaders serve as interfaces between the CSV master class and custom methods.

    -
    -
  • -
  • - splitters - – -
    -

    This package provides splitter classes for splitting data into train, validation, and test sets.

    -
    -
  • -
  • - transform - – -
    -

    Transform package for data manipulation.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/loaders/index.html b/reference/stimulus/data/loaders/index.html index b65c8adc..1ccab681 100644 --- a/reference/stimulus/data/loaders/index.html +++ b/reference/stimulus/data/loaders/index.html @@ -1,3609 +1,438 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.loaders - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- loaders - - -

- -
- -

Loaders serve as interfaces between the CSV master class and custom methods.

-

Mainly, three types of custom methods are supported: -- Encoders: methods for encoding data before it is fed into the model -- Data transformers: methods for transforming data (i.e. augmenting, noising...) -- Splitters: methods for splitting data into train, validation and test sets

-

Loaders are built from an input config YAML file which format is described in the documentation, you can find an example here: tests/test_data/dna_experiment/dna_experiment_config_template.yaml

- - - - - -

Modules:

-
    -
  • - data_transformation_generators - – -
    -

    This file contains noise generators classes for generating various types of noise.

    -
    -
  • -
  • - encoders - – -
    -

    This file contains encoders classes for encoding various types of data.

    -
    -
  • -
  • - splitters - – -
    -

    This file contains the splitter classes for splitting data accordingly.

    -
    -
  • -
  • - yaml_data - – -
    -

    Utility module for handling YAML configuration files and their validation.

    -
    -
  • -
- - - - -

Classes:

-
    -
  • - EncoderLoader - – -
    -

    Class for loading encoders from a config file.

    -
    -
  • -
  • - SplitLoader - – -
    -

    Class for loading splitters from a config file.

    -
    -
  • -
  • - TransformLoader - – -
    -

    Class for loading transformations from a config file.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- EncoderLoader - - -

-
EncoderLoader(seed: Optional[float] = None)
-
- -
- - -

Class for loading encoders from a config file.

- - - -

Parameters:

-
    -
  • - seed - (Optional[float], default: - None -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

- - - - -
- Source code in src/stimulus/data/loaders.py -
24
-25
-26
-27
-28
-29
-30
def __init__(self, seed: Optional[float] = None) -> None:
-    """Initialize the encoder loader.
-
-    Args:
-        seed: Random seed for reproducibility
-    """
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- get_encoder - - -

-
get_encoder(
-    encoder_name: str, encoder_params: Optional[dict] = None
-) -> Any
-
- -
- -

Gets an encoder object from the encoders module and initializes it with the given parameters.

- - -

Parameters:

-
    -
  • - encoder_name - (str) - – -
    -

    The name of the encoder to get

    -
    -
  • -
  • - encoder_params - (dict, default: - None -) - – -
    -

    The parameters for the encoder

    -
    -
  • -
- - -

Returns:

-
    -
  • -Any ( Any -) – -
    -

    The encoder function for the specified field and parameters

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
def get_encoder(self, encoder_name: str, encoder_params: Optional[dict] = None) -> Any:
-    """Gets an encoder object from the encoders module and initializes it with the given parameters.
-
-    Args:
-        encoder_name (str): The name of the encoder to get
-        encoder_params (dict): The parameters for the encoder
-
-    Returns:
-        Any: The encoder function for the specified field and parameters
-    """
-    try:
-        return getattr(encoders, encoder_name)(**encoder_params)
-    except AttributeError:
-        logging.exception(f"Encoder '{encoder_name}' not found in the encoders module.")
-        logging.exception(
-            f"Available encoders: {[name for name, obj in encoders.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}",
-        )
-        raise
-
-    except TypeError:
-        if encoder_params is None:
-            return getattr(encoders, encoder_name)()
-        logging.exception(f"Encoder '{encoder_name}' has incorrect parameters: {encoder_params}")
-        logging.exception(
-            f"Expected parameters for '{encoder_name}': {inspect.signature(getattr(encoders, encoder_name))}",
-        )
-        raise
-
-
-
- -
- -
- - -

- get_function_encode_all - - -

-
get_function_encode_all(field_name: str) -> Any
-
- -
- -

Gets the encoding function for a specific field.

- - -

Parameters:

-
    -
  • - field_name - (str) - – -
    -

    The field name to get the encoder for

    -
    -
  • -
- - -

Returns:

-
    -
  • -Any ( Any -) – -
    -

    The encode_all function for the specified field

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
42
-43
-44
-45
-46
-47
-48
-49
-50
-51
def get_function_encode_all(self, field_name: str) -> Any:
-    """Gets the encoding function for a specific field.
-
-    Args:
-        field_name (str): The field name to get the encoder for
-
-    Returns:
-        Any: The encode_all function for the specified field
-    """
-    return getattr(self, field_name).encode_all
-
-
-
- -
- -
- - -

- initialize_column_encoders_from_config - - -

-
initialize_column_encoders_from_config(
-    column_config: YamlColumns,
-) -> None
-
- -
- -

Build the loader from a config dictionary.

- - -

Parameters:

-
    -
  • - column_config - (YamlColumns) - – -
    -

    Configuration dictionary containing field names (column_name) and their encoder specifications.

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
32
-33
-34
-35
-36
-37
-38
-39
-40
def initialize_column_encoders_from_config(self, column_config: yaml_data.YamlColumns) -> None:
-    """Build the loader from a config dictionary.
-
-    Args:
-        column_config (yaml_data.YamlColumns): Configuration dictionary containing field names (column_name) and their encoder specifications.
-    """
-    for field in column_config:
-        encoder = self.get_encoder(field.encoder[0].name, field.encoder[0].params)
-        self.set_encoder_as_attribute(field.column_name, encoder)
-
-
-
- -
- -
- - -

- set_encoder_as_attribute - - -

-
set_encoder_as_attribute(
-    field_name: str, encoder: AbstractEncoder
-) -> None
-
- -
- -

Sets the encoder as an attribute of the loader.

- - -

Parameters:

-
    -
  • - field_name - (str) - – -
    -

    The name of the field to set the encoder for

    -
    -
  • -
  • - encoder - (AbstractEncoder) - – -
    -

    The encoder to set

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
81
-82
-83
-84
-85
-86
-87
-88
def set_encoder_as_attribute(self, field_name: str, encoder: encoders.AbstractEncoder) -> None:
-    """Sets the encoder as an attribute of the loader.
-
-    Args:
-        field_name (str): The name of the field to set the encoder for
-        encoder (encoders.AbstractEncoder): The encoder to set
-    """
-    setattr(self, field_name, encoder)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- SplitLoader - - -

-
SplitLoader(seed: Optional[float] = None)
-
- -
- - -

Class for loading splitters from a config file.

- - - -

Parameters:

-
    -
  • - seed - (Optional[float], default: - None -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

- - - - -
- Source code in src/stimulus/data/loaders.py -
184
-185
-186
-187
-188
-189
-190
def __init__(self, seed: Optional[float] = None) -> None:
-    """Initialize the split loader.
-
-    Args:
-        seed: Random seed for reproducibility
-    """
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- get_function_split - - -

-
get_function_split() -> Any
-
- -
- -

Gets the function for splitting the data.

- - -

Returns:

-
    -
  • -Any ( Any -) – -
    -

    The split function for the specified method

    -
    -
  • -
- - -

Raises:

-
    -
  • - AttributeError - – -
    -

    If splitter hasn't been initialized using initialize_splitter_from_config()

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
-204
-205
-206
-207
def get_function_split(self) -> Any:
-    """Gets the function for splitting the data.
-
-    Returns:
-        Any: The split function for the specified method
-
-    Raises:
-        AttributeError: If splitter hasn't been initialized using initialize_splitter_from_config()
-    """
-    if not hasattr(self, "split"):
-        # Raise a more specific error and chain it to the original AttributeError
-        raise AttributeError(
-            "Splitter not initialized. Please call initialize_splitter_from_config() or set_splitter_as_attribute() "
-            "before attempting to get split function.",
-        )
-    return self.split.get_split_indexes
-
-
-
- -
- -
- - -

- get_splitter - - -

-
get_splitter(
-    splitter_name: str,
-    splitter_params: Optional[dict] = None,
-) -> Any
-
- -
- -

Gets a splitter object from the splitters module.

- - -

Parameters:

-
    -
  • - splitter_name - (str) - – -
    -

    The name of the splitter to get

    -
    -
  • -
  • - splitter_params - (Optional[dict], default: - None -) - – -
    -

    Parameters for the splitter

    -
    -
  • -
- - -

Returns:

-
    -
  • -Any ( Any -) – -
    -

    The splitter function for the specified splitter

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
209
-210
-211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
-224
-225
-226
-227
-228
def get_splitter(self, splitter_name: str, splitter_params: Optional[dict] = None) -> Any:
-    """Gets a splitter object from the splitters module.
-
-    Args:
-        splitter_name (str): The name of the splitter to get
-        splitter_params (Optional[dict]): Parameters for the splitter
-
-    Returns:
-        Any: The splitter function for the specified splitter
-    """
-    try:
-        return getattr(splitters, splitter_name)(**splitter_params)
-    except TypeError:
-        if splitter_params is None:
-            return getattr(splitters, splitter_name)()
-        logging.exception(f"Splitter '{splitter_name}' has incorrect parameters: {splitter_params}")
-        logging.exception(
-            f"Expected parameters for '{splitter_name}': {inspect.signature(getattr(splitters, splitter_name))}",
-        )
-        raise
-
-
-
- -
- -
- - -

- initialize_splitter_from_config - - -

-
initialize_splitter_from_config(
-    split_config: YamlSplit,
-) -> None
-
- -
- -

Build the loader from a config dictionary.

- - -

Parameters:

-
    -
  • - split_config - (YamlSplit) - – -
    -

    Configuration dictionary containing split configurations.

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
238
-239
-240
-241
-242
-243
-244
-245
def initialize_splitter_from_config(self, split_config: yaml_data.YamlSplit) -> None:
-    """Build the loader from a config dictionary.
-
-    Args:
-        split_config (yaml_data.YamlSplit): Configuration dictionary containing split configurations.
-    """
-    splitter = self.get_splitter(split_config.split_method, split_config.params)
-    self.set_splitter_as_attribute(splitter)
-
-
-
- -
- -
- - -

- set_splitter_as_attribute - - -

-
set_splitter_as_attribute(splitter: Any) -> None
-
- -
- -

Sets the splitter as an attribute of the loader.

- - -

Parameters:

-
    -
  • - splitter - (Any) - – -
    -

    The splitter to set

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
230
-231
-232
-233
-234
-235
-236
def set_splitter_as_attribute(self, splitter: Any) -> None:
-    """Sets the splitter as an attribute of the loader.
-
-    Args:
-        splitter (Any): The splitter to set
-    """
-    self.split = splitter
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- TransformLoader - - -

-
TransformLoader(seed: Optional[float] = None)
-
- -
- - -

Class for loading transformations from a config file.

- - - -

Parameters:

-
    -
  • - seed - (Optional[float], default: - None -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

- - - - -
- Source code in src/stimulus/data/loaders.py -
 94
- 95
- 96
- 97
- 98
- 99
-100
def __init__(self, seed: Optional[float] = None) -> None:
-    """Initialize the transform loader.
-
-    Args:
-        seed: Random seed for reproducibility
-    """
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- get_data_transformer - - -

-
get_data_transformer(
-    transformation_name: str,
-    transformation_params: Optional[dict] = None,
-) -> Any
-
- -
- -

Gets a transformer object from the transformers module.

- - -

Parameters:

-
    -
  • - transformation_name - (str) - – -
    -

    The name of the transformer to get

    -
    -
  • -
  • - transformation_params - (Optional[dict], default: - None -) - – -
    -

    Parameters for the transformer

    -
    -
  • -
- - -

Returns:

-
    -
  • -Any ( Any -) – -
    -

    The transformer function for the specified transformation

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
def get_data_transformer(self, transformation_name: str, transformation_params: Optional[dict] = None) -> Any:
-    """Gets a transformer object from the transformers module.
-
-    Args:
-        transformation_name (str): The name of the transformer to get
-        transformation_params (Optional[dict]): Parameters for the transformer
-
-    Returns:
-        Any: The transformer function for the specified transformation
-    """
-    try:
-        return getattr(data_transformation_generators, transformation_name)(**transformation_params)
-    except AttributeError:
-        logging.exception(f"Transformer '{transformation_name}' not found in the transformers module.")
-        logging.exception(
-            f"Available transformers: {[name for name, obj in data_transformation_generators.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}",
-        )
-        raise
-
-    except TypeError:
-        if transformation_params is None:
-            return getattr(data_transformation_generators, transformation_name)()
-        logging.exception(f"Transformer '{transformation_name}' has incorrect parameters: {transformation_params}")
-        logging.exception(
-            f"Expected parameters for '{transformation_name}': {inspect.signature(getattr(data_transformation_generators, transformation_name))}",
-        )
-        raise
-
-
-
- -
- -
- - -

- initialize_column_data_transformers_from_config - - -

-
initialize_column_data_transformers_from_config(
-    transform_config: YamlTransform,
-) -> None
-
- -
- -

Build the loader from a config dictionary.

- - -

Parameters:

-
    -
  • - transform_config - (YamlTransform) - – -
    -

    Configuration dictionary containing transforms configurations.

    -
    -
  • -
- - -
- Example -

Given a YAML config like: -

transforms:
-  transformation_name: noise
-  columns:
-    - column_name: age
-      transformations:
-        - name: GaussianNoise
-          params:
-            std: 0.1
-    - column_name: fare
-      transformations:
-        - name: GaussianNoise
-          params:
-            std: 0.1
-

-

The loader will: -1. Iterate through each column (age, fare) -2. For each transformation in the column: - - Get the transformer (GaussianNoise) with its params (std=0.1) - - Set it as an attribute on the loader using the column name as key

-
-
- Source code in src/stimulus/data/loaders.py -
144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
def initialize_column_data_transformers_from_config(self, transform_config: yaml_data.YamlTransform) -> None:
-    """Build the loader from a config dictionary.
-
-    Args:
-        transform_config (yaml_data.YamlTransform): Configuration dictionary containing transforms configurations.
-
-    Example:
-        Given a YAML config like:
-        ```yaml
-        transforms:
-          transformation_name: noise
-          columns:
-            - column_name: age
-              transformations:
-                - name: GaussianNoise
-                  params:
-                    std: 0.1
-            - column_name: fare
-              transformations:
-                - name: GaussianNoise
-                  params:
-                    std: 0.1
-        ```
-
-        The loader will:
-        1. Iterate through each column (age, fare)
-        2. For each transformation in the column:
-           - Get the transformer (GaussianNoise) with its params (std=0.1)
-           - Set it as an attribute on the loader using the column name as key
-    """
-    for column in transform_config.columns:
-        col_name = column.column_name
-        for transform_spec in column.transformations:
-            transformer = self.get_data_transformer(transform_spec.name, transform_spec.params)
-            self.set_data_transformer_as_attribute(col_name, transformer)
-
-
-
- -
- -
- - -

- set_data_transformer_as_attribute - - -

-
set_data_transformer_as_attribute(
-    field_name: str, data_transformer: Any
-) -> None
-
- -
- -

Sets the data transformer as an attribute of the loader.

- - -

Parameters:

-
    -
  • - field_name - (str) - – -
    -

    The name of the field to set the data transformer for

    -
    -
  • -
  • - data_transformer - (Any) - – -
    -

    The data transformer to set

    -
    -
  • -
- -
- Source code in src/stimulus/data/loaders.py -
130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
def set_data_transformer_as_attribute(self, field_name: str, data_transformer: Any) -> None:
-    """Sets the data transformer as an attribute of the loader.
-
-    Args:
-        field_name (str): The name of the field to set the data transformer for
-        data_transformer (Any): The data transformer to set
-    """
-    # check if the field already exists, if it does not, initialize it to an empty dict
-    if not hasattr(self, field_name):
-        setattr(self, field_name, {data_transformer.__class__.__name__: data_transformer})
-    else:
-        field_value = getattr(self, field_name)
-        field_value[data_transformer.__class__.__name__] = data_transformer
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/splitters/index.html b/reference/stimulus/data/splitters/index.html index e60aed39..caf2274e 100644 --- a/reference/stimulus/data/splitters/index.html +++ b/reference/stimulus/data/splitters/index.html @@ -1,2750 +1,211 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.splitters - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- splitters - - -

- -
- -

This package provides splitter classes for splitting data into train, validation, and test sets.

- - - - - -

Modules:

-
    -
  • - splitters - – -
    -

    This file contains the splitter classes for splitting data accordingly.

    -
    -
  • -
- - - - -

Classes:

-
    -
  • - AbstractSplitter - – -
    -

    Abstract class for splitters.

    -
    -
  • -
  • - RandomSplit - – -
    -

    This splitter randomly splits the data.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- AbstractSplitter - - -

-
AbstractSplitter(seed: float = 42)
-
- -
-

- Bases: ABC

- - -

Abstract class for splitters.

-

A splitter splits the data into train, validation, and test sets.

- - -

Methods:

-
    -
  • - get_split_indexes - – -
    -

    calculates split indices for the data

    -
    -
  • -
  • - distance - – -
    -

    calculates the distance between two elements of the data

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - seed - (float, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - distance - – -
    -

    Calculates the distance between two elements of the data.

    -
    -
  • -
  • - get_split_indexes - – -
    -

    Splits the data. Always return indices mapping to the original list.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/splitters/splitters.py -
22
-23
-24
-25
-26
-27
-28
def __init__(self, seed: float = 42) -> None:
-    """Initialize the splitter.
-
-    Args:
-        seed: Random seed for reproducibility
-    """
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- distance - - - - abstractmethod - - -

-
distance(data_one: Any, data_two: Any) -> float
-
- -
- -

Calculates the distance between two elements of the data.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data_one - (Any) - – -
    -

    the first data point

    -
    -
  • -
  • - data_two - (Any) - – -
    -

    the second data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -distance ( float -) – -
    -

    the distance between the two data points

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
@abstractmethod
-def distance(self, data_one: Any, data_two: Any) -> float:
-    """Calculates the distance between two elements of the data.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data_one (Any): the first data point
-        data_two (Any): the second data point
-
-    Returns:
-        distance (float): the distance between the two data points
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- get_split_indexes - - - - abstractmethod - - -

-
get_split_indexes(data: dict) -> tuple[list, list, list]
-
- -
- -

Splits the data. Always return indices mapping to the original list.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (DataFrame) - – -
    -

    the data to be split

    -
    -
  • -
- - -

Returns:

-
    -
  • -split_indices ( list -) – -
    -

    the indices for train, validation, and test sets

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
@abstractmethod
-def get_split_indexes(self, data: dict) -> tuple[list, list, list]:
-    """Splits the data. Always return indices mapping to the original list.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (pl.DataFrame): the data to be split
-
-    Returns:
-        split_indices (list): the indices for train, validation, and test sets
-    """
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- RandomSplit - - -

-
RandomSplit(split: Optional[list] = None, seed: int = 42)
-
- -
-

- Bases: AbstractSplitter

- - -

This splitter randomly splits the data.

- - - -

Parameters:

-
    -
  • - split - (Optional[list], default: - None -) - – -
    -

    List of proportions for train/val/test splits

    -
    -
  • -
  • - seed - (int, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - distance - – -
    -

    Calculate distance between two data points.

    -
    -
  • -
  • - get_split_indexes - – -
    -

    Splits the data indices into train, validation, and test sets.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/splitters/splitters.py -
63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:
-    """Initialize the random splitter.
-
-    Args:
-        split: List of proportions for train/val/test splits
-        seed: Random seed for reproducibility
-    """
-    super().__init__()
-    self.split = [0.7, 0.2, 0.1] if split is None else split
-    self.seed = seed
-    if len(self.split) != SPLIT_SIZE:
-        raise ValueError(
-            "The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.",
-        )
-
-
- - - -
- - - - - - - - - -
- - -

- distance - - -

-
distance(data_one: Any, data_two: Any) -> float
-
- -
- -

Calculate distance between two data points.

- - -

Parameters:

-
    -
  • - data_one - (Any) - – -
    -

    First data point

    -
    -
  • -
  • - data_two - (Any) - – -
    -

    Second data point

    -
    -
  • -
- - -

Returns:

-
    -
  • - float - – -
    -

    Distance between the points

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
def distance(self, data_one: Any, data_two: Any) -> float:
-    """Calculate distance between two data points.
-
-    Args:
-        data_one: First data point
-        data_two: Second data point
-
-    Returns:
-        Distance between the points
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- get_split_indexes - - -

-
get_split_indexes(data: dict) -> tuple[list, list, list]
-
- -
- -

Splits the data indices into train, validation, and test sets.

-

One can use these lists of indices to parse the data afterwards.

- - -

Parameters:

-
    -
  • - data - (dict) - – -
    -

    Dictionary mapping column names to lists of data values.

    -
    -
  • -
- - -

Returns:

-
    -
  • -train ( list -) – -
    -

    The indices for the training set.

    -
    -
  • -
  • -validation ( list -) – -
    -

    The indices for the validation set.

    -
    -
  • -
  • -test ( list -) – -
    -

    The indices for the test set.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If the split argument is not a list with length 3.

    -
    -
  • -
  • - ValueError - – -
    -

    If the sum of the split proportions is not 1.

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
def get_split_indexes(
-    self,
-    data: dict,
-) -> tuple[list, list, list]:
-    """Splits the data indices into train, validation, and test sets.
-
-    One can use these lists of indices to parse the data afterwards.
-
-    Args:
-        data (dict): Dictionary mapping column names to lists of data values.
-
-    Returns:
-        train (list): The indices for the training set.
-        validation (list): The indices for the validation set.
-        test (list): The indices for the test set.
-
-    Raises:
-        ValueError: If the split argument is not a list with length 3.
-        ValueError: If the sum of the split proportions is not 1.
-    """
-    # Use round to avoid errors due to floating point imprecisions
-    if round(sum(self.split), 3) < 1.0:
-        raise ValueError(f"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.")
-
-    if not data:
-        raise ValueError("No data provided for splitting")
-    # Get length from first column's data list
-    length_of_data = len(next(iter(data.values())))
-
-    # Generate a list of indices and shuffle it
-    indices = np.arange(length_of_data)
-    np.random.seed(self.seed)
-    np.random.shuffle(indices)
-
-    # Calculate the sizes of the train, validation, and test sets
-    train_size = int(self.split[0] * length_of_data)
-    validation_size = int(self.split[1] * length_of_data)
-
-    # Split the shuffled indices according to the calculated sizes
-    train = indices[:train_size].tolist()
-    validation = indices[train_size : train_size + validation_size].tolist()
-    test = indices[train_size + validation_size :].tolist()
-
-    return train, validation, test
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/splitters/splitters/index.html b/reference/stimulus/data/splitters/splitters/index.html index e172349e..18339ae5 100644 --- a/reference/stimulus/data/splitters/splitters/index.html +++ b/reference/stimulus/data/splitters/splitters/index.html @@ -1,2844 +1,211 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.splitters.splitters - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- splitters - - -

- -
- -

This file contains the splitter classes for splitting data accordingly.

- - - - - - - -

Classes:

-
    -
  • - AbstractSplitter - – -
    -

    Abstract class for splitters.

    -
    -
  • -
  • - RandomSplit - – -
    -

    This splitter randomly splits the data.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- AbstractSplitter - - -

-
AbstractSplitter(seed: float = 42)
-
- -
-

- Bases: ABC

- - -

Abstract class for splitters.

-

A splitter splits the data into train, validation, and test sets.

- - -

Methods:

-
    -
  • - get_split_indexes - – -
    -

    calculates split indices for the data

    -
    -
  • -
  • - distance - – -
    -

    calculates the distance between two elements of the data

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - seed - (float, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - distance - – -
    -

    Calculates the distance between two elements of the data.

    -
    -
  • -
  • - get_split_indexes - – -
    -

    Splits the data. Always return indices mapping to the original list.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/splitters/splitters.py -
22
-23
-24
-25
-26
-27
-28
def __init__(self, seed: float = 42) -> None:
-    """Initialize the splitter.
-
-    Args:
-        seed: Random seed for reproducibility
-    """
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- distance - - - - abstractmethod - - -

-
distance(data_one: Any, data_two: Any) -> float
-
- -
- -

Calculates the distance between two elements of the data.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data_one - (Any) - – -
    -

    the first data point

    -
    -
  • -
  • - data_two - (Any) - – -
    -

    the second data point

    -
    -
  • -
- - -

Returns:

-
    -
  • -distance ( float -) – -
    -

    the distance between the two data points

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
@abstractmethod
-def distance(self, data_one: Any, data_two: Any) -> float:
-    """Calculates the distance between two elements of the data.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data_one (Any): the first data point
-        data_two (Any): the second data point
-
-    Returns:
-        distance (float): the distance between the two data points
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- get_split_indexes - - - - abstractmethod - - -

-
get_split_indexes(data: dict) -> tuple[list, list, list]
-
- -
- -

Splits the data. Always return indices mapping to the original list.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (DataFrame) - – -
    -

    the data to be split

    -
    -
  • -
- - -

Returns:

-
    -
  • -split_indices ( list -) – -
    -

    the indices for train, validation, and test sets

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
@abstractmethod
-def get_split_indexes(self, data: dict) -> tuple[list, list, list]:
-    """Splits the data. Always return indices mapping to the original list.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (pl.DataFrame): the data to be split
-
-    Returns:
-        split_indices (list): the indices for train, validation, and test sets
-    """
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- RandomSplit - - -

-
RandomSplit(split: Optional[list] = None, seed: int = 42)
-
- -
-

- Bases: AbstractSplitter

- - -

This splitter randomly splits the data.

- - - -

Parameters:

-
    -
  • - split - (Optional[list], default: - None -) - – -
    -

    List of proportions for train/val/test splits

    -
    -
  • -
  • - seed - (int, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - distance - – -
    -

    Calculate distance between two data points.

    -
    -
  • -
  • - get_split_indexes - – -
    -

    Splits the data indices into train, validation, and test sets.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/splitters/splitters.py -
63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:
-    """Initialize the random splitter.
-
-    Args:
-        split: List of proportions for train/val/test splits
-        seed: Random seed for reproducibility
-    """
-    super().__init__()
-    self.split = [0.7, 0.2, 0.1] if split is None else split
-    self.seed = seed
-    if len(self.split) != SPLIT_SIZE:
-        raise ValueError(
-            "The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.",
-        )
-
-
- - - -
- - - - - - - - - -
- - -

- distance - - -

-
distance(data_one: Any, data_two: Any) -> float
-
- -
- -

Calculate distance between two data points.

- - -

Parameters:

-
    -
  • - data_one - (Any) - – -
    -

    First data point

    -
    -
  • -
  • - data_two - (Any) - – -
    -

    Second data point

    -
    -
  • -
- - -

Returns:

-
    -
  • - float - – -
    -

    Distance between the points

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
def distance(self, data_one: Any, data_two: Any) -> float:
-    """Calculate distance between two data points.
-
-    Args:
-        data_one: First data point
-        data_two: Second data point
-
-    Returns:
-        Distance between the points
-    """
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- get_split_indexes - - -

-
get_split_indexes(data: dict) -> tuple[list, list, list]
-
- -
- -

Splits the data indices into train, validation, and test sets.

-

One can use these lists of indices to parse the data afterwards.

- - -

Parameters:

-
    -
  • - data - (dict) - – -
    -

    Dictionary mapping column names to lists of data values.

    -
    -
  • -
- - -

Returns:

-
    -
  • -train ( list -) – -
    -

    The indices for the training set.

    -
    -
  • -
  • -validation ( list -) – -
    -

    The indices for the validation set.

    -
    -
  • -
  • -test ( list -) – -
    -

    The indices for the test set.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If the split argument is not a list with length 3.

    -
    -
  • -
  • - ValueError - – -
    -

    If the sum of the split proportions is not 1.

    -
    -
  • -
- -
- Source code in src/stimulus/data/splitters/splitters.py -
 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
def get_split_indexes(
-    self,
-    data: dict,
-) -> tuple[list, list, list]:
-    """Splits the data indices into train, validation, and test sets.
-
-    One can use these lists of indices to parse the data afterwards.
-
-    Args:
-        data (dict): Dictionary mapping column names to lists of data values.
-
-    Returns:
-        train (list): The indices for the training set.
-        validation (list): The indices for the validation set.
-        test (list): The indices for the test set.
-
-    Raises:
-        ValueError: If the split argument is not a list with length 3.
-        ValueError: If the sum of the split proportions is not 1.
-    """
-    # Use round to avoid errors due to floating point imprecisions
-    if round(sum(self.split), 3) < 1.0:
-        raise ValueError(f"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.")
-
-    if not data:
-        raise ValueError("No data provided for splitting")
-    # Get length from first column's data list
-    length_of_data = len(next(iter(data.values())))
-
-    # Generate a list of indices and shuffle it
-    indices = np.arange(length_of_data)
-    np.random.seed(self.seed)
-    np.random.shuffle(indices)
-
-    # Calculate the sizes of the train, validation, and test sets
-    train_size = int(self.split[0] * length_of_data)
-    validation_size = int(self.split[1] * length_of_data)
-
-    # Split the shuffled indices according to the calculated sizes
-    train = indices[:train_size].tolist()
-    validation = indices[train_size : train_size + validation_size].tolist()
-    test = indices[train_size + validation_size :].tolist()
-
-    return train, validation, test
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/transform/data_transformation_generators/index.html b/reference/stimulus/data/transform/data_transformation_generators/index.html index 0a39e8b6..016af3ec 100644 --- a/reference/stimulus/data/transform/data_transformation_generators/index.html +++ b/reference/stimulus/data/transform/data_transformation_generators/index.html @@ -1,4539 +1,539 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.transform.data_transformation_generators - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- data_transformation_generators - - -

- -
- -

This file contains noise generators classes for generating various types of noise.

- - - - - - - -

Classes:

- - - - - - - - -
- - - - - - - - -
- - - -

- AbstractAugmentationGenerator - - -

-
AbstractAugmentationGenerator()
-
- -
-

- Bases: AbstractDataTransformer

- - -

Abstract class for augmentation generators.

-

All augmentation function should have the seed in it. This is because the multiprocessing of them could unset the seed.

- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Transforms a single data point.

    -
    -
  • -
  • - transform_all - – -
    -

    Transforms a list of data points.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
85
-86
-87
-88
def __init__(self) -> None:
-    """Initialize the augmentation generator."""
-    super().__init__()
-    self.add_row = True
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - - - abstractmethod - - -

-
transform(data: Any) -> Any
-
- -
- -

Transforms a single data point.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( Any -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
@abstractmethod
-def transform(self, data: Any) -> Any:
-    """Transforms a single data point.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (Any): the data to be transformed
-
-    Returns:
-        transformed_data (Any): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- transform_all - - - - abstractmethod - - -

-
transform_all(data: list) -> list
-
- -
- -

Transforms a list of data points.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
@abstractmethod
-def transform_all(self, data: list) -> list:
-    """Transforms a list of data points.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (list): the data to be transformed
-
-    Returns:
-        transformed_data (list): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- AbstractDataTransformer - - -

-
AbstractDataTransformer()
-
- -
-

- Bases: ABC

- - -

Abstract class for data transformers.

-

Data transformers implement in_place or augmentation transformations. -Whether it is in_place or augmentation is specified in the "add_row" attribute (should be True or False and set in children classes constructor)

-

Child classes should override the transform and transform_all methods.

-

transform_all should always return a list

-

Both methods should take an optional seed argument set to None by default to be compliant with stimulus' core principle of reproducibility. -Seed should be initialized through np.random.seed(seed) in the method implementation.

- - -

Attributes:

-
    -
  • - add_row - (bool) - – -
    -

    whether the transformer adds rows to the data

    -
    -
  • -
- - -

Methods:

-
    -
  • - transform - – -
    -

    transforms a data point

    -
    -
  • -
  • - transform_all - – -
    -

    transforms a list of data points

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Transforms a single data point.

    -
    -
  • -
  • - transform_all - – -
    -

    Transforms a list of data points.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
31
-32
-33
-34
def __init__(self) -> None:
-    """Initialize the data transformer."""
-    self.add_row: bool = False
-    self.seed: int = 42
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - - - abstractmethod - - -

-
transform(data: Any) -> Any
-
- -
- -

Transforms a single data point.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( Any -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
@abstractmethod
-def transform(self, data: Any) -> Any:
-    """Transforms a single data point.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (Any): the data to be transformed
-
-    Returns:
-        transformed_data (Any): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- transform_all - - - - abstractmethod - - -

-
transform_all(data: list) -> list
-
- -
- -

Transforms a list of data points.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
@abstractmethod
-def transform_all(self, data: list) -> list:
-    """Transforms a list of data points.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (list): the data to be transformed
-
-    Returns:
-        transformed_data (list): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- AbstractNoiseGenerator - - -

-
AbstractNoiseGenerator()
-
- -
-

- Bases: AbstractDataTransformer

- - -

Abstract class for noise generators.

-

All noise function should have the seed in it. This is because the multiprocessing of them could unset the seed.

- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Transforms a single data point.

    -
    -
  • -
  • - transform_all - – -
    -

    Transforms a list of data points.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
73
-74
-75
-76
def __init__(self) -> None:
-    """Initialize the noise generator."""
-    super().__init__()
-    self.add_row = False
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - - - abstractmethod - - -

-
transform(data: Any) -> Any
-
- -
- -

Transforms a single data point.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( Any -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
@abstractmethod
-def transform(self, data: Any) -> Any:
-    """Transforms a single data point.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (Any): the data to be transformed
-
-    Returns:
-        transformed_data (Any): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- -
- - -

- transform_all - - - - abstractmethod - - -

-
transform_all(data: list) -> list
-
- -
- -

Transforms a list of data points.

-

This is an abstract method that should be implemented by the child class.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed data

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
@abstractmethod
-def transform_all(self, data: list) -> list:
-    """Transforms a list of data points.
-
-    This is an abstract method that should be implemented by the child class.
-
-    Args:
-        data (list): the data to be transformed
-
-    Returns:
-        transformed_data (list): the transformed data
-    """
-    #  np.random.seed(self.seed)
-    raise NotImplementedError
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- GaussianChunk - - -

-
GaussianChunk(
-    chunk_size: int, seed: int = 42, std: float = 1
-)
-
- -
-

- Bases: AbstractAugmentationGenerator

- - -

Subset data around a random midpoint.

-

This augmentation strategy chunks the input sequences, for which the middle positions are obtained through a gaussian distribution.

-

In concrete, it changes the middle position (ie. peak summit) to another position. This position is chosen based on a gaussian distribution, so the region close to the middle point are more likely to be chosen than the rest. -Then a chunk with size chunk_size around the new middle point is returned. -This process will be repeated for each sequence with transform_all.

- - -

Methods:

- - - - -

Parameters:

-
    -
  • - chunk_size - (int) - – -
    -

    Size of chunks to extract

    -
    -
  • -
  • - seed - (int, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
  • - std - (float, default: - 1 -) - – -
    -

    Standard deviation for the Gaussian distribution

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

    -
    -
  • -
  • - transform_all - – -
    -

    Adds chunks to multiple lists using multiprocessing.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
257
-258
-259
-260
-261
-262
-263
-264
-265
-266
-267
-268
def __init__(self, chunk_size: int, seed: int = 42, std: float = 1) -> None:
-    """Initialize the Gaussian chunk generator.
-
-    Args:
-        chunk_size: Size of chunks to extract
-        seed: Random seed for reproducibility
-        std: Standard deviation for the Gaussian distribution
-    """
-    super().__init__()
-    self.chunk_size = chunk_size
-    self.seed = seed
-    self.std = std
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - -

-
transform(data: str) -> str
-
- -
- -

Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

- - -

Parameters:

-
    -
  • - data - (str) - – -
    -

    the sequence to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( str -) – -
    -

    the chunk of the sequence

    -
    -
  • -
- - -

Raises:

-
    -
  • - AssertionError - – -
    -

    if the input data is shorter than the chunk size

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
270
-271
-272
-273
-274
-275
-276
-277
-278
-279
-280
-281
-282
-283
-284
-285
-286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
-297
-298
-299
-300
-301
-302
-303
-304
-305
def transform(self, data: str) -> str:
-    """Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.
-
-    Args:
-        data (str): the sequence to be transformed
-
-    Returns:
-        transformed_data (str): the chunk of the sequence
-
-    Raises:
-        AssertionError: if the input data is shorter than the chunk size
-    """
-    np.random.seed(self.seed)
-
-    # make sure that the data is longer than chunk_size otherwise raise an error
-    if len(data) <= self.chunk_size:
-        raise ValueError("The input data is shorter than the chunk size")
-
-    # Get the middle position of the input sequence
-    middle_position = len(data) // 2
-
-    # Change the middle position by a value obtained through a gaussian distribution
-    new_middle_position = int(middle_position + np.random.normal(0, self.std))
-
-    # Get the start and end position of the chunk
-    start_position = new_middle_position - self.chunk_size // 2
-    end_position = new_middle_position + self.chunk_size // 2
-
-    # if the start position is negative, set it to 0
-    start_position = max(start_position, 0)
-
-    # Get the chunk of size chunk_size from the start position if the end position is smaller than the length of the data
-    if end_position < len(data):
-        return data[start_position : start_position + self.chunk_size]
-    # Otherwise return the chunk of the sequence from the end of the sequence of size chunk_size
-    return data[-self.chunk_size :]
-
-
-
- -
- -
- - -

- transform_all - - -

-
transform_all(data: list) -> list
-
- -
- -

Adds chunks to multiple lists using multiprocessing.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the sequences to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed sequences

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
307
-308
-309
-310
-311
-312
-313
-314
-315
-316
-317
-318
def transform_all(self, data: list) -> list:
-    """Adds chunks to multiple lists using multiprocessing.
-
-    Args:
-        data (list): the sequences to be transformed
-
-    Returns:
-        transformed_data (list): the transformed sequences
-    """
-    with mp.Pool(mp.cpu_count()) as pool:
-        function_specific_input = list(data)
-        return pool.starmap(self.transform, function_specific_input)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- GaussianNoise - - -

-
GaussianNoise(
-    mean: float = 0, std: float = 1, seed: int = 42
-)
-
- -
-

- Bases: AbstractNoiseGenerator

- - -

Add Gaussian noise to data.

-

This noise generator adds Gaussian noise to float values.

- - -

Methods:

-
    -
  • - transform - – -
    -

    adds noise to a single data point

    -
    -
  • -
  • - transform_all - – -
    -

    adds noise to a list of data points

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - mean - (float, default: - 0 -) - – -
    -

    Mean of the Gaussian noise

    -
    -
  • -
  • - std - (float, default: - 1 -) - – -
    -

    Standard deviation of the Gaussian noise

    -
    -
  • -
  • - seed - (int, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Adds Gaussian noise to a single point of data.

    -
    -
  • -
  • - transform_all - – -
    -

    Adds Gaussian noise to a list of data points.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
def __init__(self, mean: float = 0, std: float = 1, seed: int = 42) -> None:
-    """Initialize the Gaussian noise generator.
-
-    Args:
-        mean: Mean of the Gaussian noise
-        std: Standard deviation of the Gaussian noise
-        seed: Random seed for reproducibility
-    """
-    super().__init__()
-    self.mean = mean
-    self.std = std
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - -

-
transform(data: float) -> float
-
- -
- -

Adds Gaussian noise to a single point of data.

- - -

Parameters:

-
    -
  • - data - (float) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( float -) – -
    -

    the transformed data point

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
def transform(self, data: float) -> float:
-    """Adds Gaussian noise to a single point of data.
-
-    Args:
-        data (float): the data to be transformed
-
-    Returns:
-        transformed_data (float): the transformed data point
-    """
-    np.random.seed(self.seed)
-    return data + np.random.normal(self.mean, self.std)
-
-
-
- -
- -
- - -

- transform_all - - -

-
transform_all(data: list) -> list
-
- -
- -

Adds Gaussian noise to a list of data points.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed data points

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
def transform_all(self, data: list) -> list:
-    """Adds Gaussian noise to a list of data points.
-
-    Args:
-        data (list): the data to be transformed
-
-    Returns:
-        transformed_data (list): the transformed data points
-    """
-    np.random.seed(self.seed)
-    return list(np.array(data) + np.random.normal(self.mean, self.std, len(data)))
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- ReverseComplement - - -

-
ReverseComplement(sequence_type: str = 'DNA')
-
- -
-

- Bases: AbstractAugmentationGenerator

- - -

Reverse complement biological sequences.

-

This augmentation strategy reverse complements the input nucleotide sequences.

- - -

Methods:

-
    -
  • - transform - – -
    -

    reverse complements a single data point

    -
    -
  • -
  • - transform_all - – -
    -

    reverse complements a list of data points

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    if the type of the sequence is not DNA or RNA

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - sequence_type - (str, default: - 'DNA' -) - – -
    -

    Type of sequence ('DNA' or 'RNA')

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Returns the reverse complement of a list of string data using the complement_mapping.

    -
    -
  • -
  • - transform_all - – -
    -

    Reverse complement multiple data points using multiprocessing.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-213
-214
-215
-216
def __init__(self, sequence_type: str = "DNA") -> None:
-    """Initialize the reverse complement generator.
-
-    Args:
-        sequence_type: Type of sequence ('DNA' or 'RNA')
-    """
-    super().__init__()
-    if sequence_type not in ("DNA", "RNA"):
-        raise ValueError(
-            "Currently only DNA and RNA sequences are supported. Update the class ReverseComplement to support other types.",
-        )
-    if sequence_type == "DNA":
-        self.complement_mapping = str.maketrans("ATCG", "TAGC")
-    elif sequence_type == "RNA":
-        self.complement_mapping = str.maketrans("AUCG", "UAGC")
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - -

-
transform(data: str) -> str
-
- -
- -

Returns the reverse complement of a list of string data using the complement_mapping.

- - -

Parameters:

-
    -
  • - data - (str) - – -
    -

    the sequence to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( str -) – -
    -

    the reverse complement of the sequence

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
218
-219
-220
-221
-222
-223
-224
-225
-226
-227
def transform(self, data: str) -> str:
-    """Returns the reverse complement of a list of string data using the complement_mapping.
-
-    Args:
-        data (str): the sequence to be transformed
-
-    Returns:
-        transformed_data (str): the reverse complement of the sequence
-    """
-    return data.translate(self.complement_mapping)[::-1]
-
-
-
- -
- -
- - -

- transform_all - - -

-
transform_all(data: list) -> list
-
- -
- -

Reverse complement multiple data points using multiprocessing.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the sequences to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the reverse complement of the sequences

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
def transform_all(self, data: list) -> list:
-    """Reverse complement multiple data points using multiprocessing.
-
-    Args:
-        data (list): the sequences to be transformed
-
-    Returns:
-        transformed_data (list): the reverse complement of the sequences
-    """
-    with mp.Pool(mp.cpu_count()) as pool:
-        function_specific_input = list(data)
-        return pool.map(self.transform, function_specific_input)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- UniformTextMasker - - -

-
UniformTextMasker(
-    probability: float = 0.1,
-    mask: str = "*",
-    seed: int = 42,
-)
-
- -
-

- Bases: AbstractNoiseGenerator

- - -

Mask characters in text.

-

This noise generators replace characters with a masking character with a given probability.

- - -

Methods:

-
    -
  • - transform - – -
    -

    adds character masking to a single data point

    -
    -
  • -
  • - transform_all - – -
    -

    adds character masking to a list of data points

    -
    -
  • -
- - - -

Parameters:

-
    -
  • - probability - (float, default: - 0.1 -) - – -
    -

    Probability of masking each character

    -
    -
  • -
  • - mask - (str, default: - '*' -) - – -
    -

    Character to use for masking

    -
    -
  • -
  • - seed - (int, default: - 42 -) - – -
    -

    Random seed for reproducibility

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - transform - – -
    -

    Adds character masking to the data.

    -
    -
  • -
  • - transform_all - – -
    -

    Adds character masking to multiple data points using multiprocessing.

    -
    -
  • -
- - - -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
def __init__(self, probability: float = 0.1, mask: str = "*", seed: int = 42) -> None:
-    """Initialize the text masker.
-
-    Args:
-        probability: Probability of masking each character
-        mask: Character to use for masking
-        seed: Random seed for reproducibility
-    """
-    super().__init__()
-    self.probability = probability
-    self.mask = mask
-    self.seed = seed
-
-
- - - -
- - - - - - - - - -
- - -

- transform - - -

-
transform(data: str) -> str
-
- -
- -

Adds character masking to the data.

- - -

Parameters:

-
    -
  • - data - (str) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( str -) – -
    -

    the transformed data point

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
def transform(self, data: str) -> str:
-    """Adds character masking to the data.
-
-    Args:
-        data (str): the data to be transformed
-
-    Returns:
-        transformed_data (str): the transformed data point
-    """
-    np.random.seed(self.seed)
-    return "".join([c if np.random.rand() > self.probability else self.mask for c in data])
-
-
-
- -
- -
- - -

- transform_all - - -

-
transform_all(data: list) -> list
-
- -
- -

Adds character masking to multiple data points using multiprocessing.

- - -

Parameters:

-
    -
  • - data - (list) - – -
    -

    the data to be transformed

    -
    -
  • -
- - -

Returns:

-
    -
  • -transformed_data ( list -) – -
    -

    the transformed data points

    -
    -
  • -
- -
- Source code in src/stimulus/data/transform/data_transformation_generators.py -
126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
def transform_all(self, data: list) -> list:
-    """Adds character masking to multiple data points using multiprocessing.
-
-    Args:
-        data (list): the data to be transformed
-
-
-    Returns:
-        transformed_data (list): the transformed data points
-    """
-    with mp.Pool(mp.cpu_count()) as pool:
-        function_specific_input = list(data)
-        return pool.starmap(self.transform, function_specific_input)
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/data/transform/index.html b/reference/stimulus/data/transform/index.html index df783f11..7f3be978 100644 --- a/reference/stimulus/data/transform/index.html +++ b/reference/stimulus/data/transform/index.html @@ -1,1999 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.data.transform - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- transform - - -

- -
- -

Transform package for data manipulation.

- - - - - -

Modules:

- - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/debug/index.html b/reference/stimulus/debug/index.html index 56419a64..5305f10a 100644 --- a/reference/stimulus/debug/index.html +++ b/reference/stimulus/debug/index.html @@ -1,3079 +1,107 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.debug - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- debug - - -

- -
- -

Debugging utilities.

- - - - - - - -

Classes:

-
    -
  • - Environment - – -
    -

    Dataclass to store environment information.

    -
    -
  • -
  • - Package - – -
    -

    Dataclass describing a Python package.

    -
    -
  • -
  • - Variable - – -
    -

    Dataclass describing an environment variable.

    -
    -
  • -
- - - - -

Functions:

-
    -
  • - get_debug_info - – -
    -

    Get debug/environment information.

    -
    -
  • -
  • - get_version - – -
    -

    Get version of the given distribution.

    -
    -
  • -
  • - print_debug_info - – -
    -

    Print debug/environment information.

    -
    -
  • -
- - - - - -
- - - - - - - - -
- - - -

- Environment - - - - dataclass - - -

-
Environment(
-    interpreter_name: str,
-    interpreter_version: str,
-    interpreter_path: str,
-    platform: str,
-    packages: list[Package],
-    variables: list[Variable],
-)
-
- -
- - -

Dataclass to store environment information.

- - - - - - - - - - - -

Attributes:

- - - - - -
- - - - - - - -
- - - -

- interpreter_name - - - - instance-attribute - - -

-
interpreter_name: str
-
- -
- -

Python interpreter name.

-
- -
- -
- - - -

- interpreter_path - - - - instance-attribute - - -

-
interpreter_path: str
-
- -
- -

Path to Python executable.

-
- -
- -
- - - -

- interpreter_version - - - - instance-attribute - - -

-
interpreter_version: str
-
- -
- -

Python interpreter version.

-
- -
- -
- - - -

- packages - - - - instance-attribute - - -

-
packages: list[Package]
-
- -
- -

Installed packages.

-
- -
- -
- - - -

- platform - - - - instance-attribute - - -

-
platform: str
-
- -
- -

Operating System.

-
- -
- -
- - - -

- variables - - - - instance-attribute - - -

-
variables: list[Variable]
-
- -
- -

Environment variables.

-
- -
- - - - - -
- -
- -
- -
- - - -

- Package - - - - dataclass - - -

-
Package(name: str, version: str)
-
- -
- - -

Dataclass describing a Python package.

- - - - - - - - - - - -

Attributes:

-
    -
  • - name - (str) - – -
    -

    Package name.

    -
    -
  • -
  • - version - (str) - – -
    -

    Package version.

    -
    -
  • -
- - - - -
- - - - - - - -
- - - -

- name - - - - instance-attribute - - -

-
name: str
-
- -
- -

Package name.

-
- -
- -
- - - -

- version - - - - instance-attribute - - -

-
version: str
-
- -
- -

Package version.

-
- -
- - - - - -
- -
- -
- -
- - - -

- Variable - - - - dataclass - - -

-
Variable(name: str, value: str)
-
- -
- - -

Dataclass describing an environment variable.

- - - - - - - - - - - -

Attributes:

-
    -
  • - name - (str) - – -
    -

    Variable name.

    -
    -
  • -
  • - value - (str) - – -
    -

    Variable value.

    -
    -
  • -
- - - - -
- - - - - - - -
- - - -

- name - - - - instance-attribute - - -

-
name: str
-
- -
- -

Variable name.

-
- -
- -
- - - -

- value - - - - instance-attribute - - -

-
value: str
-
- -
- -

Variable value.

-
- -
- - - - - -
- -
- -
- - -
- - -

- get_debug_info - - -

-
get_debug_info() -> Environment
-
- -
- -

Get debug/environment information.

- - -

Returns:

-
    -
  • - Environment - – -
    -

    Environment information.

    -
    -
  • -
- -
- Source code in src/stimulus/debug.py -
76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
def get_debug_info() -> Environment:
-    """Get debug/environment information.
-
-    Returns:
-        Environment information.
-    """
-    py_name, py_version = _interpreter_name_version()
-    packages = ["stimulus-py"]
-    variables = ["PYTHONPATH", *[var for var in os.environ if var.startswith("STIMULUS_PY")]]
-    return Environment(
-        interpreter_name=py_name,
-        interpreter_version=py_version,
-        interpreter_path=sys.executable,
-        platform=platform.platform(),
-        variables=[Variable(var, val) for var in variables if (val := os.getenv(var))],
-        packages=[Package(pkg, get_version(pkg)) for pkg in packages],
-    )
-
-
-
- -
- -
- - -

- get_version - - -

-
get_version(dist: str = 'stimulus-py') -> str
-
- -
- -

Get version of the given distribution.

- - -

Parameters:

-
    -
  • - dist - (str, default: - 'stimulus-py' -) - – -
    -

    A distribution name.

    -
    -
  • -
- - -

Returns:

-
    -
  • - str - – -
    -

    A version number.

    -
    -
  • -
- -
- Source code in src/stimulus/debug.py -
61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
def get_version(dist: str = "stimulus-py") -> str:
-    """Get version of the given distribution.
-
-    Parameters:
-        dist: A distribution name.
-
-    Returns:
-        A version number.
-    """
-    try:
-        return metadata.version(dist)
-    except metadata.PackageNotFoundError:
-        return "0.0.0"
-
-
-
- -
- -
- - -

- print_debug_info - - -

-
print_debug_info() -> None
-
- -
- -

Print debug/environment information.

- -
- Source code in src/stimulus/debug.py -
 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
def print_debug_info() -> None:
-    """Print debug/environment information."""
-    info = get_debug_info()
-    print(f"- __System__: {info.platform}")
-    print(f"- __Python__: {info.interpreter_name} {info.interpreter_version} ({info.interpreter_path})")
-    print("- __Environment variables__:")
-    for var in info.variables:
-        print(f"  - `{var.name}`: `{var.value}`")
-    print("- __Installed packages__:")
-    for pkg in info.packages:
-        print(f"  - `{pkg.name}` v{pkg.version}")
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/index.html b/reference/stimulus/index.html index e3969338..5c4ac22e 100644 --- a/reference/stimulus/index.html +++ b/reference/stimulus/index.html @@ -1,2030 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- stimulus - - -

- -
- -

stimulus-py package.

- - - - - -

Modules:

-
    -
  • - cli - – -
    -

    Command line interface package for the stimulus library.

    -
    -
  • -
  • - data - – -
    -

    Data handling and processing module.

    -
    -
  • -
  • - debug - – -
    -

    Debugging utilities.

    -
    -
  • -
  • - learner - – -
    -

    Learner package for model training and evaluation.

    -
    -
  • -
  • - typing - – -
    -

    Typing for Stimulus Python API.

    -
    -
  • -
  • - utils - – -
    -

    Utility functions package.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/learner/index.html b/reference/stimulus/learner/index.html index 749af216..9bd56509 100644 --- a/reference/stimulus/learner/index.html +++ b/reference/stimulus/learner/index.html @@ -1,2011 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.learner - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- learner - - -

- -
- -

Learner package for model training and evaluation.

- - - - - -

Modules:

-
    -
  • - predict - – -
    -

    A module for making predictions with PyTorch models using DataLoaders.

    -
    -
  • -
  • - raytune_learner - – -
    -

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

    -
    -
  • -
  • - raytune_parser - – -
    -

    Ray Tune results parser for extracting and saving best model configurations and weights.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/learner/predict/index.html b/reference/stimulus/learner/predict/index.html index d2695351..9a4a0269 100644 --- a/reference/stimulus/learner/predict/index.html +++ b/reference/stimulus/learner/predict/index.html @@ -1,2735 +1,228 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.learner.predict - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- predict - - -

- -
- -

A module for making predictions with PyTorch models using DataLoaders.

- - - - - - - -

Classes:

-
    -
  • - PredictWrapper - – -
    -

    A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- PredictWrapper - - -

-
PredictWrapper(
-    model: Module,
-    dataloader: DataLoader,
-    loss_dict: Optional[dict[str, Any]] = None,
-)
-
- -
- - -

A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

-

It also provides the functionalities to measure the performance of the model.

- - - -

Parameters:

-
    -
  • - model - (Module) - – -
    -

    The PyTorch model to make predictions with

    -
    -
  • -
  • - dataloader - (DataLoader) - – -
    -

    DataLoader containing the evaluation data

    -
    -
  • -
  • - loss_dict - (Optional[dict[str, Any]], default: - None -) - – -
    -

    Optional dictionary of loss functions

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - compute_loss - – -
    -

    Compute the loss.

    -
    -
  • -
  • - compute_metric - – -
    -

    Wrapper to compute the performance metric.

    -
    -
  • -
  • - compute_metrics - – -
    -

    Wrapper to compute the performance metrics.

    -
    -
  • -
  • - compute_other_metric - – -
    -

    Compute the performance metric.

    -
    -
  • -
  • - handle_predictions - – -
    -

    Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

    -
    -
  • -
  • - predict - – -
    -

    Get the model predictions.

    -
    -
  • -
- - - -
- Source code in src/stimulus/learner/predict.py -
19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
def __init__(self, model: nn.Module, dataloader: DataLoader, loss_dict: Optional[dict[str, Any]] = None) -> None:
-    """Initialize the PredictWrapper.
-
-    Args:
-        model: The PyTorch model to make predictions with
-        dataloader: DataLoader containing the evaluation data
-        loss_dict: Optional dictionary of loss functions
-    """
-    self.model = model
-    self.dataloader = dataloader
-    self.loss_dict = loss_dict
-    try:
-        self.model.eval()
-    except RuntimeError as e:
-        # Using logging instead of print
-        import logging
-
-        logging.warning("Not able to run model.eval: %s", str(e))
-
-
- - - -
- - - - - - - - - -
- - -

- compute_loss - - -

-
compute_loss() -> float
-
- -
- -

Compute the loss.

-

The current implmentation basically computes the loss for each batch and then averages them. -TODO we could potentially summarize the los across batches in a different way. -Or sometimes we may potentially even have 1+ losses.

- -
- Source code in src/stimulus/learner/predict.py -
 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
def compute_loss(self) -> float:
-    """Compute the loss.
-
-    The current implmentation basically computes the loss for each batch and then averages them.
-    TODO we could potentially summarize the los across batches in a different way.
-    Or sometimes we may potentially even have 1+ losses.
-    """
-    if self.loss_dict is None:
-        raise ValueError("Loss function is not provided.")
-    loss = 0.0
-    with torch.no_grad():
-        for x, y, _ in self.dataloader:
-            # the loss_dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.
-            current_loss = self.model.batch(x=x, y=y, **self.loss_dict)[0]
-            loss += current_loss.item()
-    return loss / len(self.dataloader)
-
-
-
- -
- -
- - -

- compute_metric - - -

-
compute_metric(metric: str = 'loss') -> float
-
- -
- -

Wrapper to compute the performance metric.

- -
- Source code in src/stimulus/learner/predict.py -
92
-93
-94
-95
-96
def compute_metric(self, metric: str = "loss") -> float:
-    """Wrapper to compute the performance metric."""
-    if metric == "loss":
-        return self.compute_loss()
-    return self.compute_other_metric(metric)
-
-
-
- -
- -
- - -

- compute_metrics - - -

-
compute_metrics(metrics: list[str]) -> dict[str, float]
-
- -
- -

Wrapper to compute the performance metrics.

- -
- Source code in src/stimulus/learner/predict.py -
88
-89
-90
def compute_metrics(self, metrics: list[str]) -> dict[str, float]:
-    """Wrapper to compute the performance metrics."""
-    return {m: self.compute_metric(m) for m in metrics}
-
-
-
- -
- -
- - -

- compute_other_metric - - -

-
compute_other_metric(metric: str) -> float
-
- -
- -

Compute the performance metric.

-

TODO currently we computes the average performance metric across target y, but maybe in the future we want something different

- -
- Source code in src/stimulus/learner/predict.py -
115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
def compute_other_metric(self, metric: str) -> float:
-    """Compute the performance metric.
-
-    # TODO currently we computes the average performance metric across target y, but maybe in the future we want something different
-    """
-    if not hasattr(self, "predictions") or not hasattr(self, "labels"):
-        predictions, labels = self.predict(return_labels=True)
-        self.predictions = predictions
-        self.labels = labels
-
-    # Explicitly type the labels and predictions as dictionaries with str keys
-    labels_dict: dict[str, Tensor] = self.labels if isinstance(self.labels, dict) else {}
-    predictions_dict: dict[str, Tensor] = self.predictions if isinstance(self.predictions, dict) else {}
-
-    return sum(
-        Performance(labels=labels_dict[k], predictions=predictions_dict[k], metric=metric).val for k in labels_dict
-    ) / len(labels_dict)
-
-
-
- -
- -
- - -

- handle_predictions - - -

-
handle_predictions(
-    predictions: Any, y: dict[str, Tensor]
-) -> dict[str, Tensor]
-
- -
- -

Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

- -
- Source code in src/stimulus/learner/predict.py -
82
-83
-84
-85
-86
def handle_predictions(self, predictions: Any, y: dict[str, Tensor]) -> dict[str, Tensor]:
-    """Handle the model outputs from forward pass, into a dictionary of tensors, just like y."""
-    if len(y) == 1:
-        return {next(iter(y.keys())): predictions}
-    return dict(zip(y.keys(), predictions))
-
-
-
- -
- -
- - -

- predict - - -

-
predict(*, return_labels: bool = False) -> Union[
-    dict[str, Tensor],
-    tuple[dict[str, Tensor], dict[str, Tensor]],
-]
-
- -
- -

Get the model predictions.

-

Basically, it runs a foward pass on the model for each batch, -gets the predictions and concatenate them for all batches. -Since the returned current_predictions are formed by tensors computed for one batch, -the final predictions are obtained by concatenating them.

-

At the end it returns predictions as a dictionary of tensors with the same keys as y.

-

If return_labels if True, then the labels will be returned as well, also as a dictionary of tensors.

- - -

Parameters:

-
    -
  • - return_labels - (bool, default: - False -) - – -
    -

    Whether to also return the labels

    -
    -
  • -
- - -

Returns:

- - -
- Source code in src/stimulus/learner/predict.py -
38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
def predict(
-    self,
-    *,
-    return_labels: bool = False,
-) -> Union[dict[str, Tensor], tuple[dict[str, Tensor], dict[str, Tensor]]]:
-    """Get the model predictions.
-
-    Basically, it runs a foward pass on the model for each batch,
-    gets the predictions and concatenate them for all batches.
-    Since the returned `current_predictions` are formed by tensors computed for one batch,
-    the final `predictions` are obtained by concatenating them.
-
-    At the end it returns `predictions` as a dictionary of tensors with the same keys as `y`.
-
-    If return_labels if True, then the `labels` will be returned as well, also as a dictionary of tensors.
-
-    Args:
-        return_labels: Whether to also return the labels
-
-    Returns:
-        Dictionary of predictions, and optionally labels
-    """
-    # create empty dictionaries with the column names
-    first_batch = next(iter(self.dataloader))
-    keys = first_batch[1].keys()
-    predictions: dict[str, list[Tensor]] = {k: [] for k in keys}
-    labels: dict[str, list[Tensor]] = {k: [] for k in keys}
-
-    # get the predictions (and labels) for each batch
-    with torch.no_grad():
-        for x, y, _ in self.dataloader:
-            current_predictions = self.model(**x)
-            current_predictions = self.handle_predictions(current_predictions, y)
-            for k in keys:
-                # it might happen that the batch consists of one element only so the torch.cat will fail. To prevent this the function to ensure at least one dimensionality is called.
-                predictions[k].append(ensure_at_least_1d(current_predictions[k]))
-                if return_labels:
-                    labels[k].append(ensure_at_least_1d(y[k]))
-
-    # return the predictions (and labels) as a dictionary of tensors for the entire dataset.
-    if not return_labels:
-        return {k: torch.cat(v) for k, v in predictions.items()}
-    return {k: torch.cat(v) for k, v in predictions.items()}, {k: torch.cat(v) for k, v in labels.items()}
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/learner/raytune_learner/index.html b/reference/stimulus/learner/raytune_learner/index.html index 03ecbb1d..1ded2912 100644 --- a/reference/stimulus/learner/raytune_learner/index.html +++ b/reference/stimulus/learner/raytune_learner/index.html @@ -1,3221 +1,555 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.learner.raytune_learner - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- raytune_learner - - -

- -
- -

Ray Tune wrapper and trainable model classes for hyperparameter optimization.

- - - - - - - -

Classes:

-
    -
  • - CheckpointDict - – -
    -

    Dictionary type for checkpoint data.

    -
    -
  • -
  • - TuneModel - – -
    -

    Trainable model class for Ray Tune.

    -
    -
  • -
  • - TuneWrapper - – -
    -

    Wrapper class for Ray Tune hyperparameter optimization.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- CheckpointDict - - -

- - -
-

- Bases: TypedDict

- - -

Dictionary type for checkpoint data.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- TuneModel - - -

- - -
-

- Bases: Trainable

- - -

Trainable model class for Ray Tune.

- - - - - - - - - -

Methods:

-
    -
  • - export_model - – -
    -

    Export model to safetensors format.

    -
    -
  • -
  • - load_checkpoint - – -
    -

    Load model and optimizer state from checkpoint.

    -
    -
  • -
  • - objective - – -
    -

    Compute the objective metric(s) for the tuning process.

    -
    -
  • -
  • - save_checkpoint - – -
    -

    Save model and optimizer state to checkpoint.

    -
    -
  • -
  • - setup - – -
    -

    Get the model, loss function(s), optimizer, train and test data from the config.

    -
    -
  • -
  • - step - – -
    -

    For each batch in the training data, calculate the loss and update the model parameters.

    -
    -
  • -
- - - - - - -
- - - - - - - - - -
- - -

- export_model - - -

-
export_model(export_dir: str | None = None) -> None
-
- -
- -

Export model to safetensors format.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
294
-295
-296
-297
-298
def export_model(self, export_dir: str | None = None) -> None:  # type: ignore[override]
-    """Export model to safetensors format."""
-    if export_dir is None:
-        return
-    safe_save_model(self.model, os.path.join(export_dir, "model.safetensors"))
-
-
-
- -
- -
- - -

- load_checkpoint - - -

-
load_checkpoint(checkpoint: dict[Any, Any] | None) -> None
-
- -
- -

Load model and optimizer state from checkpoint.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
300
-301
-302
-303
-304
-305
-306
def load_checkpoint(self, checkpoint: dict[Any, Any] | None) -> None:
-    """Load model and optimizer state from checkpoint."""
-    if checkpoint is None:
-        return
-    checkpoint_dir = checkpoint["checkpoint_dir"]
-    self.model = safe_load_model(self.model, os.path.join(checkpoint_dir, "model.safetensors"))
-    self.optimizer.load_state_dict(torch.load(os.path.join(checkpoint_dir, "optimizer.pt")))
-
-
-
- -
- -
- - -

- objective - - -

-
objective() -> dict[str, float]
-
- -
- -

Compute the objective metric(s) for the tuning process.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
275
-276
-277
-278
-279
-280
-281
-282
-283
-284
-285
-286
-287
-288
-289
-290
-291
-292
def objective(self) -> dict[str, float]:
-    """Compute the objective metric(s) for the tuning process."""
-    metrics = [
-        "loss",
-        "rocauc",
-        "prauc",
-        "mcc",
-        "f1score",
-        "precision",
-        "recall",
-        "spearmanr",
-    ]  # TODO maybe we report only a subset of metrics, given certain criteria (eg. if classification or regression)
-    predict_val = PredictWrapper(self.model, self.validation, loss_dict=self.loss_dict)
-    predict_train = PredictWrapper(self.model, self.training, loss_dict=self.loss_dict)
-    return {
-        **{"val_" + metric: value for metric, value in predict_val.compute_metrics(metrics).items()},
-        **{"train_" + metric: value for metric, value in predict_train.compute_metrics(metrics).items()},
-    }
-
-
-
- -
- -
- - -

- save_checkpoint - - -

-
save_checkpoint(checkpoint_dir: str) -> dict[Any, Any]
-
- -
- -

Save model and optimizer state to checkpoint.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
308
-309
-310
-311
-312
def save_checkpoint(self, checkpoint_dir: str) -> dict[Any, Any]:
-    """Save model and optimizer state to checkpoint."""
-    safe_save_model(self.model, os.path.join(checkpoint_dir, "model.safetensors"))
-    torch.save(self.optimizer.state_dict(), os.path.join(checkpoint_dir, "optimizer.pt"))
-    return {"checkpoint_dir": checkpoint_dir}
-
-
-
- -
- -
- - -

- setup - - -

-
setup(config: dict[Any, Any]) -> None
-
- -
- -

Get the model, loss function(s), optimizer, train and test data from the config.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
195
-196
-197
-198
-199
-200
-201
-202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
-224
-225
-226
-227
-228
-229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
-241
-242
-243
-244
-245
-246
-247
-248
-249
-250
-251
-252
-253
-254
-255
-256
-257
-258
-259
-260
-261
def setup(self, config: dict[Any, Any]) -> None:
-    """Get the model, loss function(s), optimizer, train and test data from the config."""
-    # set the seeds the second time, first in TuneWrapper initialization
-    set_general_seeds(self.config["ray_worker_seed"])
-
-    # Initialize model with the config params
-    self.model = config["model"](**config["network_params"])
-
-    # Get the loss function(s) from the config model params
-    self.loss_dict = config["loss_params"]
-    for key, loss_fn in self.loss_dict.items():
-        try:
-            self.loss_dict[key] = getattr(nn, loss_fn)()
-        except AttributeError as err:
-            raise ValueError(
-                f"Invalid loss function: {loss_fn}, check PyTorch for documentation on available loss functions",
-            ) from err
-
-    # get the optimizer parameters
-    optimizer_lr = config["optimizer_params"]["lr"]
-    self.optimizer = getattr(optim, config["optimizer_params"]["method"])(
-        self.model.parameters(),
-        lr=optimizer_lr,
-    )
-
-    # get step size from the config
-    self.step_size = config["tune"]["step_size"]
-
-    # Get datasets from Ray's object store
-    training, validation = ray.get(self.config["_training_ref"]), ray.get(self.config["_validation_ref"])
-
-    # use dataloader on training/validation data
-    self.batch_size = config["data_params"]["batch_size"]
-    self.training = DataLoader(
-        training,
-        batch_size=self.batch_size,
-        shuffle=True,
-    )
-    self.validation = DataLoader(
-        validation,
-        batch_size=self.batch_size,
-        shuffle=True,
-    )
-
-    # debug section, first create a dedicated directory for each worker inside Ray_results/<tune_model_run_specific_dir> location
-    debug_dir = os.path.join(
-        config["tune_run_path"],
-        "debug",
-        ("worker_with_seed_" + str(self.config["ray_worker_seed"])),
-    )
-    if config["_debug"]:
-        # creating a special directory for it one that is worker/trial/experiment specific
-        os.makedirs(debug_dir)
-        seed_filename = os.path.join(debug_dir, "seeds.txt")
-
-        # save the initialized model weights
-        self.export_model(export_dir=debug_dir)
-
-        # save the seeds
-        with open(seed_filename, "a") as seed_f:
-            # you can not retrieve the actual seed once it set, or the current seed neither for python, numpy nor torch. so we select five numbers randomly. If that is the first draw of numbers they are always the same.
-            python_values = random.sample(range(100), 5)
-            numpy_values = list(np.random.randint(0, 100, size=5))
-            torch_values = torch.randint(0, 100, (5,)).tolist()
-            seed_f.write(
-                f"python drawn numbers : {python_values}\nnumpy drawn numbers : {numpy_values}\ntorch drawn numbers : {torch_values}\n",
-            )
-
-
-
- -
- -
- - -

- step - - -

-
step() -> dict
-
- -
- -

For each batch in the training data, calculate the loss and update the model parameters.

-

This calculation is performed based on the model's batch function. -At the end, return the objective metric(s) for the tuning process.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
263
-264
-265
-266
-267
-268
-269
-270
-271
-272
-273
def step(self) -> dict:
-    """For each batch in the training data, calculate the loss and update the model parameters.
-
-    This calculation is performed based on the model's batch function.
-    At the end, return the objective metric(s) for the tuning process.
-    """
-    for _step_size in range(self.step_size):
-        for x, y, _meta in self.training:
-            # the loss dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.
-            self.model.batch(x=x, y=y, optimizer=self.optimizer, **self.loss_dict)
-    return self.objective()
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- TuneWrapper - - -

-
TuneWrapper(
-    model_config: RayTuneModel,
-    data_config_path: str,
-    model_class: Module,
-    data_path: str,
-    encoder_loader: EncoderLoader,
-    seed: int,
-    ray_results_dir: Optional[str] = None,
-    tune_run_name: Optional[str] = None,
-    *,
-    debug: bool = False,
-    autoscaler: bool = False
-)
-
- -
- - -

Wrapper class for Ray Tune hyperparameter optimization.

- - - - - - - - - -

Methods:

-
    -
  • - tune - – -
    -

    Run the tuning process.

    -
    -
  • -
  • - tuner_initialization - – -
    -

    Prepare the tuner with the configs.

    -
    -
  • -
- - - -
- Source code in src/stimulus/learner/raytune_learner.py -
 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
def __init__(
-    self,
-    model_config: RayTuneModel,
-    data_config_path: str,
-    model_class: nn.Module,
-    data_path: str,
-    encoder_loader: EncoderLoader,
-    seed: int,
-    ray_results_dir: Optional[str] = None,
-    tune_run_name: Optional[str] = None,
-    *,
-    debug: bool = False,
-    autoscaler: bool = False,
-) -> None:
-    """Initialize the TuneWrapper with the paths to the config, model, and data."""
-    self.config = model_config.model_dump()
-
-    # set all general seeds: python, numpy and torch.
-    set_general_seeds(seed)
-
-    # build the tune config:
-    try:
-        scheduler_class = getattr(
-            tune.schedulers,
-            model_config.tune.scheduler.name,
-        )  # todo, do this in RayConfigLoader
-    except AttributeError as err:
-        raise ValueError(
-            f"Invalid scheduler: {model_config.tune.scheduler.name}, check Ray Tune for documentation on available schedulers",
-        ) from err
-
-    scheduler = scheduler_class(**model_config.tune.scheduler.params)
-    self.tune_config = tune.TuneConfig(
-        metric=model_config.tune.tune_params.metric,
-        mode=model_config.tune.tune_params.mode,
-        num_samples=model_config.tune.tune_params.num_samples,
-        scheduler=scheduler,
-    )
-
-    # build the run config
-    self.run_config = train.RunConfig(
-        name=tune_run_name
-        if tune_run_name is not None
-        else "TuneModel_" + datetime.datetime.now(tz=datetime.timezone.utc).strftime("%Y-%m-%d_%H-%M-%S"),
-        storage_path=ray_results_dir,
-        checkpoint_config=train.CheckpointConfig(checkpoint_at_end=True),
-        stop=model_config.tune.run_params.stop,
-    )
-
-    # add the data path to the config
-    if not os.path.exists(data_path):
-        raise ValueError("Data path does not exist. Given path:" + data_path)
-    self.config["data_path"] = os.path.abspath(data_path)
-
-    # Set up tune_run path
-    if ray_results_dir is None:
-        ray_results_dir = os.environ.get("HOME", "")
-    self.config["tune_run_path"] = os.path.join(
-        ray_results_dir,
-        tune_run_name
-        if tune_run_name is not None
-        else "TuneModel_" + datetime.datetime.now(tz=datetime.timezone.utc).strftime("%Y-%m-%d_%H-%M-%S"),
-    )
-    self.config["_debug"] = debug
-    self.config["model"] = model_class
-    self.config["encoder_loader"] = encoder_loader
-    self.config["ray_worker_seed"] = tune.randint(0, 1000)
-
-    self.gpu_per_trial = model_config.tune.gpu_per_trial
-    self.cpu_per_trial = model_config.tune.cpu_per_trial
-
-    self.tuner = self.tuner_initialization(
-        data_config_path=data_config_path,
-        data_path=data_path,
-        encoder_loader=encoder_loader,
-        autoscaler=autoscaler,
-    )
-
-
- - - -
- - - - - - - - - -
- - -

- tune - - -

-
tune() -> ResultGrid
-
- -
- -

Run the tuning process.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
187
-188
-189
def tune(self) -> ray.tune.ResultGrid:
-    """Run the tuning process."""
-    return self.tuner.fit()
-
-
-
- -
- -
- - -

- tuner_initialization - - -

-
tuner_initialization(
-    data_config_path: str,
-    data_path: str,
-    encoder_loader: EncoderLoader,
-    *,
-    autoscaler: bool = False
-) -> Tuner
-
- -
- -

Prepare the tuner with the configs.

- -
- Source code in src/stimulus/learner/raytune_learner.py -
113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
def tuner_initialization(
-    self,
-    data_config_path: str,
-    data_path: str,
-    encoder_loader: EncoderLoader,
-    *,
-    autoscaler: bool = False,
-) -> tune.Tuner:
-    """Prepare the tuner with the configs."""
-    # Get available resources from Ray cluster
-    cluster_res = cluster_resources()
-    logging.info(f"CLUSTER resources   ->  {cluster_res}")
-
-    # Check per-trial resources
-    try:
-        if self.gpu_per_trial > cluster_res["GPU"] and not autoscaler:
-            raise ValueError(
-                "GPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.",
-            )
-    except KeyError as err:
-        logging.warning(f"KeyError: {err}, no GPU resources available in the cluster: {cluster_res}")
-
-    if self.cpu_per_trial > cluster_res["CPU"] and not autoscaler:
-        raise ValueError(
-            "CPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.",
-        )
-
-    logging.info(f"PER_TRIAL resources ->  GPU: {self.gpu_per_trial} CPU: {self.cpu_per_trial}")
-
-    # Pre-load and encode datasets once, then put them in Ray's object store
-
-    training = TorchDataset(
-        config_path=data_config_path,
-        csv_path=data_path,
-        encoder_loader=encoder_loader,
-        split=0,
-    )
-    validation = TorchDataset(
-        config_path=data_config_path,
-        csv_path=data_path,
-        encoder_loader=encoder_loader,
-        split=1,
-    )
-
-    # log to debug the names of the columns and shapes of tensors for a batch of training
-    # Log shapes of encoded tensors for first batch of training data
-    inputs, labels, meta = training[0:10]
-
-    logging.debug("Training data tensor shapes:")
-    for field, tensor in inputs.items():
-        logging.debug(f"Input field '{field}' shape: {tensor.shape}")
-
-    for field, tensor in labels.items():
-        logging.debug(f"Label field '{field}' shape: {tensor.shape}")
-
-    for field, values in meta.items():
-        logging.debug(f"Meta field '{field}' length: {len(values)}")
-
-    training_ref = ray.put(training)
-    validation_ref = ray.put(validation)
-
-    self.config["_training_ref"] = training_ref
-    self.config["_validation_ref"] = validation_ref
-
-    # Configure trainable with resources and dataset parameters
-    trainable = tune.with_resources(
-        tune.with_parameters(
-            TuneModel,
-        ),
-        resources={"cpu": self.cpu_per_trial, "gpu": self.gpu_per_trial},
-    )
-
-    return tune.Tuner(trainable, tune_config=self.tune_config, param_space=self.config, run_config=self.run_config)
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/learner/raytune_parser/index.html b/reference/stimulus/learner/raytune_parser/index.html index 66548222..d8a203e5 100644 --- a/reference/stimulus/learner/raytune_parser/index.html +++ b/reference/stimulus/learner/raytune_parser/index.html @@ -1,3110 +1,231 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.learner.raytune_parser - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- raytune_parser - - -

- -
- -

Ray Tune results parser for extracting and saving best model configurations and weights.

- - - - - - - -

Classes:

-
    -
  • - RayTuneMetrics - – -
    -

    TypedDict for storing Ray Tune metrics results.

    -
    -
  • -
  • - RayTuneOptimizer - – -
    -

    TypedDict for storing Ray Tune optimizer state.

    -
    -
  • -
  • - RayTuneResult - – -
    -

    TypedDict for storing Ray Tune optimization results.

    -
    -
  • -
  • - TuneParser - – -
    -

    Parser class for Ray Tune results to extract best configurations and model weights.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- RayTuneMetrics - - -

- - -
-

- Bases: TypedDict

- - -

TypedDict for storing Ray Tune metrics results.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- RayTuneOptimizer - - -

- - -
-

- Bases: TypedDict

- - -

TypedDict for storing Ray Tune optimizer state.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- RayTuneResult - - -

- - -
-

- Bases: TypedDict

- - -

TypedDict for storing Ray Tune optimization results.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- TuneParser - - -

-
TuneParser(result: ResultGrid)
-
- -
- - -

Parser class for Ray Tune results to extract best configurations and model weights.

- - - - - - - - - -

Methods:

- - - - -
- Source code in src/stimulus/learner/raytune_parser.py -
39
-40
-41
-42
def __init__(self, result: ResultGrid) -> None:
-    """Initialize with the given Ray Tune result grid."""
-    self.result: ResultGrid = result
-    self.best_result: Result = self._validate_best_result()
-
-
- - - -
- - - - - - - - - -
- - -

- fix_config_values - - -

-
fix_config_values(config: dict[str, Any]) -> dict[str, Any]
-
- -
- -

Correct config values.

-

This method modifies the configuration dictionary to remove or convert -non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary -can be safely dumped to a YAML file.

- - -

Parameters:

-
    -
  • - config - (dict[str, Any]) - – -
    -

    Configuration dictionary to fix.

    -
    -
  • -
- - -

Returns:

-
    -
  • - dict[str, Any] - – -
    -

    Fixed configuration dictionary.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
def fix_config_values(self, config: dict[str, Any]) -> dict[str, Any]:
-    """Correct config values.
-
-    This method modifies the configuration dictionary to remove or convert
-    non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary
-    can be safely dumped to a YAML file.
-
-    Args:
-        config: Configuration dictionary to fix.
-
-    Returns:
-        Fixed configuration dictionary.
-    """
-    # Replace the model class with its name for serialization purposes
-    config["model"] = config["model"].__name__
-
-    # Remove keys that contain non-serializable objects
-    keys_to_remove = [
-        "_debug",
-        "tune_run_path",
-        "_training_ref",
-        "_validation_ref",
-        "encoder_loader",  # if this key holds a non-serializable object
-    ]
-    for key in keys_to_remove:
-        config.pop(key, None)
-
-    return config
-
-
-
- -
- -
- - -

- get_best_config - - -

-
get_best_config() -> dict[str, Any]
-
- -
- -

Get the best config from the results.

- - -

Returns:

-
    -
  • - dict[str, Any] - – -
    -

    The configuration dictionary of the best result.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If the config is missing.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
def get_best_config(self) -> dict[str, Any]:
-    """Get the best config from the results.
-
-    Returns:
-        The configuration dictionary of the best result.
-
-    Raises:
-        ValueError: If the config is missing.
-    """
-    config: dict[str, Any] | None = self.best_result.config
-    if config is None:
-        raise ValueError("Best result does not contain a configuration.")
-    return config
-
-
-
- -
- -
- - -

- get_best_model - - -

-
get_best_model() -> dict[str, Tensor]
-
- -
- -

Get the best model weights from the results.

- - -

Returns:

-
    -
  • - dict[str, Tensor] - – -
    -

    Dictionary of model weights.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If the checkpoint is missing.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
def get_best_model(self) -> dict[str, torch.Tensor]:
-    """Get the best model weights from the results.
-
-    Returns:
-        Dictionary of model weights.
-
-    Raises:
-        ValueError: If the checkpoint is missing.
-    """
-    if self.best_result.checkpoint is None:
-        raise ValueError("Best result does not contain a checkpoint for the model.")
-    checkpoint_dir: str = self.best_result.checkpoint.to_directory()
-    checkpoint: str = os.path.join(checkpoint_dir, "model.safetensors")
-    return safe_load_file(checkpoint)
-
-
-
- -
- -
- - -

- get_best_optimizer - - -

-
get_best_optimizer() -> dict[str, Any]
-
- -
- -

Get the best optimizer state from the results.

- - -

Returns:

-
    -
  • - dict[str, Any] - – -
    -

    Optimizer state dictionary.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If the checkpoint is missing.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
def get_best_optimizer(self) -> dict[str, Any]:
-    """Get the best optimizer state from the results.
-
-    Returns:
-        Optimizer state dictionary.
-
-    Raises:
-        ValueError: If the checkpoint is missing.
-    """
-    if self.best_result.checkpoint is None:
-        raise ValueError("Best result does not contain a checkpoint for the optimizer.")
-    checkpoint_dir: str = self.best_result.checkpoint.to_directory()
-    checkpoint: str = os.path.join(checkpoint_dir, "optimizer.pt")
-    return torch.load(checkpoint)
-
-
-
- -
- -
- - -

- save_best_config - - -

-
save_best_config(output: str) -> None
-
- -
- -

Save the best config to a file.

-
-

Todo

-

maybe only save the relevant config values.

-
- - -

Parameters:

-
    -
  • - output - (str) - – -
    -

    File path to save the configuration.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
def save_best_config(self, output: str) -> None:
-    """Save the best config to a file.
-
-    TODO: maybe only save the relevant config values.
-
-    Args:
-        output: File path to save the configuration.
-    """
-    config: dict[str, Any] = self.get_best_config()
-    config = self.fix_config_values(config)
-    with open(output, "w") as f:
-        yaml.safe_dump(config, f)
-
-
-
- -
- -
- - -

- save_best_metrics_dataframe - - -

-
save_best_metrics_dataframe(output: str) -> None
-
- -
- -

Save the dataframe with the metrics at each iteration of the best sample to a file.

- - -

Parameters:

-
    -
  • - output - (str) - – -
    -

    CSV file path to save the metrics.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
116
-117
-118
-119
-120
-121
-122
-123
def save_best_metrics_dataframe(self, output: str) -> None:
-    """Save the dataframe with the metrics at each iteration of the best sample to a file.
-
-    Args:
-        output: CSV file path to save the metrics.
-    """
-    metrics_df: pd.DataFrame = pd.DataFrame([self.best_result.metrics])
-    metrics_df.to_csv(output, index=False)
-
-
-
- -
- -
- - -

- save_best_model - - -

-
save_best_model(output: str) -> None
-
- -
- -

Save the best model weights to a file.

-

This method retrieves the best model weights using the get_best_model helper -which loads the model data from the checkpoint's directory, then re-saves -it using safe_save_file.

- - -

Parameters:

-
    -
  • - output - (str) - – -
    -

    Path where the best model weights will be saved.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
def save_best_model(self, output: str) -> None:
-    """Save the best model weights to a file.
-
-    This method retrieves the best model weights using the get_best_model helper
-    which loads the model data from the checkpoint's directory, then re-saves
-    it using safe_save_file.
-
-    Args:
-        output: Path where the best model weights will be saved.
-    """
-    model: dict[str, torch.Tensor] = self.get_best_model()
-    safe_save_file(model, output)
-
-
-
- -
- -
- - -

- save_best_optimizer - - -

-
save_best_optimizer(output: str) -> None
-
- -
- -

Save the best optimizer state to a file.

- - -

Parameters:

-
    -
  • - output - (str) - – -
    -

    Path where the best optimizer state will be saved.

    -
    -
  • -
- -
- Source code in src/stimulus/learner/raytune_parser.py -
168
-169
-170
-171
-172
-173
-174
-175
def save_best_optimizer(self, output: str) -> None:
-    """Save the best optimizer state to a file.
-
-    Args:
-        output: Path where the best optimizer state will be saved.
-    """
-    optimizer_state: dict[str, Any] = self.get_best_optimizer()
-    torch.save(optimizer_state, output)
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/typing/index.html b/reference/stimulus/typing/index.html index 7e24d06c..3e4e9ad9 100644 --- a/reference/stimulus/typing/index.html +++ b/reference/stimulus/typing/index.html @@ -1,1996 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.typing - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- typing - - -

- -
- -

Typing for Stimulus Python API.

-

This module contains all Stimulus types which will be used for variable typing -and likely not instantiated, as well as aliases for other types to use for typing purposes.

-

The aliases from this module should be used for typing purposes only.

- - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/generic_utils/index.html b/reference/stimulus/utils/generic_utils/index.html index 828c775e..23e34475 100644 --- a/reference/stimulus/utils/generic_utils/index.html +++ b/reference/stimulus/utils/generic_utils/index.html @@ -1,2171 +1,43 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils.generic_utils - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- generic_utils - - -

- -
- -

Utility functions for general purpose operations like seed setting and tensor manipulation.

- - - - - - - - - -

Functions:

-
    -
  • - ensure_at_least_1d - – -
    -

    Function to make sure tensors given are not zero dimensional. if they are add one dimension.

    -
    -
  • -
  • - set_general_seeds - – -
    -

    Set all relevant random seeds to a given value.

    -
    -
  • -
- - - - - -
- - - - - - - - - -
- - -

- ensure_at_least_1d - - -

-
ensure_at_least_1d(tensor: Tensor) -> Tensor
-
- -
- -

Function to make sure tensors given are not zero dimensional. if they are add one dimension.

- -
- Source code in src/stimulus/utils/generic_utils.py -
10
-11
-12
-13
-14
def ensure_at_least_1d(tensor: torch.Tensor) -> torch.Tensor:
-    """Function to make sure tensors given are not zero dimensional. if they are add one dimension."""
-    if tensor.dim() == 0:
-        tensor = tensor.unsqueeze(0)
-    return tensor
-
-
-
- -
- -
- - -

- set_general_seeds - - -

-
set_general_seeds(seed_value: Union[int, None]) -> None
-
- -
- -

Set all relevant random seeds to a given value.

-

Especially useful in case of ray.tune. Ray does not have a "generic" seed as far as ray 2.23.

- -
- Source code in src/stimulus/utils/generic_utils.py -
17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
def set_general_seeds(seed_value: Union[int, None]) -> None:
-    """Set all relevant random seeds to a given value.
-
-    Especially useful in case of ray.tune. Ray does not have a "generic" seed as far as ray 2.23.
-    """
-    # Set python seed
-    random.seed(seed_value)
-
-    # set numpy seed
-    np.random.seed(seed_value)
-
-    # set torch seed, diffrently from the two above torch can nopt take Noneas input value so it will not be called in that case.
-    if seed_value is not None:
-        torch.manual_seed(seed_value)
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/index.html b/reference/stimulus/utils/index.html index 17438153..3020e857 100644 --- a/reference/stimulus/utils/index.html +++ b/reference/stimulus/utils/index.html @@ -1,2025 +1,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- utils - - -

- -
- -

Utility functions package.

- - - - - -

Modules:

-
    -
  • - generic_utils - – -
    -

    Utility functions for general purpose operations like seed setting and tensor manipulation.

    -
    -
  • -
  • - launch_utils - – -
    -

    Utility functions for launching and configuring experiments and ray tuning.

    -
    -
  • -
  • - performance - – -
    -

    Utility module for computing various performance metrics for machine learning models.

    -
    -
  • -
  • - yaml_data - – -
    -

    Utility module for handling YAML configuration files and their validation.

    -
    -
  • -
  • - yaml_model_schema - – -
    -

    Module for handling YAML configuration files and converting them to Ray Tune format.

    -
    -
  • -
- - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/launch_utils/index.html b/reference/stimulus/utils/launch_utils/index.html index 2d439712..717f5e66 100644 --- a/reference/stimulus/utils/launch_utils/index.html +++ b/reference/stimulus/utils/launch_utils/index.html @@ -1,2190 +1,73 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils.launch_utils - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- launch_utils - - -

- -
- -

Utility functions for launching and configuring experiments and ray tuning.

- - - - - - - - - -

Functions:

- - - - - - -
- - - - - - - - - -
- - -

- import_class_from_file - - -

-
import_class_from_file(file_path: str) -> type
-
- -
- -

Import and return the Model class from a specified Python file.

- - -

Parameters:

-
    -
  • - file_path - (str) - – -
    -

    Path to the Python file containing the Model class.

    -
    -
  • -
- - -

Returns:

-
    -
  • -type ( type -) – -
    -

    The Model class found in the file.

    -
    -
  • -
- - -

Raises:

-
    -
  • - ImportError - – -
    -

    If no class starting with 'Model' is found in the file.

    -
    -
  • -
- -
- Source code in src/stimulus/utils/launch_utils.py -
 7
- 8
- 9
-10
-11
-12
-13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
def import_class_from_file(file_path: str) -> type:
-    """Import and return the Model class from a specified Python file.
-
-    Args:
-        file_path (str): Path to the Python file containing the Model class.
-
-    Returns:
-        type: The Model class found in the file.
-
-    Raises:
-        ImportError: If no class starting with 'Model' is found in the file.
-    """
-    # Extract directory path and file name
-    directory, file_name = os.path.split(file_path)
-    module_name = os.path.splitext(file_name)[0]  # Remove extension to get module name
-
-    # Create a module from the file path
-    # In summary, these three lines of code are responsible for creating a module specification based on a file location, creating a module object from that specification, and then executing the module's code to populate the module object with the definitions from the Python file.
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Could not create module spec for {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if spec.loader is None:
-        raise ImportError(f"Module spec has no loader for {file_path}")
-    spec.loader.exec_module(module)
-
-    # Find the class dynamically
-    for name in dir(module):
-        model_class = getattr(module, name)
-        if isinstance(model_class, type) and name.startswith("Model"):
-            return model_class
-
-    # Class not found
-    raise ImportError("No class starting with 'Model' found in the file.")
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/performance/index.html b/reference/stimulus/utils/performance/index.html index f7521756..ec05686f 100644 --- a/reference/stimulus/utils/performance/index.html +++ b/reference/stimulus/utils/performance/index.html @@ -1,2935 +1,225 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils.performance - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- performance - - -

- -
- -

Utility module for computing various performance metrics for machine learning models.

- - - - - - - -

Classes:

-
    -
  • - Performance - – -
    -

    Returns the value of a given metric.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- Performance - - -

-
Performance(
-    labels: Any, predictions: Any, metric: str = "rocauc"
-)
-
- -
- - -

Returns the value of a given metric.

-

Parameters

-

labels (np.array) : labels -predictions (np.array) : predictions -metric (str) : the metric to compute

-

Returns:

-

value (float) : the value of the metric

-

TODO we can add more metrics here

-

TODO currently for classification metrics like precision, recall, f1score and mcc, -we are using a threshold of 0.5 to convert the probabilities to binary predictions. -However for models with imbalanced predictions, where the meaningful threshold is not -located at 0.5, one can end up with full of 0s or 1s, and thus meaningless performance -metrics.

- - - -

Parameters:

-
    -
  • - labels - (Any) - – -
    -

    Ground truth labels

    -
    -
  • -
  • - predictions - (Any) - – -
    -

    Model predictions

    -
    -
  • -
  • - metric - (str, default: - 'rocauc' -) - – -
    -

    Type of metric to compute (default: "rocauc")

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - data2array - – -
    -

    Convert input data to numpy array.

    -
    -
  • -
  • - f1score - – -
    -

    Compute F1 score.

    -
    -
  • -
  • - handle_multiclass - – -
    -

    Handle the case of multiclass classification.

    -
    -
  • -
  • - mcc - – -
    -

    Compute Matthews Correlation Coefficient.

    -
    -
  • -
  • - prauc - – -
    -

    Compute PR AUC score.

    -
    -
  • -
  • - precision - – -
    -

    Compute precision score.

    -
    -
  • -
  • - recall - – -
    -

    Compute recall score.

    -
    -
  • -
  • - rocauc - – -
    -

    Compute ROC AUC score.

    -
    -
  • -
  • - spearmanr - – -
    -

    Compute Spearman correlation coefficient.

    -
    -
  • -
- - - -
- Source code in src/stimulus/utils/performance.py -
46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
def __init__(self, labels: Any, predictions: Any, metric: str = "rocauc") -> None:
-    """Initialize Performance class with labels, predictions and metric type.
-
-    Args:
-        labels: Ground truth labels
-        predictions: Model predictions
-        metric: Type of metric to compute (default: "rocauc")
-    """
-    labels_arr = self.data2array(labels)
-    predictions_arr = self.data2array(predictions)
-    labels_arr, predictions_arr = self.handle_multiclass(labels_arr, predictions_arr)
-    if labels_arr.shape != predictions_arr.shape:
-        raise ValueError(
-            f"The labels have shape {labels_arr.shape} whereas predictions have shape {predictions_arr.shape}.",
-        )
-    function = getattr(self, metric)
-    self.val = function(labels_arr, predictions_arr)
-
-
- - - -
- - - - - - - - - -
- - -

- data2array - - -

-
data2array(data: Any) -> NDArray[float64]
-
- -
- -

Convert input data to numpy array.

- - -

Parameters:

-
    -
  • - data - (Any) - – -
    -

    Input data in various formats

    -
    -
  • -
- - -

Returns:

-
    -
  • - NDArray[float64] - – -
    -

    NDArray[np.float64]: Converted numpy array

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If input data type is not supported

    -
    -
  • -
- -
- Source code in src/stimulus/utils/performance.py -
64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
def data2array(self, data: Any) -> NDArray[np.float64]:
-    """Convert input data to numpy array.
-
-    Args:
-        data: Input data in various formats
-
-    Returns:
-        NDArray[np.float64]: Converted numpy array
-
-    Raises:
-        ValueError: If input data type is not supported
-    """
-    if isinstance(data, list):
-        return np.array(data, dtype=np.float64)
-    if isinstance(data, np.ndarray):
-        return data.astype(np.float64)
-    if isinstance(data, torch.Tensor):
-        return data.detach().cpu().numpy().astype(np.float64)
-    if isinstance(data, (int, float)):
-        return np.array([data], dtype=np.float64)
-    raise ValueError(f"The data must be a list, np.array, torch.Tensor, int or float. Instead it is {type(data)}")
-
-
-
- -
- -
- - -

- f1score - - -

-
f1score(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute F1 score.

- -
- Source code in src/stimulus/utils/performance.py -
138
-139
-140
-141
def f1score(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute F1 score."""
-    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])
-    return float(f1_score(labels, predictions_binary))
-
-
-
- -
- -
- - -

- handle_multiclass - - -

-
handle_multiclass(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> tuple[NDArray[float64], NDArray[float64]]
-
- -
- -

Handle the case of multiclass classification.

- - -

Parameters:

-
    -
  • - labels - (NDArray[float64]) - – -
    -

    Labels array of shape (N,) or (N, 1)

    -
    -
  • -
  • - predictions - (NDArray[float64]) - – -
    -

    Predictions array of shape (N,) or (N, C) where C is number of classes

    -
    -
  • -
- - -

Returns:

-
    -
  • - tuple[NDArray[float64], NDArray[float64]] - – -
    -

    tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If input shapes are not compatible

    -
    -
  • -
- -
- Source code in src/stimulus/utils/performance.py -
 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
def handle_multiclass(
-    self,
-    labels: NDArray[np.float64],
-    predictions: NDArray[np.float64],
-) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
-    """Handle the case of multiclass classification.
-
-    Args:
-        labels: Labels array of shape (N,) or (N, 1)
-        predictions: Predictions array of shape (N,) or (N, C) where C is number of classes
-
-    Returns:
-        tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions
-
-    Raises:
-        ValueError: If input shapes are not compatible
-    """
-    # Case 1: If labels are 2D with shape (N,1), squeeze to 1D shape (N,)
-    # This handles cases where labels come as column vectors
-    if len(labels.shape) == NON_SQUEEZED_SHAPE_LENGTH and labels.shape[1] == 1:
-        labels = labels.squeeze(-1)
-
-    if len(predictions.shape) == NON_SQUEEZED_SHAPE_LENGTH:
-        # Case 2: Binary classification with shape (N,2)
-        # Take probability of positive class (second column)
-        if predictions.shape[1] == BINARY_CLASS_COUNT:
-            predictions = predictions[:, 1]  # Shape becomes (N,)
-            return labels, predictions
-        # Case 3: Multi-class classification with shape (N,C)
-        # Keep predictions as-is if labels are 1D and batch sizes match
-        if len(labels.shape) == 1 and predictions.shape[0] == labels.shape[0]:
-            return labels, predictions
-
-    # If we get here, the shapes are not compatible
-    raise ValueError(
-        f"Incompatible shapes: labels {labels.shape}, predictions {predictions.shape}. "
-        "Expected labels (N,) or (N, 1) and predictions (N,) or (N, C) where C is number of classes.",
-    )
-
-
-
- -
- -
- - -

- mcc - - -

-
mcc(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute Matthews Correlation Coefficient.

- -
- Source code in src/stimulus/utils/performance.py -
133
-134
-135
-136
def mcc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute Matthews Correlation Coefficient."""
-    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])
-    return float(matthews_corrcoef(labels, predictions_binary))
-
-
-
- -
- -
- - -

- prauc - - -

-
prauc(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute PR AUC score.

- -
- Source code in src/stimulus/utils/performance.py -
129
-130
-131
def prauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute PR AUC score."""
-    return float(average_precision_score(labels, predictions))
-
-
-
- -
- -
- - -

- precision - - -

-
precision(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute precision score.

- -
- Source code in src/stimulus/utils/performance.py -
143
-144
-145
-146
def precision(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute precision score."""
-    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])
-    return float(precision_score(labels, predictions_binary))
-
-
-
- -
- -
- - -

- recall - - -

-
recall(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute recall score.

- -
- Source code in src/stimulus/utils/performance.py -
148
-149
-150
-151
def recall(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute recall score."""
-    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])
-    return float(recall_score(labels, predictions_binary))
-
-
-
- -
- -
- - -

- rocauc - - -

-
rocauc(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute ROC AUC score.

- -
- Source code in src/stimulus/utils/performance.py -
125
-126
-127
def rocauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute ROC AUC score."""
-    return float(roc_auc_score(labels, predictions))
-
-
-
- -
- -
- - -

- spearmanr - - -

-
spearmanr(
-    labels: NDArray[float64], predictions: NDArray[float64]
-) -> float
-
- -
- -

Compute Spearman correlation coefficient.

- -
- Source code in src/stimulus/utils/performance.py -
153
-154
-155
def spearmanr(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:
-    """Compute Spearman correlation coefficient."""
-    return float(spearmanr(labels, predictions)[0])
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/yaml_data/index.html b/reference/stimulus/utils/yaml_data/index.html index 3f6d6494..848843e9 100644 --- a/reference/stimulus/utils/yaml_data/index.html +++ b/reference/stimulus/utils/yaml_data/index.html @@ -1,3870 +1,557 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils.yaml_data - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- yaml_data - - -

- -
- -

Utility module for handling YAML configuration files and their validation.

- - - - - - - -

Classes:

- - - - - -

Functions:

- - - - - - -
- - - - - - - - -
- - - -

- YamlColumns - - -

- - -
-

- Bases: BaseModel

- - -

Model for column configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlColumnsEncoder - - -

- - -
-

- Bases: BaseModel

- - -

Model for column encoder configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlConfigDict - - -

- - -
-

- Bases: BaseModel

- - -

Model for main YAML configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlGlobalParams - - -

- - -
-

- Bases: BaseModel

- - -

Model for global parameters in YAML configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlSchema - - -

- - -
-

- Bases: BaseModel

- - -

Model for validating YAML schema.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlSplit - - -

- - -
-

- Bases: BaseModel

- - -

Model for split configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlSubConfigDict - - -

- - -
-

- Bases: BaseModel

- - -

Model for sub-configuration generated from main config.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlTransform - - -

- - -
-

- Bases: BaseModel

- - -

Model for transform configuration.

- - - - - - - - - -

Methods:

- - - - - - - -
- - - - - - - - - -
- - -

- validate_param_lists_across_columns - - - - classmethod - - -

-
validate_param_lists_across_columns(
-    columns: list[YamlTransformColumns],
-) -> list[YamlTransformColumns]
-
- -
- -

Validate that parameter lists across columns have consistent lengths.

- - -

Parameters:

- - - -

Returns:

- - -
- Source code in src/stimulus/utils/yaml_data.py -
51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
@field_validator("columns")
-@classmethod
-def validate_param_lists_across_columns(cls, columns: list[YamlTransformColumns]) -> list[YamlTransformColumns]:
-    """Validate that parameter lists across columns have consistent lengths.
-
-    Args:
-        columns: List of transform columns to validate
-
-    Returns:
-        The validated columns list
-    """
-    # Get all parameter list lengths across all columns and transformations
-    all_list_lengths: set[int] = set()
-
-    for column in columns:
-        for transformation in column.transformations:
-            if transformation.params and any(
-                isinstance(param_value, list) and len(param_value) > 0
-                for param_value in transformation.params.values()
-            ):
-                all_list_lengths.update(
-                    len(param_value)
-                    for param_value in transformation.params.values()
-                    if isinstance(param_value, list) and len(param_value) > 0
-                )
-
-    # Skip validation if no lists found
-    if not all_list_lengths:
-        return columns
-
-    # Check if all lists either have length 1, or all have the same length
-    all_list_lengths.discard(1)  # Remove length 1 as it's always valid
-    if len(all_list_lengths) > 1:  # Multiple different lengths found
-        raise ValueError(
-            "All parameter lists across columns must either contain one element or have the same length",
-        )
-
-    return columns
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- YamlTransformColumns - - -

- - -
-

- Bases: BaseModel

- - -

Model for transform columns configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlTransformColumnsTransformation - - -

- - -
-

- Bases: BaseModel

- - -

Model for column transformation configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- - -
- - -

- check_yaml_schema - - -

-
check_yaml_schema(config_yaml: YamlConfigDict) -> str
-
- -
- -

Validate YAML configuration fields have correct types.

-

If the children field is specific to a parent, the children fields class is hosted in the parent fields class. -If any field in not the right type, the function prints an error message explaining the problem and exits the python code.

- - -

Parameters:

-
    -
  • - config_yaml - (YamlConfigDict) - – -
    -

    The YamlConfigDict containing the fields of the yaml configuration file

    -
    -
  • -
- - -

Returns:

-
    -
  • -str ( str -) – -
    -

    Empty string if validation succeeds

    -
    -
  • -
- - -

Raises:

-
    -
  • - ValueError - – -
    -

    If validation fails

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_data.py -
343
-344
-345
-346
-347
-348
-349
-350
-351
-352
-353
-354
-355
-356
-357
-358
-359
-360
-361
-362
-363
def check_yaml_schema(config_yaml: YamlConfigDict) -> str:
-    """Validate YAML configuration fields have correct types.
-
-    If the children field is specific to a parent, the children fields class is hosted in the parent fields class.
-    If any field in not the right type, the function prints an error message explaining the problem and exits the python code.
-
-    Args:
-        config_yaml: The YamlConfigDict containing the fields of the yaml configuration file
-
-    Returns:
-        str: Empty string if validation succeeds
-
-    Raises:
-        ValueError: If validation fails
-    """
-    try:
-        YamlSchema(yaml_conf=config_yaml)
-    except ValidationError as e:
-        # Use logging instead of print for error handling
-        raise ValueError("Wrong type on a field, see the pydantic report above") from e
-    return ""
-
-
-
- -
- -
- - -

- dump_yaml_list_into_files - - -

-
dump_yaml_list_into_files(
-    yaml_list: list[YamlSubConfigDict],
-    directory_path: str,
-    base_name: str,
-) -> None
-
- -
- -

Dumps a list of YAML configurations into separate files with custom formatting.

- -
- Source code in src/stimulus/utils/yaml_data.py -
248
-249
-250
-251
-252
-253
-254
-255
-256
-257
-258
-259
-260
-261
-262
-263
-264
-265
-266
-267
-268
-269
-270
-271
-272
-273
-274
-275
-276
-277
-278
-279
-280
-281
-282
-283
-284
-285
-286
-287
-288
-289
-290
-291
-292
-293
-294
-295
-296
-297
-298
-299
-300
-301
-302
-303
-304
-305
-306
-307
-308
-309
-310
-311
-312
-313
-314
-315
-316
-317
-318
-319
-320
-321
-322
-323
-324
-325
-326
-327
-328
-329
-330
-331
-332
-333
-334
-335
-336
-337
-338
-339
-340
def dump_yaml_list_into_files(
-    yaml_list: list[YamlSubConfigDict],
-    directory_path: str,
-    base_name: str,
-) -> None:
-    """Dumps a list of YAML configurations into separate files with custom formatting."""
-    # Create a new class attribute rather than assigning to the method
-    # Remove this line since we'll add ignore_aliases to CustomDumper instead
-
-    def represent_none(dumper: yaml.Dumper, _: Any) -> yaml.Node:
-        """Custom representer to format None values as empty strings in YAML output."""
-        return dumper.represent_scalar("tag:yaml.org,2002:null", "")
-
-    def custom_representer(dumper: yaml.Dumper, data: Any) -> yaml.Node:
-        """Custom representer to handle different types of lists with appropriate formatting."""
-        if isinstance(data, list):
-            if len(data) == 0:
-                return dumper.represent_scalar("tag:yaml.org,2002:null", "")
-            if isinstance(data[0], dict):
-                return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
-            if isinstance(data[0], list):
-                return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
-        return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
-
-    class CustomDumper(yaml.Dumper):
-        """Custom YAML dumper that adds extra formatting controls."""
-
-        def ignore_aliases(self, _data: Any) -> bool:
-            """Ignore aliases in the YAML output."""
-            return True
-
-        def write_line_break(self, _data: Any = None) -> None:
-            """Add extra newline after root-level elements."""
-            super().write_line_break(_data)
-            if len(self.indents) <= 1:  # At root level
-                super().write_line_break(_data)
-
-        def increase_indent(self, *, flow: bool = False, indentless: bool = False) -> None:  # type: ignore[override]
-            """Ensure consistent indentation by preventing indentless sequences."""
-            return super().increase_indent(
-                flow=flow,
-                indentless=indentless,
-            )  # Force indentless to False for better formatting
-
-    # Register the custom representers with our dumper
-    yaml.add_representer(type(None), represent_none, Dumper=CustomDumper)
-    yaml.add_representer(list, custom_representer, Dumper=CustomDumper)
-
-    for i, yaml_dict in enumerate(yaml_list):
-        dict_data = yaml_dict.model_dump(exclude_none=True)
-
-        def fix_params(input_dict: dict[str, Any]) -> dict[str, Any]:
-            """Recursively process dictionary to properly handle params fields."""
-            if isinstance(input_dict, dict):
-                processed_dict: dict[str, Any] = {}
-                for key, value in input_dict.items():
-                    if key == "encoder" and isinstance(value, list):
-                        processed_dict[key] = []
-                        for encoder in value:
-                            processed_encoder = dict(encoder)
-                            if "params" not in processed_encoder or not processed_encoder["params"]:
-                                processed_encoder["params"] = {}
-                            processed_dict[key].append(processed_encoder)
-                    elif key == "transformations" and isinstance(value, list):
-                        processed_dict[key] = []
-                        for transformation in value:
-                            processed_transformation = dict(transformation)
-                            if "params" not in processed_transformation or not processed_transformation["params"]:
-                                processed_transformation["params"] = {}
-                            processed_dict[key].append(processed_transformation)
-                    elif isinstance(value, dict):
-                        processed_dict[key] = fix_params(value)
-                    elif isinstance(value, list):
-                        processed_dict[key] = [
-                            fix_params(list_item) if isinstance(list_item, dict) else list_item for list_item in value
-                        ]
-                    else:
-                        processed_dict[key] = value
-                return processed_dict
-            return input_dict
-
-        dict_data = fix_params(dict_data)
-
-        with open(f"{directory_path}/{base_name}_{i}.yaml", "w") as f:
-            yaml.dump(
-                dict_data,
-                f,
-                Dumper=CustomDumper,
-                sort_keys=False,
-                default_flow_style=False,
-                indent=2,
-                width=float("inf"),  # Prevent line wrapping
-            )
-
-
-
- -
- -
- - -

- expand_transform_list_combinations - - -

-
expand_transform_list_combinations(
-    transform_list: list[YamlTransform],
-) -> list[YamlTransform]
-
- -
- -

Expands a list of transforms into all possible parameter combinations.

-

Takes a list of transforms where each transform may contain parameter lists, -and expands them into separate transforms with single parameter values. -For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will -create two transforms: one with 0.1/1 and another with 0.2/2.

- - -

Parameters:

-
    -
  • - transform_list - (list[YamlTransform]) - – -
    -

    A list of YamlTransform objects containing parameter lists -that need to be expanded into individual transforms.

    -
    -
  • -
- - -

Returns:

-
    -
  • - list[YamlTransform] - – -
    -

    list[YamlTransform]: A flattened list of transforms where each transform -has single parameter values instead of parameter lists. The length of -the returned list will be the sum of the number of parameter combinations -for each input transform.

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_data.py -
183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
-204
def expand_transform_list_combinations(transform_list: list[YamlTransform]) -> list[YamlTransform]:
-    """Expands a list of transforms into all possible parameter combinations.
-
-    Takes a list of transforms where each transform may contain parameter lists,
-    and expands them into separate transforms with single parameter values.
-    For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will
-    create two transforms: one with 0.1/1 and another with 0.2/2.
-
-    Args:
-        transform_list: A list of YamlTransform objects containing parameter lists
-            that need to be expanded into individual transforms.
-
-    Returns:
-        list[YamlTransform]: A flattened list of transforms where each transform
-            has single parameter values instead of parameter lists. The length of
-            the returned list will be the sum of the number of parameter combinations
-            for each input transform.
-    """
-    sub_transforms = []
-    for transform in transform_list:
-        sub_transforms.extend(expand_transform_parameter_combinations(transform))
-    return sub_transforms
-
-
-
- -
- -
- - -

- expand_transform_parameter_combinations - - -

-
expand_transform_parameter_combinations(
-    transform: YamlTransform,
-) -> list[YamlTransform]
-
- -
- -

Get all possible transforms by extracting parameters at each valid index.

-

For a transform with parameter lists, creates multiple new transforms, each containing -single parameter values from the corresponding indices of the parameter lists.

- - -

Parameters:

-
    -
  • - transform - (YamlTransform) - – -
    -

    The original transform containing parameter lists

    -
    -
  • -
- - -

Returns:

-
    -
  • - list[YamlTransform] - – -
    -

    A list of transforms, each with single parameter values from sequential indices

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_data.py -
152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
def expand_transform_parameter_combinations(transform: YamlTransform) -> list[YamlTransform]:
-    """Get all possible transforms by extracting parameters at each valid index.
-
-    For a transform with parameter lists, creates multiple new transforms, each containing
-    single parameter values from the corresponding indices of the parameter lists.
-
-    Args:
-        transform: The original transform containing parameter lists
-
-    Returns:
-        A list of transforms, each with single parameter values from sequential indices
-    """
-    # Find the length of parameter lists - we only need to check the first list we find
-    # since all lists must have the same length (enforced by pydantic validator)
-    max_length = 1
-    for column in transform.columns:
-        for transformation in column.transformations:
-            if transformation.params:
-                list_lengths = [len(v) for v in transformation.params.values() if isinstance(v, list) and len(v) > 1]
-                if list_lengths:
-                    max_length = list_lengths[0]  # All lists have same length due to validator
-                    break
-
-    # Generate a transform for each index
-    transforms = []
-    for i in range(max_length):
-        transforms.append(extract_transform_parameters_at_index(transform, i))
-
-    return transforms
-
-
-
- -
- -
- - -

- extract_transform_parameters_at_index - - -

-
extract_transform_parameters_at_index(
-    transform: YamlTransform, index: int = 0
-) -> YamlTransform
-
- -
- -

Get a transform with parameters at the specified index.

- - -

Parameters:

-
    -
  • - transform - (YamlTransform) - – -
    -

    The original transform containing parameter lists

    -
    -
  • -
  • - index - (int, default: - 0 -) - – -
    -

    Index to extract parameters from (default 0)

    -
    -
  • -
- - -

Returns:

-
    -
  • - YamlTransform - – -
    -

    A new transform with single parameter values at the specified index

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_data.py -
123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
def extract_transform_parameters_at_index(transform: YamlTransform, index: int = 0) -> YamlTransform:
-    """Get a transform with parameters at the specified index.
-
-    Args:
-        transform: The original transform containing parameter lists
-        index: Index to extract parameters from (default 0)
-
-    Returns:
-        A new transform with single parameter values at the specified index
-    """
-    # Create a copy of the transform
-    new_transform = YamlTransform(**transform.model_dump())
-
-    # Process each column and transformation
-    for column in new_transform.columns:
-        for transformation in column.transformations:
-            if transformation.params:
-                # Convert each parameter list to single value at index
-                new_params = {}
-                for param_name, param_value in transformation.params.items():
-                    if isinstance(param_value, list):
-                        new_params[param_name] = param_value[index]
-                    else:
-                        new_params[param_name] = param_value
-                transformation.params = new_params
-
-    return new_transform
-
-
-
- -
- -
- - -

- generate_data_configs - - -

-
generate_data_configs(
-    yaml_config: YamlConfigDict,
-) -> list[YamlSubConfigDict]
-
- -
- -

Generates all possible data configurations from a YAML config.

-

Takes a YAML configuration that may contain parameter lists and splits, -and generates all possible combinations of parameters and splits into -separate data configurations.

-

For example, if the config has: -- A transform with parameters [0.1, 0.2] -- Two splits [0.7/0.3] and [0.8/0.2] -This will generate 4 configs, 2 for each split.

- - -

Parameters:

-
    -
  • - yaml_config - (YamlConfigDict) - – -
    -

    The source YAML configuration containing transforms with -parameter lists and multiple splits.

    -
    -
  • -
- - -

Returns:

-
    -
  • - list[YamlSubConfigDict] - – -
    -

    list[YamlSubConfigDict]: A list of data configurations, where each -config has single parameter values and one split configuration. The -length will be the product of the number of parameter combinations -and the number of splits.

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_data.py -
207
-208
-209
-210
-211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
-224
-225
-226
-227
-228
-229
-230
-231
-232
-233
-234
-235
-236
-237
-238
-239
-240
-241
-242
-243
-244
-245
def generate_data_configs(yaml_config: YamlConfigDict) -> list[YamlSubConfigDict]:
-    """Generates all possible data configurations from a YAML config.
-
-    Takes a YAML configuration that may contain parameter lists and splits,
-    and generates all possible combinations of parameters and splits into
-    separate data configurations.
-
-    For example, if the config has:
-    - A transform with parameters [0.1, 0.2]
-    - Two splits [0.7/0.3] and [0.8/0.2]
-    This will generate 4 configs, 2 for each split.
-
-    Args:
-        yaml_config: The source YAML configuration containing transforms with
-            parameter lists and multiple splits.
-
-    Returns:
-        list[YamlSubConfigDict]: A list of data configurations, where each
-            config has single parameter values and one split configuration. The
-            length will be the product of the number of parameter combinations
-            and the number of splits.
-    """
-    if isinstance(yaml_config, dict) and not isinstance(yaml_config, YamlConfigDict):
-        raise TypeError("Input must be a YamlConfigDict object")
-
-    sub_transforms = expand_transform_list_combinations(yaml_config.transforms)
-    sub_splits = yaml_config.split
-    sub_configs = []
-    for split in sub_splits:
-        for transform in sub_transforms:
-            sub_configs.append(
-                YamlSubConfigDict(
-                    global_params=yaml_config.global_params,
-                    columns=yaml_config.columns,
-                    transforms=transform,
-                    split=split,
-                ),
-            )
-    return sub_configs
-
-
-
- -
- - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/reference/stimulus/utils/yaml_model_schema/index.html b/reference/stimulus/utils/yaml_model_schema/index.html index 96472ffb..a22f418b 100644 --- a/reference/stimulus/utils/yaml_model_schema/index.html +++ b/reference/stimulus/utils/yaml_model_schema/index.html @@ -1,3659 +1,255 @@ - - - - - - - - - - - - - - - - - - - - - - - - - stimulus.utils.yaml_model_schema - stimulus-py - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - -
- - - -

- yaml_model_schema - - -

- -
- -

Module for handling YAML configuration files and converting them to Ray Tune format.

- - - - - - - -

Classes:

-
    -
  • - CustomTunableParameter - – -
    -

    Custom tunable parameter.

    -
    -
  • -
  • - Data - – -
    -

    Data parameters.

    -
    -
  • -
  • - Loss - – -
    -

    Loss parameters.

    -
    -
  • -
  • - Model - – -
    -

    Model configuration.

    -
    -
  • -
  • - RayTuneModel - – -
    -

    Ray Tune compatible model configuration.

    -
    -
  • -
  • - RunParams - – -
    -

    Run parameters.

    -
    -
  • -
  • - Scheduler - – -
    -

    Scheduler parameters.

    -
    -
  • -
  • - TunableParameter - – -
    -

    Tunable parameter.

    -
    -
  • -
  • - Tune - – -
    -

    Tune parameters.

    -
    -
  • -
  • - TuneParams - – -
    -

    Tune parameters.

    -
    -
  • -
  • - YamlRayConfigLoader - – -
    -

    Load and convert YAML configurations to Ray Tune format.

    -
    -
  • -
- - - - - - - -
- - - - - - - - -
- - - -

- CustomTunableParameter - - -

- - -
-

- Bases: BaseModel

- - -

Custom tunable parameter.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- Data - - -

- - -
-

- Bases: BaseModel

- - -

Data parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- Loss - - -

- - -
-

- Bases: BaseModel

- - -

Loss parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- Model - - -

- - -
-

- Bases: BaseModel

- - -

Model configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- RayTuneModel - - -

- - -
-

- Bases: BaseModel

- - -

Ray Tune compatible model configuration.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- RunParams - - -

- - -
-

- Bases: BaseModel

- - -

Run parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- Scheduler - - -

- - -
-

- Bases: BaseModel

- - -

Scheduler parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- TunableParameter - - -

- - -
-

- Bases: BaseModel

- - -

Tunable parameter.

- - - - - - - - - -

Methods:

-
    -
  • - validate_mode - – -
    -

    Validate that mode is supported by Ray Tune.

    -
    -
  • -
- - - - - - -
- - - - - - - - - -
- - -

- validate_mode - - -

-
validate_mode() -> TunableParameter
-
- -
- -

Validate that mode is supported by Ray Tune.

- -
- Source code in src/stimulus/utils/yaml_model_schema.py -
27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
@pydantic.model_validator(mode="after")
-def validate_mode(self) -> "TunableParameter":
-    """Validate that mode is supported by Ray Tune."""
-    if not hasattr(tune, self.mode):
-        raise AttributeError(
-            f"Mode {self.mode} not recognized, check the ray.tune documentation at https://docs.ray.io/en/master/tune/api_docs/suggestion.html",
-        )
-
-    mode = getattr(tune, self.mode)
-    if mode.__name__ not in [
-        "choice",
-        "uniform",
-        "loguniform",
-        "quniform",
-        "qloguniform",
-        "qnormal",
-        "randint",
-        "sample_from",
-    ]:
-        raise NotImplementedError(f"Mode {mode.__name__} not implemented yet")
-
-    return self
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- Tune - - -

- - -
-

- Bases: BaseModel

- - -

Tune parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- TuneParams - - -

- - -
-

- Bases: BaseModel

- - -

Tune parameters.

- - - - - - - - - - - - - -
- - - - - - - - - - - -
- -
- -
- -
- - - -

- YamlRayConfigLoader - - -

-
YamlRayConfigLoader(model: Model)
-
- -
- - -

Load and convert YAML configurations to Ray Tune format.

-

This class handles loading model configurations and converting them into -formats compatible with Ray Tune's hyperparameter search spaces.

- - - -

Parameters:

-
    -
  • - model - (Model) - – -
    -

    Pydantic Model instance containing configuration

    -
    -
  • -
- - - - - - - - - -

Methods:

-
    -
  • - convert_config_to_ray - – -
    -

    Convert Model configuration to Ray Tune format.

    -
    -
  • -
  • - convert_raytune - – -
    -

    Convert parameter configuration to Ray Tune format.

    -
    -
  • -
  • - get_config - – -
    -

    Return the current configuration.

    -
    -
  • -
  • - raytune_sample_from - – -
    -

    Apply tune.sample_from to a given custom sampling function.

    -
    -
  • -
  • - raytune_space_selector - – -
    -

    Convert space parameters to Ray Tune format based on the mode.

    -
    -
  • -
  • - sampint - – -
    -

    Return a list of n random samples from the sample_space.

    -
    -
  • -
- - - -
- Source code in src/stimulus/utils/yaml_model_schema.py -
127
-128
-129
-130
-131
-132
-133
-134
def __init__(self, model: Model) -> None:
-    """Initialize the config loader with a Model instance.
-
-    Args:
-        model: Pydantic Model instance containing configuration
-    """
-    self.model = model
-    self.ray_model = self.convert_config_to_ray(model)
-
-
- - - -
- - - - - - - - - -
- - -

- convert_config_to_ray - - -

-
convert_config_to_ray(model: Model) -> RayTuneModel
-
- -
- -

Convert Model configuration to Ray Tune format.

-

Converts parameters in network_params and optimizer_params to Ray Tune search spaces.

- - -

Parameters:

-
    -
  • - model - (Model) - – -
    -

    Model configuration

    -
    -
  • -
- - -

Returns:

-
    -
  • - RayTuneModel - – -
    -

    Ray Tune compatible model configuration

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_model_schema.py -
184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
def convert_config_to_ray(self, model: Model) -> RayTuneModel:
-    """Convert Model configuration to Ray Tune format.
-
-    Converts parameters in network_params and optimizer_params to Ray Tune search spaces.
-
-    Args:
-        model: Model configuration
-
-    Returns:
-        Ray Tune compatible model configuration
-    """
-    return RayTuneModel(
-        network_params={k: self.convert_raytune(v) for k, v in model.network_params.items()},
-        optimizer_params={k: self.convert_raytune(v) for k, v in model.optimizer_params.items()},
-        loss_params={k: self.convert_raytune(v) for k, v in model.loss_params},
-        data_params={k: self.convert_raytune(v) for k, v in model.data_params},
-        tune=model.tune,
-    )
-
-
-
- -
- -
- - -

- convert_raytune - - -

-
convert_raytune(
-    param: TunableParameter | CustomTunableParameter,
-) -> Any
-
- -
- -

Convert parameter configuration to Ray Tune format.

- - -

Parameters:

- - - -

Returns:

-
    -
  • - Any - – -
    -

    Ray Tune compatible parameter configuration

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_model_schema.py -
169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
def convert_raytune(self, param: TunableParameter | CustomTunableParameter) -> Any:
-    """Convert parameter configuration to Ray Tune format.
-
-    Args:
-        param: Parameter configuration
-
-    Returns:
-        Ray Tune compatible parameter configuration
-    """
-    mode = getattr(tune, param.mode)
-
-    if isinstance(param, TunableParameter):
-        return self.raytune_space_selector(mode, param.space)
-    return self.raytune_sample_from(mode, param)
-
-
-
- -
- -
- - -

- get_config - - -

-
get_config() -> RayTuneModel
-
- -
- -

Return the current configuration.

- - -

Returns:

-
    -
  • - RayTuneModel - – -
    -

    Current configuration dictionary

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_model_schema.py -
203
-204
-205
-206
-207
-208
-209
def get_config(self) -> RayTuneModel:
-    """Return the current configuration.
-
-    Returns:
-        Current configuration dictionary
-    """
-    return self.ray_model
-
-
-
- -
- -
- - -

- raytune_sample_from - - -

-
raytune_sample_from(
-    mode: Callable, param: CustomTunableParameter
-) -> Any
-
- -
- -

Apply tune.sample_from to a given custom sampling function.

- - -

Parameters:

-
    -
  • - mode - (Callable) - – -
    -

    Ray Tune sampling function

    -
    -
  • -
  • - param - (CustomTunableParameter) - – -
    -

    TunableParameter containing sampling parameters

    -
    -
  • -
- - -

Returns:

-
    -
  • - Any - – -
    -

    Configured sampling function

    -
    -
  • -
- - -

Raises:

- - -
- Source code in src/stimulus/utils/yaml_model_schema.py -
151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
def raytune_sample_from(self, mode: Callable, param: CustomTunableParameter) -> Any:
-    """Apply tune.sample_from to a given custom sampling function.
-
-    Args:
-        mode: Ray Tune sampling function
-        param: TunableParameter containing sampling parameters
-
-    Returns:
-        Configured sampling function
-
-    Raises:
-        NotImplementedError: If the sampling function is not supported
-    """
-    if param.function == "sampint":
-        return mode(lambda _: self.sampint(param.sample_space, param.n_space))
-
-    raise NotImplementedError(f"Function {param.function} not implemented yet")
-
-
-
- -
- -
- - -

- raytune_space_selector - - -

-
raytune_space_selector(mode: Callable, space: list) -> Any
-
- -
- -

Convert space parameters to Ray Tune format based on the mode.

- - -

Parameters:

-
    -
  • - mode - (Callable) - – -
    -

    Ray Tune search space function (e.g., tune.choice, tune.uniform)

    -
    -
  • -
  • - space - (list) - – -
    -

    List of parameters defining the search space

    -
    -
  • -
- - -

Returns:

-
    -
  • - Any - – -
    -

    Configured Ray Tune search space

    -
    -
  • -
- -
- Source code in src/stimulus/utils/yaml_model_schema.py -
136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
def raytune_space_selector(self, mode: Callable, space: list) -> Any:
-    """Convert space parameters to Ray Tune format based on the mode.
-
-    Args:
-        mode: Ray Tune search space function (e.g., tune.choice, tune.uniform)
-        space: List of parameters defining the search space
-
-    Returns:
-        Configured Ray Tune search space
-    """
-    if mode.__name__ == "choice":
-        return mode(space)
-
-    return mode(*tuple(space))
-
-
-
- -
- -
- - -

- sampint - - - - staticmethod - - -

-
sampint(sample_space: list, n_space: list) -> list[int]
-
- -
- -

Return a list of n random samples from the sample_space.

-

This function is useful for sampling different numbers of layers, -each with different numbers of neurons.

- - -

Parameters:

-
    -
  • - sample_space - (list) - – -
    -

    List [min, max] defining range of values to sample from

    -
    -
  • -
  • - n_space - (list) - – -
    -

    List [min, max] defining range for number of samples

    -
    -
  • -
- - -

Returns:

-
    -
  • - list[int] - – -
    -

    List of randomly sampled integers

    -
    -
  • -
- - -
- Note -

Uses Python's random module which is not cryptographically secure. -This is acceptable for hyperparameter sampling but should not be -used for security-critical purposes (S311 fails when linting).

-
-
- Source code in src/stimulus/utils/yaml_model_schema.py -
211
-212
-213
-214
-215
-216
-217
-218
-219
-220
-221
-222
-223
-224
-225
-226
-227
-228
-229
-230
-231
-232
-233
@staticmethod
-def sampint(sample_space: list, n_space: list) -> list[int]:
-    """Return a list of n random samples from the sample_space.
-
-    This function is useful for sampling different numbers of layers,
-    each with different numbers of neurons.
-
-    Args:
-        sample_space: List [min, max] defining range of values to sample from
-        n_space: List [min, max] defining range for number of samples
-
-    Returns:
-        List of randomly sampled integers
-
-    Note:
-        Uses Python's random module which is not cryptographically secure.
-        This is acceptable for hyperparameter sampling but should not be
-        used for security-critical purposes (S311 fails when linting).
-    """
-    sample_space_list = list(range(sample_space[0], sample_space[1] + 1))
-    n_space_list = list(range(n_space[0], n_space[1] + 1))
-    n = random.choice(n_space_list)  # noqa: S311
-    return random.sample(sample_space_list, n)
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - - - - - - - - - - - - - - - -
\ No newline at end of file diff --git a/search/search_index.json b/search/search_index.json index bce05e2b..a97bad6d 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"STIMULUS","text":""},{"location":"#stochastic-testing-with-input-modification-for-unbiased-learning-systems","title":"Stochastic Testing with Input Modification for Unbiased Learning Systems.","text":"

Warning

This package is in active development and breaking changes may occur. The API is not yet stable and features might be added, modified, or removed without notice. Use in production environments is not recommended at this stage.

We encourage you to:

  • \ud83d\udcdd Report bugs and issues on our GitHub Issues page

  • \ud83d\udca1 Suggest features and improvements through GitHub Discussions

  • \ud83e\udd1d Contribute by submitting pull requests

We are actively working towards release 1.0.0 (see milestone), check the slack channel by clicking on the badge above where we are actively discussing. Build with us every wednesday at 14:00 CET until 18:00 CET on the nf-core gathertown (see slack for calendar updates i.e. some weeks open dev hours are not possible)

"},{"location":"#introduction","title":"Introduction","text":"

Most (if not all) quality software is thouroughly tested. Deep neural networks seem to have escaped this paradigm.

In the age of large-scale deep learning, it is critical that early-stage dl models (prototypes) are tested to ensure costly bugs do not happen at scale.

Here, we attempt at solving the testing problem by proposing an extensive library to test deep neural networks beyond test-set performance.

Stimulus provides those functionalities

  1. Data Perturbation Testing: Modify training data to test model's robustness to perturbations and uncover which pre-processing steps increase performance

  2. Hyperparameter Optimization: Perform tuning on model architecture with user-defined search spaces using Ray[tune] to ensure comparable performance across data transformations

  3. Comprehensive Analysis: Generate all-against-all model report to guide data pre-processing decisions

For large scale experiments, we recommend our nf-core deepmodeloptim pipeline which is still under development and will be released alongside stimulus v1.0.0.

\ud83d\udcf9 Stimulus was featured at the nextflow summit 2024 in Barcelona, which is a nice intoduction to current package capabilities, you can watch the talk here

Stimulus aims at providing those functionalities in a near future, stay tuned for updates!

  1. Model Architecture Testing: Run routine checks on model architecture and training process including type-checking, model execution, and weight updates

  2. Post-Training Validation: Perform comprehensive model validation including overfitting detection and out-of-distribution performance testing

  3. Informed Hyperparameter Tuning: Encourage tuning strategies that follow Google's Deep Learning Tuning Playbook 1

  4. Scaling Analysis: Generate scaling law reports to understand prototype model behavior at different scales

"},{"location":"#user-guide","title":"User guide","text":""},{"location":"#repository-organization","title":"Repository organization","text":"

Stimulus is organized as follows, we will reference to this structure in the following sections

src/stimulus/ \ud83e\uddea\n\u251c\u2500\u2500 analysis/ \ud83d\udcca\n\u2502   \u2514\u2500\u2500 analysis_default.py\n\u251c\u2500\u2500 cli/ \ud83d\udda5\ufe0f\n\u2502   \u251c\u2500\u2500 analysis_default.py\n\u2502   \u251c\u2500\u2500 check_model.py\n\u2502   \u251c\u2500\u2500 interpret_json.py\n\u2502   \u251c\u2500\u2500 predict.py\n\u2502   \u251c\u2500\u2500 shuffle_csv.py\n\u2502   \u251c\u2500\u2500 split_csv.py\n\u2502   \u251c\u2500\u2500 split_yaml.py\n\u2502   \u251c\u2500\u2500 transform_csv.py\n\u2502   \u2514\u2500\u2500 tuning.py\n\u251c\u2500\u2500 data/ \ud83d\udcc1\n\u2502   \u251c\u2500\u2500 csv.py\n\u2502   \u251c\u2500\u2500 experiments.py\n\u2502   \u251c\u2500\u2500 handlertorch.py\n\u2502   \u251c\u2500\u2500 encoding/ \ud83d\udd10\n\u2502   \u2502   \u2514\u2500\u2500 encoders.py\n\u2502   \u251c\u2500\u2500 splitters/ \u2702\ufe0f\n\u2502   \u2502   \u2514\u2500\u2500 splitters.py\n\u2502   \u2514\u2500\u2500 transform/ \ud83d\udd04\n\u2502       \u2514\u2500\u2500 data_transformation_generators.py\n\u251c\u2500\u2500 learner/ \ud83e\udde0\n\u2502   \u251c\u2500\u2500 predict.py\n\u2502   \u251c\u2500\u2500 raytune_learner.py\n\u2502   \u2514\u2500\u2500 raytune_parser.py\n\u2514\u2500\u2500 utils/ \ud83d\udee0\ufe0f\n    \u251c\u2500\u2500 json_schema.py\n    \u251c\u2500\u2500 launch_utils.py\n    \u251c\u2500\u2500 performance.py\n    \u2514\u2500\u2500 yaml_model_schema.py\n
"},{"location":"#data-encoding","title":"Data encoding","text":"

Data in stimulus can take many forms (files, text, images, networks...) in order to support this diversity, stimulus relies on the encoding module. List of available encoders can be found here.

If the provided encoders do not support the type of data you are working with, you can write your own encoder by inheriting from the AbstractEncoder class and implementing the encode, decode and encode_all methods.

  • encode is currently optional, can return a NotImplementedError if the encoder does not support encoding a single data point
  • decode is currently optional, can return a NotImplementedError if the encoder does not support decoding
  • encode_all is called by other stimulus functions, and is expected to return a np.array .
"},{"location":"#expected-data-format","title":"Expected data format","text":"

Data is expected to be presented in a csv samplesheet file with the following format:

input1:input:input_type input2:input:input_type meta1:meta:meta_type label1:label:label_type label2:label:label_type sample1 input1 sample1 input2 sample1 meta1 sample1 label1 sample1 label2 sample2 input1 sample2 input2 sample2 meta1 sample2 label1 sample2 label2 sample3 input1 sample3 input2 sample3 meta1 sample3 label1 sample3 label2

Columns are expected to follow this name convention : name:type:data_type

  • name corresponds to the column name, this should be the same as input names in model batch definition (see model section for more details)

  • type is either input, meta or label, typically models predict the labels from the input, and meta is used to perform downstream analysis

  • data_type is the column data type.

Note

This rigid data format is expected to change once we move to release v1.0.0, data types and information will be defined in a yaml config and only column names will be required in the data, see this github issue

"},{"location":"#connecting-encoders-and-datasets","title":"Connecting encoders and datasets","text":"

Once we have our data formated and our encoders ready, we need to explicitly state which encoder is used for which data type. This is done through an experiment class.

To understand how experiment classes are used to connect data types and encoders, let's have a look at a minimal DnaToFloat example :

class DnaToFloat(AbstractExperiment):\n    def __init__(self) -> None:\n        super().__init__()\n        self.dna = {\n            \"encoder\": encoders.TextOneHotEncoder(alphabet=\"acgt\"),\n        }\n        self.float = {\n            \"encoder\": encoders.FloatEncoder(),\n        }\n

Here we define the data_type for the dna and float types, note that those data_type are the same as the ones defined in the samplesheet dataset above, for example, a dataset on which this experiment would run could look like this:

mouse_dna:input:dna mouse_rnaseq:label:float ACTAGGCATGCTAGTCG 0.53 ACTGGGGCTAGTCGAA 0.23 GATGTTCTGATGCT 0.98

Note how the data_type for the mouse_dna and mouse_rnaseq columns match exactly the attribute names defined in the DnaToFloat minimal class above.

stimulus-py ships with a few basic experiment classes, if you need to write your own experiment class, simply inherit from the base AbstractExperiment class and overwrite the class __init__ method like shown above.

Note

This has the drawback of requiring a build of the experiment class each time a new task is defined (for instance, let's say we want to use dna and protein sequences to predict rna).

Once we move to release v1.0.0, type (i.e. input, meta, label) and data_type will be defined in the data yaml config, and the relevant experiment class will be automatically built.

"},{"location":"#loading-the-data","title":"Loading the data","text":"

Finally, once we have defined our encoders, the experiment class and the samplesheet, stimulus will transparently load the data using the csv.py module

csv.py contains two important classes, CsvLoader and CsvProcessing

CsvLoader is responsible for na\u00efvely loading the data (without changing anything), it works by performing a couple of checks on the dataset to ensure it is correctly formated, and then uses the experiment class in conjunction with the column names to call the proper encoders and output inputs, labels, and meta dictionary objects.

CsvLoader is used by the handlertorch module to load data into pytorch tensors.

Tip

So, to recap, when you load a dataset into a torch tensor,

  1. handlertorch will call CsvLoader with the csv samplesheet and the experiment class

  2. CsvLoader will use the experiment class to fetch the proper encoder encode_all method for each data column

  3. CsvLoader will use the encode_all method to encode the data and output dictionary objects for inputs, labels and meta data

  4. handlertorch will convert the contents to torch tensors

  5. handlertorch will feed the input torch tensor to the model, use the label torch tensor for loss computation and will store the meta tensor for downstream analysis

Great, now you know how stimulus transparently loads your data into your pytorch model! While this seems complicated, the only thing you really have to do, is to format your data correctly in a csv samplesheet and define your experiment class with the proper encoders (either by using the provided encoders or by writing your own).

"},{"location":"#data-transformation","title":"Data transformation","text":"

Measuring the impact of data transformations (noising, down/upsampling, augmentation...) on models at training time is a major feature of stimulus.

Data transformations materialize as DataTransformer classes, and should inherit from the AbstractDataTransformer class (see docs)

Note

Writing your own DataTransformer class is the same as writing your own Encoder class, you should overwrite the transform and transform_all methods

Warning

Every DataTransformer class has to have seed in transform and transform_all methods parameters, and np.random.seed(seed) should be called in those methods.

Warning

Every DataTransformer class should have an add_row argument set to either True or False depending on if it is augmenting the data (adding rows) or not.

"},{"location":"#connecting-transformations-and-dataset","title":"Connecting transformations and dataset","text":"

Just like encoders, data transformations are defined in the Experiment class alongside encoders. Let's upgrade our DnaToFloat minimal class defined above to reflect this.

class DnaToFloat(AbstractExperiment):\n    def __init__(self) -> None:\n        super().__init__()\n        self.dna = {\n            \"encoder\": encoders.TextOneHotEncoder(alphabet=\"acgt\"),\n            \"data_transformation_generators\": {\n                \"UniformTextMasker\": data_transformation_generators.UniformTextMasker(mask=\"N\"),\n                \"ReverseComplement\": data_transformation_generators.ReverseComplement(),\n                \"GaussianChunk\": data_transformation_generators.GaussianChunk(),\n            },\n        }\n        self.float = {\n            \"encoder\": encoders.FloatEncoder(),\n            \"data_transformation_generators\": {\"GaussianNoise\": data_transformation_generators.GaussianNoise()},\n        }\n

As you can see, our data_type arguments get an other field, \"data_transformation_generators\", there we can initialize the DataTransformer classes with their relevant parameters.

In the csv module, the CsvProcessing class will call the transform_all methods from the classes contained in \"data_transformation_generators\" based on the column type and a list of transformations.

i.e., if we give the [\"ReverseComplement\",\"GaussianChunk\"] list to the CsvProcessing class transform method the data contained in the mouse_dna:input:dna column in our minimal example above will be first reverse complemented and then chunked.

Tip

Recap : To transform your dataset,

  • define your own DataTransformer class or use one we provide

  • add it to your experiment class

  • load your data through CsvProcessing

  • set a list of transforms

  • call CsvProcessing.transform(transform_list)

"},{"location":"#installation","title":"Installation","text":"

stimulus is still under development, you can install it from test-pypi by running the following command:

pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple stimulus-py==0.0.10\n
"},{"location":"#citations","title":"citations","text":"
  1. Godbole, V., Dahl, G. E., Gilmer, J., Shallue, C. J., & Nado, Z. (2023). Deep Learning Tuning Playbook (Version 1.0) [Computer software]. http://github.com/google-research/tuning_playbook \u21a9

"},{"location":"changelog/","title":"Changelog","text":"

The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.

"},{"location":"changelog/#021","title":"0.2.1","text":"

First released version is 0.2.1, changelog will be updated from there

"},{"location":"code_of_conduct/","title":"Contributor Covenant Code of Conduct","text":""},{"location":"code_of_conduct/#our-pledge","title":"Our Pledge","text":"

We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.

"},{"location":"code_of_conduct/#our-standards","title":"Our Standards","text":"

Examples of behavior that contributes to a positive environment for our community include:

  • Demonstrating empathy and kindness toward other people
  • Being respectful of differing opinions, viewpoints, and experiences
  • Giving and gracefully accepting constructive feedback
  • Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
  • Focusing on what is best not just for us as individuals, but for the overall community

Examples of unacceptable behavior include:

  • The use of sexualized language or imagery, and sexual attention or advances of any kind
  • Trolling, insulting or derogatory comments, and personal or political attacks
  • Public or private harassment
  • Publishing others' private information, such as a physical or email address, without their explicit permission
  • Other conduct which could reasonably be considered inappropriate in a professional setting
"},{"location":"code_of_conduct/#enforcement-responsibilities","title":"Enforcement Responsibilities","text":"

Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.

Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.

"},{"location":"code_of_conduct/#scope","title":"Scope","text":"

This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.

"},{"location":"code_of_conduct/#enforcement","title":"Enforcement","text":"

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at mathysgrapotte@gmail.com. All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the reporter of any incident.

"},{"location":"code_of_conduct/#enforcement-guidelines","title":"Enforcement Guidelines","text":"

Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:

"},{"location":"code_of_conduct/#1-correction","title":"1. Correction","text":"

Community Impact: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.

Consequence: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.

"},{"location":"code_of_conduct/#2-warning","title":"2. Warning","text":"

Community Impact: A violation through a single incident or series of actions.

Consequence: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.

"},{"location":"code_of_conduct/#3-temporary-ban","title":"3. Temporary Ban","text":"

Community Impact: A serious violation of community standards, including sustained inappropriate behavior.

Consequence: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.

"},{"location":"code_of_conduct/#4-permanent-ban","title":"4. Permanent Ban","text":"

Community Impact: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.

Consequence: A permanent ban from any sort of public interaction within the community.

"},{"location":"code_of_conduct/#attribution","title":"Attribution","text":"

This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.

Community Impact Guidelines were inspired by Mozilla's code of conduct enforcement ladder.

For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.

"},{"location":"contributing/","title":"Contributing","text":"

Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given.

"},{"location":"contributing/#environment-setup","title":"Environment setup","text":"

Nothing easier!

Fork and clone the repository, then:

cd stimulus-py\nmake setup\n

Note

If it fails for some reason, you'll need to install uv manually.

You can install it with:

curl -LsSf https://astral.sh/uv/install.sh | sh\n

Now you can try running make setup again, or simply uv sync.

You now have the dependencies installed.

Run make help to see all the available actions!

"},{"location":"contributing/#tasks","title":"Tasks","text":"

The entry-point to run commands and tasks is the make Python script, located in the scripts directory. Try running make to show the available commands and tasks. The commands do not need the Python dependencies to be installed, while the tasks do. The cross-platform tasks are written in Python, thanks to duty.

If you work in VSCode, we provide an action to configure VSCode for the project.

"},{"location":"contributing/#development","title":"Development","text":"

As usual:

  1. create a new branch: git switch -c feature-or-bugfix-name
  2. edit the code and/or the documentation

Before committing:

  1. run make format to auto-format the code
  2. run make check to check everything (fix any warning)
  3. run make test to run the tests (fix any issue)
  4. if you updated the documentation or the project dependencies:
    1. run make docs
    2. go to http://localhost:8000 and check that everything looks good

Then you can pull request and we will review. Make sure you join our slack hosted on nf-core to talk and build with us!

"},{"location":"credits/","title":"Credits","text":""},{"location":"credits/#exec-1--credits","title":"Credits","text":"

These projects were used to build stimulus-py. Thank you!

Python | uv | copier-uv

"},{"location":"credits/#exec-1--runtime-dependencies","title":"Runtime dependencies","text":"Project Summary Version (accepted) Version (last resolved) License aiohappyeyeballs Happy Eyeballs for asyncio >=2.3.0 2.4.3 PSF-2.0 aiohttp Async http client/server framework (asyncio) >=3.7 3.11.0 Apache 2 aiohttp-cors CORS support for aiohttp 0.7.0 Apache License, Version 2.0 aiosignal aiosignal: a list of registered asynchronous callbacks 1.3.1 Apache 2.0 annotated-types Reusable constraint types to use with typing.Annotated >=0.6.0 0.7.0 MIT License attrs Classes Without Boilerplate >=17.3.0 24.2.0 MIT cachetools Extensible memoizing collections and decorators >=2.0.0, <6.0 5.5.0 MIT certifi Python package for providing Mozilla's CA Bundle. >=2017.4.17 2024.8.30 MPL-2.0 charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. >=2, <4 3.4.0 MIT click Composable command line interface toolkit >=7.0 8.1.7 BSD-3-Clause colorama Cross-platform colored terminal text. >=0.4 0.4.6 BSD License colorful Terminal string styling done right, in Python. 0.5.6 MIT License contourpy Python library for calculating contours of 2D quadrilateral grids >=1.0.1 1.3.1 BSD License cycler Composable style cycles >=0.10 0.12.1 BSD License dill serialize all of Python >=0.3.9 0.3.9 BSD-3-Clause distlib Distribution utilities >=0.3.7, <1 0.3.9 PSF-2.0 filelock A platform independent file lock. 3.16.1 Unlicense fonttools Tools to manipulate font files >=4.22.0 4.54.1 MIT frozenlist A list-like structure which implements collections.abc.MutableSequence 1.5.0 Apache 2 fsspec File-system specification 2024.10.0 BSD License google-api-core Google API client core library >=1.0.0, <2.0.0 2.23.0 Apache 2.0 google-auth Google Authentication Library >=2.14.1, <3.0.dev0 2.36.0 Apache 2.0 googleapis-common-protos Common protobufs used in Google APIs >=1.56.2, <2.0.dev0 1.66.0 Apache-2.0 grpcio HTTP/2-based RPC framework >=1.32.0 1.67.1 Apache License 2.0 idna Internationalized Domain Names in Applications (IDNA) >=2.5, <4 3.10 BSD License importlib_metadata Read metadata from Python packages >=6.6, >=4.4 8.5.0 Apache Software License iniconfig brain-dead simple config-ini parsing 2.0.0 MIT Jinja2 A very fast and expressive template engine. >=2.11.1 3.1.4 BSD License joblib Lightweight pipelining with Python functions >=1.2.0 1.4.2 BSD 3-Clause jsonschema An implementation of JSON Schema validation for Python 4.23.0 MIT jsonschema-specifications The JSON Schema meta-schemas and vocabularies, exposed as a Registry >=2023.03.6 2024.10.1 MIT License kiwisolver A fast implementation of the Cassowary constraint solver >=1.3.1 1.4.7 BSD License markdown-it-py Python port of markdown-it. Markdown parsing, done right! >=2.2.0, >=2.1.0 3.0.0 MIT License MarkupSafe Safely add untrusted strings to HTML/XML markup. >=2.0.1, >=2.0 3.0.2 BSD License matplotlib Python plotting package >=3.9.0 3.9.2 Python Software Foundation License mdurl Markdown URL utilities ~=0.1 0.1.2 MIT License memray A memory profiler for Python applications 1.14.0 Apache 2.0 mpmath Python library for arbitrary-precision floating-point arithmetic >=1.1.0, <1.4 1.3.0 BSD msgpack MessagePack serializer >=1.0.0, <2.0.0 1.1.0 Apache 2.0 multidict multidict implementation >=4.5, <7.0 6.1.0 Apache 2 multiprocess better multiprocessing and multithreading in Python ==0.70.17 0.70.17 BSD-3-Clause networkx Python package for creating and manipulating graphs and networks 3.4.2 BSD License numpy Fundamental package for array computing in Python >=1.26.0, <2.0.0 1.26.4 BSD License opencensus A stats collection and distributed tracing framework 0.11.4 Apache-2.0 opencensus-context OpenCensus Runtime Context >=0.1.3 0.1.3 Apache-2.0 packaging Core utilities for Python packages >=20.5, >=20.0 24.2 Apache Software License + BSD License pandas Powerful data structures for data analysis, time series, and statistics >=2.2.0 2.2.3 BSD License pillow Python Imaging Library (Fork) >=8 11.0.0 MIT-CMU platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a user data dir. >=3.9.1, >=2.2.0, <5 4.3.6 MIT pluggy plugin and hook calling mechanisms for python >=1.5, <2 1.5.0 MIT polars-lts-cpu Blazingly fast DataFrame library >=0.20.30, <1.12.0 1.11.0 MIT License prometheus_client Python client for the Prometheus monitoring system. >=0.7.1 0.21.0 Apache Software License 2.0 propcache Accelerated property cache >=0.2.0 0.2.0 Apache-2.0 proto-plus Beautiful, Pythonic protocol buffers. >=1.22.3, <2.0.0dev 1.25.0 Apache 2.0 protobuf >=3.15.3, !=3.19.5 5.28.3 3-Clause BSD License py-spy Sampling profiler for Python programs >=0.2.0 0.4.0 MIT pyarrow Python library for Apache Arrow >=6.0.1 17.0.0 Apache Software License pyasn1 Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208) >=0.1.3 0.6.1 BSD-2-Clause pyasn1_modules A collection of ASN.1-based protocols modules >=0.2.1 0.4.1 BSD pydantic Data validation using Python type hints >=2.0.0 2.9.2 MIT pydantic_core Core functionality for Pydantic validation and serialization ==2.23.4 2.23.4 MIT Pygments Pygments is a syntax highlighting package written in Python. >=2.5.1, >=2.13.0, <3.0.0 2.18.0 BSD-2-Clause pyparsing pyparsing module - Classes and methods to define and execute parsing grammars >=2.3.1 3.2.0 MIT License pytest pytest: simple powerful testing with Python >=8.2, >=7.0.0, <9.0.0 8.3.3 MIT python-dateutil Extensions to the standard Python datetime module >=2.8.2, >=2.8.1 2.9.0.post0 BSD License + Apache Software License pytz World timezone definitions, modern and historical >=2020.1 2024.2 MIT PyYAML YAML parser and emitter for Python >=5.1 6.0.2 MIT ray Ray provides a simple, universal API for building distributed applications. >=2.38.0 2.39.0 Apache 2.0 referencing JSON Referencing + Python >=0.28.4 0.35.1 MIT License requests Python HTTP for Humans. >=2.20 2.32.3 Apache-2.0 rich Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal >=12.0.0, >=11.2.0 13.9.4 MIT rpds-py Python bindings to Rust's persistent data structures (rpds) >=0.7.1 0.21.0 MIT License rsa Pure-Python RSA implementation >=3.1.4, <5 4.9 Apache-2.0 safetensors >=0.4.5 0.4.5 Apache Software License scikit-learn A set of python modules for machine learning and data mining >=1.5.0 1.5.2 BSD License scipy Fundamental algorithms for scientific computing in Python ==1.14.1 1.14.1 BSD License six Python 2 and 3 compatibility utilities >=1.5 1.16.0 MIT smart-open Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...) 7.0.5 MIT sympy Computer algebra system (CAS) in Python 1.13.1 BSD syrupy Pytest Snapshot Test Utility >=4.8.0 4.8.1 Apache-2.0 tensorboardX TensorBoardX lets you watch Tensors Flow without Tensorflow >=1.9 2.6.2.2 MIT license textual Modern Text User Interface framework >=0.41.0 0.85.2 MIT threadpoolctl threadpoolctl >=3.1.0 3.5.0 BSD-3-Clause torch Tensors and Dynamic neural networks in Python with strong GPU acceleration ==2.2.2 2.2.2 BSD-3 typing_extensions Backported and Experimental Type Hints for Python 3.8+ >=4.12.2, >=3.6.4 4.12.2 Python Software Foundation License tzdata Provider of IANA time zone data >=2022.7 2024.2 Apache-2.0 urllib3 HTTP library with thread-safe connection pooling, file post, and more. >=1.26.0, >=1.21.1, <3 2.2.3 MIT License virtualenv Virtual Python Environment builder >=20.0.24, !=20.21.1 20.27.1 MIT wrapt Module for decorators, wrappers and monkey patching. 1.16.0 BSD yarl Yet another URL library >=1.17.0, <2.0 1.17.1 Apache-2.0 zipp Backport of pathlib-compatible object wrapper for zip files >=3.20 3.21.0 MIT License"},{"location":"credits/#exec-1--development-dependencies","title":"Development dependencies","text":"Project Summary Version (accepted) Version (last resolved) License ansimarkup Produce colored terminal text with an xml-like markup ~=1.4 1.5.0 Revised BSD License appdirs A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\". >=1.4 1.4.4 MIT babel Internationalization utilities >=2.7.0 2.16.0 BSD-3-Clause black The uncompromising code formatter. >=24.4 24.10.0 MIT build A simple, correct Python build frontend >=1.2 1.2.2.post1 MIT License certifi Python package for providing Mozilla's CA Bundle. >=2017.4.17 2024.8.30 MPL-2.0 charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. >=2, <4 3.4.0 MIT click Composable command line interface toolkit >=7.0 8.1.7 BSD-3-Clause colorama Cross-platform colored terminal text. >=0.4 0.4.6 BSD License coverage Code coverage measurement for Python >=7.5 7.6.4 Apache-2.0 csscompressor A python port of YUI CSS Compressor >=0.9.5 0.9.5 BSD docutils Docutils -- Python Documentation Utilities >=0.21.2 0.21.2 Public Domain + Python Software Foundation License + BSD License + GNU General Public License (GPL) duty A simple task runner. >=1.4 1.4.3 ISC editables Editable installations >=0.5 0.5 MIT License execnet execnet: rapid multi-Python deployment >=2.1 2.1.1 MIT failprint Run a command, print its output only if it fails. >=0.11, !=1.0.0 1.0.3 ISC ghp-import Copy your docs directly to the gh-pages branch. >=1.0 2.1.0 Apache Software License git-changelog Automatic Changelog generator using Jinja2 templates. >=2.5 2.5.2 ISC gitdb Git Object Database >=4.0.1, <5 4.0.11 BSD License GitPython GitPython is a Python library used to interact with Git repositories 3.1.43 BSD-3-Clause griffe Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API. >=0.49 1.5.1 ISC htmlmin2 An HTML Minifier >=0.1.13 0.1.13 BSD idna Internationalized Domain Names in Applications (IDNA) >=2.5, <4 3.10 BSD License importlib_metadata Read metadata from Python packages >=6.6, >=4.4 8.5.0 Apache Software License iniconfig brain-dead simple config-ini parsing 2.0.0 MIT jaraco.classes Utility functions for Python class constructs 3.4.0 MIT License jaraco.context Useful decorators and context managers 6.0.1 MIT License jaraco.functools Functools like those found in stdlib 4.1.0 MIT License Jinja2 A very fast and expressive template engine. >=2.11.1 3.1.4 BSD License jsmin JavaScript minifier. >=3.0.1 3.0.1 MIT License keyring Store and access your passwords safely. >=15.1 25.5.0 MIT License Markdown Python implementation of John Gruber's Markdown. >=3.3.6 3.7 BSD License markdown-callouts Markdown extension: a classier syntax for admonitions >=0.4 0.4.0 MIT markdown-exec Utilities to execute code blocks in Markdown files. >=1.8 1.9.3 ISC markdown-it-py Python port of markdown-it. Markdown parsing, done right! >=2.2.0, >=2.1.0 3.0.0 MIT License MarkupSafe Safely add untrusted strings to HTML/XML markup. >=2.0.1, >=2.0 3.0.2 BSD License mdurl Markdown URL utilities ~=0.1 0.1.2 MIT License mergedeep A deep merge function for \ud83d\udc0d. >=1.3.4 1.3.4 MIT License mkdocs Project documentation with Markdown. >=1.6 1.6.1 BSD-2-Clause mkdocs-autorefs Automatically link across pages in MkDocs. >=1.2 1.2.0 ISC mkdocs-coverage MkDocs plugin to integrate your coverage HTML report into your site. >=1.0 1.1.0 ISC mkdocs-gen-files MkDocs plugin to programmatically generate documentation pages during the build >=0.5 0.5.0 MIT mkdocs-get-deps MkDocs extension that lists all dependencies according to a mkdocs.yml file >=0.2.0 0.2.0 MIT mkdocs-git-revision-date-localized-plugin Mkdocs plugin that enables displaying the localized date of the last git modification of a markdown file. >=1.2 1.3.0 MIT mkdocs-literate-nav MkDocs plugin to specify the navigation in Markdown instead of YAML >=0.6 0.6.1 MIT mkdocs-material Documentation that simply works >=9.5 9.5.44 MIT mkdocs-material-extensions Extension pack for Python Markdown and MkDocs Material. ~=1.3 1.3.1 MIT mkdocs-minify-plugin An MkDocs plugin to minify HTML, JS or CSS files prior to being written to disk >=0.8 0.8.0 MIT mkdocstrings Automatic documentation from sources, for MkDocs. >=0.25 0.27.0 ISC mkdocstrings-python A Python handler for mkdocstrings. >=0.5.2 1.12.2 ISC more-itertools More routines for operating on iterables, beyond itertools 10.5.0 MIT License mypy Optional static typing for Python >=1.10 1.13.0 MIT mypy-extensions Type system extensions for programs checked with the mypy type checker. >=1.0.0 1.0.0 MIT License nh3 Python bindings to the ammonia HTML sanitization library. >=0.2.14 0.2.18 MIT packaging Core utilities for Python packages >=20.5, >=20.0 24.2 Apache Software License + BSD License paginate Divides large result sets into pages for easier browsing ~=0.5 0.5.7 MIT pathspec Utility library for gitignore style pattern matching of file paths. >=0.11.1 0.12.1 Mozilla Public License 2.0 (MPL 2.0) pkginfo Query metadata from sdists / bdists / installed packages. >=1.8.1 1.10.0 MIT platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a user data dir. >=3.9.1, >=2.2.0, <5 4.3.6 MIT pluggy plugin and hook calling mechanisms for python >=1.5, <2 1.5.0 MIT ptyprocess Run a subprocess in a pseudo terminal ~=0.6 0.7.0 ISC License (ISCL) Pygments Pygments is a syntax highlighting package written in Python. >=2.5.1, >=2.13.0, <3.0.0 2.18.0 BSD-2-Clause pymdown-extensions Extension pack for Python Markdown. >=9 10.12 MIT pyproject_hooks Wrappers to call pyproject.toml-based build backend hooks. 1.2.0 MIT License pytest pytest: simple powerful testing with Python >=8.2, >=7.0.0, <9.0.0 8.3.3 MIT pytest-cov Pytest plugin for measuring coverage. >=5.0 6.0.0 MIT pytest-randomly Pytest plugin to randomly order tests and control random.seed. >=3.15 3.16.0 MIT License pytest-xdist pytest xdist plugin for distributed testing, most importantly across multiple CPUs >=3.6 3.6.1 MIT License python-dateutil Extensions to the standard Python datetime module >=2.8.2, >=2.8.1 2.9.0.post0 BSD License + Apache Software License pytz World timezone definitions, modern and historical >=2020.1 2024.2 MIT PyYAML YAML parser and emitter for Python >=5.1 6.0.2 MIT pyyaml_env_tag A custom YAML tag for referencing environment variables in YAML files. >=0.1 0.1 MIT License readme_renderer readme_renderer is a library for rendering readme descriptions for Warehouse >=35.0 44.0 Apache License, Version 2.0 regex Alternative regular expression module, to replace re. >=2022.4 2024.11.6 Apache Software License requests Python HTTP for Humans. >=2.20 2.32.3 Apache-2.0 requests-toolbelt A utility belt for advanced users of python-requests >=0.8.0, !=0.9.0 1.0.0 Apache 2.0 rfc3986 Validating URI References per RFC 3986 >=1.4.0 2.0.0 Apache 2.0 rich Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal >=12.0.0, >=11.2.0 13.9.4 MIT ruff An extremely fast Python linter and code formatter, written in Rust. >=0.4 0.7.3 MIT semver Python helper for Semantic Versioning (https://semver.org) >=2.13 3.0.2 BSD six Python 2 and 3 compatibility utilities >=1.5 1.16.0 MIT smmap A pure Python implementation of a sliding window memory map manager >=3.0.1, <6 5.0.1 BSD twine Collection of utilities for publishing packages on PyPI >=5.1 5.1.1 Apache Software License types-Markdown Typing stubs for Markdown >=3.6 3.7.0.20240822 Apache-2.0 types-PyYAML Typing stubs for PyYAML >=6.0 6.0.12.20240917 Apache-2.0 typing_extensions Backported and Experimental Type Hints for Python 3.8+ >=4.12.2, >=3.6.4 4.12.2 Python Software Foundation License urllib3 HTTP library with thread-safe connection pooling, file post, and more. >=1.26.0, >=1.21.1, <3 2.2.3 MIT License watchdog Filesystem events monitoring >=2.0 6.0.0 Apache-2.0 zipp Backport of pathlib-compatible object wrapper for zip files >=3.20 3.21.0 MIT License"},{"location":"license/","title":"License","text":"
MIT License\n\nCopyright (c) 2024 Mathys Grapotte\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n
"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"
  • stimulus
    • cli
      • analysis_default
      • check_model
      • predict
      • shuffle_csv
      • split_csv
      • split_yaml
      • transform_csv
      • tuning
    • data
      • data_handlers
      • encoding
        • encoders
      • handlertorch
      • loaders
      • splitters
        • splitters
      • transform
        • data_transformation_generators
    • debug
    • learner
      • predict
      • raytune_learner
      • raytune_parser
    • typing
    • utils
      • generic_utils
      • launch_utils
      • performance
      • yaml_data
      • yaml_model_schema
"},{"location":"reference/stimulus/","title":"stimulus","text":""},{"location":"reference/stimulus/#stimulus","title":"stimulus","text":"

stimulus-py package.

Modules:

  • cli \u2013

    Command line interface package for the stimulus library.

  • data \u2013

    Data handling and processing module.

  • debug \u2013

    Debugging utilities.

  • learner \u2013

    Learner package for model training and evaluation.

  • typing \u2013

    Typing for Stimulus Python API.

  • utils \u2013

    Utility functions package.

"},{"location":"reference/stimulus/debug/","title":"stimulus.debug","text":""},{"location":"reference/stimulus/debug/#stimulus.debug","title":"debug","text":"

Debugging utilities.

Classes:

  • Environment \u2013

    Dataclass to store environment information.

  • Package \u2013

    Dataclass describing a Python package.

  • Variable \u2013

    Dataclass describing an environment variable.

Functions:

  • get_debug_info \u2013

    Get debug/environment information.

  • get_version \u2013

    Get version of the given distribution.

  • print_debug_info \u2013

    Print debug/environment information.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment","title":"Environment dataclass","text":"
Environment(\n    interpreter_name: str,\n    interpreter_version: str,\n    interpreter_path: str,\n    platform: str,\n    packages: list[Package],\n    variables: list[Variable],\n)\n

Dataclass to store environment information.

Attributes:

  • interpreter_name (str) \u2013

    Python interpreter name.

  • interpreter_path (str) \u2013

    Path to Python executable.

  • interpreter_version (str) \u2013

    Python interpreter version.

  • packages (list[Package]) \u2013

    Installed packages.

  • platform (str) \u2013

    Operating System.

  • variables (list[Variable]) \u2013

    Environment variables.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_name","title":"interpreter_name instance-attribute","text":"
interpreter_name: str\n

Python interpreter name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_path","title":"interpreter_path instance-attribute","text":"
interpreter_path: str\n

Path to Python executable.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_version","title":"interpreter_version instance-attribute","text":"
interpreter_version: str\n

Python interpreter version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.packages","title":"packages instance-attribute","text":"
packages: list[Package]\n

Installed packages.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.platform","title":"platform instance-attribute","text":"
platform: str\n

Operating System.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.variables","title":"variables instance-attribute","text":"
variables: list[Variable]\n

Environment variables.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package","title":"Package dataclass","text":"
Package(name: str, version: str)\n

Dataclass describing a Python package.

Attributes:

  • name (str) \u2013

    Package name.

  • version (str) \u2013

    Package version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package.name","title":"name instance-attribute","text":"
name: str\n

Package name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package.version","title":"version instance-attribute","text":"
version: str\n

Package version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable","title":"Variable dataclass","text":"
Variable(name: str, value: str)\n

Dataclass describing an environment variable.

Attributes:

  • name (str) \u2013

    Variable name.

  • value (str) \u2013

    Variable value.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable.name","title":"name instance-attribute","text":"
name: str\n

Variable name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable.value","title":"value instance-attribute","text":"
value: str\n

Variable value.

"},{"location":"reference/stimulus/debug/#stimulus.debug.get_debug_info","title":"get_debug_info","text":"
get_debug_info() -> Environment\n

Get debug/environment information.

Returns:

  • Environment \u2013

    Environment information.

Source code in src/stimulus/debug.py
def get_debug_info() -> Environment:\n    \"\"\"Get debug/environment information.\n\n    Returns:\n        Environment information.\n    \"\"\"\n    py_name, py_version = _interpreter_name_version()\n    packages = [\"stimulus-py\"]\n    variables = [\"PYTHONPATH\", *[var for var in os.environ if var.startswith(\"STIMULUS_PY\")]]\n    return Environment(\n        interpreter_name=py_name,\n        interpreter_version=py_version,\n        interpreter_path=sys.executable,\n        platform=platform.platform(),\n        variables=[Variable(var, val) for var in variables if (val := os.getenv(var))],\n        packages=[Package(pkg, get_version(pkg)) for pkg in packages],\n    )\n
"},{"location":"reference/stimulus/debug/#stimulus.debug.get_version","title":"get_version","text":"
get_version(dist: str = 'stimulus-py') -> str\n

Get version of the given distribution.

Parameters:

  • dist (str, default: 'stimulus-py' ) \u2013

    A distribution name.

Returns:

  • str \u2013

    A version number.

Source code in src/stimulus/debug.py
def get_version(dist: str = \"stimulus-py\") -> str:\n    \"\"\"Get version of the given distribution.\n\n    Parameters:\n        dist: A distribution name.\n\n    Returns:\n        A version number.\n    \"\"\"\n    try:\n        return metadata.version(dist)\n    except metadata.PackageNotFoundError:\n        return \"0.0.0\"\n
"},{"location":"reference/stimulus/debug/#stimulus.debug.print_debug_info","title":"print_debug_info","text":"
print_debug_info() -> None\n

Print debug/environment information.

Source code in src/stimulus/debug.py
def print_debug_info() -> None:\n    \"\"\"Print debug/environment information.\"\"\"\n    info = get_debug_info()\n    print(f\"- __System__: {info.platform}\")\n    print(f\"- __Python__: {info.interpreter_name} {info.interpreter_version} ({info.interpreter_path})\")\n    print(\"- __Environment variables__:\")\n    for var in info.variables:\n        print(f\"  - `{var.name}`: `{var.value}`\")\n    print(\"- __Installed packages__:\")\n    for pkg in info.packages:\n        print(f\"  - `{pkg.name}` v{pkg.version}\")\n
"},{"location":"reference/stimulus/cli/","title":"stimulus.cli","text":""},{"location":"reference/stimulus/cli/#stimulus.cli","title":"cli","text":"

Command line interface package for the stimulus library.

Modules:

  • analysis_default \u2013

    Analysis default module for running model analysis and performance evaluation.

  • check_model \u2013

    CLI module for checking model configuration and running initial tests.

  • predict \u2013

    CLI module for model prediction on datasets.

  • shuffle_csv \u2013

    CLI module for shuffling CSV data files.

  • split_csv \u2013

    CLI module for splitting CSV data files.

  • split_yaml \u2013

    CLI module for splitting YAML configuration files.

  • transform_csv \u2013

    CLI module for transforming CSV data files.

  • tuning \u2013

    CLI module for running raytune tuning experiment.

"},{"location":"reference/stimulus/cli/analysis_default/","title":"stimulus.cli.analysis_default","text":""},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default","title":"analysis_default","text":"

Analysis default module for running model analysis and performance evaluation.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • load_model \u2013

    Load the model with its config and weights.

  • main \u2013

    Run the main analysis pipeline.

  • run \u2013

    Run the analysis script.

  • run_analysis_performance_model \u2013

    Run analysis to report model robustness.

  • run_analysis_performance_tune \u2013

    Run performance analysis during tuning/training.

"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/analysis_default.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"The model .py file\")\n    parser.add_argument(\n        \"-w\",\n        \"--weight\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"Model weights .pt file\",\n    )\n    parser.add_argument(\n        \"-me\",\n        \"--metrics\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The file path for the metrics file obtained during tuning\",\n    )\n    parser.add_argument(\n        \"-ec\",\n        \"--experiment_config\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The experiment config used to modify the data.\",\n    )\n    parser.add_argument(\n        \"-mc\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The tune config file.\",\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--data\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"List of data files to be used for the analysis.\",\n    )\n    parser.add_argument(\"-o\", \"--outdir\", type=str, required=True, help=\"output directory\")\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.load_model","title":"load_model","text":"
load_model(\n    model_class: Any, weight_path: str, mconfig_path: str\n) -> Any\n

Load the model with its config and weights.

Parameters:

  • model_class (Any) \u2013

    Model class to instantiate

  • weight_path (str) \u2013

    Path to model weights

  • mconfig_path (str) \u2013

    Path to model config

Returns:

  • Any \u2013

    Loaded model instance

Source code in src/stimulus/cli/analysis_default.py
def load_model(model_class: Any, weight_path: str, mconfig_path: str) -> Any:\n    \"\"\"Load the model with its config and weights.\n\n    Args:\n        model_class: Model class to instantiate\n        weight_path: Path to model weights\n        mconfig_path: Path to model config\n\n    Returns:\n        Loaded model instance\n    \"\"\"\n    with open(mconfig_path) as in_json:\n        mconfig = json.load(in_json)[\"model_params\"]\n\n    model = model_class(**mconfig)\n    return safe_load(model, weight_path, strict=True)\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.main","title":"main","text":"
main(\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    metrics_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None\n

Run the main analysis pipeline.

Parameters:

  • model_path (str) \u2013

    Path to model file

  • weight_list (list[str]) \u2013

    List of model weight paths

  • mconfig_list (list[str]) \u2013

    List of model config paths

  • metrics_list (list[str]) \u2013

    List of metric file paths

  • econfig_list (list[str]) \u2013

    List of experiment config paths

  • data_list (list[str]) \u2013

    List of data file paths

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def main(\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    metrics_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None:\n    \"\"\"Run the main analysis pipeline.\n\n    Args:\n        model_path: Path to model file\n        weight_list: List of model weight paths\n        mconfig_list: List of model config paths\n        metrics_list: List of metric file paths\n        econfig_list: List of experiment config paths\n        data_list: List of data file paths\n        outdir: Output directory path\n    \"\"\"\n    metrics = [\"rocauc\", \"prauc\", \"mcc\", \"f1score\", \"precision\", \"recall\"]\n\n    # Plot the performance during tuning/training\n    run_analysis_performance_tune(\n        metrics_list,\n        [*metrics, \"loss\"],  # Use list unpacking instead of concatenation\n        os.path.join(outdir, \"performance_tune_train\"),\n    )\n\n    # Run robustness analysis\n    run_analysis_performance_model(\n        metrics,\n        model_path,\n        weight_list,\n        mconfig_list,\n        econfig_list,\n        data_list,\n        os.path.join(outdir, \"performance_robustness\"),\n    )\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run","title":"run","text":"
run() -> None\n

Run the analysis script.

Source code in src/stimulus/cli/analysis_default.py
def run() -> None:\n    \"\"\"Run the analysis script.\"\"\"\n    args = get_args()\n    main(args.model, args.weight, args.model_config, args.metrics, args.experiment_config, args.data, args.outdir)\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run_analysis_performance_model","title":"run_analysis_performance_model","text":"
run_analysis_performance_model(\n    metrics: list[str],\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None\n

Run analysis to report model robustness.

This block will compute the predictions of each model for each dataset. This information will be parsed and plots will be generated to report the model robustness.

Parameters:

  • metrics (list[str]) \u2013

    List of metrics to analyze

  • model_path (str) \u2013

    Path to model file

  • weight_list (list[str]) \u2013

    List of model weight paths

  • mconfig_list (list[str]) \u2013

    List of model config paths

  • econfig_list (list[str]) \u2013

    List of experiment config paths

  • data_list (list[str]) \u2013

    List of data file paths

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def run_analysis_performance_model(\n    metrics: list[str],\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None:\n    \"\"\"Run analysis to report model robustness.\n\n    This block will compute the predictions of each model for each dataset.\n    This information will be parsed and plots will be generated to report the model robustness.\n\n    Args:\n        metrics: List of metrics to analyze\n        model_path: Path to model file\n        weight_list: List of model weight paths\n        mconfig_list: List of model config paths\n        econfig_list: List of experiment config paths\n        data_list: List of data file paths\n        outdir: Output directory path\n    \"\"\"\n    if not os.path.exists(outdir):\n        os.makedirs(outdir)\n\n    # Load all the models weights into a list\n    model_names = []\n    model_list = []\n    model_class = import_class_from_file(model_path)\n    for weight_path, mconfig_path in zip(weight_list, mconfig_list):\n        model = load_model(model_class, weight_path, mconfig_path)\n        model_names.append(mconfig_path.split(\"/\")[-1].replace(\"-config.json\", \"\"))\n        model_list.append(model)\n\n    # Read experiment config and initialize experiment class\n    with open(econfig_list[0]) as in_json:\n        experiment_name = json.load(in_json)[\"experiment\"]\n    initialized_experiment_class = get_experiment(experiment_name)\n\n    # Initialize analysis\n    analysis = AnalysisRobustness(metrics, initialized_experiment_class, batch_size=256)\n\n    # Compute performance metrics\n    df = analysis.get_performance_table(model_names, model_list, data_list)\n    df.to_csv(os.path.join(outdir, \"performance_table.csv\"), index=False)\n\n    # Get average performance\n    tmp = analysis.get_average_performance_table(df)\n    tmp.to_csv(os.path.join(outdir, \"average_performance_table.csv\"), index=False)\n\n    # Plot heatmap\n    analysis.plot_performance_heatmap(df, output=os.path.join(outdir, \"performance_heatmap.png\"))\n\n    # Plot delta performance\n    outdir2 = os.path.join(outdir, \"delta_performance_vs_data\")\n    if not os.path.exists(outdir2):\n        os.makedirs(outdir2)\n    for metric in metrics:\n        analysis.plot_delta_performance(\n            metric,\n            df,\n            output=os.path.join(outdir2, f\"delta_performance_{metric}.png\"),\n        )\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run_analysis_performance_tune","title":"run_analysis_performance_tune","text":"
run_analysis_performance_tune(\n    metrics_list: list[str], metrics: list[str], outdir: str\n) -> None\n

Run performance analysis during tuning/training.

Each model has a metrics file obtained during tuning/training, check the performance there and plot it. This is to track the model performance per training iteration.

Parameters:

  • metrics_list (list[str]) \u2013

    List of metric file paths

  • metrics (list[str]) \u2013

    List of metrics to analyze

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def run_analysis_performance_tune(metrics_list: list[str], metrics: list[str], outdir: str) -> None:\n    \"\"\"Run performance analysis during tuning/training.\n\n    Each model has a metrics file obtained during tuning/training,\n    check the performance there and plot it.\n    This is to track the model performance per training iteration.\n\n    Args:\n        metrics_list: List of metric file paths\n        metrics: List of metrics to analyze\n        outdir: Output directory path\n    \"\"\"\n    if not os.path.exists(outdir):\n        os.makedirs(outdir)\n\n    for metrics_path in metrics_list:\n        AnalysisPerformanceTune(metrics_path).plot_metric_vs_iteration(\n            metrics=metrics,\n            output=os.path.join(outdir, metrics_path.replace(\"-metrics.csv\", \"\") + \"-metric_vs_iteration.png\"),\n        )\n
"},{"location":"reference/stimulus/cli/check_model/","title":"stimulus.cli.check_model","text":""},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model","title":"check_model","text":"

CLI module for checking model configuration and running initial tests.

Modules:

  • handlertorch \u2013

    This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

  • launch_utils \u2013

    Utility functions for launching and configuring experiments and ray tuning.

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

  • raytune_learner \u2013

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

  • yaml_model_schema \u2013

    Module for handling YAML configuration files and converting them to Ray Tune format.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Run the main model checking pipeline.

  • run \u2013

    Run the model checking script.

"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/check_model.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Launch check_model.\")\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input csv file.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model file.\")\n    parser.add_argument(\n        \"-e\",\n        \"--data_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to data config file.\",\n    )\n    parser.add_argument(\n        \"-c\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to yaml config training file.\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--initial_weights\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"FILE\",\n        help=\"The path to the initial weights (optional).\",\n    )\n\n    parser.add_argument(\n        \"-n\",\n        \"--num_samples\",\n        type=int,\n        required=False,\n        nargs=\"?\",\n        const=3,\n        default=3,\n        metavar=\"NUM_SAMPLES\",\n        help=\"Number of samples for tuning. Overwrites tune.tune_params.num_samples in config.\",\n    )\n    parser.add_argument(\n        \"--ray_results_dirpath\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"DIR_PATH\",\n        help=\"Location where ray_results output dir should be written. If None, uses ~/ray_results.\",\n    )\n    parser.add_argument(\n        \"--debug_mode\",\n        action=\"store_true\",\n        help=\"Activate debug mode for tuning. Default false, no debug.\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.main","title":"main","text":"
main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,\n    num_samples: int = 3,\n    ray_results_dirpath: str | None = None,\n    *,\n    debug_mode: bool = False\n) -> None\n

Run the main model checking pipeline.

Parameters:

  • data_path (str) \u2013

    Path to input data file.

  • model_path (str) \u2013

    Path to model file.

  • data_config_path (str) \u2013

    Path to data config file.

  • model_config_path (str) \u2013

    Path to model config file.

  • initial_weights (str | None, default: None ) \u2013

    Optional path to initial weights.

  • num_samples (int, default: 3 ) \u2013

    Number of samples for tuning.

  • ray_results_dirpath (str | None, default: None ) \u2013

    Directory for ray results.

  • debug_mode (bool, default: False ) \u2013

    Whether to run in debug mode.

Source code in src/stimulus/cli/check_model.py
def main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,  # noqa: ARG001\n    num_samples: int = 3,\n    ray_results_dirpath: str | None = None,\n    *,\n    debug_mode: bool = False,\n) -> None:\n    \"\"\"Run the main model checking pipeline.\n\n    Args:\n        data_path: Path to input data file.\n        model_path: Path to model file.\n        data_config_path: Path to data config file.\n        model_config_path: Path to model config file.\n        initial_weights: Optional path to initial weights.\n        num_samples: Number of samples for tuning.\n        ray_results_dirpath: Directory for ray results.\n        debug_mode: Whether to run in debug mode.\n    \"\"\"\n    with open(data_config_path) as file:\n        data_config = yaml.safe_load(file)\n        data_config = yaml_data.YamlSubConfigDict(**data_config)\n\n    with open(model_config_path) as file:\n        model_config = yaml.safe_load(file)\n        model_config = yaml_model_schema.Model(**model_config)\n\n    encoder_loader = loaders.EncoderLoader()\n    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)\n\n    logger.info(\"Dataset loaded successfully.\")\n\n    model_class = launch_utils.import_class_from_file(model_path)\n\n    logger.info(\"Model class loaded successfully.\")\n\n    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)\n    ray_config_dict = ray_config_loader.get_config().model_dump()\n    ray_config_model = ray_config_loader.get_config()\n\n    logger.info(\"Ray config loaded successfully.\")\n\n    sampled_model_params = {\n        key: domain.sample() if hasattr(domain, \"sample\") else domain\n        for key, domain in ray_config_dict[\"network_params\"].items()\n    }\n\n    logger.info(\"Sampled model params loaded successfully.\")\n\n    model_instance = model_class(**sampled_model_params)\n\n    logger.info(\"Model instance loaded successfully.\")\n\n    torch_dataset = handlertorch.TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n    )\n\n    torch_dataloader = DataLoader(torch_dataset, batch_size=10, shuffle=True)\n\n    logger.info(\"Torch dataloader loaded successfully.\")\n\n    # try to run the model on a single batch\n    for batch in torch_dataloader:\n        input_data, labels, metadata = batch\n        # Log shapes of tensors in each dictionary\n        for key, tensor in input_data.items():\n            logger.debug(f\"Input tensor '{key}' shape: {tensor.shape}\")\n        for key, tensor in labels.items():\n            logger.debug(f\"Label tensor '{key}' shape: {tensor.shape}\")\n        for key, list_object in metadata.items():\n            logger.debug(f\"Metadata lists '{key}' length: {len(list_object)}\")\n        output = model_instance(**input_data)\n        logger.info(\"model ran successfully on a single batch\")\n        logger.debug(f\"Output shape: {output.shape}\")\n        break\n\n    logger.info(\"Model checking single pass completed successfully.\")\n\n    # override num_samples\n    model_config.tune.tune_params.num_samples = num_samples\n\n    tuner = raytune_learner.TuneWrapper(\n        model_config=ray_config_model,\n        data_config_path=data_config_path,\n        model_class=model_class,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        seed=42,\n        ray_results_dir=ray_results_dirpath,\n        debug=debug_mode,\n    )\n\n    logger.info(\"Tuner initialized successfully.\")\n\n    tuner.tune()\n\n    logger.info(\"Tuning completed successfully.\")\n    logger.info(\"Checks complete\")\n
"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.run","title":"run","text":"
run() -> None\n

Run the model checking script.

Source code in src/stimulus/cli/check_model.py
def run() -> None:\n    \"\"\"Run the model checking script.\"\"\"\n    args = get_args()\n    main(\n        data_path=args.data,\n        model_path=args.model,\n        data_config_path=args.data_config,\n        model_config_path=args.model_config,\n        initial_weights=args.initial_weights,\n        num_samples=args.num_samples,\n        ray_results_dirpath=args.ray_results_dirpath,\n        debug_mode=args.debug_mode,\n    )\n
"},{"location":"reference/stimulus/cli/predict/","title":"stimulus.cli.predict","text":""},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict","title":"predict","text":"

CLI module for model prediction on datasets.

Functions:

  • add_meta_info \u2013

    Add metadata columns to predictions/labels dictionary.

  • get_args \u2013

    Parse command line arguments.

  • get_batch_size \u2013

    Get batch size from model config.

  • get_meta_keys \u2013

    Extract metadata column keys.

  • load_model \u2013

    Load model with hyperparameters and weights.

  • main \u2013

    Run model prediction pipeline.

  • parse_y_keys \u2013

    Parse dictionary keys to match input data format.

  • run \u2013

    Execute model prediction pipeline.

"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.add_meta_info","title":"add_meta_info","text":"
add_meta_info(\n    data: DataFrame, y: dict[str, Any]\n) -> dict[str, Any]\n

Add metadata columns to predictions/labels dictionary.

Parameters:

  • data (DataFrame) \u2013

    Input DataFrame with metadata.

  • y (dict[str, Any]) \u2013

    Dictionary of predictions/labels.

Returns:

  • dict[str, Any] \u2013

    Updated dictionary with metadata.

Source code in src/stimulus/cli/predict.py
def add_meta_info(data: pl.DataFrame, y: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Add metadata columns to predictions/labels dictionary.\n\n    Args:\n        data: Input DataFrame with metadata.\n        y: Dictionary of predictions/labels.\n\n    Returns:\n        Updated dictionary with metadata.\n    \"\"\"\n    keys = get_meta_keys(data.columns)\n    for key in keys:\n        y[key] = data[key].to_list()\n    return y\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Parse command line arguments.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/predict.py
def get_args() -> argparse.Namespace:\n    \"\"\"Parse command line arguments.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Predict model outputs on a dataset.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model .py file.\")\n    parser.add_argument(\"-w\", \"--weight\", type=str, required=True, metavar=\"FILE\", help=\"Path to model weights file.\")\n    parser.add_argument(\n        \"-mc\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to tune config file with model hyperparameters.\",\n    )\n    parser.add_argument(\n        \"-ec\",\n        \"--experiment_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to experiment config for data modification.\",\n    )\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input data.\")\n    parser.add_argument(\"-o\", \"--output\", type=str, required=True, metavar=\"FILE\", help=\"Path for output predictions.\")\n    parser.add_argument(\"--split\", type=int, help=\"Data split to use (default: None).\")\n    parser.add_argument(\"--return_labels\", action=\"store_true\", help=\"Include labels with predictions.\")\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_batch_size","title":"get_batch_size","text":"
get_batch_size(mconfig: dict[str, Any]) -> int\n

Get batch size from model config.

Parameters:

  • mconfig (dict[str, Any]) \u2013

    Model configuration dictionary.

Returns:

  • int \u2013

    Batch size to use for predictions.

Source code in src/stimulus/cli/predict.py
def get_batch_size(mconfig: dict[str, Any]) -> int:\n    \"\"\"Get batch size from model config.\n\n    Args:\n        mconfig: Model configuration dictionary.\n\n    Returns:\n        Batch size to use for predictions.\n    \"\"\"\n    default_batch_size = 256\n    if \"data_params\" in mconfig and \"batch_size\" in mconfig[\"data_params\"]:\n        return mconfig[\"data_params\"][\"batch_size\"]\n    return default_batch_size\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_meta_keys","title":"get_meta_keys","text":"
get_meta_keys(names: Sequence[str]) -> list[str]\n

Extract metadata column keys.

Parameters:

  • names (Sequence[str]) \u2013

    List of column names.

Returns:

  • list[str] \u2013

    List of metadata column keys.

Source code in src/stimulus/cli/predict.py
def get_meta_keys(names: Sequence[str]) -> list[str]:\n    \"\"\"Extract metadata column keys.\n\n    Args:\n        names: List of column names.\n\n    Returns:\n        List of metadata column keys.\n    \"\"\"\n    return [name for name in names if name.split(\":\")[1] == \"meta\"]\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.load_model","title":"load_model","text":"
load_model(\n    model_class: Any,\n    weight_path: str,\n    mconfig: dict[str, Any],\n) -> Any\n

Load model with hyperparameters and weights.

Parameters:

  • model_class (Any) \u2013

    Model class to instantiate.

  • weight_path (str) \u2013

    Path to model weights.

  • mconfig (dict[str, Any]) \u2013

    Model configuration dictionary.

Returns:

  • Any \u2013

    Loaded model instance.

Source code in src/stimulus/cli/predict.py
def load_model(model_class: Any, weight_path: str, mconfig: dict[str, Any]) -> Any:\n    \"\"\"Load model with hyperparameters and weights.\n\n    Args:\n        model_class: Model class to instantiate.\n        weight_path: Path to model weights.\n        mconfig: Model configuration dictionary.\n\n    Returns:\n        Loaded model instance.\n    \"\"\"\n    hyperparameters = mconfig[\"model_params\"]\n    model = model_class(**hyperparameters)\n    model.load_state_dict(torch.load(weight_path))\n    return model\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.main","title":"main","text":"
main(\n    model_path: str,\n    weight_path: str,\n    mconfig_path: str,\n    econfig_path: str,\n    data_path: str,\n    output: str,\n    *,\n    return_labels: bool = False,\n    split: int | None = None\n) -> None\n

Run model prediction pipeline.

Parameters:

  • model_path (str) \u2013

    Path to model file.

  • weight_path (str) \u2013

    Path to model weights.

  • mconfig_path (str) \u2013

    Path to model config.

  • econfig_path (str) \u2013

    Path to experiment config.

  • data_path (str) \u2013

    Path to input data.

  • output (str) \u2013

    Path for output predictions.

  • return_labels (bool, default: False ) \u2013

    Whether to include labels.

  • split (int | None, default: None ) \u2013

    Data split to use.

Source code in src/stimulus/cli/predict.py
def main(\n    model_path: str,\n    weight_path: str,\n    mconfig_path: str,\n    econfig_path: str,\n    data_path: str,\n    output: str,\n    *,\n    return_labels: bool = False,\n    split: int | None = None,\n) -> None:\n    \"\"\"Run model prediction pipeline.\n\n    Args:\n        model_path: Path to model file.\n        weight_path: Path to model weights.\n        mconfig_path: Path to model config.\n        econfig_path: Path to experiment config.\n        data_path: Path to input data.\n        output: Path for output predictions.\n        return_labels: Whether to include labels.\n        split: Data split to use.\n    \"\"\"\n    with open(mconfig_path) as in_json:\n        mconfig = json.load(in_json)\n\n    model_class = import_class_from_file(model_path)\n    model = load_model(model_class, weight_path, mconfig)\n\n    with open(econfig_path) as in_json:\n        experiment_name = json.load(in_json)[\"experiment\"]\n    initialized_experiment_class = get_experiment(experiment_name)\n\n    dataloader = DataLoader(\n        TorchDataset(data_path, initialized_experiment_class, split=split),\n        batch_size=get_batch_size(mconfig),\n        shuffle=False,\n    )\n\n    predictor = PredictWrapper(model, dataloader)\n    out = predictor.predict(return_labels=return_labels)\n    y_pred, y_true = out if return_labels else (out, {})\n\n    y_pred = {k: v.tolist() for k, v in y_pred.items()}\n    y_true = {k: v.tolist() for k, v in y_true.items()}\n\n    data = pl.read_csv(data_path)\n    y_pred = parse_y_keys(y_pred, data, y_type=\"pred\")\n    y_true = parse_y_keys(y_true, data, y_type=\"label\")\n\n    y = {**y_pred, **y_true}\n    y = add_meta_info(data, y)\n    df = pl.from_dict(y)\n    df.write_csv(output)\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.parse_y_keys","title":"parse_y_keys","text":"
parse_y_keys(\n    y: dict[str, Any], data: DataFrame, y_type: str = \"pred\"\n) -> dict[str, Any]\n

Parse dictionary keys to match input data format.

Parameters:

  • y (dict[str, Any]) \u2013

    Dictionary of predictions or labels.

  • data (DataFrame) \u2013

    Input DataFrame.

  • y_type (str, default: 'pred' ) \u2013

    Type of values ('pred' or 'label').

Returns:

  • dict[str, Any] \u2013

    Dictionary with updated keys.

Source code in src/stimulus/cli/predict.py
def parse_y_keys(y: dict[str, Any], data: pl.DataFrame, y_type: str = \"pred\") -> dict[str, Any]:\n    \"\"\"Parse dictionary keys to match input data format.\n\n    Args:\n        y: Dictionary of predictions or labels.\n        data: Input DataFrame.\n        y_type: Type of values ('pred' or 'label').\n\n    Returns:\n        Dictionary with updated keys.\n    \"\"\"\n    if not y:\n        return y\n\n    parsed_y = {}\n    for k1, v1 in y.items():\n        for k2 in data.columns:\n            if k1 == k2.split(\":\")[0]:\n                new_key = f\"{k1}:{y_type}:{k2.split(':')[2]}\"\n                parsed_y[new_key] = v1\n\n    return parsed_y\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.run","title":"run","text":"
run() -> None\n

Execute model prediction pipeline.

Source code in src/stimulus/cli/predict.py
def run() -> None:\n    \"\"\"Execute model prediction pipeline.\"\"\"\n    args = get_args()\n    main(\n        args.model,\n        args.weight,\n        args.model_config,\n        args.experiment_config,\n        args.data,\n        args.output,\n        return_labels=args.return_labels,\n        split=args.split,\n    )\n
"},{"location":"reference/stimulus/cli/shuffle_csv/","title":"stimulus.cli.shuffle_csv","text":""},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv","title":"shuffle_csv","text":"

CLI module for shuffling CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Shuffle the data and split it according to the default split method.

  • run \u2013

    Run the CSV shuffling script.

"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/shuffle_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Shuffle rows in a CSV data file.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.main","title":"main","text":"
main(\n    data_csv: str, config_yaml: str, out_path: str\n) -> None\n

Shuffle the data and split it according to the default split method.

Parameters:

  • data_csv (str) \u2013

    Path to input CSV file.

  • config_yaml (str) \u2013

    Path to config YAML file.

  • out_path (str) \u2013

    Path to output shuffled CSV.

TODO major changes when this is going to select a given shuffle method and integration with split.

Source code in src/stimulus/cli/shuffle_csv.py
def main(data_csv: str, config_yaml: str, out_path: str) -> None:\n    \"\"\"Shuffle the data and split it according to the default split method.\n\n    Args:\n        data_csv: Path to input CSV file.\n        config_yaml: Path to config YAML file.\n        out_path: Path to output shuffled CSV.\n\n    TODO major changes when this is going to select a given shuffle method and integration with split.\n    \"\"\"\n    # create a DatasetProcessor object from the config and the csv\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # shuffle the data with a default seed. TODO get the seed for the config if and when that is going to be set there.\n    processor.shuffle_labels(seed=42)\n\n    # save the modified csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.run","title":"run","text":"
run() -> None\n

Run the CSV shuffling script.

Source code in src/stimulus/cli/shuffle_csv.py
def run() -> None:\n    \"\"\"Run the CSV shuffling script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output)\n
"},{"location":"reference/stimulus/cli/split_csv/","title":"stimulus.cli.split_csv","text":""},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv","title":"split_csv","text":"

CLI module for splitting CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Connect CSV and YAML configuration and handle sanity checks.

  • run \u2013

    Run the CSV splitting script.

"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Source code in src/stimulus/cli/split_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\"\"\"\n    parser = argparse.ArgumentParser(description=\"Split a CSV data file.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--force\",\n        type=bool,\n        required=False,\n        default=False,\n        help=\"Overwrite the split column if it already exists in the csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.main","title":"main","text":"
main(\n    data_csv: str,\n    config_yaml: str,\n    out_path: str,\n    *,\n    force: bool = False\n) -> None\n

Connect CSV and YAML configuration and handle sanity checks.

Parameters:

  • data_csv (str) \u2013

    Path to input CSV file.

  • config_yaml (str) \u2013

    Path to config YAML file.

  • out_path (str) \u2013

    Path to output split CSV.

  • force (bool, default: False ) \u2013

    Overwrite the split column if it already exists in the CSV.

Source code in src/stimulus/cli/split_csv.py
def main(data_csv: str, config_yaml: str, out_path: str, *, force: bool = False) -> None:\n    \"\"\"Connect CSV and YAML configuration and handle sanity checks.\n\n    Args:\n        data_csv: Path to input CSV file.\n        config_yaml: Path to config YAML file.\n        out_path: Path to output split CSV.\n        force: Overwrite the split column if it already exists in the CSV.\n    \"\"\"\n    # create a DatasetProcessor object from the config and the csv\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # create a split manager from the config\n    split_config = processor.dataset_manager.config.split\n    with open(config_yaml) as f:\n        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))\n    split_loader = SplitLoader(seed=yaml_config.global_params.seed)\n    split_loader.initialize_splitter_from_config(split_config)\n    split_manager = SplitManager(split_loader)\n\n    # apply the split method to the data\n    processor.add_split(split_manager=split_manager, force=force)\n\n    # save the modified csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.run","title":"run","text":"
run() -> None\n

Run the CSV splitting script.

Source code in src/stimulus/cli/split_csv.py
def run() -> None:\n    \"\"\"Run the CSV splitting script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output, force=args.force)\n
"},{"location":"reference/stimulus/cli/split_yaml/","title":"stimulus.cli.split_yaml","text":""},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml","title":"split_yaml","text":"

CLI module for splitting YAML configuration files.

This module provides functionality to split a single YAML configuration file into multiple YAML files, each containing a specific combination of data transformations and splits. The resulting YAML files can be used as input configurations for the stimulus package.

Functions:

  • get_args \u2013

    Get the arguments when using from the command line.

  • main \u2013

    Reads a YAML config file and generates all possible data configurations.

"},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the command line.

Source code in src/stimulus/cli/split_yaml.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the command line.\"\"\"\n    parser = argparse.ArgumentParser(description=\"\")\n    parser.add_argument(\n        \"-j\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all transform - split - parameter info\",\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--out_dir\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"./\",\n        default=\"./\",\n        metavar=\"DIR\",\n        help=\"The output dir where all the YAMLs are written to. Output YAML will be called split-#[number].yaml transform-#[number].yaml. Default -> ./\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml.main","title":"main","text":"
main(config_yaml: str, out_dir_path: str) -> None\n

Reads a YAML config file and generates all possible data configurations.

This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to the stimulus package.

The structure of the YAML is described here -> TODO paste here link to documentation. This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations. Each resulting YAML will hold only one combination of the above three things.

This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform) and uses the default split behavior.

Source code in src/stimulus/cli/split_yaml.py
def main(config_yaml: str, out_dir_path: str) -> None:\n    \"\"\"Reads a YAML config file and generates all possible data configurations.\n\n    This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to\n    the stimulus package.\n\n    The structure of the YAML is described here -> TODO paste here link to documentation.\n    This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations.\n    Each resulting YAML will hold only one combination of the above three things.\n\n    This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform)\n    and uses the default split behavior.\n    \"\"\"\n    # read the yaml experiment config and load it to dictionary\n    yaml_config: dict[str, Any] = {}\n    with open(config_yaml) as conf_file:\n        yaml_config = yaml.safe_load(conf_file)\n\n    yaml_config_dict: YamlConfigDict = YamlConfigDict(**yaml_config)\n    # check if the yaml schema is correct\n    check_yaml_schema(yaml_config_dict)\n\n    # generate all the YAML configs\n    data_configs = generate_data_configs(yaml_config_dict)\n\n    # dump all the YAML configs into files\n    dump_yaml_list_into_files(data_configs, out_dir_path, \"test\")\n
"},{"location":"reference/stimulus/cli/transform_csv/","title":"stimulus.cli.transform_csv","text":""},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv","title":"transform_csv","text":"

CLI module for transforming CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Connect CSV and YAML configuration and handle sanity checks.

  • run \u2013

    Run the CSV transformation script.

"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Source code in src/stimulus/cli/transform_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\"\"\"\n    parser = argparse.ArgumentParser(description=\"CLI for transforming CSV data files using YAML configuration.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that holds all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.main","title":"main","text":"
main(\n    data_csv: str, config_yaml: str, out_path: str\n) -> None\n

Connect CSV and YAML configuration and handle sanity checks.

This launcher will be the connection between the csv and one YAML configuration. It should also handle some sanity checks.

Source code in src/stimulus/cli/transform_csv.py
def main(data_csv: str, config_yaml: str, out_path: str) -> None:\n    \"\"\"Connect CSV and YAML configuration and handle sanity checks.\n\n    This launcher will be the connection between the csv and one YAML configuration.\n    It should also handle some sanity checks.\n    \"\"\"\n    # initialize the csv processing class, it open and reads the csv in automatic\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # initialize the transform manager\n    transform_config = processor.dataset_manager.config.transforms\n    with open(config_yaml) as f:\n        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))\n    transform_loader = TransformLoader(seed=yaml_config.global_params.seed)\n    transform_loader.initialize_column_data_transformers_from_config(transform_config)\n    transform_manager = TransformManager(transform_loader)\n\n    # apply the transformations to the data\n    processor.apply_transformation_group(transform_manager)\n\n    # write the transformed data to a new csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.run","title":"run","text":"
run() -> None\n

Run the CSV transformation script.

Source code in src/stimulus/cli/transform_csv.py
def run() -> None:\n    \"\"\"Run the CSV transformation script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output)\n
"},{"location":"reference/stimulus/cli/tuning/","title":"stimulus.cli.tuning","text":""},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning","title":"tuning","text":"

CLI module for running raytune tuning experiment.

Modules:

  • launch_utils \u2013

    Utility functions for launching and configuring experiments and ray tuning.

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

  • raytune_learner \u2013

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

  • raytune_parser \u2013

    Ray Tune results parser for extracting and saving best model configurations and weights.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

  • yaml_model_schema \u2013

    Module for handling YAML configuration files and converting them to Ray Tune format.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Run the main model checking pipeline.

  • run \u2013

    Run the model checking script.

"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/tuning.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Launch check_model.\")\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input csv file.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model file.\")\n    parser.add_argument(\n        \"-e\",\n        \"--data_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to data config file.\",\n    )\n    parser.add_argument(\n        \"-c\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to yaml config training file.\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--initial_weights\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"FILE\",\n        help=\"The path to the initial weights (optional).\",\n    )\n    parser.add_argument(\n        \"--ray_results_dirpath\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"DIR_PATH\",\n        help=\"Location where ray_results output dir should be written. If None, uses ~/ray_results.\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_model.pt\",\n        default=\"best_model.pt\",\n        metavar=\"FILE\",\n        help=\"The output file path to write the trained model to\",\n    )\n    parser.add_argument(\n        \"-bm\",\n        \"--best_metrics\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_metrics.csv\",\n        default=\"best_metrics.csv\",\n        metavar=\"FILE\",\n        help=\"The path to write the best metrics to\",\n    )\n    parser.add_argument(\n        \"-bc\",\n        \"--best_config\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_config.yaml\",\n        default=\"best_config.yaml\",\n        metavar=\"FILE\",\n        help=\"The path to write the best config to\",\n    )\n    parser.add_argument(\n        \"-bo\",\n        \"--best_optimizer\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_optimizer.pt\",\n        default=\"best_optimizer.pt\",\n        metavar=\"FILE\",\n        help=\"The path to write the best optimizer to\",\n    )\n    parser.add_argument(\n        \"--tune_run_name\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"CUSTOM_RUN_NAME\",\n        help=(\n            \"Tells ray tune what the 'experiment_name' (i.e. the given tune_run name) should be. \"\n            \"If set, the subdirectory of ray_results is named with this value and its train dir is prefixed accordingly. \"\n            \"Default None means that ray will generate such a name on its own.\"\n        ),\n    )\n    parser.add_argument(\n        \"--debug_mode\",\n        action=\"store_true\",\n        help=\"Activate debug mode for tuning. Default false, no debug.\",\n    )\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.main","title":"main","text":"
main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,\n    ray_results_dirpath: str | None = None,\n    output_path: str | None = None,\n    best_optimizer_path: str | None = None,\n    best_metrics_path: str | None = None,\n    best_config_path: str | None = None,\n    *,\n    debug_mode: bool = False\n) -> None\n

Run the main model checking pipeline.

Parameters:

  • data_path (str) \u2013

    Path to input data file.

  • model_path (str) \u2013

    Path to model file.

  • data_config_path (str) \u2013

    Path to data config file.

  • model_config_path (str) \u2013

    Path to model config file.

  • initial_weights (str | None, default: None ) \u2013

    Optional path to initial weights.

  • ray_results_dirpath (str | None, default: None ) \u2013

    Directory for ray results.

  • debug_mode (bool, default: False ) \u2013

    Whether to run in debug mode.

  • output_path (str | None, default: None ) \u2013

    Path to write the best model to.

  • best_optimizer_path (str | None, default: None ) \u2013

    Path to write the best optimizer to.

  • best_metrics_path (str | None, default: None ) \u2013

    Path to write the best metrics to.

  • best_config_path (str | None, default: None ) \u2013

    Path to write the best config to.

Source code in src/stimulus/cli/tuning.py
def main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,  # noqa: ARG001\n    ray_results_dirpath: str | None = None,\n    output_path: str | None = None,\n    best_optimizer_path: str | None = None,\n    best_metrics_path: str | None = None,\n    best_config_path: str | None = None,\n    *,\n    debug_mode: bool = False,\n) -> None:\n    \"\"\"Run the main model checking pipeline.\n\n    Args:\n        data_path: Path to input data file.\n        model_path: Path to model file.\n        data_config_path: Path to data config file.\n        model_config_path: Path to model config file.\n        initial_weights: Optional path to initial weights.\n        ray_results_dirpath: Directory for ray results.\n        debug_mode: Whether to run in debug mode.\n        output_path: Path to write the best model to.\n        best_optimizer_path: Path to write the best optimizer to.\n        best_metrics_path: Path to write the best metrics to.\n        best_config_path: Path to write the best config to.\n    \"\"\"\n    # Convert data config to proper type\n    with open(data_config_path) as file:\n        data_config_dict: dict[str, Any] = yaml.safe_load(file)\n    data_config: yaml_data.YamlSubConfigDict = yaml_data.YamlSubConfigDict(**data_config_dict)\n\n    with open(model_config_path) as file:\n        model_config_dict: dict[str, Any] = yaml.safe_load(file)\n    model_config: yaml_model_schema.Model = yaml_model_schema.Model(**model_config_dict)\n\n    encoder_loader = loaders.EncoderLoader()\n    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)\n\n    model_class = launch_utils.import_class_from_file(model_path)\n\n    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)\n    ray_config_model = ray_config_loader.get_config()\n\n    tuner = raytune_learner.TuneWrapper(\n        model_config=ray_config_model,\n        data_config_path=data_config_path,\n        model_class=model_class,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        seed=42,\n        ray_results_dir=ray_results_dirpath,\n        debug=debug_mode,\n    )\n\n    # Ensure output_path is provided\n    if output_path is None:\n        raise ValueError(\"output_path must not be None\")\n    try:\n        grid_results = tuner.tune()\n        if not grid_results:\n            _raise_empty_grid()\n\n        # Initialize parser with results\n        parser = raytune_parser.TuneParser(result=grid_results)\n\n        # Ensure output directory exists\n        Path(output_path).parent.mkdir(parents=True, exist_ok=True)\n\n        # Save outputs using proper Result object API\n        parser.save_best_model(output=output_path)\n        parser.save_best_optimizer(output=best_optimizer_path)\n        parser.save_best_metrics_dataframe(output=best_metrics_path)\n        parser.save_best_config(output=best_config_path)\n\n    except RuntimeError:\n        logger.exception(\"Tuning failed\")\n        raise\n    except KeyError:\n        logger.exception(\"Missing expected result key\")\n        raise\n    finally:\n        if debug_mode:\n            logger.info(\"Debug mode - preserving Ray results directory\")\n        elif ray_results_dirpath:\n            shutil.rmtree(ray_results_dirpath, ignore_errors=True)\n
"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.run","title":"run","text":"
run() -> None\n

Run the model checking script.

Source code in src/stimulus/cli/tuning.py
def run() -> None:\n    \"\"\"Run the model checking script.\"\"\"\n    args = get_args()\n    main(\n        data_path=args.data,\n        model_path=args.model,\n        data_config_path=args.data_config,\n        model_config_path=args.model_config,\n        initial_weights=args.initial_weights,\n        ray_results_dirpath=args.ray_results_dirpath,\n        output_path=args.output,\n        best_optimizer_path=args.best_optimizer,\n        best_metrics_path=args.best_metrics,\n        best_config_path=args.best_config,\n        debug_mode=args.debug_mode,\n    )\n
"},{"location":"reference/stimulus/data/","title":"stimulus.data","text":""},{"location":"reference/stimulus/data/#stimulus.data","title":"data","text":"

Data handling and processing module.

This module provides functionality for loading, transforming, and managing data in various formats like CSV. It includes classes and utilities for:

  • Loading and processing CSV data files
  • Applying data transformations and augmentations
  • Splitting data into train/validation/test sets
  • Converting data into PyTorch datasets

Modules:

  • data_handlers \u2013

    This module provides classes for handling CSV data files in the STIMULUS format.

  • encoding \u2013

    Encoding package for data transformation.

  • handlertorch \u2013

    This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

  • splitters \u2013

    This package provides splitter classes for splitting data into train, validation, and test sets.

  • transform \u2013

    Transform package for data manipulation.

"},{"location":"reference/stimulus/data/data_handlers/","title":"stimulus.data.data_handlers","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers","title":"data_handlers","text":"

This module provides classes for handling CSV data files in the STIMULUS format.

The module contains three main classes: - DatasetHandler: Base class for loading and managing CSV data - DatasetProcessor: Class for preprocessing data with transformations and splits - DatasetLoader: Class for loading processed data for model training

The data format consists of: 1. A CSV file containing the raw data 2. A YAML configuration file that defines: - Column names and their roles (input/label/meta) - Data types and encoders for each column - Transformations to apply (noise, augmentation, etc.) - Split configuration for train/val/test sets

The data handling pipeline consists of: 1. Loading raw CSV data according to the YAML config 2. Applying configured transformations 3. Splitting into train/val/test sets based on config 4. Encoding data for model training using specified encoders

See titanic.yaml in tests/test_data/titanic/ for an example configuration file format.

Modules:

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

Classes:

  • DatasetHandler \u2013

    Main class for handling dataset loading, encoding, transformation and splitting.

  • DatasetLoader \u2013

    Class for loading dataset and passing it to the deep learning model.

  • DatasetManager \u2013

    Class for managing the dataset.

  • DatasetProcessor \u2013

    Class for loading dataset, applying transformations and splitting.

  • EncodeManager \u2013

    Manages the encoding of data columns using configured encoders.

  • SplitManager \u2013

    Class for managing the splitting.

  • TransformManager \u2013

    Class for managing the transformations.

"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler","title":"DatasetHandler","text":"
DatasetHandler(config_path: str, csv_path: str)\n

Main class for handling dataset loading, encoding, transformation and splitting.

This class coordinates the interaction between different managers to process CSV datasets according to the provided configuration.

Attributes:

  • encoder_manager (EncodeManager) \u2013

    Manager for handling data encoding operations.

  • transform_manager (TransformManager) \u2013

    Manager for handling data transformations.

  • split_manager (SplitManager) \u2013

    Manager for handling dataset splitting.

  • dataset_manager (DatasetManager) \u2013

    Manager for organizing dataset columns and config.

Parameters:

  • config_path (str) \u2013

    Path to the dataset configuration file.

  • csv_path (str) \u2013

    Path to the CSV data file.

Methods:

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n) -> None:\n    \"\"\"Initialize the DatasetHandler with required config.\n\n    Args:\n        config_path (str): Path to the dataset configuration file.\n        csv_path (str): Path to the CSV data file.\n    \"\"\"\n    self.dataset_manager = DatasetManager(config_path)\n    self.columns = self.read_csv_header(csv_path)\n    self.data = self.load_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader","title":"DatasetLoader","text":"
DatasetLoader(\n    config_path: str,\n    csv_path: str,\n    encoder_loader: EncoderLoader,\n    split: Union[int, None] = None,\n)\n

Bases: DatasetHandler

Class for loading dataset and passing it to the deep learning model.

Methods:

  • get_all_items \u2013

    Get the full dataset as three separate dictionaries for inputs, labels and metadata.

  • get_all_items_and_length \u2013

    Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • load_csv_per_split \u2013

    Load the part of csv file that has the specified split value.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n    encoder_loader: loaders.EncoderLoader,\n    split: Union[int, None] = None,\n) -> None:\n    \"\"\"Initialize the DatasetLoader.\"\"\"\n    super().__init__(config_path, csv_path)\n    self.encoder_manager = EncodeManager(encoder_loader)\n    self.data = self.load_csv_per_split(csv_path, split) if split is not None else self.load_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.get_all_items","title":"get_all_items","text":"
get_all_items() -> tuple[dict, dict, dict]\n

Get the full dataset as three separate dictionaries for inputs, labels and metadata.

Returns:

  • tuple[dict, dict, dict] \u2013

    tuple[dict, dict, dict]: Three dictionaries containing: - Input dictionary mapping input column names to encoded input data - Label dictionary mapping label column names to encoded label data - Meta dictionary mapping meta column names to meta data

Example

handler = DatasetHandler(...) input_dict, label_dict, meta_dict = handler.get_dataset() print(input_dict.keys()) dict_keys(['age', 'fare']) print(label_dict.keys()) dict_keys(['survived']) print(meta_dict.keys()) dict_keys(['passenger_id'])

Source code in src/stimulus/data/data_handlers.py
def get_all_items(self) -> tuple[dict, dict, dict]:\n    \"\"\"Get the full dataset as three separate dictionaries for inputs, labels and metadata.\n\n    Returns:\n        tuple[dict, dict, dict]: Three dictionaries containing:\n            - Input dictionary mapping input column names to encoded input data\n            - Label dictionary mapping label column names to encoded label data\n            - Meta dictionary mapping meta column names to meta data\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> input_dict, label_dict, meta_dict = handler.get_dataset()\n        >>> print(input_dict.keys())\n        dict_keys(['age', 'fare'])\n        >>> print(label_dict.keys())\n        dict_keys(['survived'])\n        >>> print(meta_dict.keys())\n        dict_keys(['passenger_id'])\n    \"\"\"\n    input_columns, label_columns, meta_columns = (\n        self.dataset_manager.column_categories[\"input\"],\n        self.dataset_manager.column_categories[\"label\"],\n        self.dataset_manager.column_categories[\"meta\"],\n    )\n    input_data = self.encoder_manager.encode_dataframe(self.data[input_columns])\n    label_data = self.encoder_manager.encode_dataframe(self.data[label_columns])\n    meta_data = {key: self.data[key].to_list() for key in meta_columns}\n    return input_data, label_data, meta_data\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.get_all_items_and_length","title":"get_all_items_and_length","text":"
get_all_items_and_length() -> (\n    tuple[tuple[dict, dict, dict], int]\n)\n

Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

Source code in src/stimulus/data/data_handlers.py
def get_all_items_and_length(self) -> tuple[tuple[dict, dict, dict], int]:\n    \"\"\"Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.\"\"\"\n    return self.get_all_items(), len(self.data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.load_csv_per_split","title":"load_csv_per_split","text":"
load_csv_per_split(csv_path: str, split: int) -> DataFrame\n

Load the part of csv file that has the specified split value.

Split is a number that for 0 is train, 1 is validation, 2 is test. This is accessed through the column with category split. Example column name could be split:split:int.

NOTE that the aim of having this function is that depending on the training, validation and test scenarios, we are gonna load only the relevant data for it.

Source code in src/stimulus/data/data_handlers.py
def load_csv_per_split(self, csv_path: str, split: int) -> pl.DataFrame:\n    \"\"\"Load the part of csv file that has the specified split value.\n\n    Split is a number that for 0 is train, 1 is validation, 2 is test.\n    This is accessed through the column with category `split`. Example column name could be `split:split:int`.\n\n    NOTE that the aim of having this function is that depending on the training, validation and test scenarios,\n    we are gonna load only the relevant data for it.\n    \"\"\"\n    if \"split\" not in self.columns:\n        raise ValueError(\"The category split is not present in the csv file\")\n    if split not in [0, 1, 2]:\n        raise ValueError(f\"The split value should be 0, 1 or 2. The specified split value is {split}\")\n    return pl.scan_csv(csv_path).filter(pl.col(\"split\") == split).collect()\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager","title":"DatasetManager","text":"
DatasetManager(config_path: str)\n

Class for managing the dataset.

This class handles loading and organizing dataset configuration from YAML files. It manages column categorization into input, label and meta types based on the config.

Attributes:

  • config (dict) \u2013

    The loaded configuration dictionary from YAML

  • column_categories (dict) \u2013

    Dictionary mapping column types to lists of column names

Methods:

  • _load_config \u2013

    str) -> dict: Loads the config from a YAML file.

  • categorize_columns_by_type \u2013

    Organizes the columns into input, label, meta based on the config.

Methods:

  • categorize_columns_by_type \u2013

    Organizes columns from config into input, label, and meta categories.

  • get_split_columns \u2013

    Get the columns that are used for splitting.

  • get_transform_logic \u2013

    Get the transformation logic.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n) -> None:\n    \"\"\"Initialize the DatasetManager.\"\"\"\n    self.config = self._load_config(config_path)\n    self.column_categories = self.categorize_columns_by_type()\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.categorize_columns_by_type","title":"categorize_columns_by_type","text":"
categorize_columns_by_type() -> dict\n

Organizes columns from config into input, label, and meta categories.

Reads the column definitions from the config and sorts them into categories based on their column_type field.

Returns:

  • dict ( dict ) \u2013

    Dictionary containing lists of column names for each category: { \"input\": [\"col1\", \"col2\"], # Input columns \"label\": [\"target\"], # Label/output columns \"meta\": [\"id\"] # Metadata columns }

Example

manager = DatasetManager(\"config.yaml\") categories = manager.categorize_columns_by_type() print(categories) { 'input': ['hello', 'bonjour'], 'label': ['ciao'], 'meta': [\"id\"] }

Source code in src/stimulus/data/data_handlers.py
def categorize_columns_by_type(self) -> dict:\n    \"\"\"Organizes columns from config into input, label, and meta categories.\n\n    Reads the column definitions from the config and sorts them into categories\n    based on their column_type field.\n\n    Returns:\n        dict: Dictionary containing lists of column names for each category:\n            {\n                \"input\": [\"col1\", \"col2\"],  # Input columns\n                \"label\": [\"target\"],        # Label/output columns\n                \"meta\": [\"id\"]     # Metadata columns\n            }\n\n    Example:\n        >>> manager = DatasetManager(\"config.yaml\")\n        >>> categories = manager.categorize_columns_by_type()\n        >>> print(categories)\n        {\n            'input': ['hello', 'bonjour'],\n            'label': ['ciao'],\n            'meta': [\"id\"]\n        }\n    \"\"\"\n    input_columns = []\n    label_columns = []\n    meta_columns = []\n    for column in self.config.columns:\n        if column.column_type == \"input\":\n            input_columns.append(column.column_name)\n        elif column.column_type == \"label\":\n            label_columns.append(column.column_name)\n        elif column.column_type == \"meta\":\n            meta_columns.append(column.column_name)\n\n    return {\"input\": input_columns, \"label\": label_columns, \"meta\": meta_columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.get_split_columns","title":"get_split_columns","text":"
get_split_columns() -> list[str]\n

Get the columns that are used for splitting.

Source code in src/stimulus/data/data_handlers.py
def get_split_columns(self) -> list[str]:\n    \"\"\"Get the columns that are used for splitting.\"\"\"\n    return self.config.split.split_input_columns\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.get_transform_logic","title":"get_transform_logic","text":"
get_transform_logic() -> dict\n

Get the transformation logic.

Returns a dictionary in the following structure : { \"transformation_name\": str, \"transformations\": list[tuple[str, str, dict]] }

Source code in src/stimulus/data/data_handlers.py
def get_transform_logic(self) -> dict:\n    \"\"\"Get the transformation logic.\n\n    Returns a dictionary in the following structure :\n    {\n        \"transformation_name\": str,\n        \"transformations\": list[tuple[str, str, dict]]\n    }\n    \"\"\"\n    transformation_logic = {\n        \"transformation_name\": self.config.transforms.transformation_name,\n        \"transformations\": [],\n    }\n    for column in self.config.transforms.columns:\n        for transformation in column.transformations:\n            transformation_logic[\"transformations\"].append(\n                (column.column_name, transformation.name, transformation.params),\n            )\n    return transformation_logic\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor","title":"DatasetProcessor","text":"
DatasetProcessor(config_path: str, csv_path: str)\n

Bases: DatasetHandler

Class for loading dataset, applying transformations and splitting.

Methods:

  • add_split \u2013

    Add a column specifying the train, validation, test splits of the data.

  • apply_transformation_group \u2013

    Apply the transformation group to the data.

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

  • shuffle_labels \u2013

    Shuffles the labels in the data.

Source code in src/stimulus/data/data_handlers.py
def __init__(self, config_path: str, csv_path: str) -> None:\n    \"\"\"Initialize the DatasetProcessor.\"\"\"\n    super().__init__(config_path, csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.add_split","title":"add_split","text":"
add_split(\n    split_manager: SplitManager, *, force: bool = False\n) -> None\n

Add a column specifying the train, validation, test splits of the data.

An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.

Parameters:

  • split_manager (SplitManager) \u2013

    Manager for handling dataset splitting

  • force (bool, default: False ) \u2013

    If True, the split column present in the csv file will be overwritten.

Source code in src/stimulus/data/data_handlers.py
def add_split(self, split_manager: SplitManager, *, force: bool = False) -> None:\n    \"\"\"Add a column specifying the train, validation, test splits of the data.\n\n    An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.\n\n    Args:\n        split_manager (SplitManager): Manager for handling dataset splitting\n        force (bool): If True, the split column present in the csv file will be overwritten.\n    \"\"\"\n    if (\"split\" in self.columns) and (not force):\n        raise ValueError(\n            \"The category split is already present in the csv file. If you want to still use this function, set force=True\",\n        )\n    # get relevant split columns from the dataset_manager\n    split_columns = self.dataset_manager.get_split_columns()\n    split_input_data = self.select_columns(split_columns)\n\n    # get the split indices\n    train, validation, test = split_manager.get_split_indices(split_input_data)\n\n    # add the split column to the data\n    split_column = np.full(len(self.data), -1).astype(int)\n    split_column[train] = 0\n    split_column[validation] = 1\n    split_column[test] = 2\n    self.data = self.data.with_columns(pl.Series(\"split\", split_column))\n\n    if \"split\" not in self.columns:\n        self.columns.append(\"split\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.apply_transformation_group","title":"apply_transformation_group","text":"
apply_transformation_group(\n    transform_manager: TransformManager,\n) -> None\n

Apply the transformation group to the data.

Source code in src/stimulus/data/data_handlers.py
def apply_transformation_group(self, transform_manager: TransformManager) -> None:\n    \"\"\"Apply the transformation group to the data.\"\"\"\n    for column_name, transform_name, _params in self.dataset_manager.get_transform_logic()[\"transformations\"]:\n        transformed_data, add_row = transform_manager.transform_column(\n            column_name,\n            transform_name,\n            self.data[column_name],\n        )\n        if add_row:\n            new_rows = self.data.with_columns(pl.Series(column_name, transformed_data))\n            self.data = pl.vstack(self.data, new_rows)\n        else:\n            self.data = self.data.with_columns(pl.Series(column_name, transformed_data))\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.shuffle_labels","title":"shuffle_labels","text":"
shuffle_labels(seed: Optional[float] = None) -> None\n

Shuffles the labels in the data.

Source code in src/stimulus/data/data_handlers.py
def shuffle_labels(self, seed: Optional[float] = None) -> None:\n    \"\"\"Shuffles the labels in the data.\"\"\"\n    # set the np seed\n    np.random.seed(seed)\n\n    label_keys = self.dataset_manager.column_categories[\"label\"]\n    for key in label_keys:\n        self.data = self.data.with_columns(pl.Series(key, np.random.permutation(list(self.data[key]))))\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager","title":"EncodeManager","text":"
EncodeManager(encoder_loader: EncoderLoader)\n

Manages the encoding of data columns using configured encoders.

This class handles encoding of data columns based on the encoders specified in the configuration. It uses an EncoderLoader to get the appropriate encoder for each column and applies the encoding.

Attributes:

  • encoder_loader (EncoderLoader) \u2013

    Loader that provides encoders based on config.

Example

encoder_loader = EncoderLoader(config) encode_manager = EncodeManager(encoder_loader) data = [\"ACGT\", \"TGCA\", \"GCTA\"] encoded = encode_manager.encode_column(\"dna_seq\", data) print(encoded.shape) torch.Size([3, 4, 4]) # 3 sequences, length 4, one-hot encoded

Parameters:

  • encoder_loader (EncoderLoader) \u2013

    Loader that provides encoders based on configuration.

Methods:

  • encode_column \u2013

    Encodes a column of data using the configured encoder.

  • encode_columns \u2013

    Encodes multiple columns of data using the configured encoders.

  • encode_dataframe \u2013

    Encode the dataframe using the encoders.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    encoder_loader: loaders.EncoderLoader,\n) -> None:\n    \"\"\"Initialize the EncodeManager.\n\n    Args:\n        encoder_loader: Loader that provides encoders based on configuration.\n    \"\"\"\n    self.encoder_loader = encoder_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_column","title":"encode_column","text":"
encode_column(\n    column_name: str, column_data: list\n) -> Tensor\n

Encodes a column of data using the configured encoder.

Gets the appropriate encoder for the column from the encoder_loader and uses it to encode all the data in the column.

Parameters:

  • column_name (str) \u2013

    Name of the column to encode.

  • column_data (list) \u2013

    List of data values from the column to encode.

Returns:

  • Tensor \u2013

    Encoded data as a torch.Tensor. The exact shape depends on the encoder used.

Example

data = [\"ACGT\", \"TGCA\"] encoded = encode_manager.encode_column(\"dna_seq\", data) print(encoded.shape) torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

Source code in src/stimulus/data/data_handlers.py
def encode_column(self, column_name: str, column_data: list) -> torch.Tensor:\n    \"\"\"Encodes a column of data using the configured encoder.\n\n    Gets the appropriate encoder for the column from the encoder_loader and uses it\n    to encode all the data in the column.\n\n    Args:\n        column_name: Name of the column to encode.\n        column_data: List of data values from the column to encode.\n\n    Returns:\n        Encoded data as a torch.Tensor. The exact shape depends on the encoder used.\n\n    Example:\n        >>> data = [\"ACGT\", \"TGCA\"]\n        >>> encoded = encode_manager.encode_column(\"dna_seq\", data)\n        >>> print(encoded.shape)\n        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded\n    \"\"\"\n    encode_all_function = self.encoder_loader.get_function_encode_all(column_name)\n    return encode_all_function(column_data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_columns","title":"encode_columns","text":"
encode_columns(column_data: dict) -> dict\n

Encodes multiple columns of data using the configured encoders.

Gets the appropriate encoder for each column from the encoder_loader and encodes all data values in those columns.

Parameters:

  • column_data (dict) \u2013

    Dict mapping column names to lists of data values to encode.

Returns:

  • dict \u2013

    Dict mapping column names to their encoded tensors. The exact shape of each

  • dict \u2013

    tensor depends on the encoder used for that column.

Example

data = {\"dna_seq\": [\"ACGT\", \"TGCA\"], \"labels\": [\"1\", \"2\"]} encoded = encode_manager.encode_columns(data) print(encoded[\"dna_seq\"].shape) torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

Source code in src/stimulus/data/data_handlers.py
def encode_columns(self, column_data: dict) -> dict:\n    \"\"\"Encodes multiple columns of data using the configured encoders.\n\n    Gets the appropriate encoder for each column from the encoder_loader and encodes\n    all data values in those columns.\n\n    Args:\n        column_data: Dict mapping column names to lists of data values to encode.\n\n    Returns:\n        Dict mapping column names to their encoded tensors. The exact shape of each\n        tensor depends on the encoder used for that column.\n\n    Example:\n        >>> data = {\"dna_seq\": [\"ACGT\", \"TGCA\"], \"labels\": [\"1\", \"2\"]}\n        >>> encoded = encode_manager.encode_columns(data)\n        >>> print(encoded[\"dna_seq\"].shape)\n        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded\n    \"\"\"\n    return {col: self.encode_column(col, values) for col, values in column_data.items()}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_dataframe","title":"encode_dataframe","text":"
encode_dataframe(dataframe: DataFrame) -> dict[str, Tensor]\n

Encode the dataframe using the encoders.

Source code in src/stimulus/data/data_handlers.py
def encode_dataframe(self, dataframe: pl.DataFrame) -> dict[str, torch.Tensor]:\n    \"\"\"Encode the dataframe using the encoders.\"\"\"\n    return {col: self.encode_column(col, dataframe[col].to_list()) for col in dataframe.columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.SplitManager","title":"SplitManager","text":"
SplitManager(split_loader: SplitLoader)\n

Class for managing the splitting.

Methods:

  • get_split_indices \u2013

    Get the indices for train, validation, and test splits.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    split_loader: loaders.SplitLoader,\n) -> None:\n    \"\"\"Initialize the SplitManager.\"\"\"\n    self.split_loader = split_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.SplitManager.get_split_indices","title":"get_split_indices","text":"
get_split_indices(\n    data: dict,\n) -> tuple[ndarray, ndarray, ndarray]\n

Get the indices for train, validation, and test splits.

Source code in src/stimulus/data/data_handlers.py
def get_split_indices(self, data: dict) -> tuple[np.ndarray, np.ndarray, np.ndarray]:\n    \"\"\"Get the indices for train, validation, and test splits.\"\"\"\n    return self.split_loader.get_function_split()(data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.TransformManager","title":"TransformManager","text":"
TransformManager(transform_loader: TransformLoader)\n

Class for managing the transformations.

Methods:

  • transform_column \u2013

    Transform a column of data using the specified transformation.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    transform_loader: loaders.TransformLoader,\n) -> None:\n    \"\"\"Initialize the TransformManager.\"\"\"\n    self.transform_loader = transform_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.TransformManager.transform_column","title":"transform_column","text":"
transform_column(\n    column_name: str, transform_name: str, column_data: list\n) -> tuple[list, bool]\n

Transform a column of data using the specified transformation.

Parameters:

  • column_name (str) \u2013

    The name of the column to transform.

  • transform_name (str) \u2013

    The name of the transformation to use.

  • column_data (list) \u2013

    The data to transform.

Returns:

  • list ( list ) \u2013

    The transformed data.

  • bool ( bool ) \u2013

    Whether the transformation added new rows to the data.

Source code in src/stimulus/data/data_handlers.py
def transform_column(self, column_name: str, transform_name: str, column_data: list) -> tuple[list, bool]:\n    \"\"\"Transform a column of data using the specified transformation.\n\n    Args:\n        column_name (str): The name of the column to transform.\n        transform_name (str): The name of the transformation to use.\n        column_data (list): The data to transform.\n\n    Returns:\n        list: The transformed data.\n        bool: Whether the transformation added new rows to the data.\n    \"\"\"\n    transformer = self.transform_loader.__getattribute__(column_name)[transform_name]\n    return transformer.transform_all(column_data), transformer.add_row\n
"},{"location":"reference/stimulus/data/handlertorch/","title":"stimulus.data.handlertorch","text":""},{"location":"reference/stimulus/data/handlertorch/#stimulus.data.handlertorch","title":"handlertorch","text":"

This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

Modules:

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

Classes:

  • TorchDataset \u2013

    Class for creating a torch dataset.

"},{"location":"reference/stimulus/data/handlertorch/#stimulus.data.handlertorch.TorchDataset","title":"TorchDataset","text":"
TorchDataset(\n    config_path: str,\n    csv_path: str,\n    encoder_loader: EncoderLoader,\n    split: Optional[int] = None,\n)\n

Bases: Dataset

Class for creating a torch dataset.

Parameters:

  • config_path (str) \u2013

    Path to the configuration file

  • csv_path (str) \u2013

    Path to the CSV data file

  • encoder_loader (EncoderLoader) \u2013

    Encoder loader instance

  • split (Optional[int], default: None ) \u2013

    Optional tuple containing split information

Source code in src/stimulus/data/handlertorch.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n    encoder_loader: loaders.EncoderLoader,\n    split: Optional[int] = None,\n) -> None:\n    \"\"\"Initialize the TorchDataset.\n\n    Args:\n        config_path: Path to the configuration file\n        csv_path: Path to the CSV data file\n        encoder_loader: Encoder loader instance\n        split: Optional tuple containing split information\n    \"\"\"\n    self.loader = data_handlers.DatasetLoader(\n        config_path=config_path,\n        csv_path=csv_path,\n        encoder_loader=encoder_loader,\n        split=split,\n    )\n
"},{"location":"reference/stimulus/data/loaders/","title":"stimulus.data.loaders","text":""},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders","title":"loaders","text":"

Loaders serve as interfaces between the CSV master class and custom methods.

Mainly, three types of custom methods are supported: - Encoders: methods for encoding data before it is fed into the model - Data transformers: methods for transforming data (i.e. augmenting, noising...) - Splitters: methods for splitting data into train, validation and test sets

Loaders are built from an input config YAML file which format is described in the documentation, you can find an example here: tests/test_data/dna_experiment/dna_experiment_config_template.yaml

Modules:

  • data_transformation_generators \u2013

    This file contains noise generators classes for generating various types of noise.

  • encoders \u2013

    This file contains encoders classes for encoding various types of data.

  • splitters \u2013

    This file contains the splitter classes for splitting data accordingly.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

Classes:

  • EncoderLoader \u2013

    Class for loading encoders from a config file.

  • SplitLoader \u2013

    Class for loading splitters from a config file.

  • TransformLoader \u2013

    Class for loading transformations from a config file.

"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader","title":"EncoderLoader","text":"
EncoderLoader(seed: Optional[float] = None)\n

Class for loading encoders from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_encoder \u2013

    Gets an encoder object from the encoders module and initializes it with the given parameters.

  • get_function_encode_all \u2013

    Gets the encoding function for a specific field.

  • initialize_column_encoders_from_config \u2013

    Build the loader from a config dictionary.

  • set_encoder_as_attribute \u2013

    Sets the encoder as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the encoder loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.get_encoder","title":"get_encoder","text":"
get_encoder(\n    encoder_name: str, encoder_params: Optional[dict] = None\n) -> Any\n

Gets an encoder object from the encoders module and initializes it with the given parameters.

Parameters:

  • encoder_name (str) \u2013

    The name of the encoder to get

  • encoder_params (dict, default: None ) \u2013

    The parameters for the encoder

Returns:

  • Any ( Any ) \u2013

    The encoder function for the specified field and parameters

Source code in src/stimulus/data/loaders.py
def get_encoder(self, encoder_name: str, encoder_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets an encoder object from the encoders module and initializes it with the given parameters.\n\n    Args:\n        encoder_name (str): The name of the encoder to get\n        encoder_params (dict): The parameters for the encoder\n\n    Returns:\n        Any: The encoder function for the specified field and parameters\n    \"\"\"\n    try:\n        return getattr(encoders, encoder_name)(**encoder_params)\n    except AttributeError:\n        logging.exception(f\"Encoder '{encoder_name}' not found in the encoders module.\")\n        logging.exception(\n            f\"Available encoders: {[name for name, obj in encoders.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}\",\n        )\n        raise\n\n    except TypeError:\n        if encoder_params is None:\n            return getattr(encoders, encoder_name)()\n        logging.exception(f\"Encoder '{encoder_name}' has incorrect parameters: {encoder_params}\")\n        logging.exception(\n            f\"Expected parameters for '{encoder_name}': {inspect.signature(getattr(encoders, encoder_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.get_function_encode_all","title":"get_function_encode_all","text":"
get_function_encode_all(field_name: str) -> Any\n

Gets the encoding function for a specific field.

Parameters:

  • field_name (str) \u2013

    The field name to get the encoder for

Returns:

  • Any ( Any ) \u2013

    The encode_all function for the specified field

Source code in src/stimulus/data/loaders.py
def get_function_encode_all(self, field_name: str) -> Any:\n    \"\"\"Gets the encoding function for a specific field.\n\n    Args:\n        field_name (str): The field name to get the encoder for\n\n    Returns:\n        Any: The encode_all function for the specified field\n    \"\"\"\n    return getattr(self, field_name).encode_all\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.initialize_column_encoders_from_config","title":"initialize_column_encoders_from_config","text":"
initialize_column_encoders_from_config(\n    column_config: YamlColumns,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • column_config (YamlColumns) \u2013

    Configuration dictionary containing field names (column_name) and their encoder specifications.

Source code in src/stimulus/data/loaders.py
def initialize_column_encoders_from_config(self, column_config: yaml_data.YamlColumns) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        column_config (yaml_data.YamlColumns): Configuration dictionary containing field names (column_name) and their encoder specifications.\n    \"\"\"\n    for field in column_config:\n        encoder = self.get_encoder(field.encoder[0].name, field.encoder[0].params)\n        self.set_encoder_as_attribute(field.column_name, encoder)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.set_encoder_as_attribute","title":"set_encoder_as_attribute","text":"
set_encoder_as_attribute(\n    field_name: str, encoder: AbstractEncoder\n) -> None\n

Sets the encoder as an attribute of the loader.

Parameters:

  • field_name (str) \u2013

    The name of the field to set the encoder for

  • encoder (AbstractEncoder) \u2013

    The encoder to set

Source code in src/stimulus/data/loaders.py
def set_encoder_as_attribute(self, field_name: str, encoder: encoders.AbstractEncoder) -> None:\n    \"\"\"Sets the encoder as an attribute of the loader.\n\n    Args:\n        field_name (str): The name of the field to set the encoder for\n        encoder (encoders.AbstractEncoder): The encoder to set\n    \"\"\"\n    setattr(self, field_name, encoder)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader","title":"SplitLoader","text":"
SplitLoader(seed: Optional[float] = None)\n

Class for loading splitters from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_function_split \u2013

    Gets the function for splitting the data.

  • get_splitter \u2013

    Gets a splitter object from the splitters module.

  • initialize_splitter_from_config \u2013

    Build the loader from a config dictionary.

  • set_splitter_as_attribute \u2013

    Sets the splitter as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the split loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.get_function_split","title":"get_function_split","text":"
get_function_split() -> Any\n

Gets the function for splitting the data.

Returns:

  • Any ( Any ) \u2013

    The split function for the specified method

Raises:

  • AttributeError \u2013

    If splitter hasn't been initialized using initialize_splitter_from_config()

Source code in src/stimulus/data/loaders.py
def get_function_split(self) -> Any:\n    \"\"\"Gets the function for splitting the data.\n\n    Returns:\n        Any: The split function for the specified method\n\n    Raises:\n        AttributeError: If splitter hasn't been initialized using initialize_splitter_from_config()\n    \"\"\"\n    if not hasattr(self, \"split\"):\n        # Raise a more specific error and chain it to the original AttributeError\n        raise AttributeError(\n            \"Splitter not initialized. Please call initialize_splitter_from_config() or set_splitter_as_attribute() \"\n            \"before attempting to get split function.\",\n        )\n    return self.split.get_split_indexes\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.get_splitter","title":"get_splitter","text":"
get_splitter(\n    splitter_name: str,\n    splitter_params: Optional[dict] = None,\n) -> Any\n

Gets a splitter object from the splitters module.

Parameters:

  • splitter_name (str) \u2013

    The name of the splitter to get

  • splitter_params (Optional[dict], default: None ) \u2013

    Parameters for the splitter

Returns:

  • Any ( Any ) \u2013

    The splitter function for the specified splitter

Source code in src/stimulus/data/loaders.py
def get_splitter(self, splitter_name: str, splitter_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets a splitter object from the splitters module.\n\n    Args:\n        splitter_name (str): The name of the splitter to get\n        splitter_params (Optional[dict]): Parameters for the splitter\n\n    Returns:\n        Any: The splitter function for the specified splitter\n    \"\"\"\n    try:\n        return getattr(splitters, splitter_name)(**splitter_params)\n    except TypeError:\n        if splitter_params is None:\n            return getattr(splitters, splitter_name)()\n        logging.exception(f\"Splitter '{splitter_name}' has incorrect parameters: {splitter_params}\")\n        logging.exception(\n            f\"Expected parameters for '{splitter_name}': {inspect.signature(getattr(splitters, splitter_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.initialize_splitter_from_config","title":"initialize_splitter_from_config","text":"
initialize_splitter_from_config(\n    split_config: YamlSplit,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • split_config (YamlSplit) \u2013

    Configuration dictionary containing split configurations.

Source code in src/stimulus/data/loaders.py
def initialize_splitter_from_config(self, split_config: yaml_data.YamlSplit) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        split_config (yaml_data.YamlSplit): Configuration dictionary containing split configurations.\n    \"\"\"\n    splitter = self.get_splitter(split_config.split_method, split_config.params)\n    self.set_splitter_as_attribute(splitter)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.set_splitter_as_attribute","title":"set_splitter_as_attribute","text":"
set_splitter_as_attribute(splitter: Any) -> None\n

Sets the splitter as an attribute of the loader.

Parameters:

  • splitter (Any) \u2013

    The splitter to set

Source code in src/stimulus/data/loaders.py
def set_splitter_as_attribute(self, splitter: Any) -> None:\n    \"\"\"Sets the splitter as an attribute of the loader.\n\n    Args:\n        splitter (Any): The splitter to set\n    \"\"\"\n    self.split = splitter\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader","title":"TransformLoader","text":"
TransformLoader(seed: Optional[float] = None)\n

Class for loading transformations from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_data_transformer \u2013

    Gets a transformer object from the transformers module.

  • initialize_column_data_transformers_from_config \u2013

    Build the loader from a config dictionary.

  • set_data_transformer_as_attribute \u2013

    Sets the data transformer as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the transform loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.get_data_transformer","title":"get_data_transformer","text":"
get_data_transformer(\n    transformation_name: str,\n    transformation_params: Optional[dict] = None,\n) -> Any\n

Gets a transformer object from the transformers module.

Parameters:

  • transformation_name (str) \u2013

    The name of the transformer to get

  • transformation_params (Optional[dict], default: None ) \u2013

    Parameters for the transformer

Returns:

  • Any ( Any ) \u2013

    The transformer function for the specified transformation

Source code in src/stimulus/data/loaders.py
def get_data_transformer(self, transformation_name: str, transformation_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets a transformer object from the transformers module.\n\n    Args:\n        transformation_name (str): The name of the transformer to get\n        transformation_params (Optional[dict]): Parameters for the transformer\n\n    Returns:\n        Any: The transformer function for the specified transformation\n    \"\"\"\n    try:\n        return getattr(data_transformation_generators, transformation_name)(**transformation_params)\n    except AttributeError:\n        logging.exception(f\"Transformer '{transformation_name}' not found in the transformers module.\")\n        logging.exception(\n            f\"Available transformers: {[name for name, obj in data_transformation_generators.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}\",\n        )\n        raise\n\n    except TypeError:\n        if transformation_params is None:\n            return getattr(data_transformation_generators, transformation_name)()\n        logging.exception(f\"Transformer '{transformation_name}' has incorrect parameters: {transformation_params}\")\n        logging.exception(\n            f\"Expected parameters for '{transformation_name}': {inspect.signature(getattr(data_transformation_generators, transformation_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.initialize_column_data_transformers_from_config","title":"initialize_column_data_transformers_from_config","text":"
initialize_column_data_transformers_from_config(\n    transform_config: YamlTransform,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • transform_config (YamlTransform) \u2013

    Configuration dictionary containing transforms configurations.

Example

Given a YAML config like:

transforms:\n  transformation_name: noise\n  columns:\n    - column_name: age\n      transformations:\n        - name: GaussianNoise\n          params:\n            std: 0.1\n    - column_name: fare\n      transformations:\n        - name: GaussianNoise\n          params:\n            std: 0.1\n

The loader will: 1. Iterate through each column (age, fare) 2. For each transformation in the column: - Get the transformer (GaussianNoise) with its params (std=0.1) - Set it as an attribute on the loader using the column name as key

Source code in src/stimulus/data/loaders.py
def initialize_column_data_transformers_from_config(self, transform_config: yaml_data.YamlTransform) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        transform_config (yaml_data.YamlTransform): Configuration dictionary containing transforms configurations.\n\n    Example:\n        Given a YAML config like:\n        ```yaml\n        transforms:\n          transformation_name: noise\n          columns:\n            - column_name: age\n              transformations:\n                - name: GaussianNoise\n                  params:\n                    std: 0.1\n            - column_name: fare\n              transformations:\n                - name: GaussianNoise\n                  params:\n                    std: 0.1\n        ```\n\n        The loader will:\n        1. Iterate through each column (age, fare)\n        2. For each transformation in the column:\n           - Get the transformer (GaussianNoise) with its params (std=0.1)\n           - Set it as an attribute on the loader using the column name as key\n    \"\"\"\n    for column in transform_config.columns:\n        col_name = column.column_name\n        for transform_spec in column.transformations:\n            transformer = self.get_data_transformer(transform_spec.name, transform_spec.params)\n            self.set_data_transformer_as_attribute(col_name, transformer)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.set_data_transformer_as_attribute","title":"set_data_transformer_as_attribute","text":"
set_data_transformer_as_attribute(\n    field_name: str, data_transformer: Any\n) -> None\n

Sets the data transformer as an attribute of the loader.

Parameters:

  • field_name (str) \u2013

    The name of the field to set the data transformer for

  • data_transformer (Any) \u2013

    The data transformer to set

Source code in src/stimulus/data/loaders.py
def set_data_transformer_as_attribute(self, field_name: str, data_transformer: Any) -> None:\n    \"\"\"Sets the data transformer as an attribute of the loader.\n\n    Args:\n        field_name (str): The name of the field to set the data transformer for\n        data_transformer (Any): The data transformer to set\n    \"\"\"\n    # check if the field already exists, if it does not, initialize it to an empty dict\n    if not hasattr(self, field_name):\n        setattr(self, field_name, {data_transformer.__class__.__name__: data_transformer})\n    else:\n        field_value = getattr(self, field_name)\n        field_value[data_transformer.__class__.__name__] = data_transformer\n
"},{"location":"reference/stimulus/data/encoding/","title":"stimulus.data.encoding","text":""},{"location":"reference/stimulus/data/encoding/#stimulus.data.encoding","title":"encoding","text":"

Encoding package for data transformation.

Modules:

  • encoders \u2013

    This file contains encoders classes for encoding various types of data.

"},{"location":"reference/stimulus/data/encoding/encoders/","title":"stimulus.data.encoding.encoders","text":""},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders","title":"encoders","text":"

This file contains encoders classes for encoding various types of data.

Classes:

  • AbstractEncoder \u2013

    Abstract class for encoders.

  • NumericEncoder \u2013

    Encoder for float/int data.

  • NumericRankEncoder \u2013

    Encoder for float/int data that encodes the data based on their rank.

  • StrClassificationEncoder \u2013

    A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

  • TextOneHotEncoder \u2013

    One hot encoder for text data.

"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder","title":"AbstractEncoder","text":"

Bases: ABC

Abstract class for encoders.

Encoders are classes that encode the raw data into torch.tensors. Different encoders provide different encoding methods. Different encoders may take different types of data as input.

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a torch.tensor

  • encode_multiprocess \u2013

    encodes a list of data points using multiprocessing

  • decode \u2013

    decodes a single data point

Methods:

  • decode \u2013

    Decode a single data point.

  • encode \u2013

    Encode a single data point.

  • encode_all \u2013

    Encode a list of data points.

"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.decode","title":"decode abstractmethod","text":"
decode(data: Any) -> Any\n

Decode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) \u2013

    a single encoded data point

Returns:

  • decoded_data_point ( Any ) \u2013

    the decoded data point

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef decode(self, data: Any) -> Any:\n    \"\"\"Decode a single data point.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (Any): a single encoded data point\n\n    Returns:\n        decoded_data_point (Any): the decoded data point\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.encode","title":"encode abstractmethod","text":"
encode(data: Any) -> Any\n

Encode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) \u2013

    a single data point

Returns:

  • encoded_data_point ( Any ) \u2013

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef encode(self, data: Any) -> Any:\n    \"\"\"Encode a single data point.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (Any): a single data point\n\n    Returns:\n        encoded_data_point (Any): the encoded data point\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.encode_all","title":"encode_all abstractmethod","text":"
encode_all(data: list[Any]) -> Tensor\n

Encode a list of data points.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (list[Any]) \u2013

    a list of data points

Returns:

  • encoded_data ( Tensor ) \u2013

    encoded data points

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef encode_all(self, data: list[Any]) -> torch.Tensor:\n    \"\"\"Encode a list of data points.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (list[Any]): a list of data points\n\n    Returns:\n        encoded_data (torch.Tensor): encoded data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder","title":"NumericEncoder","text":"
NumericEncoder(dtype: dtype = float32)\n

Bases: AbstractEncoder

Encoder for float/int data.

Attributes:

  • dtype (dtype) \u2013

    The data type of the encoded data. Default = torch.float32 (32-bit floating point)

Parameters:

  • dtype (dtype, default: float32 ) \u2013

    the data type of the encoded data. Default = torch.float (32-bit floating point)

Methods:

  • decode \u2013

    Decodes the data.

  • encode \u2013

    Encodes the data.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, dtype: torch.dtype = torch.float32) -> None:\n    \"\"\"Initialize the NumericEncoder class.\n\n    Args:\n        dtype (torch.dtype): the data type of the encoded data. Default = torch.float (32-bit floating point)\n    \"\"\"\n    self.dtype = dtype\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.decode","title":"decode","text":"
decode(data: Tensor) -> list[float]\n

Decodes the data.

Parameters:

  • data (Tensor) \u2013

    the encoded data

Returns:

  • decoded_data ( list[float] ) \u2013

    the decoded data

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: torch.Tensor) -> list[float]:\n    \"\"\"Decodes the data.\n\n    Args:\n        data (torch.Tensor): the encoded data\n\n    Returns:\n        decoded_data (list[float]): the decoded data\n    \"\"\"\n    return data.cpu().numpy().tolist()\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.encode","title":"encode","text":"
encode(data: float) -> Tensor\n

Encodes the data.

This method takes as input a single data point, should be mappable to a single output.

Parameters:

  • data (float) \u2013

    a single data point

Returns:

  • encoded_data_point ( Tensor ) \u2013

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: float) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a single data point, should be mappable to a single output.\n\n    Args:\n        data (float): a single data point\n\n    Returns:\n        encoded_data_point (torch.Tensor): the encoded data point\n    \"\"\"\n    return self.encode_all([data])\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.encode_all","title":"encode_all","text":"
encode_all(data: list[float]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, or a single float, and returns a torch.tensor.

Parameters:

  • data (list[float]) \u2013

    a list of data points or a single data point

Returns:

  • encoded_data ( Tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: list[float]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, or a single float, and returns a torch.tensor.\n\n    Args:\n        data (list[float]): a list of data points or a single data point\n\n    Returns:\n        encoded_data (torch.Tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n\n    self._check_input_dtype(data)\n    self._warn_float_is_converted_to_int(data)\n\n    return torch.tensor(data, dtype=self.dtype)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder","title":"NumericRankEncoder","text":"
NumericRankEncoder(*, scale: bool = False)\n

Bases: AbstractEncoder

Encoder for float/int data that encodes the data based on their rank.

Attributes:

  • scale (bool) \u2013

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a torch.tensor

  • decode \u2013

    decodes a single data point

  • _check_input_dtype \u2013

    checks if the input data is int or float data

Parameters:

  • scale (bool, default: False ) \u2013

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • decode \u2013

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode \u2013

    Returns an error since encoding a single float does not make sense.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, *, scale: bool = False) -> None:\n    \"\"\"Initialize the NumericRankEncoder class.\n\n    Args:\n        scale (bool): whether to scale the ranks to be between 0 and 1. Default = False\n    \"\"\"\n    self.scale = scale\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.decode","title":"decode","text":"
decode(data: Any) -> Any\n

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: Any) -> Any:\n    \"\"\"Returns an error since decoding does not make sense without encoder information, which is not yet supported.\"\"\"\n    raise NotImplementedError(\"Decoding is not yet supported for NumericRank.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.encode","title":"encode","text":"
encode(data: Any) -> Tensor\n

Returns an error since encoding a single float does not make sense.

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: Any) -> torch.Tensor:\n    \"\"\"Returns an error since encoding a single float does not make sense.\"\"\"\n    raise NotImplementedError(\"Encoding a single float does not make sense. Use encode_all instead.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.encode_all","title":"encode_all","text":"
encode_all(data: list[Union[int, float]]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, and returns the ranks of the data points. The ranks are normalized to be between 0 and 1, when scale is set to True.

Parameters:

  • data (list[Union[int, float]]) \u2013

    a list of numeric values

Returns:

  • encoded_data ( Tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: list[Union[int, float]]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, and returns the ranks of the data points.\n    The ranks are normalized to be between 0 and 1, when scale is set to True.\n\n    Args:\n        data (list[Union[int, float]]): a list of numeric values\n\n    Returns:\n        encoded_data (torch.Tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n    self._check_input_dtype(data)\n\n    # Get ranks (0 is lowest, n-1 is highest)\n    # and normalize to be between 0 and 1\n    array_data: np.ndarray = np.array(data)\n    ranks: np.ndarray = np.argsort(np.argsort(array_data))\n    if self.scale:\n        ranks = ranks / max(len(ranks) - 1, 1)\n    return torch.tensor(ranks)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder","title":"StrClassificationEncoder","text":"
StrClassificationEncoder(*, scale: bool = False)\n

Bases: AbstractEncoder

A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

When scale is set to True, the labels are scaled to be between 0 and 1.

Attributes:

  • scale (bool) \u2013

    Whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • encode \u2013

    str) -> int: Raises a NotImplementedError, as encoding a single string is not meaningful in this context.

  • encode_all \u2013

    list[str]) -> torch.tensor: Encodes an entire list of string data into a numeric representation using LabelEncoder and returns a torch tensor. Ensures that the provided data items are valid strings prior to encoding.

  • decode \u2013

    Any) -> Any: Raises a NotImplementedError, as decoding is not supported with the current design.

  • _check_dtype \u2013

    list[str]) -> None: Validates that all items in the data list are strings, raising a ValueError otherwise.

Parameters:

  • scale (bool, default: False ) \u2013

    whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • decode \u2013

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode \u2013

    Returns an error since encoding a single string does not make sense.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, *, scale: bool = False) -> None:\n    \"\"\"Initialize the StrClassificationEncoder class.\n\n    Args:\n        scale (bool): whether to scale the labels to be between 0 and 1. Default = False\n    \"\"\"\n    self.scale = scale\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.decode","title":"decode","text":"
decode(data: Any) -> Any\n

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: Any) -> Any:\n    \"\"\"Returns an error since decoding does not make sense without encoder information, which is not yet supported.\"\"\"\n    raise NotImplementedError(\"Decoding is not yet supported for StrClassification.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.encode","title":"encode","text":"
encode(data: str) -> int\n

Returns an error since encoding a single string does not make sense.

Parameters:

  • data (str) \u2013

    a single string

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: str) -> int:\n    \"\"\"Returns an error since encoding a single string does not make sense.\n\n    Args:\n        data (str): a single string\n    \"\"\"\n    raise NotImplementedError(\"Encoding a single string does not make sense. Use encode_all instead.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.encode_all","title":"encode_all","text":"
encode_all(data: Union[str, list[str]]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array. For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html

Parameters:

  • data (Union[str, list[str]]) \u2013

    a list of strings or single string

Returns:

  • encoded_data ( tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array.\n    For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html\n\n    Args:\n        data (Union[str, list[str]]): a list of strings or single string\n\n    Returns:\n        encoded_data (torch.tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n\n    self._check_dtype(data)\n\n    encoder = preprocessing.LabelEncoder()\n    encoded_data = torch.tensor(encoder.fit_transform(data))\n    if self.scale:\n        encoded_data = encoded_data / max(len(encoded_data) - 1, 1)\n\n    return encoded_data\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder","title":"TextOneHotEncoder","text":"
TextOneHotEncoder(\n    alphabet: str = \"acgt\",\n    *,\n    convert_lowercase: bool = False,\n    padding: bool = False\n)\n

Bases: AbstractEncoder

One hot encoder for text data.

NOTE encodes based on the given alphabet If a character c is not in the alphabet, c will be represented by a vector of zeros.

Attributes:

  • alphabet (str) \u2013

    the alphabet to one hot encode the data with.

  • convert_lowercase (bool) \u2013

    whether to convert the sequence and alphabet to lowercase. Default is False.

  • padding (bool) \u2013

    whether to pad the sequences with zeros. Default is False.

  • encoder (OneHotEncoder) \u2013

    preprocessing.OneHotEncoder object initialized with self.alphabet

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a numpy array

  • encode_multiprocess \u2013

    encodes a list of data points using multiprocessing

  • decode \u2013

    decodes a single data point

  • _sequence_to_array \u2013

    transforms a sequence into a numpy array

Parameters:

  • alphabet (str, default: 'acgt' ) \u2013

    the alphabet to one hot encode the data with.

Raises:

  • TypeError \u2013

    If the input alphabet is not a string.

Methods:

  • decode \u2013

    Decodes one-hot encoded tensor back to sequences.

  • encode \u2013

    One hot encodes a single sequence.

  • encode_all \u2013

    Encodes a list of sequences.

  • encode_multiprocess \u2013

    Encodes a list of sequences using multiprocessing.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, alphabet: str = \"acgt\", *, convert_lowercase: bool = False, padding: bool = False) -> None:\n    \"\"\"Initialize the TextOneHotEncoder class.\n\n    Args:\n        alphabet (str): the alphabet to one hot encode the data with.\n\n    Raises:\n        TypeError: If the input alphabet is not a string.\n    \"\"\"\n    if not isinstance(alphabet, str):\n        error_msg = f\"Expected a string input for alphabet, got {type(alphabet).__name__}\"\n        logger.error(error_msg)\n        raise TypeError(error_msg)\n\n    if convert_lowercase:\n        alphabet = alphabet.lower()\n\n    self.alphabet = alphabet\n    self.convert_lowercase = convert_lowercase\n    self.padding = padding\n\n    self.encoder = preprocessing.OneHotEncoder(\n        categories=[list(alphabet)],\n        handle_unknown=\"ignore\",\n    )  # handle_unknown='ignore' unsures that a vector of zeros is returned for unknown characters, such as 'Ns' in DNA sequences\n    self.encoder.fit(np.array(list(alphabet)).reshape(-1, 1))\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.decode","title":"decode","text":"
decode(data: Tensor) -> Union[str, list[str]]\n

Decodes one-hot encoded tensor back to sequences.

Parameters:

  • data (Tensor) \u2013

    2D or 3D tensor of one-hot encoded sequences - 2D shape: (sequence_length, alphabet_size) - 3D shape: (batch_size, sequence_length, alphabet_size)

NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.

Returns:

  • Union[str, list[str]] \u2013

    Union[str, list[str]]: Single sequence string or list of sequence strings

Raises:

  • TypeError \u2013

    If the input data is not a 2D or 3D tensor

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: torch.Tensor) -> Union[str, list[str]]:\n    \"\"\"Decodes one-hot encoded tensor back to sequences.\n\n    Args:\n        data (torch.Tensor): 2D or 3D tensor of one-hot encoded sequences\n            - 2D shape: (sequence_length, alphabet_size)\n            - 3D shape: (batch_size, sequence_length, alphabet_size)\n\n    NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.\n\n    Returns:\n        Union[str, list[str]]: Single sequence string or list of sequence strings\n\n    Raises:\n        TypeError: If the input data is not a 2D or 3D tensor\n    \"\"\"\n    expected_2d_tensor = 2\n    expected_3d_tensor = 3\n\n    if data.dim() == expected_2d_tensor:\n        # Single sequence\n        data_np = data.numpy().reshape(-1, len(self.alphabet))\n        decoded = self.encoder.inverse_transform(data_np).flatten()\n        return \"\".join([i for i in decoded if i is not None])\n\n    if data.dim() == expected_3d_tensor:\n        # Multiple sequences\n        batch_size, seq_len, _ = data.shape\n        data_np = data.reshape(-1, len(self.alphabet)).numpy()\n        decoded = self.encoder.inverse_transform(data_np)\n        sequences = decoded.reshape(batch_size, seq_len)\n        # Convert to masked array where None values are masked\n        masked_sequences = np.ma.masked_equal(sequences, None)\n        # Fill masked values with \"-\"\n        filled_sequences = masked_sequences.filled(\"-\")\n        return [\"\".join(seq) for seq in filled_sequences]\n\n    raise ValueError(f\"Expected 2D or 3D tensor, got {data.dim()}D\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode","title":"encode","text":"
encode(data: str) -> Tensor\n

One hot encodes a single sequence.

Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequence. Unknown characters are represented by a vector of zeros.

Parameters:

  • data (str) \u2013

    single sequence

Returns:

  • encoded_data_point ( Tensor ) \u2013

    one hot encoded sequence

Raises:

  • TypeError \u2013

    If the input data is not a string.

Examples:

>>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n>>> encoder.encode(\"acgt\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n>>> encoder.encode(\"acgtn\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1],\n        [0, 0, 0, 0]])\n
>>> encoder = TextOneHotEncoder(alphabet=\"ACgt\")\n>>> encoder.encode(\"acgt\")\ntensor([[0, 0, 0, 0],\n        [0, 0, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n>>> encoder.encode(\"ACgt\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n
Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: str) -> torch.Tensor:\n    \"\"\"One hot encodes a single sequence.\n\n    Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length).\n    The returned tensor corresponds to the one hot encoding of the sequence.\n    Unknown characters are represented by a vector of zeros.\n\n    Args:\n        data (str): single sequence\n\n    Returns:\n        encoded_data_point (torch.Tensor): one hot encoded sequence\n\n    Raises:\n        TypeError: If the input data is not a string.\n\n    Examples:\n        >>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n        >>> encoder.encode(\"acgt\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n        >>> encoder.encode(\"acgtn\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1],\n                [0, 0, 0, 0]])\n\n        >>> encoder = TextOneHotEncoder(alphabet=\"ACgt\")\n        >>> encoder.encode(\"acgt\")\n        tensor([[0, 0, 0, 0],\n                [0, 0, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n        >>> encoder.encode(\"ACgt\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n    \"\"\"\n    sequence_array = self._sequence_to_array(data)\n    transformed = self.encoder.transform(sequence_array)\n    numpy_array = np.squeeze(np.stack(transformed.toarray()))\n    return torch.from_numpy(numpy_array)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode_all","title":"encode_all","text":"
encode_all(data: Union[str, list[str]]) -> Tensor\n

Encodes a list of sequences.

Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequences. Unknown characters are represented by a vector of zeros.

Parameters:

  • data (Union[str, list[str]]) \u2013

    list of sequences or a single sequence

Returns:

  • encoded_data ( Tensor ) \u2013

    one hot encoded sequences

Raises:

  • TypeError \u2013

    If the input data is not a list or a string.

  • ValueError \u2013

    If all sequences do not have the same length when padding is False.

Examples:

>>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n>>> encoder.encode_all([\"acgt\", \"acgtn\"])\ntensor([[[1, 0, 0, 0],\n         [0, 1, 0, 0],\n         [0, 0, 1, 0],\n         [0, 0, 0, 1],\n         [0, 0, 0, 0]], // this is padded with zeros\n
    [[1, 0, 0, 0],\n     [0, 1, 0, 0],\n     [0, 0, 1, 0],\n     [0, 0, 0, 1],\n     [0, 0, 0, 0]]])\n
Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:\n    \"\"\"Encodes a list of sequences.\n\n    Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length).\n    The returned tensor corresponds to the one hot encoding of the sequences.\n    Unknown characters are represented by a vector of zeros.\n\n    Args:\n        data (Union[str, list[str]]): list of sequences or a single sequence\n\n    Returns:\n        encoded_data (torch.Tensor): one hot encoded sequences\n\n    Raises:\n        TypeError: If the input data is not a list or a string.\n        ValueError: If all sequences do not have the same length when padding is False.\n\n    Examples:\n        >>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n        >>> encoder.encode_all([\"acgt\", \"acgtn\"])\n        tensor([[[1, 0, 0, 0],\n                 [0, 1, 0, 0],\n                 [0, 0, 1, 0],\n                 [0, 0, 0, 1],\n                 [0, 0, 0, 0]], // this is padded with zeros\n\n                [[1, 0, 0, 0],\n                 [0, 1, 0, 0],\n                 [0, 0, 1, 0],\n                 [0, 0, 0, 1],\n                 [0, 0, 0, 0]]])\n    \"\"\"\n    encoded_data = None  # to prevent UnboundLocalError\n    # encode data\n    if isinstance(data, str):\n        encoded_data = self.encode(data)\n        return torch.stack([encoded_data])\n    if isinstance(data, list):\n        # TODO instead maybe we can run encode_multiprocess when data size is larger than a certain threshold.\n        encoded_list = self.encode_multiprocess(data)\n    else:\n        error_msg = f\"Expected list or string input for data, got {type(data).__name__}\"\n        logger.error(error_msg)\n        raise TypeError(error_msg)\n\n    # handle padding\n    if self.padding:\n        max_length = max([len(d) for d in encoded_list])\n        encoded_data = torch.stack([F.pad(d, (0, 0, 0, max_length - len(d))) for d in encoded_list])\n    else:\n        lengths = {len(d) for d in encoded_list}\n        if len(lengths) > 1:\n            error_msg = \"All sequences must have the same length when padding is False.\"\n            logger.error(error_msg)\n            raise ValueError(error_msg)\n        encoded_data = torch.stack(encoded_list)\n\n    if encoded_data is None:\n        raise ValueError(\"Encoded data is None. This should not happen.\")\n\n    return encoded_data\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode_multiprocess","title":"encode_multiprocess","text":"
encode_multiprocess(data: list[str]) -> list[Tensor]\n

Encodes a list of sequences using multiprocessing.

Source code in src/stimulus/data/encoding/encoders.py
def encode_multiprocess(self, data: list[str]) -> list[torch.Tensor]:\n    \"\"\"Encodes a list of sequences using multiprocessing.\"\"\"\n    with mp.Pool() as pool:\n        return pool.map(self.encode, data)\n
"},{"location":"reference/stimulus/data/splitters/","title":"stimulus.data.splitters","text":""},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters","title":"splitters","text":"

This package provides splitter classes for splitting data into train, validation, and test sets.

Modules:

  • splitters \u2013

    This file contains the splitter classes for splitting data accordingly.

Classes:

  • AbstractSplitter \u2013

    Abstract class for splitters.

  • RandomSplit \u2013

    This splitter randomly splits the data.

"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter","title":"AbstractSplitter","text":"
AbstractSplitter(seed: float = 42)\n

Bases: ABC

Abstract class for splitters.

A splitter splits the data into train, validation, and test sets.

Methods:

  • get_split_indexes \u2013

    calculates split indices for the data

  • distance \u2013

    calculates the distance between two elements of the data

Parameters:

  • seed (float, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculates the distance between two elements of the data.

  • get_split_indexes \u2013

    Splits the data. Always return indices mapping to the original list.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, seed: float = 42) -> None:\n    \"\"\"Initialize the splitter.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter.distance","title":"distance abstractmethod","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculates the distance between two elements of the data.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data_one (Any) \u2013

    the first data point

  • data_two (Any) \u2013

    the second data point

Returns:

  • distance ( float ) \u2013

    the distance between the two data points

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculates the distance between two elements of the data.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data_one (Any): the first data point\n        data_two (Any): the second data point\n\n    Returns:\n        distance (float): the distance between the two data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter.get_split_indexes","title":"get_split_indexes abstractmethod","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data. Always return indices mapping to the original list.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (DataFrame) \u2013

    the data to be split

Returns:

  • split_indices ( list ) \u2013

    the indices for train, validation, and test sets

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef get_split_indexes(self, data: dict) -> tuple[list, list, list]:\n    \"\"\"Splits the data. Always return indices mapping to the original list.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (pl.DataFrame): the data to be split\n\n    Returns:\n        split_indices (list): the indices for train, validation, and test sets\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit","title":"RandomSplit","text":"
RandomSplit(split: Optional[list] = None, seed: int = 42)\n

Bases: AbstractSplitter

This splitter randomly splits the data.

Parameters:

  • split (Optional[list], default: None ) \u2013

    List of proportions for train/val/test splits

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculate distance between two data points.

  • get_split_indexes \u2013

    Splits the data indices into train, validation, and test sets.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:\n    \"\"\"Initialize the random splitter.\n\n    Args:\n        split: List of proportions for train/val/test splits\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.split = [0.7, 0.2, 0.1] if split is None else split\n    self.seed = seed\n    if len(self.split) != SPLIT_SIZE:\n        raise ValueError(\n            \"The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.\",\n        )\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit.distance","title":"distance","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculate distance between two data points.

Parameters:

  • data_one (Any) \u2013

    First data point

  • data_two (Any) \u2013

    Second data point

Returns:

  • float \u2013

    Distance between the points

Source code in src/stimulus/data/splitters/splitters.py
def distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculate distance between two data points.\n\n    Args:\n        data_one: First data point\n        data_two: Second data point\n\n    Returns:\n        Distance between the points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit.get_split_indexes","title":"get_split_indexes","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data indices into train, validation, and test sets.

One can use these lists of indices to parse the data afterwards.

Parameters:

  • data (dict) \u2013

    Dictionary mapping column names to lists of data values.

Returns:

  • train ( list ) \u2013

    The indices for the training set.

  • validation ( list ) \u2013

    The indices for the validation set.

  • test ( list ) \u2013

    The indices for the test set.

Raises:

  • ValueError \u2013

    If the split argument is not a list with length 3.

  • ValueError \u2013

    If the sum of the split proportions is not 1.

Source code in src/stimulus/data/splitters/splitters.py
def get_split_indexes(\n    self,\n    data: dict,\n) -> tuple[list, list, list]:\n    \"\"\"Splits the data indices into train, validation, and test sets.\n\n    One can use these lists of indices to parse the data afterwards.\n\n    Args:\n        data (dict): Dictionary mapping column names to lists of data values.\n\n    Returns:\n        train (list): The indices for the training set.\n        validation (list): The indices for the validation set.\n        test (list): The indices for the test set.\n\n    Raises:\n        ValueError: If the split argument is not a list with length 3.\n        ValueError: If the sum of the split proportions is not 1.\n    \"\"\"\n    # Use round to avoid errors due to floating point imprecisions\n    if round(sum(self.split), 3) < 1.0:\n        raise ValueError(f\"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.\")\n\n    if not data:\n        raise ValueError(\"No data provided for splitting\")\n    # Get length from first column's data list\n    length_of_data = len(next(iter(data.values())))\n\n    # Generate a list of indices and shuffle it\n    indices = np.arange(length_of_data)\n    np.random.seed(self.seed)\n    np.random.shuffle(indices)\n\n    # Calculate the sizes of the train, validation, and test sets\n    train_size = int(self.split[0] * length_of_data)\n    validation_size = int(self.split[1] * length_of_data)\n\n    # Split the shuffled indices according to the calculated sizes\n    train = indices[:train_size].tolist()\n    validation = indices[train_size : train_size + validation_size].tolist()\n    test = indices[train_size + validation_size :].tolist()\n\n    return train, validation, test\n
"},{"location":"reference/stimulus/data/splitters/splitters/","title":"stimulus.data.splitters.splitters","text":""},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters","title":"splitters","text":"

This file contains the splitter classes for splitting data accordingly.

Classes:

  • AbstractSplitter \u2013

    Abstract class for splitters.

  • RandomSplit \u2013

    This splitter randomly splits the data.

"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter","title":"AbstractSplitter","text":"
AbstractSplitter(seed: float = 42)\n

Bases: ABC

Abstract class for splitters.

A splitter splits the data into train, validation, and test sets.

Methods:

  • get_split_indexes \u2013

    calculates split indices for the data

  • distance \u2013

    calculates the distance between two elements of the data

Parameters:

  • seed (float, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculates the distance between two elements of the data.

  • get_split_indexes \u2013

    Splits the data. Always return indices mapping to the original list.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, seed: float = 42) -> None:\n    \"\"\"Initialize the splitter.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter.distance","title":"distance abstractmethod","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculates the distance between two elements of the data.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data_one (Any) \u2013

    the first data point

  • data_two (Any) \u2013

    the second data point

Returns:

  • distance ( float ) \u2013

    the distance between the two data points

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculates the distance between two elements of the data.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data_one (Any): the first data point\n        data_two (Any): the second data point\n\n    Returns:\n        distance (float): the distance between the two data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter.get_split_indexes","title":"get_split_indexes abstractmethod","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data. Always return indices mapping to the original list.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (DataFrame) \u2013

    the data to be split

Returns:

  • split_indices ( list ) \u2013

    the indices for train, validation, and test sets

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef get_split_indexes(self, data: dict) -> tuple[list, list, list]:\n    \"\"\"Splits the data. Always return indices mapping to the original list.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (pl.DataFrame): the data to be split\n\n    Returns:\n        split_indices (list): the indices for train, validation, and test sets\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit","title":"RandomSplit","text":"
RandomSplit(split: Optional[list] = None, seed: int = 42)\n

Bases: AbstractSplitter

This splitter randomly splits the data.

Parameters:

  • split (Optional[list], default: None ) \u2013

    List of proportions for train/val/test splits

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculate distance between two data points.

  • get_split_indexes \u2013

    Splits the data indices into train, validation, and test sets.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:\n    \"\"\"Initialize the random splitter.\n\n    Args:\n        split: List of proportions for train/val/test splits\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.split = [0.7, 0.2, 0.1] if split is None else split\n    self.seed = seed\n    if len(self.split) != SPLIT_SIZE:\n        raise ValueError(\n            \"The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.\",\n        )\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit.distance","title":"distance","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculate distance between two data points.

Parameters:

  • data_one (Any) \u2013

    First data point

  • data_two (Any) \u2013

    Second data point

Returns:

  • float \u2013

    Distance between the points

Source code in src/stimulus/data/splitters/splitters.py
def distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculate distance between two data points.\n\n    Args:\n        data_one: First data point\n        data_two: Second data point\n\n    Returns:\n        Distance between the points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit.get_split_indexes","title":"get_split_indexes","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data indices into train, validation, and test sets.

One can use these lists of indices to parse the data afterwards.

Parameters:

  • data (dict) \u2013

    Dictionary mapping column names to lists of data values.

Returns:

  • train ( list ) \u2013

    The indices for the training set.

  • validation ( list ) \u2013

    The indices for the validation set.

  • test ( list ) \u2013

    The indices for the test set.

Raises:

  • ValueError \u2013

    If the split argument is not a list with length 3.

  • ValueError \u2013

    If the sum of the split proportions is not 1.

Source code in src/stimulus/data/splitters/splitters.py
def get_split_indexes(\n    self,\n    data: dict,\n) -> tuple[list, list, list]:\n    \"\"\"Splits the data indices into train, validation, and test sets.\n\n    One can use these lists of indices to parse the data afterwards.\n\n    Args:\n        data (dict): Dictionary mapping column names to lists of data values.\n\n    Returns:\n        train (list): The indices for the training set.\n        validation (list): The indices for the validation set.\n        test (list): The indices for the test set.\n\n    Raises:\n        ValueError: If the split argument is not a list with length 3.\n        ValueError: If the sum of the split proportions is not 1.\n    \"\"\"\n    # Use round to avoid errors due to floating point imprecisions\n    if round(sum(self.split), 3) < 1.0:\n        raise ValueError(f\"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.\")\n\n    if not data:\n        raise ValueError(\"No data provided for splitting\")\n    # Get length from first column's data list\n    length_of_data = len(next(iter(data.values())))\n\n    # Generate a list of indices and shuffle it\n    indices = np.arange(length_of_data)\n    np.random.seed(self.seed)\n    np.random.shuffle(indices)\n\n    # Calculate the sizes of the train, validation, and test sets\n    train_size = int(self.split[0] * length_of_data)\n    validation_size = int(self.split[1] * length_of_data)\n\n    # Split the shuffled indices according to the calculated sizes\n    train = indices[:train_size].tolist()\n    validation = indices[train_size : train_size + validation_size].tolist()\n    test = indices[train_size + validation_size :].tolist()\n\n    return train, validation, test\n
"},{"location":"reference/stimulus/data/transform/","title":"stimulus.data.transform","text":""},{"location":"reference/stimulus/data/transform/#stimulus.data.transform","title":"transform","text":"

Transform package for data manipulation.

Modules:

  • data_transformation_generators \u2013

    This file contains noise generators classes for generating various types of noise.

"},{"location":"reference/stimulus/data/transform/data_transformation_generators/","title":"stimulus.data.transform.data_transformation_generators","text":""},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators","title":"data_transformation_generators","text":"

This file contains noise generators classes for generating various types of noise.

Classes:

  • AbstractAugmentationGenerator \u2013

    Abstract class for augmentation generators.

  • AbstractDataTransformer \u2013

    Abstract class for data transformers.

  • AbstractNoiseGenerator \u2013

    Abstract class for noise generators.

  • GaussianChunk \u2013

    Subset data around a random midpoint.

  • GaussianNoise \u2013

    Add Gaussian noise to data.

  • ReverseComplement \u2013

    Reverse complement biological sequences.

  • UniformTextMasker \u2013

    Mask characters in text.

"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator","title":"AbstractAugmentationGenerator","text":"
AbstractAugmentationGenerator()\n

Bases: AbstractDataTransformer

Abstract class for augmentation generators.

All augmentation function should have the seed in it. This is because the multiprocessing of them could unset the seed.

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the augmentation generator.\"\"\"\n    super().__init__()\n    self.add_row = True\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer","title":"AbstractDataTransformer","text":"
AbstractDataTransformer()\n

Bases: ABC

Abstract class for data transformers.

Data transformers implement in_place or augmentation transformations. Whether it is in_place or augmentation is specified in the \"add_row\" attribute (should be True or False and set in children classes constructor)

Child classes should override the transform and transform_all methods.

transform_all should always return a list

Both methods should take an optional seed argument set to None by default to be compliant with stimulus' core principle of reproducibility. Seed should be initialized through np.random.seed(seed) in the method implementation.

Attributes:

  • add_row (bool) \u2013

    whether the transformer adds rows to the data

Methods:

  • transform \u2013

    transforms a data point

  • transform_all \u2013

    transforms a list of data points

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the data transformer.\"\"\"\n    self.add_row: bool = False\n    self.seed: int = 42\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator","title":"AbstractNoiseGenerator","text":"
AbstractNoiseGenerator()\n

Bases: AbstractDataTransformer

Abstract class for noise generators.

All noise function should have the seed in it. This is because the multiprocessing of them could unset the seed.

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the noise generator.\"\"\"\n    super().__init__()\n    self.add_row = False\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk","title":"GaussianChunk","text":"
GaussianChunk(\n    chunk_size: int, seed: int = 42, std: float = 1\n)\n

Bases: AbstractAugmentationGenerator

Subset data around a random midpoint.

This augmentation strategy chunks the input sequences, for which the middle positions are obtained through a gaussian distribution.

In concrete, it changes the middle position (ie. peak summit) to another position. This position is chosen based on a gaussian distribution, so the region close to the middle point are more likely to be chosen than the rest. Then a chunk with size chunk_size around the new middle point is returned. This process will be repeated for each sequence with transform_all.

Methods:

  • transform \u2013

    chunk a single list

  • transform_all \u2013

    chunks multiple lists

Parameters:

  • chunk_size (int) \u2013

    Size of chunks to extract

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

  • std (float, default: 1 ) \u2013

    Standard deviation for the Gaussian distribution

Methods:

  • transform \u2013

    Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

  • transform_all \u2013

    Adds chunks to multiple lists using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, chunk_size: int, seed: int = 42, std: float = 1) -> None:\n    \"\"\"Initialize the Gaussian chunk generator.\n\n    Args:\n        chunk_size: Size of chunks to extract\n        seed: Random seed for reproducibility\n        std: Standard deviation for the Gaussian distribution\n    \"\"\"\n    super().__init__()\n    self.chunk_size = chunk_size\n    self.seed = seed\n    self.std = std\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk.transform","title":"transform","text":"
transform(data: str) -> str\n

Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

Parameters:

  • data (str) \u2013

    the sequence to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the chunk of the sequence

Raises:

  • AssertionError \u2013

    if the input data is shorter than the chunk size

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.\n\n    Args:\n        data (str): the sequence to be transformed\n\n    Returns:\n        transformed_data (str): the chunk of the sequence\n\n    Raises:\n        AssertionError: if the input data is shorter than the chunk size\n    \"\"\"\n    np.random.seed(self.seed)\n\n    # make sure that the data is longer than chunk_size otherwise raise an error\n    if len(data) <= self.chunk_size:\n        raise ValueError(\"The input data is shorter than the chunk size\")\n\n    # Get the middle position of the input sequence\n    middle_position = len(data) // 2\n\n    # Change the middle position by a value obtained through a gaussian distribution\n    new_middle_position = int(middle_position + np.random.normal(0, self.std))\n\n    # Get the start and end position of the chunk\n    start_position = new_middle_position - self.chunk_size // 2\n    end_position = new_middle_position + self.chunk_size // 2\n\n    # if the start position is negative, set it to 0\n    start_position = max(start_position, 0)\n\n    # Get the chunk of size chunk_size from the start position if the end position is smaller than the length of the data\n    if end_position < len(data):\n        return data[start_position : start_position + self.chunk_size]\n    # Otherwise return the chunk of the sequence from the end of the sequence of size chunk_size\n    return data[-self.chunk_size :]\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds chunks to multiple lists using multiprocessing.

Parameters:

  • data (list) \u2013

    the sequences to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed sequences

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds chunks to multiple lists using multiprocessing.\n\n    Args:\n        data (list): the sequences to be transformed\n\n    Returns:\n        transformed_data (list): the transformed sequences\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.starmap(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise","title":"GaussianNoise","text":"
GaussianNoise(\n    mean: float = 0, std: float = 1, seed: int = 42\n)\n

Bases: AbstractNoiseGenerator

Add Gaussian noise to data.

This noise generator adds Gaussian noise to float values.

Methods:

  • transform \u2013

    adds noise to a single data point

  • transform_all \u2013

    adds noise to a list of data points

Parameters:

  • mean (float, default: 0 ) \u2013

    Mean of the Gaussian noise

  • std (float, default: 1 ) \u2013

    Standard deviation of the Gaussian noise

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • transform \u2013

    Adds Gaussian noise to a single point of data.

  • transform_all \u2013

    Adds Gaussian noise to a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, mean: float = 0, std: float = 1, seed: int = 42) -> None:\n    \"\"\"Initialize the Gaussian noise generator.\n\n    Args:\n        mean: Mean of the Gaussian noise\n        std: Standard deviation of the Gaussian noise\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.mean = mean\n    self.std = std\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise.transform","title":"transform","text":"
transform(data: float) -> float\n

Adds Gaussian noise to a single point of data.

Parameters:

  • data (float) \u2013

    the data to be transformed

Returns:

  • transformed_data ( float ) \u2013

    the transformed data point

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: float) -> float:\n    \"\"\"Adds Gaussian noise to a single point of data.\n\n    Args:\n        data (float): the data to be transformed\n\n    Returns:\n        transformed_data (float): the transformed data point\n    \"\"\"\n    np.random.seed(self.seed)\n    return data + np.random.normal(self.mean, self.std)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds Gaussian noise to a list of data points.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data points

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds Gaussian noise to a list of data points.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data points\n    \"\"\"\n    np.random.seed(self.seed)\n    return list(np.array(data) + np.random.normal(self.mean, self.std, len(data)))\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement","title":"ReverseComplement","text":"
ReverseComplement(sequence_type: str = 'DNA')\n

Bases: AbstractAugmentationGenerator

Reverse complement biological sequences.

This augmentation strategy reverse complements the input nucleotide sequences.

Methods:

  • transform \u2013

    reverse complements a single data point

  • transform_all \u2013

    reverse complements a list of data points

Raises:

  • ValueError \u2013

    if the type of the sequence is not DNA or RNA

Parameters:

  • sequence_type (str, default: 'DNA' ) \u2013

    Type of sequence ('DNA' or 'RNA')

Methods:

  • transform \u2013

    Returns the reverse complement of a list of string data using the complement_mapping.

  • transform_all \u2013

    Reverse complement multiple data points using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, sequence_type: str = \"DNA\") -> None:\n    \"\"\"Initialize the reverse complement generator.\n\n    Args:\n        sequence_type: Type of sequence ('DNA' or 'RNA')\n    \"\"\"\n    super().__init__()\n    if sequence_type not in (\"DNA\", \"RNA\"):\n        raise ValueError(\n            \"Currently only DNA and RNA sequences are supported. Update the class ReverseComplement to support other types.\",\n        )\n    if sequence_type == \"DNA\":\n        self.complement_mapping = str.maketrans(\"ATCG\", \"TAGC\")\n    elif sequence_type == \"RNA\":\n        self.complement_mapping = str.maketrans(\"AUCG\", \"UAGC\")\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement.transform","title":"transform","text":"
transform(data: str) -> str\n

Returns the reverse complement of a list of string data using the complement_mapping.

Parameters:

  • data (str) \u2013

    the sequence to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the reverse complement of the sequence

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Returns the reverse complement of a list of string data using the complement_mapping.\n\n    Args:\n        data (str): the sequence to be transformed\n\n    Returns:\n        transformed_data (str): the reverse complement of the sequence\n    \"\"\"\n    return data.translate(self.complement_mapping)[::-1]\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Reverse complement multiple data points using multiprocessing.

Parameters:

  • data (list) \u2013

    the sequences to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the reverse complement of the sequences

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Reverse complement multiple data points using multiprocessing.\n\n    Args:\n        data (list): the sequences to be transformed\n\n    Returns:\n        transformed_data (list): the reverse complement of the sequences\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.map(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker","title":"UniformTextMasker","text":"
UniformTextMasker(\n    probability: float = 0.1,\n    mask: str = \"*\",\n    seed: int = 42,\n)\n

Bases: AbstractNoiseGenerator

Mask characters in text.

This noise generators replace characters with a masking character with a given probability.

Methods:

  • transform \u2013

    adds character masking to a single data point

  • transform_all \u2013

    adds character masking to a list of data points

Parameters:

  • probability (float, default: 0.1 ) \u2013

    Probability of masking each character

  • mask (str, default: '*' ) \u2013

    Character to use for masking

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • transform \u2013

    Adds character masking to the data.

  • transform_all \u2013

    Adds character masking to multiple data points using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, probability: float = 0.1, mask: str = \"*\", seed: int = 42) -> None:\n    \"\"\"Initialize the text masker.\n\n    Args:\n        probability: Probability of masking each character\n        mask: Character to use for masking\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.probability = probability\n    self.mask = mask\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker.transform","title":"transform","text":"
transform(data: str) -> str\n

Adds character masking to the data.

Parameters:

  • data (str) \u2013

    the data to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the transformed data point

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Adds character masking to the data.\n\n    Args:\n        data (str): the data to be transformed\n\n    Returns:\n        transformed_data (str): the transformed data point\n    \"\"\"\n    np.random.seed(self.seed)\n    return \"\".join([c if np.random.rand() > self.probability else self.mask for c in data])\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds character masking to multiple data points using multiprocessing.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data points

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds character masking to multiple data points using multiprocessing.\n\n    Args:\n        data (list): the data to be transformed\n\n\n    Returns:\n        transformed_data (list): the transformed data points\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.starmap(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/learner/","title":"stimulus.learner","text":""},{"location":"reference/stimulus/learner/#stimulus.learner","title":"learner","text":"

Learner package for model training and evaluation.

Modules:

  • predict \u2013

    A module for making predictions with PyTorch models using DataLoaders.

  • raytune_learner \u2013

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

  • raytune_parser \u2013

    Ray Tune results parser for extracting and saving best model configurations and weights.

"},{"location":"reference/stimulus/learner/predict/","title":"stimulus.learner.predict","text":""},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict","title":"predict","text":"

A module for making predictions with PyTorch models using DataLoaders.

Classes:

  • PredictWrapper \u2013

    A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper","title":"PredictWrapper","text":"
PredictWrapper(\n    model: Module,\n    dataloader: DataLoader,\n    loss_dict: Optional[dict[str, Any]] = None,\n)\n

A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

It also provides the functionalities to measure the performance of the model.

Parameters:

  • model (Module) \u2013

    The PyTorch model to make predictions with

  • dataloader (DataLoader) \u2013

    DataLoader containing the evaluation data

  • loss_dict (Optional[dict[str, Any]], default: None ) \u2013

    Optional dictionary of loss functions

Methods:

  • compute_loss \u2013

    Compute the loss.

  • compute_metric \u2013

    Wrapper to compute the performance metric.

  • compute_metrics \u2013

    Wrapper to compute the performance metrics.

  • compute_other_metric \u2013

    Compute the performance metric.

  • handle_predictions \u2013

    Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

  • predict \u2013

    Get the model predictions.

Source code in src/stimulus/learner/predict.py
def __init__(self, model: nn.Module, dataloader: DataLoader, loss_dict: Optional[dict[str, Any]] = None) -> None:\n    \"\"\"Initialize the PredictWrapper.\n\n    Args:\n        model: The PyTorch model to make predictions with\n        dataloader: DataLoader containing the evaluation data\n        loss_dict: Optional dictionary of loss functions\n    \"\"\"\n    self.model = model\n    self.dataloader = dataloader\n    self.loss_dict = loss_dict\n    try:\n        self.model.eval()\n    except RuntimeError as e:\n        # Using logging instead of print\n        import logging\n\n        logging.warning(\"Not able to run model.eval: %s\", str(e))\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_loss","title":"compute_loss","text":"
compute_loss() -> float\n

Compute the loss.

The current implmentation basically computes the loss for each batch and then averages them. TODO we could potentially summarize the los across batches in a different way. Or sometimes we may potentially even have 1+ losses.

Source code in src/stimulus/learner/predict.py
def compute_loss(self) -> float:\n    \"\"\"Compute the loss.\n\n    The current implmentation basically computes the loss for each batch and then averages them.\n    TODO we could potentially summarize the los across batches in a different way.\n    Or sometimes we may potentially even have 1+ losses.\n    \"\"\"\n    if self.loss_dict is None:\n        raise ValueError(\"Loss function is not provided.\")\n    loss = 0.0\n    with torch.no_grad():\n        for x, y, _ in self.dataloader:\n            # the loss_dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.\n            current_loss = self.model.batch(x=x, y=y, **self.loss_dict)[0]\n            loss += current_loss.item()\n    return loss / len(self.dataloader)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_metric","title":"compute_metric","text":"
compute_metric(metric: str = 'loss') -> float\n

Wrapper to compute the performance metric.

Source code in src/stimulus/learner/predict.py
def compute_metric(self, metric: str = \"loss\") -> float:\n    \"\"\"Wrapper to compute the performance metric.\"\"\"\n    if metric == \"loss\":\n        return self.compute_loss()\n    return self.compute_other_metric(metric)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_metrics","title":"compute_metrics","text":"
compute_metrics(metrics: list[str]) -> dict[str, float]\n

Wrapper to compute the performance metrics.

Source code in src/stimulus/learner/predict.py
def compute_metrics(self, metrics: list[str]) -> dict[str, float]:\n    \"\"\"Wrapper to compute the performance metrics.\"\"\"\n    return {m: self.compute_metric(m) for m in metrics}\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_other_metric","title":"compute_other_metric","text":"
compute_other_metric(metric: str) -> float\n

Compute the performance metric.

"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_other_metric--todo-currently-we-computes-the-average-performance-metric-across-target-y-but-maybe-in-the-future-we-want-something-different","title":"TODO currently we computes the average performance metric across target y, but maybe in the future we want something different","text":"Source code in src/stimulus/learner/predict.py
def compute_other_metric(self, metric: str) -> float:\n    \"\"\"Compute the performance metric.\n\n    # TODO currently we computes the average performance metric across target y, but maybe in the future we want something different\n    \"\"\"\n    if not hasattr(self, \"predictions\") or not hasattr(self, \"labels\"):\n        predictions, labels = self.predict(return_labels=True)\n        self.predictions = predictions\n        self.labels = labels\n\n    # Explicitly type the labels and predictions as dictionaries with str keys\n    labels_dict: dict[str, Tensor] = self.labels if isinstance(self.labels, dict) else {}\n    predictions_dict: dict[str, Tensor] = self.predictions if isinstance(self.predictions, dict) else {}\n\n    return sum(\n        Performance(labels=labels_dict[k], predictions=predictions_dict[k], metric=metric).val for k in labels_dict\n    ) / len(labels_dict)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.handle_predictions","title":"handle_predictions","text":"
handle_predictions(\n    predictions: Any, y: dict[str, Tensor]\n) -> dict[str, Tensor]\n

Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

Source code in src/stimulus/learner/predict.py
def handle_predictions(self, predictions: Any, y: dict[str, Tensor]) -> dict[str, Tensor]:\n    \"\"\"Handle the model outputs from forward pass, into a dictionary of tensors, just like y.\"\"\"\n    if len(y) == 1:\n        return {next(iter(y.keys())): predictions}\n    return dict(zip(y.keys(), predictions))\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.predict","title":"predict","text":"
predict(*, return_labels: bool = False) -> Union[\n    dict[str, Tensor],\n    tuple[dict[str, Tensor], dict[str, Tensor]],\n]\n

Get the model predictions.

Basically, it runs a foward pass on the model for each batch, gets the predictions and concatenate them for all batches. Since the returned current_predictions are formed by tensors computed for one batch, the final predictions are obtained by concatenating them.

At the end it returns predictions as a dictionary of tensors with the same keys as y.

If return_labels if True, then the labels will be returned as well, also as a dictionary of tensors.

Parameters:

  • return_labels (bool, default: False ) \u2013

    Whether to also return the labels

Returns:

  • Union[dict[str, Tensor], tuple[dict[str, Tensor], dict[str, Tensor]]] \u2013

    Dictionary of predictions, and optionally labels

Source code in src/stimulus/learner/predict.py
def predict(\n    self,\n    *,\n    return_labels: bool = False,\n) -> Union[dict[str, Tensor], tuple[dict[str, Tensor], dict[str, Tensor]]]:\n    \"\"\"Get the model predictions.\n\n    Basically, it runs a foward pass on the model for each batch,\n    gets the predictions and concatenate them for all batches.\n    Since the returned `current_predictions` are formed by tensors computed for one batch,\n    the final `predictions` are obtained by concatenating them.\n\n    At the end it returns `predictions` as a dictionary of tensors with the same keys as `y`.\n\n    If return_labels if True, then the `labels` will be returned as well, also as a dictionary of tensors.\n\n    Args:\n        return_labels: Whether to also return the labels\n\n    Returns:\n        Dictionary of predictions, and optionally labels\n    \"\"\"\n    # create empty dictionaries with the column names\n    first_batch = next(iter(self.dataloader))\n    keys = first_batch[1].keys()\n    predictions: dict[str, list[Tensor]] = {k: [] for k in keys}\n    labels: dict[str, list[Tensor]] = {k: [] for k in keys}\n\n    # get the predictions (and labels) for each batch\n    with torch.no_grad():\n        for x, y, _ in self.dataloader:\n            current_predictions = self.model(**x)\n            current_predictions = self.handle_predictions(current_predictions, y)\n            for k in keys:\n                # it might happen that the batch consists of one element only so the torch.cat will fail. To prevent this the function to ensure at least one dimensionality is called.\n                predictions[k].append(ensure_at_least_1d(current_predictions[k]))\n                if return_labels:\n                    labels[k].append(ensure_at_least_1d(y[k]))\n\n    # return the predictions (and labels) as a dictionary of tensors for the entire dataset.\n    if not return_labels:\n        return {k: torch.cat(v) for k, v in predictions.items()}\n    return {k: torch.cat(v) for k, v in predictions.items()}, {k: torch.cat(v) for k, v in labels.items()}\n
"},{"location":"reference/stimulus/learner/raytune_learner/","title":"stimulus.learner.raytune_learner","text":""},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner","title":"raytune_learner","text":"

Ray Tune wrapper and trainable model classes for hyperparameter optimization.

Classes:

  • CheckpointDict \u2013

    Dictionary type for checkpoint data.

  • TuneModel \u2013

    Trainable model class for Ray Tune.

  • TuneWrapper \u2013

    Wrapper class for Ray Tune hyperparameter optimization.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.CheckpointDict","title":"CheckpointDict","text":"

Bases: TypedDict

Dictionary type for checkpoint data.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel","title":"TuneModel","text":"

Bases: Trainable

Trainable model class for Ray Tune.

Methods:

  • export_model \u2013

    Export model to safetensors format.

  • load_checkpoint \u2013

    Load model and optimizer state from checkpoint.

  • objective \u2013

    Compute the objective metric(s) for the tuning process.

  • save_checkpoint \u2013

    Save model and optimizer state to checkpoint.

  • setup \u2013

    Get the model, loss function(s), optimizer, train and test data from the config.

  • step \u2013

    For each batch in the training data, calculate the loss and update the model parameters.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.export_model","title":"export_model","text":"
export_model(export_dir: str | None = None) -> None\n

Export model to safetensors format.

Source code in src/stimulus/learner/raytune_learner.py
def export_model(self, export_dir: str | None = None) -> None:  # type: ignore[override]\n    \"\"\"Export model to safetensors format.\"\"\"\n    if export_dir is None:\n        return\n    safe_save_model(self.model, os.path.join(export_dir, \"model.safetensors\"))\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.load_checkpoint","title":"load_checkpoint","text":"
load_checkpoint(checkpoint: dict[Any, Any] | None) -> None\n

Load model and optimizer state from checkpoint.

Source code in src/stimulus/learner/raytune_learner.py
def load_checkpoint(self, checkpoint: dict[Any, Any] | None) -> None:\n    \"\"\"Load model and optimizer state from checkpoint.\"\"\"\n    if checkpoint is None:\n        return\n    checkpoint_dir = checkpoint[\"checkpoint_dir\"]\n    self.model = safe_load_model(self.model, os.path.join(checkpoint_dir, \"model.safetensors\"))\n    self.optimizer.load_state_dict(torch.load(os.path.join(checkpoint_dir, \"optimizer.pt\")))\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.objective","title":"objective","text":"
objective() -> dict[str, float]\n

Compute the objective metric(s) for the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def objective(self) -> dict[str, float]:\n    \"\"\"Compute the objective metric(s) for the tuning process.\"\"\"\n    metrics = [\n        \"loss\",\n        \"rocauc\",\n        \"prauc\",\n        \"mcc\",\n        \"f1score\",\n        \"precision\",\n        \"recall\",\n        \"spearmanr\",\n    ]  # TODO maybe we report only a subset of metrics, given certain criteria (eg. if classification or regression)\n    predict_val = PredictWrapper(self.model, self.validation, loss_dict=self.loss_dict)\n    predict_train = PredictWrapper(self.model, self.training, loss_dict=self.loss_dict)\n    return {\n        **{\"val_\" + metric: value for metric, value in predict_val.compute_metrics(metrics).items()},\n        **{\"train_\" + metric: value for metric, value in predict_train.compute_metrics(metrics).items()},\n    }\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.save_checkpoint","title":"save_checkpoint","text":"
save_checkpoint(checkpoint_dir: str) -> dict[Any, Any]\n

Save model and optimizer state to checkpoint.

Source code in src/stimulus/learner/raytune_learner.py
def save_checkpoint(self, checkpoint_dir: str) -> dict[Any, Any]:\n    \"\"\"Save model and optimizer state to checkpoint.\"\"\"\n    safe_save_model(self.model, os.path.join(checkpoint_dir, \"model.safetensors\"))\n    torch.save(self.optimizer.state_dict(), os.path.join(checkpoint_dir, \"optimizer.pt\"))\n    return {\"checkpoint_dir\": checkpoint_dir}\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.setup","title":"setup","text":"
setup(config: dict[Any, Any]) -> None\n

Get the model, loss function(s), optimizer, train and test data from the config.

Source code in src/stimulus/learner/raytune_learner.py
def setup(self, config: dict[Any, Any]) -> None:\n    \"\"\"Get the model, loss function(s), optimizer, train and test data from the config.\"\"\"\n    # set the seeds the second time, first in TuneWrapper initialization\n    set_general_seeds(self.config[\"ray_worker_seed\"])\n\n    # Initialize model with the config params\n    self.model = config[\"model\"](**config[\"network_params\"])\n\n    # Get the loss function(s) from the config model params\n    self.loss_dict = config[\"loss_params\"]\n    for key, loss_fn in self.loss_dict.items():\n        try:\n            self.loss_dict[key] = getattr(nn, loss_fn)()\n        except AttributeError as err:\n            raise ValueError(\n                f\"Invalid loss function: {loss_fn}, check PyTorch for documentation on available loss functions\",\n            ) from err\n\n    # get the optimizer parameters\n    optimizer_lr = config[\"optimizer_params\"][\"lr\"]\n    self.optimizer = getattr(optim, config[\"optimizer_params\"][\"method\"])(\n        self.model.parameters(),\n        lr=optimizer_lr,\n    )\n\n    # get step size from the config\n    self.step_size = config[\"tune\"][\"step_size\"]\n\n    # Get datasets from Ray's object store\n    training, validation = ray.get(self.config[\"_training_ref\"]), ray.get(self.config[\"_validation_ref\"])\n\n    # use dataloader on training/validation data\n    self.batch_size = config[\"data_params\"][\"batch_size\"]\n    self.training = DataLoader(\n        training,\n        batch_size=self.batch_size,\n        shuffle=True,\n    )\n    self.validation = DataLoader(\n        validation,\n        batch_size=self.batch_size,\n        shuffle=True,\n    )\n\n    # debug section, first create a dedicated directory for each worker inside Ray_results/<tune_model_run_specific_dir> location\n    debug_dir = os.path.join(\n        config[\"tune_run_path\"],\n        \"debug\",\n        (\"worker_with_seed_\" + str(self.config[\"ray_worker_seed\"])),\n    )\n    if config[\"_debug\"]:\n        # creating a special directory for it one that is worker/trial/experiment specific\n        os.makedirs(debug_dir)\n        seed_filename = os.path.join(debug_dir, \"seeds.txt\")\n\n        # save the initialized model weights\n        self.export_model(export_dir=debug_dir)\n\n        # save the seeds\n        with open(seed_filename, \"a\") as seed_f:\n            # you can not retrieve the actual seed once it set, or the current seed neither for python, numpy nor torch. so we select five numbers randomly. If that is the first draw of numbers they are always the same.\n            python_values = random.sample(range(100), 5)\n            numpy_values = list(np.random.randint(0, 100, size=5))\n            torch_values = torch.randint(0, 100, (5,)).tolist()\n            seed_f.write(\n                f\"python drawn numbers : {python_values}\\nnumpy drawn numbers : {numpy_values}\\ntorch drawn numbers : {torch_values}\\n\",\n            )\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.step","title":"step","text":"
step() -> dict\n

For each batch in the training data, calculate the loss and update the model parameters.

This calculation is performed based on the model's batch function. At the end, return the objective metric(s) for the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def step(self) -> dict:\n    \"\"\"For each batch in the training data, calculate the loss and update the model parameters.\n\n    This calculation is performed based on the model's batch function.\n    At the end, return the objective metric(s) for the tuning process.\n    \"\"\"\n    for _step_size in range(self.step_size):\n        for x, y, _meta in self.training:\n            # the loss dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.\n            self.model.batch(x=x, y=y, optimizer=self.optimizer, **self.loss_dict)\n    return self.objective()\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper","title":"TuneWrapper","text":"
TuneWrapper(\n    model_config: RayTuneModel,\n    data_config_path: str,\n    model_class: Module,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    seed: int,\n    ray_results_dir: Optional[str] = None,\n    tune_run_name: Optional[str] = None,\n    *,\n    debug: bool = False,\n    autoscaler: bool = False\n)\n

Wrapper class for Ray Tune hyperparameter optimization.

Methods:

  • tune \u2013

    Run the tuning process.

  • tuner_initialization \u2013

    Prepare the tuner with the configs.

Source code in src/stimulus/learner/raytune_learner.py
def __init__(\n    self,\n    model_config: RayTuneModel,\n    data_config_path: str,\n    model_class: nn.Module,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    seed: int,\n    ray_results_dir: Optional[str] = None,\n    tune_run_name: Optional[str] = None,\n    *,\n    debug: bool = False,\n    autoscaler: bool = False,\n) -> None:\n    \"\"\"Initialize the TuneWrapper with the paths to the config, model, and data.\"\"\"\n    self.config = model_config.model_dump()\n\n    # set all general seeds: python, numpy and torch.\n    set_general_seeds(seed)\n\n    # build the tune config:\n    try:\n        scheduler_class = getattr(\n            tune.schedulers,\n            model_config.tune.scheduler.name,\n        )  # todo, do this in RayConfigLoader\n    except AttributeError as err:\n        raise ValueError(\n            f\"Invalid scheduler: {model_config.tune.scheduler.name}, check Ray Tune for documentation on available schedulers\",\n        ) from err\n\n    scheduler = scheduler_class(**model_config.tune.scheduler.params)\n    self.tune_config = tune.TuneConfig(\n        metric=model_config.tune.tune_params.metric,\n        mode=model_config.tune.tune_params.mode,\n        num_samples=model_config.tune.tune_params.num_samples,\n        scheduler=scheduler,\n    )\n\n    # build the run config\n    self.run_config = train.RunConfig(\n        name=tune_run_name\n        if tune_run_name is not None\n        else \"TuneModel_\" + datetime.datetime.now(tz=datetime.timezone.utc).strftime(\"%Y-%m-%d_%H-%M-%S\"),\n        storage_path=ray_results_dir,\n        checkpoint_config=train.CheckpointConfig(checkpoint_at_end=True),\n        stop=model_config.tune.run_params.stop,\n    )\n\n    # add the data path to the config\n    if not os.path.exists(data_path):\n        raise ValueError(\"Data path does not exist. Given path:\" + data_path)\n    self.config[\"data_path\"] = os.path.abspath(data_path)\n\n    # Set up tune_run path\n    if ray_results_dir is None:\n        ray_results_dir = os.environ.get(\"HOME\", \"\")\n    self.config[\"tune_run_path\"] = os.path.join(\n        ray_results_dir,\n        tune_run_name\n        if tune_run_name is not None\n        else \"TuneModel_\" + datetime.datetime.now(tz=datetime.timezone.utc).strftime(\"%Y-%m-%d_%H-%M-%S\"),\n    )\n    self.config[\"_debug\"] = debug\n    self.config[\"model\"] = model_class\n    self.config[\"encoder_loader\"] = encoder_loader\n    self.config[\"ray_worker_seed\"] = tune.randint(0, 1000)\n\n    self.gpu_per_trial = model_config.tune.gpu_per_trial\n    self.cpu_per_trial = model_config.tune.cpu_per_trial\n\n    self.tuner = self.tuner_initialization(\n        data_config_path=data_config_path,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        autoscaler=autoscaler,\n    )\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper.tune","title":"tune","text":"
tune() -> ResultGrid\n

Run the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def tune(self) -> ray.tune.ResultGrid:\n    \"\"\"Run the tuning process.\"\"\"\n    return self.tuner.fit()\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper.tuner_initialization","title":"tuner_initialization","text":"
tuner_initialization(\n    data_config_path: str,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    *,\n    autoscaler: bool = False\n) -> Tuner\n

Prepare the tuner with the configs.

Source code in src/stimulus/learner/raytune_learner.py
def tuner_initialization(\n    self,\n    data_config_path: str,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    *,\n    autoscaler: bool = False,\n) -> tune.Tuner:\n    \"\"\"Prepare the tuner with the configs.\"\"\"\n    # Get available resources from Ray cluster\n    cluster_res = cluster_resources()\n    logging.info(f\"CLUSTER resources   ->  {cluster_res}\")\n\n    # Check per-trial resources\n    try:\n        if self.gpu_per_trial > cluster_res[\"GPU\"] and not autoscaler:\n            raise ValueError(\n                \"GPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.\",\n            )\n    except KeyError as err:\n        logging.warning(f\"KeyError: {err}, no GPU resources available in the cluster: {cluster_res}\")\n\n    if self.cpu_per_trial > cluster_res[\"CPU\"] and not autoscaler:\n        raise ValueError(\n            \"CPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.\",\n        )\n\n    logging.info(f\"PER_TRIAL resources ->  GPU: {self.gpu_per_trial} CPU: {self.cpu_per_trial}\")\n\n    # Pre-load and encode datasets once, then put them in Ray's object store\n\n    training = TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n        split=0,\n    )\n    validation = TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n        split=1,\n    )\n\n    # log to debug the names of the columns and shapes of tensors for a batch of training\n    # Log shapes of encoded tensors for first batch of training data\n    inputs, labels, meta = training[0:10]\n\n    logging.debug(\"Training data tensor shapes:\")\n    for field, tensor in inputs.items():\n        logging.debug(f\"Input field '{field}' shape: {tensor.shape}\")\n\n    for field, tensor in labels.items():\n        logging.debug(f\"Label field '{field}' shape: {tensor.shape}\")\n\n    for field, values in meta.items():\n        logging.debug(f\"Meta field '{field}' length: {len(values)}\")\n\n    training_ref = ray.put(training)\n    validation_ref = ray.put(validation)\n\n    self.config[\"_training_ref\"] = training_ref\n    self.config[\"_validation_ref\"] = validation_ref\n\n    # Configure trainable with resources and dataset parameters\n    trainable = tune.with_resources(\n        tune.with_parameters(\n            TuneModel,\n        ),\n        resources={\"cpu\": self.cpu_per_trial, \"gpu\": self.gpu_per_trial},\n    )\n\n    return tune.Tuner(trainable, tune_config=self.tune_config, param_space=self.config, run_config=self.run_config)\n
"},{"location":"reference/stimulus/learner/raytune_parser/","title":"stimulus.learner.raytune_parser","text":""},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser","title":"raytune_parser","text":"

Ray Tune results parser for extracting and saving best model configurations and weights.

Classes:

  • RayTuneMetrics \u2013

    TypedDict for storing Ray Tune metrics results.

  • RayTuneOptimizer \u2013

    TypedDict for storing Ray Tune optimizer state.

  • RayTuneResult \u2013

    TypedDict for storing Ray Tune optimization results.

  • TuneParser \u2013

    Parser class for Ray Tune results to extract best configurations and model weights.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneMetrics","title":"RayTuneMetrics","text":"

Bases: TypedDict

TypedDict for storing Ray Tune metrics results.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneOptimizer","title":"RayTuneOptimizer","text":"

Bases: TypedDict

TypedDict for storing Ray Tune optimizer state.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneResult","title":"RayTuneResult","text":"

Bases: TypedDict

TypedDict for storing Ray Tune optimization results.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser","title":"TuneParser","text":"
TuneParser(result: ResultGrid)\n

Parser class for Ray Tune results to extract best configurations and model weights.

Methods:

  • fix_config_values \u2013

    Correct config values.

  • get_best_config \u2013

    Get the best config from the results.

  • get_best_model \u2013

    Get the best model weights from the results.

  • get_best_optimizer \u2013

    Get the best optimizer state from the results.

  • save_best_config \u2013

    Save the best config to a file.

  • save_best_metrics_dataframe \u2013

    Save the dataframe with the metrics at each iteration of the best sample to a file.

  • save_best_model \u2013

    Save the best model weights to a file.

  • save_best_optimizer \u2013

    Save the best optimizer state to a file.

Source code in src/stimulus/learner/raytune_parser.py
def __init__(self, result: ResultGrid) -> None:\n    \"\"\"Initialize with the given Ray Tune result grid.\"\"\"\n    self.result: ResultGrid = result\n    self.best_result: Result = self._validate_best_result()\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.fix_config_values","title":"fix_config_values","text":"
fix_config_values(config: dict[str, Any]) -> dict[str, Any]\n

Correct config values.

This method modifies the configuration dictionary to remove or convert non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary can be safely dumped to a YAML file.

Parameters:

  • config (dict[str, Any]) \u2013

    Configuration dictionary to fix.

Returns:

  • dict[str, Any] \u2013

    Fixed configuration dictionary.

Source code in src/stimulus/learner/raytune_parser.py
def fix_config_values(self, config: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Correct config values.\n\n    This method modifies the configuration dictionary to remove or convert\n    non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary\n    can be safely dumped to a YAML file.\n\n    Args:\n        config: Configuration dictionary to fix.\n\n    Returns:\n        Fixed configuration dictionary.\n    \"\"\"\n    # Replace the model class with its name for serialization purposes\n    config[\"model\"] = config[\"model\"].__name__\n\n    # Remove keys that contain non-serializable objects\n    keys_to_remove = [\n        \"_debug\",\n        \"tune_run_path\",\n        \"_training_ref\",\n        \"_validation_ref\",\n        \"encoder_loader\",  # if this key holds a non-serializable object\n    ]\n    for key in keys_to_remove:\n        config.pop(key, None)\n\n    return config\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_config","title":"get_best_config","text":"
get_best_config() -> dict[str, Any]\n

Get the best config from the results.

Returns:

  • dict[str, Any] \u2013

    The configuration dictionary of the best result.

Raises:

  • ValueError \u2013

    If the config is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_config(self) -> dict[str, Any]:\n    \"\"\"Get the best config from the results.\n\n    Returns:\n        The configuration dictionary of the best result.\n\n    Raises:\n        ValueError: If the config is missing.\n    \"\"\"\n    config: dict[str, Any] | None = self.best_result.config\n    if config is None:\n        raise ValueError(\"Best result does not contain a configuration.\")\n    return config\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_model","title":"get_best_model","text":"
get_best_model() -> dict[str, Tensor]\n

Get the best model weights from the results.

Returns:

  • dict[str, Tensor] \u2013

    Dictionary of model weights.

Raises:

  • ValueError \u2013

    If the checkpoint is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_model(self) -> dict[str, torch.Tensor]:\n    \"\"\"Get the best model weights from the results.\n\n    Returns:\n        Dictionary of model weights.\n\n    Raises:\n        ValueError: If the checkpoint is missing.\n    \"\"\"\n    if self.best_result.checkpoint is None:\n        raise ValueError(\"Best result does not contain a checkpoint for the model.\")\n    checkpoint_dir: str = self.best_result.checkpoint.to_directory()\n    checkpoint: str = os.path.join(checkpoint_dir, \"model.safetensors\")\n    return safe_load_file(checkpoint)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_optimizer","title":"get_best_optimizer","text":"
get_best_optimizer() -> dict[str, Any]\n

Get the best optimizer state from the results.

Returns:

  • dict[str, Any] \u2013

    Optimizer state dictionary.

Raises:

  • ValueError \u2013

    If the checkpoint is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_optimizer(self) -> dict[str, Any]:\n    \"\"\"Get the best optimizer state from the results.\n\n    Returns:\n        Optimizer state dictionary.\n\n    Raises:\n        ValueError: If the checkpoint is missing.\n    \"\"\"\n    if self.best_result.checkpoint is None:\n        raise ValueError(\"Best result does not contain a checkpoint for the optimizer.\")\n    checkpoint_dir: str = self.best_result.checkpoint.to_directory()\n    checkpoint: str = os.path.join(checkpoint_dir, \"optimizer.pt\")\n    return torch.load(checkpoint)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_config","title":"save_best_config","text":"
save_best_config(output: str) -> None\n

Save the best config to a file.

Todo

maybe only save the relevant config values.

Parameters:

  • output (str) \u2013

    File path to save the configuration.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_config(self, output: str) -> None:\n    \"\"\"Save the best config to a file.\n\n    TODO: maybe only save the relevant config values.\n\n    Args:\n        output: File path to save the configuration.\n    \"\"\"\n    config: dict[str, Any] = self.get_best_config()\n    config = self.fix_config_values(config)\n    with open(output, \"w\") as f:\n        yaml.safe_dump(config, f)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_metrics_dataframe","title":"save_best_metrics_dataframe","text":"
save_best_metrics_dataframe(output: str) -> None\n

Save the dataframe with the metrics at each iteration of the best sample to a file.

Parameters:

  • output (str) \u2013

    CSV file path to save the metrics.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_metrics_dataframe(self, output: str) -> None:\n    \"\"\"Save the dataframe with the metrics at each iteration of the best sample to a file.\n\n    Args:\n        output: CSV file path to save the metrics.\n    \"\"\"\n    metrics_df: pd.DataFrame = pd.DataFrame([self.best_result.metrics])\n    metrics_df.to_csv(output, index=False)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_model","title":"save_best_model","text":"
save_best_model(output: str) -> None\n

Save the best model weights to a file.

This method retrieves the best model weights using the get_best_model helper which loads the model data from the checkpoint's directory, then re-saves it using safe_save_file.

Parameters:

  • output (str) \u2013

    Path where the best model weights will be saved.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_model(self, output: str) -> None:\n    \"\"\"Save the best model weights to a file.\n\n    This method retrieves the best model weights using the get_best_model helper\n    which loads the model data from the checkpoint's directory, then re-saves\n    it using safe_save_file.\n\n    Args:\n        output: Path where the best model weights will be saved.\n    \"\"\"\n    model: dict[str, torch.Tensor] = self.get_best_model()\n    safe_save_file(model, output)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_optimizer","title":"save_best_optimizer","text":"
save_best_optimizer(output: str) -> None\n

Save the best optimizer state to a file.

Parameters:

  • output (str) \u2013

    Path where the best optimizer state will be saved.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_optimizer(self, output: str) -> None:\n    \"\"\"Save the best optimizer state to a file.\n\n    Args:\n        output: Path where the best optimizer state will be saved.\n    \"\"\"\n    optimizer_state: dict[str, Any] = self.get_best_optimizer()\n    torch.save(optimizer_state, output)\n
"},{"location":"reference/stimulus/typing/","title":"stimulus.typing","text":""},{"location":"reference/stimulus/typing/#stimulus.typing","title":"typing","text":"

Typing for Stimulus Python API.

This module contains all Stimulus types which will be used for variable typing and likely not instantiated, as well as aliases for other types to use for typing purposes.

The aliases from this module should be used for typing purposes only.

"},{"location":"reference/stimulus/utils/","title":"stimulus.utils","text":""},{"location":"reference/stimulus/utils/#stimulus.utils","title":"utils","text":"

Utility functions package.

Modules:

  • generic_utils \u2013

    Utility functions for general purpose operations like seed setting and tensor manipulation.

  • launch_utils \u2013

    Utility functions for launching and configuring experiments and ray tuning.

  • performance \u2013

    Utility module for computing various performance metrics for machine learning models.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

  • yaml_model_schema \u2013

    Module for handling YAML configuration files and converting them to Ray Tune format.

"},{"location":"reference/stimulus/utils/generic_utils/","title":"stimulus.utils.generic_utils","text":""},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils","title":"generic_utils","text":"

Utility functions for general purpose operations like seed setting and tensor manipulation.

Functions:

  • ensure_at_least_1d \u2013

    Function to make sure tensors given are not zero dimensional. if they are add one dimension.

  • set_general_seeds \u2013

    Set all relevant random seeds to a given value.

"},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils.ensure_at_least_1d","title":"ensure_at_least_1d","text":"
ensure_at_least_1d(tensor: Tensor) -> Tensor\n

Function to make sure tensors given are not zero dimensional. if they are add one dimension.

Source code in src/stimulus/utils/generic_utils.py
def ensure_at_least_1d(tensor: torch.Tensor) -> torch.Tensor:\n    \"\"\"Function to make sure tensors given are not zero dimensional. if they are add one dimension.\"\"\"\n    if tensor.dim() == 0:\n        tensor = tensor.unsqueeze(0)\n    return tensor\n
"},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils.set_general_seeds","title":"set_general_seeds","text":"
set_general_seeds(seed_value: Union[int, None]) -> None\n

Set all relevant random seeds to a given value.

Especially useful in case of ray.tune. Ray does not have a \"generic\" seed as far as ray 2.23.

Source code in src/stimulus/utils/generic_utils.py
def set_general_seeds(seed_value: Union[int, None]) -> None:\n    \"\"\"Set all relevant random seeds to a given value.\n\n    Especially useful in case of ray.tune. Ray does not have a \"generic\" seed as far as ray 2.23.\n    \"\"\"\n    # Set python seed\n    random.seed(seed_value)\n\n    # set numpy seed\n    np.random.seed(seed_value)\n\n    # set torch seed, diffrently from the two above torch can nopt take Noneas input value so it will not be called in that case.\n    if seed_value is not None:\n        torch.manual_seed(seed_value)\n
"},{"location":"reference/stimulus/utils/launch_utils/","title":"stimulus.utils.launch_utils","text":""},{"location":"reference/stimulus/utils/launch_utils/#stimulus.utils.launch_utils","title":"launch_utils","text":"

Utility functions for launching and configuring experiments and ray tuning.

Functions:

  • import_class_from_file \u2013

    Import and return the Model class from a specified Python file.

"},{"location":"reference/stimulus/utils/launch_utils/#stimulus.utils.launch_utils.import_class_from_file","title":"import_class_from_file","text":"
import_class_from_file(file_path: str) -> type\n

Import and return the Model class from a specified Python file.

Parameters:

  • file_path (str) \u2013

    Path to the Python file containing the Model class.

Returns:

  • type ( type ) \u2013

    The Model class found in the file.

Raises:

  • ImportError \u2013

    If no class starting with 'Model' is found in the file.

Source code in src/stimulus/utils/launch_utils.py
def import_class_from_file(file_path: str) -> type:\n    \"\"\"Import and return the Model class from a specified Python file.\n\n    Args:\n        file_path (str): Path to the Python file containing the Model class.\n\n    Returns:\n        type: The Model class found in the file.\n\n    Raises:\n        ImportError: If no class starting with 'Model' is found in the file.\n    \"\"\"\n    # Extract directory path and file name\n    directory, file_name = os.path.split(file_path)\n    module_name = os.path.splitext(file_name)[0]  # Remove extension to get module name\n\n    # Create a module from the file path\n    # In summary, these three lines of code are responsible for creating a module specification based on a file location, creating a module object from that specification, and then executing the module's code to populate the module object with the definitions from the Python file.\n    spec = importlib.util.spec_from_file_location(module_name, file_path)\n    if spec is None:\n        raise ImportError(f\"Could not create module spec for {file_path}\")\n    module = importlib.util.module_from_spec(spec)\n    if spec.loader is None:\n        raise ImportError(f\"Module spec has no loader for {file_path}\")\n    spec.loader.exec_module(module)\n\n    # Find the class dynamically\n    for name in dir(module):\n        model_class = getattr(module, name)\n        if isinstance(model_class, type) and name.startswith(\"Model\"):\n            return model_class\n\n    # Class not found\n    raise ImportError(\"No class starting with 'Model' found in the file.\")\n
"},{"location":"reference/stimulus/utils/performance/","title":"stimulus.utils.performance","text":""},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance","title":"performance","text":"

Utility module for computing various performance metrics for machine learning models.

Classes:

  • Performance \u2013

    Returns the value of a given metric.

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance","title":"Performance","text":"
Performance(\n    labels: Any, predictions: Any, metric: str = \"rocauc\"\n)\n

Returns the value of a given metric.

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance--parameters","title":"Parameters","text":"

labels (np.array) : labels predictions (np.array) : predictions metric (str) : the metric to compute

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance--returns","title":"Returns:","text":"

value (float) : the value of the metric

TODO we can add more metrics here

TODO currently for classification metrics like precision, recall, f1score and mcc, we are using a threshold of 0.5 to convert the probabilities to binary predictions. However for models with imbalanced predictions, where the meaningful threshold is not located at 0.5, one can end up with full of 0s or 1s, and thus meaningless performance metrics.

Parameters:

  • labels (Any) \u2013

    Ground truth labels

  • predictions (Any) \u2013

    Model predictions

  • metric (str, default: 'rocauc' ) \u2013

    Type of metric to compute (default: \"rocauc\")

Methods:

  • data2array \u2013

    Convert input data to numpy array.

  • f1score \u2013

    Compute F1 score.

  • handle_multiclass \u2013

    Handle the case of multiclass classification.

  • mcc \u2013

    Compute Matthews Correlation Coefficient.

  • prauc \u2013

    Compute PR AUC score.

  • precision \u2013

    Compute precision score.

  • recall \u2013

    Compute recall score.

  • rocauc \u2013

    Compute ROC AUC score.

  • spearmanr \u2013

    Compute Spearman correlation coefficient.

Source code in src/stimulus/utils/performance.py
def __init__(self, labels: Any, predictions: Any, metric: str = \"rocauc\") -> None:\n    \"\"\"Initialize Performance class with labels, predictions and metric type.\n\n    Args:\n        labels: Ground truth labels\n        predictions: Model predictions\n        metric: Type of metric to compute (default: \"rocauc\")\n    \"\"\"\n    labels_arr = self.data2array(labels)\n    predictions_arr = self.data2array(predictions)\n    labels_arr, predictions_arr = self.handle_multiclass(labels_arr, predictions_arr)\n    if labels_arr.shape != predictions_arr.shape:\n        raise ValueError(\n            f\"The labels have shape {labels_arr.shape} whereas predictions have shape {predictions_arr.shape}.\",\n        )\n    function = getattr(self, metric)\n    self.val = function(labels_arr, predictions_arr)\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.data2array","title":"data2array","text":"
data2array(data: Any) -> NDArray[float64]\n

Convert input data to numpy array.

Parameters:

  • data (Any) \u2013

    Input data in various formats

Returns:

  • NDArray[float64] \u2013

    NDArray[np.float64]: Converted numpy array

Raises:

  • ValueError \u2013

    If input data type is not supported

Source code in src/stimulus/utils/performance.py
def data2array(self, data: Any) -> NDArray[np.float64]:\n    \"\"\"Convert input data to numpy array.\n\n    Args:\n        data: Input data in various formats\n\n    Returns:\n        NDArray[np.float64]: Converted numpy array\n\n    Raises:\n        ValueError: If input data type is not supported\n    \"\"\"\n    if isinstance(data, list):\n        return np.array(data, dtype=np.float64)\n    if isinstance(data, np.ndarray):\n        return data.astype(np.float64)\n    if isinstance(data, torch.Tensor):\n        return data.detach().cpu().numpy().astype(np.float64)\n    if isinstance(data, (int, float)):\n        return np.array([data], dtype=np.float64)\n    raise ValueError(f\"The data must be a list, np.array, torch.Tensor, int or float. Instead it is {type(data)}\")\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.f1score","title":"f1score","text":"
f1score(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute F1 score.

Source code in src/stimulus/utils/performance.py
def f1score(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute F1 score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(f1_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.handle_multiclass","title":"handle_multiclass","text":"
handle_multiclass(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> tuple[NDArray[float64], NDArray[float64]]\n

Handle the case of multiclass classification.

Parameters:

  • labels (NDArray[float64]) \u2013

    Labels array of shape (N,) or (N, 1)

  • predictions (NDArray[float64]) \u2013

    Predictions array of shape (N,) or (N, C) where C is number of classes

Returns:

  • tuple[NDArray[float64], NDArray[float64]] \u2013

    tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions

Raises:

  • ValueError \u2013

    If input shapes are not compatible

Source code in src/stimulus/utils/performance.py
def handle_multiclass(\n    self,\n    labels: NDArray[np.float64],\n    predictions: NDArray[np.float64],\n) -> tuple[NDArray[np.float64], NDArray[np.float64]]:\n    \"\"\"Handle the case of multiclass classification.\n\n    Args:\n        labels: Labels array of shape (N,) or (N, 1)\n        predictions: Predictions array of shape (N,) or (N, C) where C is number of classes\n\n    Returns:\n        tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions\n\n    Raises:\n        ValueError: If input shapes are not compatible\n    \"\"\"\n    # Case 1: If labels are 2D with shape (N,1), squeeze to 1D shape (N,)\n    # This handles cases where labels come as column vectors\n    if len(labels.shape) == NON_SQUEEZED_SHAPE_LENGTH and labels.shape[1] == 1:\n        labels = labels.squeeze(-1)\n\n    if len(predictions.shape) == NON_SQUEEZED_SHAPE_LENGTH:\n        # Case 2: Binary classification with shape (N,2)\n        # Take probability of positive class (second column)\n        if predictions.shape[1] == BINARY_CLASS_COUNT:\n            predictions = predictions[:, 1]  # Shape becomes (N,)\n            return labels, predictions\n        # Case 3: Multi-class classification with shape (N,C)\n        # Keep predictions as-is if labels are 1D and batch sizes match\n        if len(labels.shape) == 1 and predictions.shape[0] == labels.shape[0]:\n            return labels, predictions\n\n    # If we get here, the shapes are not compatible\n    raise ValueError(\n        f\"Incompatible shapes: labels {labels.shape}, predictions {predictions.shape}. \"\n        \"Expected labels (N,) or (N, 1) and predictions (N,) or (N, C) where C is number of classes.\",\n    )\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.mcc","title":"mcc","text":"
mcc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute Matthews Correlation Coefficient.

Source code in src/stimulus/utils/performance.py
def mcc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute Matthews Correlation Coefficient.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(matthews_corrcoef(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.prauc","title":"prauc","text":"
prauc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute PR AUC score.

Source code in src/stimulus/utils/performance.py
def prauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute PR AUC score.\"\"\"\n    return float(average_precision_score(labels, predictions))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.precision","title":"precision","text":"
precision(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute precision score.

Source code in src/stimulus/utils/performance.py
def precision(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute precision score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(precision_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.recall","title":"recall","text":"
recall(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute recall score.

Source code in src/stimulus/utils/performance.py
def recall(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute recall score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(recall_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.rocauc","title":"rocauc","text":"
rocauc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute ROC AUC score.

Source code in src/stimulus/utils/performance.py
def rocauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute ROC AUC score.\"\"\"\n    return float(roc_auc_score(labels, predictions))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.spearmanr","title":"spearmanr","text":"
spearmanr(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute Spearman correlation coefficient.

Source code in src/stimulus/utils/performance.py
def spearmanr(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute Spearman correlation coefficient.\"\"\"\n    return float(spearmanr(labels, predictions)[0])\n
"},{"location":"reference/stimulus/utils/yaml_data/","title":"stimulus.utils.yaml_data","text":""},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data","title":"yaml_data","text":"

Utility module for handling YAML configuration files and their validation.

Classes:

  • YamlColumns \u2013

    Model for column configuration.

  • YamlColumnsEncoder \u2013

    Model for column encoder configuration.

  • YamlConfigDict \u2013

    Model for main YAML configuration.

  • YamlGlobalParams \u2013

    Model for global parameters in YAML configuration.

  • YamlSchema \u2013

    Model for validating YAML schema.

  • YamlSplit \u2013

    Model for split configuration.

  • YamlSubConfigDict \u2013

    Model for sub-configuration generated from main config.

  • YamlTransform \u2013

    Model for transform configuration.

  • YamlTransformColumns \u2013

    Model for transform columns configuration.

  • YamlTransformColumnsTransformation \u2013

    Model for column transformation configuration.

Functions:

  • check_yaml_schema \u2013

    Validate YAML configuration fields have correct types.

  • dump_yaml_list_into_files \u2013

    Dumps a list of YAML configurations into separate files with custom formatting.

  • expand_transform_list_combinations \u2013

    Expands a list of transforms into all possible parameter combinations.

  • expand_transform_parameter_combinations \u2013

    Get all possible transforms by extracting parameters at each valid index.

  • extract_transform_parameters_at_index \u2013

    Get a transform with parameters at the specified index.

  • generate_data_configs \u2013

    Generates all possible data configurations from a YAML config.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlColumns","title":"YamlColumns","text":"

Bases: BaseModel

Model for column configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlColumnsEncoder","title":"YamlColumnsEncoder","text":"

Bases: BaseModel

Model for column encoder configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlConfigDict","title":"YamlConfigDict","text":"

Bases: BaseModel

Model for main YAML configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlGlobalParams","title":"YamlGlobalParams","text":"

Bases: BaseModel

Model for global parameters in YAML configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSchema","title":"YamlSchema","text":"

Bases: BaseModel

Model for validating YAML schema.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSplit","title":"YamlSplit","text":"

Bases: BaseModel

Model for split configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSubConfigDict","title":"YamlSubConfigDict","text":"

Bases: BaseModel

Model for sub-configuration generated from main config.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransform","title":"YamlTransform","text":"

Bases: BaseModel

Model for transform configuration.

Methods:

  • validate_param_lists_across_columns \u2013

    Validate that parameter lists across columns have consistent lengths.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransform.validate_param_lists_across_columns","title":"validate_param_lists_across_columns classmethod","text":"
validate_param_lists_across_columns(\n    columns: list[YamlTransformColumns],\n) -> list[YamlTransformColumns]\n

Validate that parameter lists across columns have consistent lengths.

Parameters:

  • columns (list[YamlTransformColumns]) \u2013

    List of transform columns to validate

Returns:

  • list[YamlTransformColumns] \u2013

    The validated columns list

Source code in src/stimulus/utils/yaml_data.py
@field_validator(\"columns\")\n@classmethod\ndef validate_param_lists_across_columns(cls, columns: list[YamlTransformColumns]) -> list[YamlTransformColumns]:\n    \"\"\"Validate that parameter lists across columns have consistent lengths.\n\n    Args:\n        columns: List of transform columns to validate\n\n    Returns:\n        The validated columns list\n    \"\"\"\n    # Get all parameter list lengths across all columns and transformations\n    all_list_lengths: set[int] = set()\n\n    for column in columns:\n        for transformation in column.transformations:\n            if transformation.params and any(\n                isinstance(param_value, list) and len(param_value) > 0\n                for param_value in transformation.params.values()\n            ):\n                all_list_lengths.update(\n                    len(param_value)\n                    for param_value in transformation.params.values()\n                    if isinstance(param_value, list) and len(param_value) > 0\n                )\n\n    # Skip validation if no lists found\n    if not all_list_lengths:\n        return columns\n\n    # Check if all lists either have length 1, or all have the same length\n    all_list_lengths.discard(1)  # Remove length 1 as it's always valid\n    if len(all_list_lengths) > 1:  # Multiple different lengths found\n        raise ValueError(\n            \"All parameter lists across columns must either contain one element or have the same length\",\n        )\n\n    return columns\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransformColumns","title":"YamlTransformColumns","text":"

Bases: BaseModel

Model for transform columns configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransformColumnsTransformation","title":"YamlTransformColumnsTransformation","text":"

Bases: BaseModel

Model for column transformation configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.check_yaml_schema","title":"check_yaml_schema","text":"
check_yaml_schema(config_yaml: YamlConfigDict) -> str\n

Validate YAML configuration fields have correct types.

If the children field is specific to a parent, the children fields class is hosted in the parent fields class. If any field in not the right type, the function prints an error message explaining the problem and exits the python code.

Parameters:

  • config_yaml (YamlConfigDict) \u2013

    The YamlConfigDict containing the fields of the yaml configuration file

Returns:

  • str ( str ) \u2013

    Empty string if validation succeeds

Raises:

  • ValueError \u2013

    If validation fails

Source code in src/stimulus/utils/yaml_data.py
def check_yaml_schema(config_yaml: YamlConfigDict) -> str:\n    \"\"\"Validate YAML configuration fields have correct types.\n\n    If the children field is specific to a parent, the children fields class is hosted in the parent fields class.\n    If any field in not the right type, the function prints an error message explaining the problem and exits the python code.\n\n    Args:\n        config_yaml: The YamlConfigDict containing the fields of the yaml configuration file\n\n    Returns:\n        str: Empty string if validation succeeds\n\n    Raises:\n        ValueError: If validation fails\n    \"\"\"\n    try:\n        YamlSchema(yaml_conf=config_yaml)\n    except ValidationError as e:\n        # Use logging instead of print for error handling\n        raise ValueError(\"Wrong type on a field, see the pydantic report above\") from e\n    return \"\"\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.dump_yaml_list_into_files","title":"dump_yaml_list_into_files","text":"
dump_yaml_list_into_files(\n    yaml_list: list[YamlSubConfigDict],\n    directory_path: str,\n    base_name: str,\n) -> None\n

Dumps a list of YAML configurations into separate files with custom formatting.

Source code in src/stimulus/utils/yaml_data.py
def dump_yaml_list_into_files(\n    yaml_list: list[YamlSubConfigDict],\n    directory_path: str,\n    base_name: str,\n) -> None:\n    \"\"\"Dumps a list of YAML configurations into separate files with custom formatting.\"\"\"\n    # Create a new class attribute rather than assigning to the method\n    # Remove this line since we'll add ignore_aliases to CustomDumper instead\n\n    def represent_none(dumper: yaml.Dumper, _: Any) -> yaml.Node:\n        \"\"\"Custom representer to format None values as empty strings in YAML output.\"\"\"\n        return dumper.represent_scalar(\"tag:yaml.org,2002:null\", \"\")\n\n    def custom_representer(dumper: yaml.Dumper, data: Any) -> yaml.Node:\n        \"\"\"Custom representer to handle different types of lists with appropriate formatting.\"\"\"\n        if isinstance(data, list):\n            if len(data) == 0:\n                return dumper.represent_scalar(\"tag:yaml.org,2002:null\", \"\")\n            if isinstance(data[0], dict):\n                return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=False)\n            if isinstance(data[0], list):\n                return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=True)\n        return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=True)\n\n    class CustomDumper(yaml.Dumper):\n        \"\"\"Custom YAML dumper that adds extra formatting controls.\"\"\"\n\n        def ignore_aliases(self, _data: Any) -> bool:\n            \"\"\"Ignore aliases in the YAML output.\"\"\"\n            return True\n\n        def write_line_break(self, _data: Any = None) -> None:\n            \"\"\"Add extra newline after root-level elements.\"\"\"\n            super().write_line_break(_data)\n            if len(self.indents) <= 1:  # At root level\n                super().write_line_break(_data)\n\n        def increase_indent(self, *, flow: bool = False, indentless: bool = False) -> None:  # type: ignore[override]\n            \"\"\"Ensure consistent indentation by preventing indentless sequences.\"\"\"\n            return super().increase_indent(\n                flow=flow,\n                indentless=indentless,\n            )  # Force indentless to False for better formatting\n\n    # Register the custom representers with our dumper\n    yaml.add_representer(type(None), represent_none, Dumper=CustomDumper)\n    yaml.add_representer(list, custom_representer, Dumper=CustomDumper)\n\n    for i, yaml_dict in enumerate(yaml_list):\n        dict_data = yaml_dict.model_dump(exclude_none=True)\n\n        def fix_params(input_dict: dict[str, Any]) -> dict[str, Any]:\n            \"\"\"Recursively process dictionary to properly handle params fields.\"\"\"\n            if isinstance(input_dict, dict):\n                processed_dict: dict[str, Any] = {}\n                for key, value in input_dict.items():\n                    if key == \"encoder\" and isinstance(value, list):\n                        processed_dict[key] = []\n                        for encoder in value:\n                            processed_encoder = dict(encoder)\n                            if \"params\" not in processed_encoder or not processed_encoder[\"params\"]:\n                                processed_encoder[\"params\"] = {}\n                            processed_dict[key].append(processed_encoder)\n                    elif key == \"transformations\" and isinstance(value, list):\n                        processed_dict[key] = []\n                        for transformation in value:\n                            processed_transformation = dict(transformation)\n                            if \"params\" not in processed_transformation or not processed_transformation[\"params\"]:\n                                processed_transformation[\"params\"] = {}\n                            processed_dict[key].append(processed_transformation)\n                    elif isinstance(value, dict):\n                        processed_dict[key] = fix_params(value)\n                    elif isinstance(value, list):\n                        processed_dict[key] = [\n                            fix_params(list_item) if isinstance(list_item, dict) else list_item for list_item in value\n                        ]\n                    else:\n                        processed_dict[key] = value\n                return processed_dict\n            return input_dict\n\n        dict_data = fix_params(dict_data)\n\n        with open(f\"{directory_path}/{base_name}_{i}.yaml\", \"w\") as f:\n            yaml.dump(\n                dict_data,\n                f,\n                Dumper=CustomDumper,\n                sort_keys=False,\n                default_flow_style=False,\n                indent=2,\n                width=float(\"inf\"),  # Prevent line wrapping\n            )\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.expand_transform_list_combinations","title":"expand_transform_list_combinations","text":"
expand_transform_list_combinations(\n    transform_list: list[YamlTransform],\n) -> list[YamlTransform]\n

Expands a list of transforms into all possible parameter combinations.

Takes a list of transforms where each transform may contain parameter lists, and expands them into separate transforms with single parameter values. For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will create two transforms: one with 0.1/1 and another with 0.2/2.

Parameters:

  • transform_list (list[YamlTransform]) \u2013

    A list of YamlTransform objects containing parameter lists that need to be expanded into individual transforms.

Returns:

  • list[YamlTransform] \u2013

    list[YamlTransform]: A flattened list of transforms where each transform has single parameter values instead of parameter lists. The length of the returned list will be the sum of the number of parameter combinations for each input transform.

Source code in src/stimulus/utils/yaml_data.py
def expand_transform_list_combinations(transform_list: list[YamlTransform]) -> list[YamlTransform]:\n    \"\"\"Expands a list of transforms into all possible parameter combinations.\n\n    Takes a list of transforms where each transform may contain parameter lists,\n    and expands them into separate transforms with single parameter values.\n    For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will\n    create two transforms: one with 0.1/1 and another with 0.2/2.\n\n    Args:\n        transform_list: A list of YamlTransform objects containing parameter lists\n            that need to be expanded into individual transforms.\n\n    Returns:\n        list[YamlTransform]: A flattened list of transforms where each transform\n            has single parameter values instead of parameter lists. The length of\n            the returned list will be the sum of the number of parameter combinations\n            for each input transform.\n    \"\"\"\n    sub_transforms = []\n    for transform in transform_list:\n        sub_transforms.extend(expand_transform_parameter_combinations(transform))\n    return sub_transforms\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.expand_transform_parameter_combinations","title":"expand_transform_parameter_combinations","text":"
expand_transform_parameter_combinations(\n    transform: YamlTransform,\n) -> list[YamlTransform]\n

Get all possible transforms by extracting parameters at each valid index.

For a transform with parameter lists, creates multiple new transforms, each containing single parameter values from the corresponding indices of the parameter lists.

Parameters:

  • transform (YamlTransform) \u2013

    The original transform containing parameter lists

Returns:

  • list[YamlTransform] \u2013

    A list of transforms, each with single parameter values from sequential indices

Source code in src/stimulus/utils/yaml_data.py
def expand_transform_parameter_combinations(transform: YamlTransform) -> list[YamlTransform]:\n    \"\"\"Get all possible transforms by extracting parameters at each valid index.\n\n    For a transform with parameter lists, creates multiple new transforms, each containing\n    single parameter values from the corresponding indices of the parameter lists.\n\n    Args:\n        transform: The original transform containing parameter lists\n\n    Returns:\n        A list of transforms, each with single parameter values from sequential indices\n    \"\"\"\n    # Find the length of parameter lists - we only need to check the first list we find\n    # since all lists must have the same length (enforced by pydantic validator)\n    max_length = 1\n    for column in transform.columns:\n        for transformation in column.transformations:\n            if transformation.params:\n                list_lengths = [len(v) for v in transformation.params.values() if isinstance(v, list) and len(v) > 1]\n                if list_lengths:\n                    max_length = list_lengths[0]  # All lists have same length due to validator\n                    break\n\n    # Generate a transform for each index\n    transforms = []\n    for i in range(max_length):\n        transforms.append(extract_transform_parameters_at_index(transform, i))\n\n    return transforms\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.extract_transform_parameters_at_index","title":"extract_transform_parameters_at_index","text":"
extract_transform_parameters_at_index(\n    transform: YamlTransform, index: int = 0\n) -> YamlTransform\n

Get a transform with parameters at the specified index.

Parameters:

  • transform (YamlTransform) \u2013

    The original transform containing parameter lists

  • index (int, default: 0 ) \u2013

    Index to extract parameters from (default 0)

Returns:

  • YamlTransform \u2013

    A new transform with single parameter values at the specified index

Source code in src/stimulus/utils/yaml_data.py
def extract_transform_parameters_at_index(transform: YamlTransform, index: int = 0) -> YamlTransform:\n    \"\"\"Get a transform with parameters at the specified index.\n\n    Args:\n        transform: The original transform containing parameter lists\n        index: Index to extract parameters from (default 0)\n\n    Returns:\n        A new transform with single parameter values at the specified index\n    \"\"\"\n    # Create a copy of the transform\n    new_transform = YamlTransform(**transform.model_dump())\n\n    # Process each column and transformation\n    for column in new_transform.columns:\n        for transformation in column.transformations:\n            if transformation.params:\n                # Convert each parameter list to single value at index\n                new_params = {}\n                for param_name, param_value in transformation.params.items():\n                    if isinstance(param_value, list):\n                        new_params[param_name] = param_value[index]\n                    else:\n                        new_params[param_name] = param_value\n                transformation.params = new_params\n\n    return new_transform\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.generate_data_configs","title":"generate_data_configs","text":"
generate_data_configs(\n    yaml_config: YamlConfigDict,\n) -> list[YamlSubConfigDict]\n

Generates all possible data configurations from a YAML config.

Takes a YAML configuration that may contain parameter lists and splits, and generates all possible combinations of parameters and splits into separate data configurations.

For example, if the config has: - A transform with parameters [0.1, 0.2] - Two splits [0.7/0.3] and [0.8/0.2] This will generate 4 configs, 2 for each split.

Parameters:

  • yaml_config (YamlConfigDict) \u2013

    The source YAML configuration containing transforms with parameter lists and multiple splits.

Returns:

  • list[YamlSubConfigDict] \u2013

    list[YamlSubConfigDict]: A list of data configurations, where each config has single parameter values and one split configuration. The length will be the product of the number of parameter combinations and the number of splits.

Source code in src/stimulus/utils/yaml_data.py
def generate_data_configs(yaml_config: YamlConfigDict) -> list[YamlSubConfigDict]:\n    \"\"\"Generates all possible data configurations from a YAML config.\n\n    Takes a YAML configuration that may contain parameter lists and splits,\n    and generates all possible combinations of parameters and splits into\n    separate data configurations.\n\n    For example, if the config has:\n    - A transform with parameters [0.1, 0.2]\n    - Two splits [0.7/0.3] and [0.8/0.2]\n    This will generate 4 configs, 2 for each split.\n\n    Args:\n        yaml_config: The source YAML configuration containing transforms with\n            parameter lists and multiple splits.\n\n    Returns:\n        list[YamlSubConfigDict]: A list of data configurations, where each\n            config has single parameter values and one split configuration. The\n            length will be the product of the number of parameter combinations\n            and the number of splits.\n    \"\"\"\n    if isinstance(yaml_config, dict) and not isinstance(yaml_config, YamlConfigDict):\n        raise TypeError(\"Input must be a YamlConfigDict object\")\n\n    sub_transforms = expand_transform_list_combinations(yaml_config.transforms)\n    sub_splits = yaml_config.split\n    sub_configs = []\n    for split in sub_splits:\n        for transform in sub_transforms:\n            sub_configs.append(\n                YamlSubConfigDict(\n                    global_params=yaml_config.global_params,\n                    columns=yaml_config.columns,\n                    transforms=transform,\n                    split=split,\n                ),\n            )\n    return sub_configs\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/","title":"stimulus.utils.yaml_model_schema","text":""},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema","title":"yaml_model_schema","text":"

Module for handling YAML configuration files and converting them to Ray Tune format.

Classes:

  • CustomTunableParameter \u2013

    Custom tunable parameter.

  • Data \u2013

    Data parameters.

  • Loss \u2013

    Loss parameters.

  • Model \u2013

    Model configuration.

  • RayTuneModel \u2013

    Ray Tune compatible model configuration.

  • RunParams \u2013

    Run parameters.

  • Scheduler \u2013

    Scheduler parameters.

  • TunableParameter \u2013

    Tunable parameter.

  • Tune \u2013

    Tune parameters.

  • TuneParams \u2013

    Tune parameters.

  • YamlRayConfigLoader \u2013

    Load and convert YAML configurations to Ray Tune format.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.CustomTunableParameter","title":"CustomTunableParameter","text":"

Bases: BaseModel

Custom tunable parameter.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Data","title":"Data","text":"

Bases: BaseModel

Data parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Loss","title":"Loss","text":"

Bases: BaseModel

Loss parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Model","title":"Model","text":"

Bases: BaseModel

Model configuration.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.RayTuneModel","title":"RayTuneModel","text":"

Bases: BaseModel

Ray Tune compatible model configuration.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.RunParams","title":"RunParams","text":"

Bases: BaseModel

Run parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Scheduler","title":"Scheduler","text":"

Bases: BaseModel

Scheduler parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TunableParameter","title":"TunableParameter","text":"

Bases: BaseModel

Tunable parameter.

Methods:

  • validate_mode \u2013

    Validate that mode is supported by Ray Tune.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TunableParameter.validate_mode","title":"validate_mode","text":"
validate_mode() -> TunableParameter\n

Validate that mode is supported by Ray Tune.

Source code in src/stimulus/utils/yaml_model_schema.py
@pydantic.model_validator(mode=\"after\")\ndef validate_mode(self) -> \"TunableParameter\":\n    \"\"\"Validate that mode is supported by Ray Tune.\"\"\"\n    if not hasattr(tune, self.mode):\n        raise AttributeError(\n            f\"Mode {self.mode} not recognized, check the ray.tune documentation at https://docs.ray.io/en/master/tune/api_docs/suggestion.html\",\n        )\n\n    mode = getattr(tune, self.mode)\n    if mode.__name__ not in [\n        \"choice\",\n        \"uniform\",\n        \"loguniform\",\n        \"quniform\",\n        \"qloguniform\",\n        \"qnormal\",\n        \"randint\",\n        \"sample_from\",\n    ]:\n        raise NotImplementedError(f\"Mode {mode.__name__} not implemented yet\")\n\n    return self\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Tune","title":"Tune","text":"

Bases: BaseModel

Tune parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TuneParams","title":"TuneParams","text":"

Bases: BaseModel

Tune parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader","title":"YamlRayConfigLoader","text":"
YamlRayConfigLoader(model: Model)\n

Load and convert YAML configurations to Ray Tune format.

This class handles loading model configurations and converting them into formats compatible with Ray Tune's hyperparameter search spaces.

Parameters:

  • model (Model) \u2013

    Pydantic Model instance containing configuration

Methods:

  • convert_config_to_ray \u2013

    Convert Model configuration to Ray Tune format.

  • convert_raytune \u2013

    Convert parameter configuration to Ray Tune format.

  • get_config \u2013

    Return the current configuration.

  • raytune_sample_from \u2013

    Apply tune.sample_from to a given custom sampling function.

  • raytune_space_selector \u2013

    Convert space parameters to Ray Tune format based on the mode.

  • sampint \u2013

    Return a list of n random samples from the sample_space.

Source code in src/stimulus/utils/yaml_model_schema.py
def __init__(self, model: Model) -> None:\n    \"\"\"Initialize the config loader with a Model instance.\n\n    Args:\n        model: Pydantic Model instance containing configuration\n    \"\"\"\n    self.model = model\n    self.ray_model = self.convert_config_to_ray(model)\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.convert_config_to_ray","title":"convert_config_to_ray","text":"
convert_config_to_ray(model: Model) -> RayTuneModel\n

Convert Model configuration to Ray Tune format.

Converts parameters in network_params and optimizer_params to Ray Tune search spaces.

Parameters:

  • model (Model) \u2013

    Model configuration

Returns:

  • RayTuneModel \u2013

    Ray Tune compatible model configuration

Source code in src/stimulus/utils/yaml_model_schema.py
def convert_config_to_ray(self, model: Model) -> RayTuneModel:\n    \"\"\"Convert Model configuration to Ray Tune format.\n\n    Converts parameters in network_params and optimizer_params to Ray Tune search spaces.\n\n    Args:\n        model: Model configuration\n\n    Returns:\n        Ray Tune compatible model configuration\n    \"\"\"\n    return RayTuneModel(\n        network_params={k: self.convert_raytune(v) for k, v in model.network_params.items()},\n        optimizer_params={k: self.convert_raytune(v) for k, v in model.optimizer_params.items()},\n        loss_params={k: self.convert_raytune(v) for k, v in model.loss_params},\n        data_params={k: self.convert_raytune(v) for k, v in model.data_params},\n        tune=model.tune,\n    )\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.convert_raytune","title":"convert_raytune","text":"
convert_raytune(\n    param: TunableParameter | CustomTunableParameter,\n) -> Any\n

Convert parameter configuration to Ray Tune format.

Parameters:

  • param (TunableParameter | CustomTunableParameter) \u2013

    Parameter configuration

Returns:

  • Any \u2013

    Ray Tune compatible parameter configuration

Source code in src/stimulus/utils/yaml_model_schema.py
def convert_raytune(self, param: TunableParameter | CustomTunableParameter) -> Any:\n    \"\"\"Convert parameter configuration to Ray Tune format.\n\n    Args:\n        param: Parameter configuration\n\n    Returns:\n        Ray Tune compatible parameter configuration\n    \"\"\"\n    mode = getattr(tune, param.mode)\n\n    if isinstance(param, TunableParameter):\n        return self.raytune_space_selector(mode, param.space)\n    return self.raytune_sample_from(mode, param)\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.get_config","title":"get_config","text":"
get_config() -> RayTuneModel\n

Return the current configuration.

Returns:

  • RayTuneModel \u2013

    Current configuration dictionary

Source code in src/stimulus/utils/yaml_model_schema.py
def get_config(self) -> RayTuneModel:\n    \"\"\"Return the current configuration.\n\n    Returns:\n        Current configuration dictionary\n    \"\"\"\n    return self.ray_model\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.raytune_sample_from","title":"raytune_sample_from","text":"
raytune_sample_from(\n    mode: Callable, param: CustomTunableParameter\n) -> Any\n

Apply tune.sample_from to a given custom sampling function.

Parameters:

  • mode (Callable) \u2013

    Ray Tune sampling function

  • param (CustomTunableParameter) \u2013

    TunableParameter containing sampling parameters

Returns:

  • Any \u2013

    Configured sampling function

Raises:

  • NotImplementedError \u2013

    If the sampling function is not supported

Source code in src/stimulus/utils/yaml_model_schema.py
def raytune_sample_from(self, mode: Callable, param: CustomTunableParameter) -> Any:\n    \"\"\"Apply tune.sample_from to a given custom sampling function.\n\n    Args:\n        mode: Ray Tune sampling function\n        param: TunableParameter containing sampling parameters\n\n    Returns:\n        Configured sampling function\n\n    Raises:\n        NotImplementedError: If the sampling function is not supported\n    \"\"\"\n    if param.function == \"sampint\":\n        return mode(lambda _: self.sampint(param.sample_space, param.n_space))\n\n    raise NotImplementedError(f\"Function {param.function} not implemented yet\")\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.raytune_space_selector","title":"raytune_space_selector","text":"
raytune_space_selector(mode: Callable, space: list) -> Any\n

Convert space parameters to Ray Tune format based on the mode.

Parameters:

  • mode (Callable) \u2013

    Ray Tune search space function (e.g., tune.choice, tune.uniform)

  • space (list) \u2013

    List of parameters defining the search space

Returns:

  • Any \u2013

    Configured Ray Tune search space

Source code in src/stimulus/utils/yaml_model_schema.py
def raytune_space_selector(self, mode: Callable, space: list) -> Any:\n    \"\"\"Convert space parameters to Ray Tune format based on the mode.\n\n    Args:\n        mode: Ray Tune search space function (e.g., tune.choice, tune.uniform)\n        space: List of parameters defining the search space\n\n    Returns:\n        Configured Ray Tune search space\n    \"\"\"\n    if mode.__name__ == \"choice\":\n        return mode(space)\n\n    return mode(*tuple(space))\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.sampint","title":"sampint staticmethod","text":"
sampint(sample_space: list, n_space: list) -> list[int]\n

Return a list of n random samples from the sample_space.

This function is useful for sampling different numbers of layers, each with different numbers of neurons.

Parameters:

  • sample_space (list) \u2013

    List [min, max] defining range of values to sample from

  • n_space (list) \u2013

    List [min, max] defining range for number of samples

Returns:

  • list[int] \u2013

    List of randomly sampled integers

Note

Uses Python's random module which is not cryptographically secure. This is acceptable for hyperparameter sampling but should not be used for security-critical purposes (S311 fails when linting).

Source code in src/stimulus/utils/yaml_model_schema.py
@staticmethod\ndef sampint(sample_space: list, n_space: list) -> list[int]:\n    \"\"\"Return a list of n random samples from the sample_space.\n\n    This function is useful for sampling different numbers of layers,\n    each with different numbers of neurons.\n\n    Args:\n        sample_space: List [min, max] defining range of values to sample from\n        n_space: List [min, max] defining range for number of samples\n\n    Returns:\n        List of randomly sampled integers\n\n    Note:\n        Uses Python's random module which is not cryptographically secure.\n        This is acceptable for hyperparameter sampling but should not be\n        used for security-critical purposes (S311 fails when linting).\n    \"\"\"\n    sample_space_list = list(range(sample_space[0], sample_space[1] + 1))\n    n_space_list = list(range(n_space[0], n_space[1] + 1))\n    n = random.choice(n_space_list)  # noqa: S311\n    return random.sample(sample_space_list, n)\n
"},{"location":"coverage/","title":"Coverage report","text":""}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"STIMULUS","text":""},{"location":"#stochastic-testing-with-input-modification-for-unbiased-learning-systems","title":"Stochastic Testing with Input Modification for Unbiased Learning Systems.","text":"

Warning

This package is in active development and breaking changes may occur. The API is not yet stable and features might be added, modified, or removed without notice. Use in production environments is not recommended at this stage.

We encourage you to:

  • \ud83d\udcdd Report bugs and issues on our GitHub Issues page

  • \ud83d\udca1 Suggest features and improvements through GitHub Discussions

  • \ud83e\udd1d Contribute by submitting pull requests

We are actively working towards release 1.0.0 (see milestone), check the slack channel by clicking on the badge above where we are actively discussing. Build with us every wednesday at 14:00 CET until 18:00 CET on the nf-core gathertown (see slack for calendar updates i.e. some weeks open dev hours are not possible)

"},{"location":"#introduction","title":"Introduction","text":"

Most (if not all) quality software is thouroughly tested. Deep neural networks seem to have escaped this paradigm.

In the age of large-scale deep learning, it is critical that early-stage dl models (prototypes) are tested to ensure costly bugs do not happen at scale.

Here, we attempt at solving the testing problem by proposing an extensive library to test deep neural networks beyond test-set performance.

Stimulus provides those functionalities

  1. Data Perturbation Testing: Modify training data to test model's robustness to perturbations and uncover which pre-processing steps increase performance

  2. Hyperparameter Optimization: Perform tuning on model architecture with user-defined search spaces using Ray[tune] to ensure comparable performance across data transformations

  3. Comprehensive Analysis: Generate all-against-all model report to guide data pre-processing decisions

For large scale experiments, we recommend our nf-core deepmodeloptim pipeline which is still under development and will be released alongside stimulus v1.0.0.

\ud83d\udcf9 Stimulus was featured at the nextflow summit 2024 in Barcelona, which is a nice intoduction to current package capabilities, you can watch the talk here

Stimulus aims at providing those functionalities in a near future, stay tuned for updates!

  1. Model Architecture Testing: Run routine checks on model architecture and training process including type-checking, model execution, and weight updates

  2. Post-Training Validation: Perform comprehensive model validation including overfitting detection and out-of-distribution performance testing

  3. Informed Hyperparameter Tuning: Encourage tuning strategies that follow Google's Deep Learning Tuning Playbook 1

  4. Scaling Analysis: Generate scaling law reports to understand prototype model behavior at different scales

"},{"location":"#user-guide","title":"User guide","text":""},{"location":"#repository-organization","title":"Repository organization","text":"

Stimulus is organized as follows, we will reference to this structure in the following sections

src/stimulus/ \ud83e\uddea\n\u251c\u2500\u2500 analysis/ \ud83d\udcca\n\u2502   \u2514\u2500\u2500 analysis_default.py\n\u251c\u2500\u2500 cli/ \ud83d\udda5\ufe0f\n\u2502   \u251c\u2500\u2500 analysis_default.py\n\u2502   \u251c\u2500\u2500 check_model.py\n\u2502   \u251c\u2500\u2500 interpret_json.py\n\u2502   \u251c\u2500\u2500 predict.py\n\u2502   \u251c\u2500\u2500 shuffle_csv.py\n\u2502   \u251c\u2500\u2500 split_csv.py\n\u2502   \u251c\u2500\u2500 split_yaml.py\n\u2502   \u251c\u2500\u2500 transform_csv.py\n\u2502   \u2514\u2500\u2500 tuning.py\n\u251c\u2500\u2500 data/ \ud83d\udcc1\n\u2502   \u251c\u2500\u2500 csv.py\n\u2502   \u251c\u2500\u2500 experiments.py\n\u2502   \u251c\u2500\u2500 handlertorch.py\n\u2502   \u251c\u2500\u2500 encoding/ \ud83d\udd10\n\u2502   \u2502   \u2514\u2500\u2500 encoders.py\n\u2502   \u251c\u2500\u2500 splitters/ \u2702\ufe0f\n\u2502   \u2502   \u2514\u2500\u2500 splitters.py\n\u2502   \u2514\u2500\u2500 transform/ \ud83d\udd04\n\u2502       \u2514\u2500\u2500 data_transformation_generators.py\n\u251c\u2500\u2500 learner/ \ud83e\udde0\n\u2502   \u251c\u2500\u2500 predict.py\n\u2502   \u251c\u2500\u2500 raytune_learner.py\n\u2502   \u2514\u2500\u2500 raytune_parser.py\n\u2514\u2500\u2500 utils/ \ud83d\udee0\ufe0f\n    \u251c\u2500\u2500 json_schema.py\n    \u251c\u2500\u2500 launch_utils.py\n    \u251c\u2500\u2500 performance.py\n    \u2514\u2500\u2500 yaml_model_schema.py\n
"},{"location":"#data-encoding","title":"Data encoding","text":"

Data in stimulus can take many forms (files, text, images, networks...) in order to support this diversity, stimulus relies on the encoding module. List of available encoders can be found here.

If the provided encoders do not support the type of data you are working with, you can write your own encoder by inheriting from the AbstractEncoder class and implementing the encode, decode and encode_all methods.

  • encode is currently optional, can return a NotImplementedError if the encoder does not support encoding a single data point
  • decode is currently optional, can return a NotImplementedError if the encoder does not support decoding
  • encode_all is called by other stimulus functions, and is expected to return a np.array .
"},{"location":"#expected-data-format","title":"Expected data format","text":"

Data is expected to be presented in a csv samplesheet file with the following format:

input1:input:input_type input2:input:input_type meta1:meta:meta_type label1:label:label_type label2:label:label_type sample1 input1 sample1 input2 sample1 meta1 sample1 label1 sample1 label2 sample2 input1 sample2 input2 sample2 meta1 sample2 label1 sample2 label2 sample3 input1 sample3 input2 sample3 meta1 sample3 label1 sample3 label2

Columns are expected to follow this name convention : name:type:data_type

  • name corresponds to the column name, this should be the same as input names in model batch definition (see model section for more details)

  • type is either input, meta or label, typically models predict the labels from the input, and meta is used to perform downstream analysis

  • data_type is the column data type.

Note

This rigid data format is expected to change once we move to release v1.0.0, data types and information will be defined in a yaml config and only column names will be required in the data, see this github issue

"},{"location":"#connecting-encoders-and-datasets","title":"Connecting encoders and datasets","text":"

Once we have our data formated and our encoders ready, we need to explicitly state which encoder is used for which data type. This is done through an experiment class.

To understand how experiment classes are used to connect data types and encoders, let's have a look at a minimal DnaToFloat example :

class DnaToFloat(AbstractExperiment):\n    def __init__(self) -> None:\n        super().__init__()\n        self.dna = {\n            \"encoder\": encoders.TextOneHotEncoder(alphabet=\"acgt\"),\n        }\n        self.float = {\n            \"encoder\": encoders.FloatEncoder(),\n        }\n

Here we define the data_type for the dna and float types, note that those data_type are the same as the ones defined in the samplesheet dataset above, for example, a dataset on which this experiment would run could look like this:

mouse_dna:input:dna mouse_rnaseq:label:float ACTAGGCATGCTAGTCG 0.53 ACTGGGGCTAGTCGAA 0.23 GATGTTCTGATGCT 0.98

Note how the data_type for the mouse_dna and mouse_rnaseq columns match exactly the attribute names defined in the DnaToFloat minimal class above.

stimulus-py ships with a few basic experiment classes, if you need to write your own experiment class, simply inherit from the base AbstractExperiment class and overwrite the class __init__ method like shown above.

Note

This has the drawback of requiring a build of the experiment class each time a new task is defined (for instance, let's say we want to use dna and protein sequences to predict rna).

Once we move to release v1.0.0, type (i.e. input, meta, label) and data_type will be defined in the data yaml config, and the relevant experiment class will be automatically built.

"},{"location":"#loading-the-data","title":"Loading the data","text":"

Finally, once we have defined our encoders, the experiment class and the samplesheet, stimulus will transparently load the data using the csv.py module

csv.py contains two important classes, CsvLoader and CsvProcessing

CsvLoader is responsible for na\u00efvely loading the data (without changing anything), it works by performing a couple of checks on the dataset to ensure it is correctly formated, and then uses the experiment class in conjunction with the column names to call the proper encoders and output inputs, labels, and meta dictionary objects.

CsvLoader is used by the handlertorch module to load data into pytorch tensors.

Tip

So, to recap, when you load a dataset into a torch tensor,

  1. handlertorch will call CsvLoader with the csv samplesheet and the experiment class

  2. CsvLoader will use the experiment class to fetch the proper encoder encode_all method for each data column

  3. CsvLoader will use the encode_all method to encode the data and output dictionary objects for inputs, labels and meta data

  4. handlertorch will convert the contents to torch tensors

  5. handlertorch will feed the input torch tensor to the model, use the label torch tensor for loss computation and will store the meta tensor for downstream analysis

Great, now you know how stimulus transparently loads your data into your pytorch model! While this seems complicated, the only thing you really have to do, is to format your data correctly in a csv samplesheet and define your experiment class with the proper encoders (either by using the provided encoders or by writing your own).

"},{"location":"#data-transformation","title":"Data transformation","text":"

Measuring the impact of data transformations (noising, down/upsampling, augmentation...) on models at training time is a major feature of stimulus.

Data transformations materialize as DataTransformer classes, and should inherit from the AbstractDataTransformer class (see docs)

Note

Writing your own DataTransformer class is the same as writing your own Encoder class, you should overwrite the transform and transform_all methods

Warning

Every DataTransformer class has to have seed in transform and transform_all methods parameters, and np.random.seed(seed) should be called in those methods.

Warning

Every DataTransformer class should have an add_row argument set to either True or False depending on if it is augmenting the data (adding rows) or not.

"},{"location":"#connecting-transformations-and-dataset","title":"Connecting transformations and dataset","text":"

Just like encoders, data transformations are defined in the Experiment class alongside encoders. Let's upgrade our DnaToFloat minimal class defined above to reflect this.

class DnaToFloat(AbstractExperiment):\n    def __init__(self) -> None:\n        super().__init__()\n        self.dna = {\n            \"encoder\": encoders.TextOneHotEncoder(alphabet=\"acgt\"),\n            \"data_transformation_generators\": {\n                \"UniformTextMasker\": data_transformation_generators.UniformTextMasker(mask=\"N\"),\n                \"ReverseComplement\": data_transformation_generators.ReverseComplement(),\n                \"GaussianChunk\": data_transformation_generators.GaussianChunk(),\n            },\n        }\n        self.float = {\n            \"encoder\": encoders.FloatEncoder(),\n            \"data_transformation_generators\": {\"GaussianNoise\": data_transformation_generators.GaussianNoise()},\n        }\n

As you can see, our data_type arguments get an other field, \"data_transformation_generators\", there we can initialize the DataTransformer classes with their relevant parameters.

In the csv module, the CsvProcessing class will call the transform_all methods from the classes contained in \"data_transformation_generators\" based on the column type and a list of transformations.

i.e., if we give the [\"ReverseComplement\",\"GaussianChunk\"] list to the CsvProcessing class transform method the data contained in the mouse_dna:input:dna column in our minimal example above will be first reverse complemented and then chunked.

Tip

Recap : To transform your dataset,

  • define your own DataTransformer class or use one we provide

  • add it to your experiment class

  • load your data through CsvProcessing

  • set a list of transforms

  • call CsvProcessing.transform(transform_list)

"},{"location":"#installation","title":"Installation","text":"

stimulus is still under development, you can install it from test-pypi by running the following command:

pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple stimulus-py==0.0.10\n
"},{"location":"#citations","title":"citations","text":"
  1. Godbole, V., Dahl, G. E., Gilmer, J., Shallue, C. J., & Nado, Z. (2023). Deep Learning Tuning Playbook (Version 1.0) [Computer software]. http://github.com/google-research/tuning_playbook \u21a9

"},{"location":"changelog/","title":"Changelog","text":"

The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.

"},{"location":"changelog/#021","title":"0.2.1","text":"

First released version is 0.2.1, changelog will be updated from there

"},{"location":"code_of_conduct/","title":"Contributor Covenant Code of Conduct","text":""},{"location":"code_of_conduct/#our-pledge","title":"Our Pledge","text":"

We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.

"},{"location":"code_of_conduct/#our-standards","title":"Our Standards","text":"

Examples of behavior that contributes to a positive environment for our community include:

  • Demonstrating empathy and kindness toward other people
  • Being respectful of differing opinions, viewpoints, and experiences
  • Giving and gracefully accepting constructive feedback
  • Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
  • Focusing on what is best not just for us as individuals, but for the overall community

Examples of unacceptable behavior include:

  • The use of sexualized language or imagery, and sexual attention or advances of any kind
  • Trolling, insulting or derogatory comments, and personal or political attacks
  • Public or private harassment
  • Publishing others' private information, such as a physical or email address, without their explicit permission
  • Other conduct which could reasonably be considered inappropriate in a professional setting
"},{"location":"code_of_conduct/#enforcement-responsibilities","title":"Enforcement Responsibilities","text":"

Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.

Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.

"},{"location":"code_of_conduct/#scope","title":"Scope","text":"

This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.

"},{"location":"code_of_conduct/#enforcement","title":"Enforcement","text":"

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at mathysgrapotte@gmail.com. All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the reporter of any incident.

"},{"location":"code_of_conduct/#enforcement-guidelines","title":"Enforcement Guidelines","text":"

Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:

"},{"location":"code_of_conduct/#1-correction","title":"1. Correction","text":"

Community Impact: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.

Consequence: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.

"},{"location":"code_of_conduct/#2-warning","title":"2. Warning","text":"

Community Impact: A violation through a single incident or series of actions.

Consequence: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.

"},{"location":"code_of_conduct/#3-temporary-ban","title":"3. Temporary Ban","text":"

Community Impact: A serious violation of community standards, including sustained inappropriate behavior.

Consequence: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.

"},{"location":"code_of_conduct/#4-permanent-ban","title":"4. Permanent Ban","text":"

Community Impact: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.

Consequence: A permanent ban from any sort of public interaction within the community.

"},{"location":"code_of_conduct/#attribution","title":"Attribution","text":"

This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.

Community Impact Guidelines were inspired by Mozilla's code of conduct enforcement ladder.

For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.

"},{"location":"contributing/","title":"Contributing","text":"

Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given.

"},{"location":"contributing/#environment-setup","title":"Environment setup","text":"

Nothing easier!

Fork and clone the repository, then:

cd stimulus-py\nmake setup\n

Note

If it fails for some reason, you'll need to install uv manually.

You can install it with:

curl -LsSf https://astral.sh/uv/install.sh | sh\n

Now you can try running make setup again, or simply uv sync.

You now have the dependencies installed.

Run make help to see all the available actions!

"},{"location":"contributing/#tasks","title":"Tasks","text":"

The entry-point to run commands and tasks is the make Python script, located in the scripts directory. Try running make to show the available commands and tasks. The commands do not need the Python dependencies to be installed, while the tasks do. The cross-platform tasks are written in Python, thanks to duty.

If you work in VSCode, we provide an action to configure VSCode for the project.

"},{"location":"contributing/#development","title":"Development","text":"

As usual:

  1. create a new branch: git switch -c feature-or-bugfix-name
  2. edit the code and/or the documentation

Before committing:

  1. run make format to auto-format the code
  2. run make check to check everything (fix any warning)
  3. run make test to run the tests (fix any issue)
  4. if you updated the documentation or the project dependencies:
    1. run make docs
    2. go to http://localhost:8000 and check that everything looks good

Then you can pull request and we will review. Make sure you join our slack hosted on nf-core to talk and build with us!

"},{"location":"credits/","title":"Credits","text":""},{"location":"credits/#exec-1--credits","title":"Credits","text":"

These projects were used to build stimulus-py. Thank you!

Python | uv | copier-uv

"},{"location":"credits/#exec-1--runtime-dependencies","title":"Runtime dependencies","text":"Project Summary Version (accepted) Version (last resolved) License aiohappyeyeballs Happy Eyeballs for asyncio >=2.3.0 2.4.4 PSF-2.0 aiohttp Async http client/server framework (asyncio) >=3.7 3.11.11 Apache-2.0 aiohttp-cors CORS support for aiohttp 0.7.0 Apache License, Version 2.0 aiosignal aiosignal: a list of registered asynchronous callbacks 1.3.2 Apache 2.0 annotated-types Reusable constraint types to use with typing.Annotated >=0.6.0 0.7.0 MIT License attrs Classes Without Boilerplate >=22.2.0 25.1.0 MIT cachetools Extensible memoizing collections and decorators >=2.0.0, <6.0 5.5.1 MIT certifi Python package for providing Mozilla's CA Bundle. >=2017.4.17 2025.1.31 MPL-2.0 charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. >=2, <4 3.4.1 MIT click Composable command line interface toolkit >=7.0 8.1.8 BSD License colorama Cross-platform colored terminal text. >=0.4 0.4.6 BSD License colorful Terminal string styling done right, in Python. 0.5.6 MIT License contourpy Python library for calculating contours of 2D quadrilateral grids >=1.0.1 1.3.1 BSD License cycler Composable style cycles >=0.10 0.12.1 BSD License dill serialize all of Python >=0.3.9 0.3.9 BSD-3-Clause distlib Distribution utilities >=0.3.7, <1 0.3.9 PSF-2.0 filelock A platform independent file lock. 3.17.0 Unlicense fonttools Tools to manipulate font files >=4.22.0 4.55.8 MIT frozenlist A list-like structure which implements collections.abc.MutableSequence 1.5.0 Apache 2 fsspec File-system specification 2025.2.0 BSD License google-api-core Google API client core library >=1.0.0, <2.0.0 2.24.1 Apache 2.0 google-auth Google Authentication Library >=2.14.1, <3.0.dev0 2.38.0 Apache 2.0 googleapis-common-protos Common protobufs used in Google APIs >=1.56.2, <2.0.dev0 1.66.0 Apache-2.0 grpcio HTTP/2-based RPC framework >=1.32.0 1.70.0 Apache License 2.0 idna Internationalized Domain Names in Applications (IDNA) >=2.5, <4 3.10 BSD License importlib_metadata Read metadata from Python packages >=4.4 8.6.1 Apache Software License iniconfig brain-dead simple config-ini parsing 2.0.0 MIT Jinja2 A very fast and expressive template engine. >=2.11.1 3.1.5 BSD License joblib Lightweight pipelining with Python functions >=1.2.0 1.4.2 BSD 3-Clause jsonschema An implementation of JSON Schema validation for Python 4.23.0 MIT jsonschema-specifications The JSON Schema meta-schemas and vocabularies, exposed as a Registry >=2023.03.6 2024.10.1 MIT License kiwisolver A fast implementation of the Cassowary constraint solver >=1.3.1 1.4.8 BSD License MarkupSafe Safely add untrusted strings to HTML/XML markup. >=2.0.1, >=2.0 3.0.2 BSD License matplotlib Python plotting package >=3.9.0 3.10.0 Python Software Foundation License mpmath Python library for arbitrary-precision floating-point arithmetic >=1.1.0, <1.4 1.3.0 BSD msgpack MessagePack serializer >=1.0.0, <2.0.0 1.1.0 Apache 2.0 multidict multidict implementation >=4.5, <7.0 6.1.0 Apache 2 multiprocess better multiprocessing and multithreading in Python ==0.70.17 0.70.17 BSD-3-Clause networkx Python package for creating and manipulating graphs and networks 3.4.2 BSD License numpy Fundamental package for array computing in Python >=1.26.0, <2.0.0 1.26.4 BSD License nvidia-cublas-cu12 CUBLAS native runtime libraries ==12.1.3.1 12.1.3.1 NVIDIA Proprietary Software nvidia-cuda-cupti-cu12 CUDA profiling tools runtime libs. ==12.1.105 12.1.105 NVIDIA Proprietary Software nvidia-cuda-nvrtc-cu12 NVRTC native runtime libraries ==12.1.105 12.1.105 NVIDIA Proprietary Software nvidia-cuda-runtime-cu12 CUDA Runtime native Libraries ==12.1.105 12.1.105 NVIDIA Proprietary Software nvidia-cudnn-cu12 cuDNN runtime libraries ==8.9.2.26 8.9.2.26 NVIDIA Proprietary Software nvidia-cufft-cu12 CUFFT native runtime libraries ==11.0.2.54 11.0.2.54 NVIDIA Proprietary Software nvidia-curand-cu12 CURAND native runtime libraries ==10.3.2.106 10.3.2.106 NVIDIA Proprietary Software nvidia-cusolver-cu12 CUDA solver native runtime libraries ==11.4.5.107 11.4.5.107 NVIDIA Proprietary Software nvidia-cusparse-cu12 CUSPARSE native runtime libraries ==12.1.0.106 12.1.0.106 NVIDIA Proprietary Software nvidia-nccl-cu12 NVIDIA Collective Communication Library (NCCL) Runtime ==2.19.3 2.19.3 NVIDIA Proprietary Software nvidia-nvjitlink-cu12 Nvidia JIT LTO Library 12.8.61 NVIDIA Proprietary Software nvidia-nvtx-cu12 NVIDIA Tools Extension ==12.1.105 12.1.105 NVIDIA Proprietary Software opencensus A stats collection and distributed tracing framework 0.11.4 Apache-2.0 opencensus-context OpenCensus Runtime Context >=0.1.3 0.1.3 Apache-2.0 packaging Core utilities for Python packages >=20.5, >=20.0 24.2 Apache Software License + BSD License pandas Powerful data structures for data analysis, time series, and statistics >=2.2.0 2.2.3 BSD License pillow Python Imaging Library (Fork) >=8 11.1.0 MIT-CMU platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a user data dir. >=3.9.1, >=2.2.0, <5 4.3.6 MIT pluggy plugin and hook calling mechanisms for python >=1.5, <2 1.5.0 MIT polars-lts-cpu Blazingly fast DataFrame library >=0.20.30, <1.12.0 1.11.0 MIT License prometheus_client Python client for the Prometheus monitoring system. >=0.7.1 0.21.1 Apache Software License 2.0 propcache Accelerated property cache >=0.2.0 0.2.1 Apache-2.0 proto-plus Beautiful, Pythonic protocol buffers >=1.22.3, <2.0.0dev 1.26.0 Apache 2.0 protobuf >=3.15.3, !=3.19.5 5.29.3 3-Clause BSD License py-spy Sampling profiler for Python programs >=0.2.0 0.4.0 MIT pyarrow Python library for Apache Arrow >=9.0.0 17.0.0 Apache Software License pyasn1 Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208) >=0.1.3 0.6.1 BSD-2-Clause pyasn1_modules A collection of ASN.1-based protocols modules >=0.2.1 0.4.1 BSD pydantic Data validation using Python type hints >=2.0.0 2.10.6 MIT pydantic_core Core functionality for Pydantic validation and serialization ==2.27.2 2.27.2 MIT pyparsing pyparsing module - Classes and methods to define and execute parsing grammars >=2.3.1 3.2.1 MIT License pytest pytest: simple powerful testing with Python >=8.2, >=7.0.0, <9.0.0 8.3.4 MIT python-dateutil Extensions to the standard Python datetime module >=2.8.2, >=2.8.1 2.9.0.post0 BSD License + Apache Software License pytz World timezone definitions, modern and historical >=2020.1 2025.1 MIT PyYAML YAML parser and emitter for Python >=5.1 6.0.2 MIT ray Ray provides a simple, universal API for building distributed applications. >=2.38.0 2.42.0 Apache 2.0 referencing JSON Referencing + Python >=0.28.4 0.36.2 MIT requests Python HTTP for Humans. >=2.20 2.32.3 Apache-2.0 rpds-py Python bindings to Rust's persistent data structures (rpds) >=0.7.1 0.22.3 MIT License rsa Pure-Python RSA implementation >=3.1.4, <5 4.9 Apache-2.0 safetensors >=0.4.5 0.5.2 Apache Software License scikit-learn A set of python modules for machine learning and data mining >=1.5.0 1.6.1 BSD License scipy Fundamental algorithms for scientific computing in Python ==1.14.1 1.14.1 BSD License six Python 2 and 3 compatibility utilities >=1.5 1.17.0 MIT smart-open Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...) 7.1.0 MIT sympy Computer algebra system (CAS) in Python 1.13.3 BSD syrupy Pytest Snapshot Test Utility >=4.8.0 4.8.1 Apache-2.0 tensorboardX TensorBoardX lets you watch Tensors Flow without Tensorflow >=1.9 2.6.2.2 MIT license threadpoolctl threadpoolctl >=3.1.0 3.5.0 BSD-3-Clause torch Tensors and Dynamic neural networks in Python with strong GPU acceleration ==2.2.2 2.2.2 BSD-3 triton A language and compiler for custom Deep Learning operations ==2.2.0 2.2.0 MIT License typing_extensions Backported and Experimental Type Hints for Python 3.8+ >=4.9, >=4.12.2 4.12.2 Python Software Foundation License tzdata Provider of IANA time zone data >=2022.7 2025.1 Apache-2.0 urllib3 HTTP library with thread-safe connection pooling, file post, and more. >=1.26.0, >=1.21.1, <3 2.3.0 MIT License virtualenv Virtual Python Environment builder >=20.0.24, !=20.21.1 20.29.1 MIT wrapt Module for decorators, wrappers and monkey patching. 1.17.2 BSD yarl Yet another URL library >=1.17.0, <2.0 1.18.3 Apache-2.0 zipp Backport of pathlib-compatible object wrapper for zip files >=3.20 3.21.0 MIT License"},{"location":"credits/#exec-1--development-dependencies","title":"Development dependencies","text":"Project Summary Version (accepted) Version (last resolved) License ansimarkup Produce colored terminal text with an xml-like markup ~=1.4 1.5.0 Revised BSD License appdirs A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\". >=1.4 1.4.4 MIT babel Internationalization utilities >=2.7.0 2.17.0 BSD-3-Clause backports.tarfile Backport of CPython tarfile module 1.2.0 MIT License black The uncompromising code formatter. >=24.4 25.1.0 MIT build A simple, correct Python build frontend >=1.2 1.2.2.post1 MIT License certifi Python package for providing Mozilla's CA Bundle. >=2017.4.17 2025.1.31 MPL-2.0 cffi Foreign Function Interface for Python calling C code. >=1.12 1.17.1 MIT charset-normalizer The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. >=2, <4 3.4.1 MIT click Composable command line interface toolkit >=7.0 8.1.8 BSD License colorama Cross-platform colored terminal text. >=0.4 0.4.6 BSD License coverage Code coverage measurement for Python >=7.5 7.6.10 Apache-2.0 cryptography cryptography is a package which provides cryptographic recipes and primitives to Python developers. >=2.0 44.0.0 Apache-2.0 OR BSD-3-Clause csscompressor A python port of YUI CSS Compressor >=0.9.5 0.9.5 BSD docutils Docutils -- Python Documentation Utilities >=0.21.2 0.21.2 Public Domain + Python Software Foundation License + BSD License + GNU General Public License (GPL) duty A simple task runner. >=1.4 1.5.0 ISC editables Editable installations >=0.5 0.5 MIT License execnet execnet: rapid multi-Python deployment >=2.1 2.1.1 MIT failprint Run a command, print its output only if it fails. >=0.11, !=1.0.0 1.0.3 ISC ghp-import Copy your docs directly to the gh-pages branch. >=1.0 2.1.0 Apache Software License git-changelog Automatic Changelog generator using Jinja2 templates. >=2.5 2.5.3 ISC gitdb Git Object Database >=4.0.1, <5 4.0.12 BSD License GitPython GitPython is a Python library used to interact with Git repositories 3.1.44 BSD-3-Clause griffe Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API. >=0.49 1.5.6 ISC htmlmin2 An HTML Minifier >=0.1.13 0.1.13 BSD id A tool for generating OIDC identities 1.5.0 Apache Software License idna Internationalized Domain Names in Applications (IDNA) >=2.5, <4 3.10 BSD License importlib_metadata Read metadata from Python packages >=4.4 8.6.1 Apache Software License iniconfig brain-dead simple config-ini parsing 2.0.0 MIT jaraco.classes Utility functions for Python class constructs 3.4.0 MIT License jaraco.context Useful decorators and context managers 6.0.1 MIT License jaraco.functools Functools like those found in stdlib 4.1.0 MIT License jeepney Low-level, pure Python DBus protocol wrapper. >=0.4.2 0.8.0 MIT License Jinja2 A very fast and expressive template engine. >=2.11.1 3.1.5 BSD License jsmin JavaScript minifier. >=3.0.1 3.0.1 MIT License keyring Store and access your passwords safely. >=15.1 25.6.0 MIT License Markdown Python implementation of John Gruber's Markdown. >=3.3.6 3.7 BSD License markdown-callouts Markdown extension: a classier syntax for admonitions >=0.4 0.4.0 MIT markdown-exec Utilities to execute code blocks in Markdown files. >=1.8 1.10.0 ISC markdown-it-py Python port of markdown-it. Markdown parsing, done right! >=2.2.0 3.0.0 MIT License MarkupSafe Safely add untrusted strings to HTML/XML markup. >=2.0.1, >=2.0 3.0.2 BSD License mdurl Markdown URL utilities ~=0.1 0.1.2 MIT License mergedeep A deep merge function for \ud83d\udc0d. >=1.3.4 1.3.4 MIT License mkdocs Project documentation with Markdown. >=1.6 1.6.1 BSD-2-Clause mkdocs-autorefs Automatically link across pages in MkDocs. >=1.3 1.3.0 ISC mkdocs-coverage MkDocs plugin to integrate your coverage HTML report into your site. >=1.0 1.1.0 ISC mkdocs-gen-files MkDocs plugin to programmatically generate documentation pages during the build >=0.5 0.5.0 MIT mkdocs-get-deps MkDocs extension that lists all dependencies according to a mkdocs.yml file >=0.2.0 0.2.0 MIT mkdocs-git-revision-date-localized-plugin Mkdocs plugin that enables displaying the localized date of the last git modification of a markdown file. >=1.2 1.3.0 MIT mkdocs-literate-nav MkDocs plugin to specify the navigation in Markdown instead of YAML >=0.6 0.6.1 MIT mkdocs-material Documentation that simply works >=9.5 9.6.2 MIT mkdocs-material-extensions Extension pack for Python Markdown and MkDocs Material. ~=1.3 1.3.1 MIT mkdocs-minify-plugin An MkDocs plugin to minify HTML, JS or CSS files prior to being written to disk >=0.8 0.8.0 MIT mkdocstrings Automatic documentation from sources, for MkDocs. >=0.25 0.28.0 ISC mkdocstrings-python A Python handler for mkdocstrings. >=0.5.2 1.14.4 ISC more-itertools More routines for operating on iterables, beyond itertools 10.6.0 MIT License mypy Optional static typing for Python >=1.10 1.15.0 MIT mypy-extensions Type system extensions for programs checked with the mypy type checker. >=1.0.0 1.0.0 MIT License nh3 Python binding to Ammonia HTML sanitizer Rust crate >=0.2.14 0.2.20 MIT packaging Core utilities for Python packages >=20.5, >=20.0 24.2 Apache Software License + BSD License paginate Divides large result sets into pages for easier browsing ~=0.5 0.5.7 MIT pathspec Utility library for gitignore style pattern matching of file paths. >=0.11.1 0.12.1 Mozilla Public License 2.0 (MPL 2.0) platformdirs A small Python package for determining appropriate platform-specific dirs, e.g. a user data dir. >=3.9.1, >=2.2.0, <5 4.3.6 MIT pluggy plugin and hook calling mechanisms for python >=1.5, <2 1.5.0 MIT ptyprocess Run a subprocess in a pseudo terminal ~=0.6 0.7.0 ISC License (ISCL) pycparser C parser in Python 2.22 BSD-3-Clause Pygments Pygments is a syntax highlighting package written in Python. ~=2.16 2.19.1 BSD-2-Clause pymdown-extensions Extension pack for Python Markdown. ~=10.2 10.14.3 MIT pyproject_hooks Wrappers to call pyproject.toml-based build backend hooks. 1.2.0 MIT License pytest pytest: simple powerful testing with Python >=8.2, >=7.0.0, <9.0.0 8.3.4 MIT pytest-cov Pytest plugin for measuring coverage. >=5.0 6.0.0 MIT pytest-randomly Pytest plugin to randomly order tests and control random.seed. >=3.15 3.16.0 MIT License pytest-xdist pytest xdist plugin for distributed testing, most importantly across multiple CPUs >=3.6 3.6.1 MIT License python-dateutil Extensions to the standard Python datetime module >=2.8.2, >=2.8.1 2.9.0.post0 BSD License + Apache Software License pytz World timezone definitions, modern and historical >=2020.1 2025.1 MIT PyYAML YAML parser and emitter for Python >=5.1 6.0.2 MIT pyyaml_env_tag A custom YAML tag for referencing environment variables in YAML files. >=0.1 0.1 MIT License readme_renderer readme_renderer is a library for rendering readme descriptions for Warehouse >=35.0 44.0 Apache License, Version 2.0 regex Alternative regular expression module, to replace re. >=2022.4 2024.11.6 Apache Software License requests Python HTTP for Humans. >=2.20 2.32.3 Apache-2.0 requests-toolbelt A utility belt for advanced users of python-requests >=0.8.0, !=0.9.0 1.0.0 Apache 2.0 rfc3986 Validating URI References per RFC 3986 >=1.4.0 2.0.0 Apache 2.0 rich Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal >=12.0.0 13.9.4 MIT ruff An extremely fast Python linter and code formatter, written in Rust. >=0.4 0.9.4 MIT SecretStorage Python bindings to FreeDesktop.org Secret Service API >=3.2 3.3.3 BSD 3-Clause License semver Python helper for Semantic Versioning (https://semver.org) >=2.13 3.0.4 BSD License six Python 2 and 3 compatibility utilities >=1.5 1.17.0 MIT smmap A pure Python implementation of a sliding window memory map manager >=3.0.1, <6 5.0.2 BSD-3-Clause twine Collection of utilities for publishing packages on PyPI >=5.1 6.1.0 Apache Software License types-Markdown Typing stubs for Markdown >=3.6 3.7.0.20241204 Apache-2.0 types-PyYAML Typing stubs for PyYAML >=6.0 6.0.12.20241230 Apache-2.0 typing_extensions Backported and Experimental Type Hints for Python 3.8+ >=4.9, >=4.12.2 4.12.2 Python Software Foundation License urllib3 HTTP library with thread-safe connection pooling, file post, and more. >=1.26.0, >=1.21.1, <3 2.3.0 MIT License watchdog Filesystem events monitoring >=2.0 6.0.0 Apache-2.0 zipp Backport of pathlib-compatible object wrapper for zip files >=3.20 3.21.0 MIT License"},{"location":"license/","title":"License","text":"
MIT License\n\nCopyright (c) 2024 Mathys Grapotte\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n
"},{"location":"reference/SUMMARY/","title":"SUMMARY","text":"
  • stimulus
    • cli
      • analysis_default
      • check_model
      • predict
      • shuffle_csv
      • split_csv
      • split_yaml
      • transform_csv
      • tuning
    • data
      • data_handlers
      • encoding
        • encoders
      • handlertorch
      • loaders
      • splitters
        • splitters
      • transform
        • data_transformation_generators
    • debug
    • learner
      • predict
      • raytune_learner
      • raytune_parser
    • typing
    • utils
      • generic_utils
      • launch_utils
      • performance
      • yaml_data
      • yaml_model_schema
"},{"location":"reference/stimulus/","title":"stimulus","text":""},{"location":"reference/stimulus/#stimulus","title":"stimulus","text":"

stimulus-py package.

Modules:

  • cli \u2013

    Command line interface package for the stimulus library.

  • data \u2013

    Data handling and processing module.

  • debug \u2013

    Debugging utilities.

  • learner \u2013

    Learner package for model training and evaluation.

  • typing \u2013

    Typing for Stimulus Python API.

  • utils \u2013

    Utility functions package.

"},{"location":"reference/stimulus/debug/","title":"stimulus.debug","text":""},{"location":"reference/stimulus/debug/#stimulus.debug","title":"debug","text":"

Debugging utilities.

Classes:

  • Environment \u2013

    Dataclass to store environment information.

  • Package \u2013

    Dataclass describing a Python package.

  • Variable \u2013

    Dataclass describing an environment variable.

Functions:

  • get_debug_info \u2013

    Get debug/environment information.

  • get_version \u2013

    Get version of the given distribution.

  • print_debug_info \u2013

    Print debug/environment information.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment","title":"Environment dataclass","text":"
Environment(\n    interpreter_name: str,\n    interpreter_version: str,\n    interpreter_path: str,\n    platform: str,\n    packages: list[Package],\n    variables: list[Variable],\n)\n

Dataclass to store environment information.

Attributes:

  • interpreter_name (str) \u2013

    Python interpreter name.

  • interpreter_path (str) \u2013

    Path to Python executable.

  • interpreter_version (str) \u2013

    Python interpreter version.

  • packages (list[Package]) \u2013

    Installed packages.

  • platform (str) \u2013

    Operating System.

  • variables (list[Variable]) \u2013

    Environment variables.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_name","title":"interpreter_name instance-attribute","text":"
interpreter_name: str\n

Python interpreter name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_path","title":"interpreter_path instance-attribute","text":"
interpreter_path: str\n

Path to Python executable.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.interpreter_version","title":"interpreter_version instance-attribute","text":"
interpreter_version: str\n

Python interpreter version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.packages","title":"packages instance-attribute","text":"
packages: list[Package]\n

Installed packages.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.platform","title":"platform instance-attribute","text":"
platform: str\n

Operating System.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Environment.variables","title":"variables instance-attribute","text":"
variables: list[Variable]\n

Environment variables.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package","title":"Package dataclass","text":"
Package(name: str, version: str)\n

Dataclass describing a Python package.

Attributes:

  • name (str) \u2013

    Package name.

  • version (str) \u2013

    Package version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package.name","title":"name instance-attribute","text":"
name: str\n

Package name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Package.version","title":"version instance-attribute","text":"
version: str\n

Package version.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable","title":"Variable dataclass","text":"
Variable(name: str, value: str)\n

Dataclass describing an environment variable.

Attributes:

  • name (str) \u2013

    Variable name.

  • value (str) \u2013

    Variable value.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable.name","title":"name instance-attribute","text":"
name: str\n

Variable name.

"},{"location":"reference/stimulus/debug/#stimulus.debug.Variable.value","title":"value instance-attribute","text":"
value: str\n

Variable value.

"},{"location":"reference/stimulus/debug/#stimulus.debug.get_debug_info","title":"get_debug_info","text":"
get_debug_info() -> Environment\n

Get debug/environment information.

Returns:

  • Environment \u2013

    Environment information.

Source code in src/stimulus/debug.py
def get_debug_info() -> Environment:\n    \"\"\"Get debug/environment information.\n\n    Returns:\n        Environment information.\n    \"\"\"\n    py_name, py_version = _interpreter_name_version()\n    packages = [\"stimulus-py\"]\n    variables = [\"PYTHONPATH\", *[var for var in os.environ if var.startswith(\"STIMULUS_PY\")]]\n    return Environment(\n        interpreter_name=py_name,\n        interpreter_version=py_version,\n        interpreter_path=sys.executable,\n        platform=platform.platform(),\n        variables=[Variable(var, val) for var in variables if (val := os.getenv(var))],\n        packages=[Package(pkg, get_version(pkg)) for pkg in packages],\n    )\n
"},{"location":"reference/stimulus/debug/#stimulus.debug.get_version","title":"get_version","text":"
get_version(dist: str = 'stimulus-py') -> str\n

Get version of the given distribution.

Parameters:

  • dist (str, default: 'stimulus-py' ) \u2013

    A distribution name.

Returns:

  • str \u2013

    A version number.

Source code in src/stimulus/debug.py
def get_version(dist: str = \"stimulus-py\") -> str:\n    \"\"\"Get version of the given distribution.\n\n    Parameters:\n        dist: A distribution name.\n\n    Returns:\n        A version number.\n    \"\"\"\n    try:\n        return metadata.version(dist)\n    except metadata.PackageNotFoundError:\n        return \"0.0.0\"\n
"},{"location":"reference/stimulus/debug/#stimulus.debug.print_debug_info","title":"print_debug_info","text":"
print_debug_info() -> None\n

Print debug/environment information.

Source code in src/stimulus/debug.py
def print_debug_info() -> None:\n    \"\"\"Print debug/environment information.\"\"\"\n    info = get_debug_info()\n    print(f\"- __System__: {info.platform}\")\n    print(f\"- __Python__: {info.interpreter_name} {info.interpreter_version} ({info.interpreter_path})\")\n    print(\"- __Environment variables__:\")\n    for var in info.variables:\n        print(f\"  - `{var.name}`: `{var.value}`\")\n    print(\"- __Installed packages__:\")\n    for pkg in info.packages:\n        print(f\"  - `{pkg.name}` v{pkg.version}\")\n
"},{"location":"reference/stimulus/cli/","title":"stimulus.cli","text":""},{"location":"reference/stimulus/cli/#stimulus.cli","title":"cli","text":"

Command line interface package for the stimulus library.

Modules:

  • analysis_default \u2013

    Analysis default module for running model analysis and performance evaluation.

  • check_model \u2013

    CLI module for checking model configuration and running initial tests.

  • predict \u2013

    CLI module for model prediction on datasets.

  • shuffle_csv \u2013

    CLI module for shuffling CSV data files.

  • split_csv \u2013

    CLI module for splitting CSV data files.

  • split_yaml \u2013

    CLI module for splitting YAML configuration files.

  • transform_csv \u2013

    CLI module for transforming CSV data files.

  • tuning \u2013

    CLI module for running raytune tuning experiment.

"},{"location":"reference/stimulus/cli/analysis_default/","title":"stimulus.cli.analysis_default","text":""},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default","title":"analysis_default","text":"

Analysis default module for running model analysis and performance evaluation.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • load_model \u2013

    Load the model with its config and weights.

  • main \u2013

    Run the main analysis pipeline.

  • run \u2013

    Run the analysis script.

  • run_analysis_performance_model \u2013

    Run analysis to report model robustness.

  • run_analysis_performance_tune \u2013

    Run performance analysis during tuning/training.

"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/analysis_default.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"The model .py file\")\n    parser.add_argument(\n        \"-w\",\n        \"--weight\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"Model weights .pt file\",\n    )\n    parser.add_argument(\n        \"-me\",\n        \"--metrics\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The file path for the metrics file obtained during tuning\",\n    )\n    parser.add_argument(\n        \"-ec\",\n        \"--experiment_config\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The experiment config used to modify the data.\",\n    )\n    parser.add_argument(\n        \"-mc\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"The tune config file.\",\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--data\",\n        type=str,\n        required=True,\n        nargs=\"+\",\n        metavar=\"FILE\",\n        help=\"List of data files to be used for the analysis.\",\n    )\n    parser.add_argument(\"-o\", \"--outdir\", type=str, required=True, help=\"output directory\")\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.load_model","title":"load_model","text":"
load_model(\n    model_class: Any, weight_path: str, mconfig_path: str\n) -> Any\n

Load the model with its config and weights.

Parameters:

  • model_class (Any) \u2013

    Model class to instantiate

  • weight_path (str) \u2013

    Path to model weights

  • mconfig_path (str) \u2013

    Path to model config

Returns:

  • Any \u2013

    Loaded model instance

Source code in src/stimulus/cli/analysis_default.py
def load_model(model_class: Any, weight_path: str, mconfig_path: str) -> Any:\n    \"\"\"Load the model with its config and weights.\n\n    Args:\n        model_class: Model class to instantiate\n        weight_path: Path to model weights\n        mconfig_path: Path to model config\n\n    Returns:\n        Loaded model instance\n    \"\"\"\n    with open(mconfig_path) as in_json:\n        mconfig = json.load(in_json)[\"model_params\"]\n\n    model = model_class(**mconfig)\n    return safe_load(model, weight_path, strict=True)\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.main","title":"main","text":"
main(\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    metrics_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None\n

Run the main analysis pipeline.

Parameters:

  • model_path (str) \u2013

    Path to model file

  • weight_list (list[str]) \u2013

    List of model weight paths

  • mconfig_list (list[str]) \u2013

    List of model config paths

  • metrics_list (list[str]) \u2013

    List of metric file paths

  • econfig_list (list[str]) \u2013

    List of experiment config paths

  • data_list (list[str]) \u2013

    List of data file paths

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def main(\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    metrics_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None:\n    \"\"\"Run the main analysis pipeline.\n\n    Args:\n        model_path: Path to model file\n        weight_list: List of model weight paths\n        mconfig_list: List of model config paths\n        metrics_list: List of metric file paths\n        econfig_list: List of experiment config paths\n        data_list: List of data file paths\n        outdir: Output directory path\n    \"\"\"\n    metrics = [\"rocauc\", \"prauc\", \"mcc\", \"f1score\", \"precision\", \"recall\"]\n\n    # Plot the performance during tuning/training\n    run_analysis_performance_tune(\n        metrics_list,\n        [*metrics, \"loss\"],  # Use list unpacking instead of concatenation\n        os.path.join(outdir, \"performance_tune_train\"),\n    )\n\n    # Run robustness analysis\n    run_analysis_performance_model(\n        metrics,\n        model_path,\n        weight_list,\n        mconfig_list,\n        econfig_list,\n        data_list,\n        os.path.join(outdir, \"performance_robustness\"),\n    )\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run","title":"run","text":"
run() -> None\n

Run the analysis script.

Source code in src/stimulus/cli/analysis_default.py
def run() -> None:\n    \"\"\"Run the analysis script.\"\"\"\n    args = get_args()\n    main(args.model, args.weight, args.model_config, args.metrics, args.experiment_config, args.data, args.outdir)\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run_analysis_performance_model","title":"run_analysis_performance_model","text":"
run_analysis_performance_model(\n    metrics: list[str],\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None\n

Run analysis to report model robustness.

This block will compute the predictions of each model for each dataset. This information will be parsed and plots will be generated to report the model robustness.

Parameters:

  • metrics (list[str]) \u2013

    List of metrics to analyze

  • model_path (str) \u2013

    Path to model file

  • weight_list (list[str]) \u2013

    List of model weight paths

  • mconfig_list (list[str]) \u2013

    List of model config paths

  • econfig_list (list[str]) \u2013

    List of experiment config paths

  • data_list (list[str]) \u2013

    List of data file paths

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def run_analysis_performance_model(\n    metrics: list[str],\n    model_path: str,\n    weight_list: list[str],\n    mconfig_list: list[str],\n    econfig_list: list[str],\n    data_list: list[str],\n    outdir: str,\n) -> None:\n    \"\"\"Run analysis to report model robustness.\n\n    This block will compute the predictions of each model for each dataset.\n    This information will be parsed and plots will be generated to report the model robustness.\n\n    Args:\n        metrics: List of metrics to analyze\n        model_path: Path to model file\n        weight_list: List of model weight paths\n        mconfig_list: List of model config paths\n        econfig_list: List of experiment config paths\n        data_list: List of data file paths\n        outdir: Output directory path\n    \"\"\"\n    if not os.path.exists(outdir):\n        os.makedirs(outdir)\n\n    # Load all the models weights into a list\n    model_names = []\n    model_list = []\n    model_class = import_class_from_file(model_path)\n    for weight_path, mconfig_path in zip(weight_list, mconfig_list):\n        model = load_model(model_class, weight_path, mconfig_path)\n        model_names.append(mconfig_path.split(\"/\")[-1].replace(\"-config.json\", \"\"))\n        model_list.append(model)\n\n    # Read experiment config and initialize experiment class\n    with open(econfig_list[0]) as in_json:\n        experiment_name = json.load(in_json)[\"experiment\"]\n    initialized_experiment_class = get_experiment(experiment_name)\n\n    # Initialize analysis\n    analysis = AnalysisRobustness(metrics, initialized_experiment_class, batch_size=256)\n\n    # Compute performance metrics\n    df = analysis.get_performance_table(model_names, model_list, data_list)\n    df.to_csv(os.path.join(outdir, \"performance_table.csv\"), index=False)\n\n    # Get average performance\n    tmp = analysis.get_average_performance_table(df)\n    tmp.to_csv(os.path.join(outdir, \"average_performance_table.csv\"), index=False)\n\n    # Plot heatmap\n    analysis.plot_performance_heatmap(df, output=os.path.join(outdir, \"performance_heatmap.png\"))\n\n    # Plot delta performance\n    outdir2 = os.path.join(outdir, \"delta_performance_vs_data\")\n    if not os.path.exists(outdir2):\n        os.makedirs(outdir2)\n    for metric in metrics:\n        analysis.plot_delta_performance(\n            metric,\n            df,\n            output=os.path.join(outdir2, f\"delta_performance_{metric}.png\"),\n        )\n
"},{"location":"reference/stimulus/cli/analysis_default/#stimulus.cli.analysis_default.run_analysis_performance_tune","title":"run_analysis_performance_tune","text":"
run_analysis_performance_tune(\n    metrics_list: list[str], metrics: list[str], outdir: str\n) -> None\n

Run performance analysis during tuning/training.

Each model has a metrics file obtained during tuning/training, check the performance there and plot it. This is to track the model performance per training iteration.

Parameters:

  • metrics_list (list[str]) \u2013

    List of metric file paths

  • metrics (list[str]) \u2013

    List of metrics to analyze

  • outdir (str) \u2013

    Output directory path

Source code in src/stimulus/cli/analysis_default.py
def run_analysis_performance_tune(metrics_list: list[str], metrics: list[str], outdir: str) -> None:\n    \"\"\"Run performance analysis during tuning/training.\n\n    Each model has a metrics file obtained during tuning/training,\n    check the performance there and plot it.\n    This is to track the model performance per training iteration.\n\n    Args:\n        metrics_list: List of metric file paths\n        metrics: List of metrics to analyze\n        outdir: Output directory path\n    \"\"\"\n    if not os.path.exists(outdir):\n        os.makedirs(outdir)\n\n    for metrics_path in metrics_list:\n        AnalysisPerformanceTune(metrics_path).plot_metric_vs_iteration(\n            metrics=metrics,\n            output=os.path.join(outdir, metrics_path.replace(\"-metrics.csv\", \"\") + \"-metric_vs_iteration.png\"),\n        )\n
"},{"location":"reference/stimulus/cli/check_model/","title":"stimulus.cli.check_model","text":""},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model","title":"check_model","text":"

CLI module for checking model configuration and running initial tests.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Run the main model checking pipeline.

  • run \u2013

    Run the model checking script.

"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/check_model.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Launch check_model.\")\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input csv file.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model file.\")\n    parser.add_argument(\n        \"-e\",\n        \"--data_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to data config file.\",\n    )\n    parser.add_argument(\n        \"-c\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to yaml config training file.\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--initial_weights\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"FILE\",\n        help=\"The path to the initial weights (optional).\",\n    )\n\n    parser.add_argument(\n        \"-n\",\n        \"--num_samples\",\n        type=int,\n        required=False,\n        nargs=\"?\",\n        const=3,\n        default=3,\n        metavar=\"NUM_SAMPLES\",\n        help=\"Number of samples for tuning. Overwrites tune.tune_params.num_samples in config.\",\n    )\n    parser.add_argument(\n        \"--ray_results_dirpath\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"DIR_PATH\",\n        help=\"Location where ray_results output dir should be written. If None, uses ~/ray_results.\",\n    )\n    parser.add_argument(\n        \"--debug_mode\",\n        action=\"store_true\",\n        help=\"Activate debug mode for tuning. Default false, no debug.\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.main","title":"main","text":"
main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,\n    num_samples: int = 3,\n    ray_results_dirpath: str | None = None,\n    *,\n    debug_mode: bool = False\n) -> None\n

Run the main model checking pipeline.

Parameters:

  • data_path (str) \u2013

    Path to input data file.

  • model_path (str) \u2013

    Path to model file.

  • data_config_path (str) \u2013

    Path to data config file.

  • model_config_path (str) \u2013

    Path to model config file.

  • initial_weights (str | None, default: None ) \u2013

    Optional path to initial weights.

  • num_samples (int, default: 3 ) \u2013

    Number of samples for tuning.

  • ray_results_dirpath (str | None, default: None ) \u2013

    Directory for ray results.

  • debug_mode (bool, default: False ) \u2013

    Whether to run in debug mode.

Source code in src/stimulus/cli/check_model.py
def main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,  # noqa: ARG001\n    num_samples: int = 3,\n    ray_results_dirpath: str | None = None,\n    *,\n    debug_mode: bool = False,\n) -> None:\n    \"\"\"Run the main model checking pipeline.\n\n    Args:\n        data_path: Path to input data file.\n        model_path: Path to model file.\n        data_config_path: Path to data config file.\n        model_config_path: Path to model config file.\n        initial_weights: Optional path to initial weights.\n        num_samples: Number of samples for tuning.\n        ray_results_dirpath: Directory for ray results.\n        debug_mode: Whether to run in debug mode.\n    \"\"\"\n    with open(data_config_path) as file:\n        data_config = yaml.safe_load(file)\n        data_config = yaml_data.YamlSubConfigDict(**data_config)\n\n    with open(model_config_path) as file:\n        model_config = yaml.safe_load(file)\n        model_config = yaml_model_schema.Model(**model_config)\n\n    encoder_loader = loaders.EncoderLoader()\n    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)\n\n    logger.info(\"Dataset loaded successfully.\")\n\n    model_class = launch_utils.import_class_from_file(model_path)\n\n    logger.info(\"Model class loaded successfully.\")\n\n    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)\n    ray_config_dict = ray_config_loader.get_config().model_dump()\n    ray_config_model = ray_config_loader.get_config()\n\n    logger.info(\"Ray config loaded successfully.\")\n\n    sampled_model_params = {\n        key: domain.sample() if hasattr(domain, \"sample\") else domain\n        for key, domain in ray_config_dict[\"network_params\"].items()\n    }\n\n    logger.info(\"Sampled model params loaded successfully.\")\n\n    model_instance = model_class(**sampled_model_params)\n\n    logger.info(\"Model instance loaded successfully.\")\n\n    torch_dataset = handlertorch.TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n    )\n\n    torch_dataloader = DataLoader(torch_dataset, batch_size=10, shuffle=True)\n\n    logger.info(\"Torch dataloader loaded successfully.\")\n\n    # try to run the model on a single batch\n    for batch in torch_dataloader:\n        input_data, labels, metadata = batch\n        # Log shapes of tensors in each dictionary\n        for key, tensor in input_data.items():\n            logger.debug(f\"Input tensor '{key}' shape: {tensor.shape}\")\n        for key, tensor in labels.items():\n            logger.debug(f\"Label tensor '{key}' shape: {tensor.shape}\")\n        for key, list_object in metadata.items():\n            logger.debug(f\"Metadata lists '{key}' length: {len(list_object)}\")\n        output = model_instance(**input_data)\n        logger.info(\"model ran successfully on a single batch\")\n        logger.debug(f\"Output shape: {output.shape}\")\n        break\n\n    logger.info(\"Model checking single pass completed successfully.\")\n\n    # override num_samples\n    model_config.tune.tune_params.num_samples = num_samples\n\n    tuner = raytune_learner.TuneWrapper(\n        model_config=ray_config_model,\n        data_config_path=data_config_path,\n        model_class=model_class,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        seed=42,\n        ray_results_dir=ray_results_dirpath,\n        debug=debug_mode,\n    )\n\n    logger.info(\"Tuner initialized successfully.\")\n\n    tuner.tune()\n\n    logger.info(\"Tuning completed successfully.\")\n    logger.info(\"Checks complete\")\n
"},{"location":"reference/stimulus/cli/check_model/#stimulus.cli.check_model.run","title":"run","text":"
run() -> None\n

Run the model checking script.

Source code in src/stimulus/cli/check_model.py
def run() -> None:\n    \"\"\"Run the model checking script.\"\"\"\n    args = get_args()\n    main(\n        data_path=args.data,\n        model_path=args.model,\n        data_config_path=args.data_config,\n        model_config_path=args.model_config,\n        initial_weights=args.initial_weights,\n        num_samples=args.num_samples,\n        ray_results_dirpath=args.ray_results_dirpath,\n        debug_mode=args.debug_mode,\n    )\n
"},{"location":"reference/stimulus/cli/predict/","title":"stimulus.cli.predict","text":""},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict","title":"predict","text":"

CLI module for model prediction on datasets.

Functions:

  • add_meta_info \u2013

    Add metadata columns to predictions/labels dictionary.

  • get_args \u2013

    Parse command line arguments.

  • get_batch_size \u2013

    Get batch size from model config.

  • get_meta_keys \u2013

    Extract metadata column keys.

  • load_model \u2013

    Load model with hyperparameters and weights.

  • main \u2013

    Run model prediction pipeline.

  • parse_y_keys \u2013

    Parse dictionary keys to match input data format.

  • run \u2013

    Execute model prediction pipeline.

"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.add_meta_info","title":"add_meta_info","text":"
add_meta_info(\n    data: DataFrame, y: dict[str, Any]\n) -> dict[str, Any]\n

Add metadata columns to predictions/labels dictionary.

Parameters:

  • data (DataFrame) \u2013

    Input DataFrame with metadata.

  • y (dict[str, Any]) \u2013

    Dictionary of predictions/labels.

Returns:

  • dict[str, Any] \u2013

    Updated dictionary with metadata.

Source code in src/stimulus/cli/predict.py
def add_meta_info(data: pl.DataFrame, y: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Add metadata columns to predictions/labels dictionary.\n\n    Args:\n        data: Input DataFrame with metadata.\n        y: Dictionary of predictions/labels.\n\n    Returns:\n        Updated dictionary with metadata.\n    \"\"\"\n    keys = get_meta_keys(data.columns)\n    for key in keys:\n        y[key] = data[key].to_list()\n    return y\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Parse command line arguments.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/predict.py
def get_args() -> argparse.Namespace:\n    \"\"\"Parse command line arguments.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Predict model outputs on a dataset.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model .py file.\")\n    parser.add_argument(\"-w\", \"--weight\", type=str, required=True, metavar=\"FILE\", help=\"Path to model weights file.\")\n    parser.add_argument(\n        \"-mc\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to tune config file with model hyperparameters.\",\n    )\n    parser.add_argument(\n        \"-ec\",\n        \"--experiment_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to experiment config for data modification.\",\n    )\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input data.\")\n    parser.add_argument(\"-o\", \"--output\", type=str, required=True, metavar=\"FILE\", help=\"Path for output predictions.\")\n    parser.add_argument(\"--split\", type=int, help=\"Data split to use (default: None).\")\n    parser.add_argument(\"--return_labels\", action=\"store_true\", help=\"Include labels with predictions.\")\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_batch_size","title":"get_batch_size","text":"
get_batch_size(mconfig: dict[str, Any]) -> int\n

Get batch size from model config.

Parameters:

  • mconfig (dict[str, Any]) \u2013

    Model configuration dictionary.

Returns:

  • int \u2013

    Batch size to use for predictions.

Source code in src/stimulus/cli/predict.py
def get_batch_size(mconfig: dict[str, Any]) -> int:\n    \"\"\"Get batch size from model config.\n\n    Args:\n        mconfig: Model configuration dictionary.\n\n    Returns:\n        Batch size to use for predictions.\n    \"\"\"\n    default_batch_size = 256\n    if \"data_params\" in mconfig and \"batch_size\" in mconfig[\"data_params\"]:\n        return mconfig[\"data_params\"][\"batch_size\"]\n    return default_batch_size\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.get_meta_keys","title":"get_meta_keys","text":"
get_meta_keys(names: Sequence[str]) -> list[str]\n

Extract metadata column keys.

Parameters:

  • names (Sequence[str]) \u2013

    List of column names.

Returns:

  • list[str] \u2013

    List of metadata column keys.

Source code in src/stimulus/cli/predict.py
def get_meta_keys(names: Sequence[str]) -> list[str]:\n    \"\"\"Extract metadata column keys.\n\n    Args:\n        names: List of column names.\n\n    Returns:\n        List of metadata column keys.\n    \"\"\"\n    return [name for name in names if name.split(\":\")[1] == \"meta\"]\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.load_model","title":"load_model","text":"
load_model(\n    model_class: Any,\n    weight_path: str,\n    mconfig: dict[str, Any],\n) -> Any\n

Load model with hyperparameters and weights.

Parameters:

  • model_class (Any) \u2013

    Model class to instantiate.

  • weight_path (str) \u2013

    Path to model weights.

  • mconfig (dict[str, Any]) \u2013

    Model configuration dictionary.

Returns:

  • Any \u2013

    Loaded model instance.

Source code in src/stimulus/cli/predict.py
def load_model(model_class: Any, weight_path: str, mconfig: dict[str, Any]) -> Any:\n    \"\"\"Load model with hyperparameters and weights.\n\n    Args:\n        model_class: Model class to instantiate.\n        weight_path: Path to model weights.\n        mconfig: Model configuration dictionary.\n\n    Returns:\n        Loaded model instance.\n    \"\"\"\n    hyperparameters = mconfig[\"model_params\"]\n    model = model_class(**hyperparameters)\n    model.load_state_dict(torch.load(weight_path))\n    return model\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.main","title":"main","text":"
main(\n    model_path: str,\n    weight_path: str,\n    mconfig_path: str,\n    econfig_path: str,\n    data_path: str,\n    output: str,\n    *,\n    return_labels: bool = False,\n    split: int | None = None\n) -> None\n

Run model prediction pipeline.

Parameters:

  • model_path (str) \u2013

    Path to model file.

  • weight_path (str) \u2013

    Path to model weights.

  • mconfig_path (str) \u2013

    Path to model config.

  • econfig_path (str) \u2013

    Path to experiment config.

  • data_path (str) \u2013

    Path to input data.

  • output (str) \u2013

    Path for output predictions.

  • return_labels (bool, default: False ) \u2013

    Whether to include labels.

  • split (int | None, default: None ) \u2013

    Data split to use.

Source code in src/stimulus/cli/predict.py
def main(\n    model_path: str,\n    weight_path: str,\n    mconfig_path: str,\n    econfig_path: str,\n    data_path: str,\n    output: str,\n    *,\n    return_labels: bool = False,\n    split: int | None = None,\n) -> None:\n    \"\"\"Run model prediction pipeline.\n\n    Args:\n        model_path: Path to model file.\n        weight_path: Path to model weights.\n        mconfig_path: Path to model config.\n        econfig_path: Path to experiment config.\n        data_path: Path to input data.\n        output: Path for output predictions.\n        return_labels: Whether to include labels.\n        split: Data split to use.\n    \"\"\"\n    with open(mconfig_path) as in_json:\n        mconfig = json.load(in_json)\n\n    model_class = import_class_from_file(model_path)\n    model = load_model(model_class, weight_path, mconfig)\n\n    with open(econfig_path) as in_json:\n        experiment_name = json.load(in_json)[\"experiment\"]\n    initialized_experiment_class = get_experiment(experiment_name)\n\n    dataloader = DataLoader(\n        TorchDataset(data_path, initialized_experiment_class, split=split),\n        batch_size=get_batch_size(mconfig),\n        shuffle=False,\n    )\n\n    predictor = PredictWrapper(model, dataloader)\n    out = predictor.predict(return_labels=return_labels)\n    y_pred, y_true = out if return_labels else (out, {})\n\n    y_pred = {k: v.tolist() for k, v in y_pred.items()}\n    y_true = {k: v.tolist() for k, v in y_true.items()}\n\n    data = pl.read_csv(data_path)\n    y_pred = parse_y_keys(y_pred, data, y_type=\"pred\")\n    y_true = parse_y_keys(y_true, data, y_type=\"label\")\n\n    y = {**y_pred, **y_true}\n    y = add_meta_info(data, y)\n    df = pl.from_dict(y)\n    df.write_csv(output)\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.parse_y_keys","title":"parse_y_keys","text":"
parse_y_keys(\n    y: dict[str, Any], data: DataFrame, y_type: str = \"pred\"\n) -> dict[str, Any]\n

Parse dictionary keys to match input data format.

Parameters:

  • y (dict[str, Any]) \u2013

    Dictionary of predictions or labels.

  • data (DataFrame) \u2013

    Input DataFrame.

  • y_type (str, default: 'pred' ) \u2013

    Type of values ('pred' or 'label').

Returns:

  • dict[str, Any] \u2013

    Dictionary with updated keys.

Source code in src/stimulus/cli/predict.py
def parse_y_keys(y: dict[str, Any], data: pl.DataFrame, y_type: str = \"pred\") -> dict[str, Any]:\n    \"\"\"Parse dictionary keys to match input data format.\n\n    Args:\n        y: Dictionary of predictions or labels.\n        data: Input DataFrame.\n        y_type: Type of values ('pred' or 'label').\n\n    Returns:\n        Dictionary with updated keys.\n    \"\"\"\n    if not y:\n        return y\n\n    parsed_y = {}\n    for k1, v1 in y.items():\n        for k2 in data.columns:\n            if k1 == k2.split(\":\")[0]:\n                new_key = f\"{k1}:{y_type}:{k2.split(':')[2]}\"\n                parsed_y[new_key] = v1\n\n    return parsed_y\n
"},{"location":"reference/stimulus/cli/predict/#stimulus.cli.predict.run","title":"run","text":"
run() -> None\n

Execute model prediction pipeline.

Source code in src/stimulus/cli/predict.py
def run() -> None:\n    \"\"\"Execute model prediction pipeline.\"\"\"\n    args = get_args()\n    main(\n        args.model,\n        args.weight,\n        args.model_config,\n        args.experiment_config,\n        args.data,\n        args.output,\n        return_labels=args.return_labels,\n        split=args.split,\n    )\n
"},{"location":"reference/stimulus/cli/shuffle_csv/","title":"stimulus.cli.shuffle_csv","text":""},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv","title":"shuffle_csv","text":"

CLI module for shuffling CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Shuffle the data and split it according to the default split method.

  • run \u2013

    Run the CSV shuffling script.

"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/shuffle_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Shuffle rows in a CSV data file.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.main","title":"main","text":"
main(\n    data_csv: str, config_yaml: str, out_path: str\n) -> None\n

Shuffle the data and split it according to the default split method.

Parameters:

  • data_csv (str) \u2013

    Path to input CSV file.

  • config_yaml (str) \u2013

    Path to config YAML file.

  • out_path (str) \u2013

    Path to output shuffled CSV.

TODO major changes when this is going to select a given shuffle method and integration with split.

Source code in src/stimulus/cli/shuffle_csv.py
def main(data_csv: str, config_yaml: str, out_path: str) -> None:\n    \"\"\"Shuffle the data and split it according to the default split method.\n\n    Args:\n        data_csv: Path to input CSV file.\n        config_yaml: Path to config YAML file.\n        out_path: Path to output shuffled CSV.\n\n    TODO major changes when this is going to select a given shuffle method and integration with split.\n    \"\"\"\n    # create a DatasetProcessor object from the config and the csv\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # shuffle the data with a default seed. TODO get the seed for the config if and when that is going to be set there.\n    processor.shuffle_labels(seed=42)\n\n    # save the modified csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/shuffle_csv/#stimulus.cli.shuffle_csv.run","title":"run","text":"
run() -> None\n

Run the CSV shuffling script.

Source code in src/stimulus/cli/shuffle_csv.py
def run() -> None:\n    \"\"\"Run the CSV shuffling script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output)\n
"},{"location":"reference/stimulus/cli/split_csv/","title":"stimulus.cli.split_csv","text":""},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv","title":"split_csv","text":"

CLI module for splitting CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Connect CSV and YAML configuration and handle sanity checks.

  • run \u2013

    Run the CSV splitting script.

"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Source code in src/stimulus/cli/split_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\"\"\"\n    parser = argparse.ArgumentParser(description=\"Split a CSV data file.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--force\",\n        type=bool,\n        required=False,\n        default=False,\n        help=\"Overwrite the split column if it already exists in the csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.main","title":"main","text":"
main(\n    data_csv: str,\n    config_yaml: str,\n    out_path: str,\n    *,\n    force: bool = False\n) -> None\n

Connect CSV and YAML configuration and handle sanity checks.

Parameters:

  • data_csv (str) \u2013

    Path to input CSV file.

  • config_yaml (str) \u2013

    Path to config YAML file.

  • out_path (str) \u2013

    Path to output split CSV.

  • force (bool, default: False ) \u2013

    Overwrite the split column if it already exists in the CSV.

Source code in src/stimulus/cli/split_csv.py
def main(data_csv: str, config_yaml: str, out_path: str, *, force: bool = False) -> None:\n    \"\"\"Connect CSV and YAML configuration and handle sanity checks.\n\n    Args:\n        data_csv: Path to input CSV file.\n        config_yaml: Path to config YAML file.\n        out_path: Path to output split CSV.\n        force: Overwrite the split column if it already exists in the CSV.\n    \"\"\"\n    # create a DatasetProcessor object from the config and the csv\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # create a split manager from the config\n    split_config = processor.dataset_manager.config.split\n    with open(config_yaml) as f:\n        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))\n    split_loader = SplitLoader(seed=yaml_config.global_params.seed)\n    split_loader.initialize_splitter_from_config(split_config)\n    split_manager = SplitManager(split_loader)\n\n    # apply the split method to the data\n    processor.add_split(split_manager=split_manager, force=force)\n\n    # save the modified csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/split_csv/#stimulus.cli.split_csv.run","title":"run","text":"
run() -> None\n

Run the CSV splitting script.

Source code in src/stimulus/cli/split_csv.py
def run() -> None:\n    \"\"\"Run the CSV splitting script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output, force=args.force)\n
"},{"location":"reference/stimulus/cli/split_yaml/","title":"stimulus.cli.split_yaml","text":""},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml","title":"split_yaml","text":"

CLI module for splitting YAML configuration files.

This module provides functionality to split a single YAML configuration file into multiple YAML files, each containing a specific combination of data transformations and splits. The resulting YAML files can be used as input configurations for the stimulus package.

Functions:

  • get_args \u2013

    Get the arguments when using from the command line.

  • main \u2013

    Reads a YAML config file and generates all possible data configurations.

"},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the command line.

Source code in src/stimulus/cli/split_yaml.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the command line.\"\"\"\n    parser = argparse.ArgumentParser(description=\"\")\n    parser.add_argument(\n        \"-j\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that hold all transform - split - parameter info\",\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--out_dir\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"./\",\n        default=\"./\",\n        metavar=\"DIR\",\n        help=\"The output dir where all the YAMLs are written to. Output YAML will be called split-#[number].yaml transform-#[number].yaml. Default -> ./\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/split_yaml/#stimulus.cli.split_yaml.main","title":"main","text":"
main(config_yaml: str, out_dir_path: str) -> None\n

Reads a YAML config file and generates all possible data configurations.

This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to the stimulus package.

The structure of the YAML is described here -> TODO paste here link to documentation. This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations. Each resulting YAML will hold only one combination of the above three things.

This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform) and uses the default split behavior.

Source code in src/stimulus/cli/split_yaml.py
def main(config_yaml: str, out_dir_path: str) -> None:\n    \"\"\"Reads a YAML config file and generates all possible data configurations.\n\n    This script reads a YAML with a defined structure and creates all the YAML files ready to be passed to\n    the stimulus package.\n\n    The structure of the YAML is described here -> TODO paste here link to documentation.\n    This YAML and it's structure summarize how to generate all the transform - split and respective parameter combinations.\n    Each resulting YAML will hold only one combination of the above three things.\n\n    This script will always generate at least one YAML file that represent the combination that does not touch the data (no transform)\n    and uses the default split behavior.\n    \"\"\"\n    # read the yaml experiment config and load it to dictionary\n    yaml_config: dict[str, Any] = {}\n    with open(config_yaml) as conf_file:\n        yaml_config = yaml.safe_load(conf_file)\n\n    yaml_config_dict: YamlConfigDict = YamlConfigDict(**yaml_config)\n    # check if the yaml schema is correct\n    check_yaml_schema(yaml_config_dict)\n\n    # generate all the YAML configs\n    data_configs = generate_data_configs(yaml_config_dict)\n\n    # dump all the YAML configs into files\n    dump_yaml_list_into_files(data_configs, out_dir_path, \"test\")\n
"},{"location":"reference/stimulus/cli/transform_csv/","title":"stimulus.cli.transform_csv","text":""},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv","title":"transform_csv","text":"

CLI module for transforming CSV data files.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Connect CSV and YAML configuration and handle sanity checks.

  • run \u2013

    Run the CSV transformation script.

"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Source code in src/stimulus/cli/transform_csv.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\"\"\"\n    parser = argparse.ArgumentParser(description=\"CLI for transforming CSV data files using YAML configuration.\")\n    parser.add_argument(\n        \"-c\",\n        \"--csv\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The file path for the csv containing all data\",\n    )\n    parser.add_argument(\n        \"-y\",\n        \"--yaml\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The YAML config file that holds all parameter info\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"The output file path to write the noised csv\",\n    )\n\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.main","title":"main","text":"
main(\n    data_csv: str, config_yaml: str, out_path: str\n) -> None\n

Connect CSV and YAML configuration and handle sanity checks.

This launcher will be the connection between the csv and one YAML configuration. It should also handle some sanity checks.

Source code in src/stimulus/cli/transform_csv.py
def main(data_csv: str, config_yaml: str, out_path: str) -> None:\n    \"\"\"Connect CSV and YAML configuration and handle sanity checks.\n\n    This launcher will be the connection between the csv and one YAML configuration.\n    It should also handle some sanity checks.\n    \"\"\"\n    # initialize the csv processing class, it open and reads the csv in automatic\n    processor = DatasetProcessor(config_path=config_yaml, csv_path=data_csv)\n\n    # initialize the transform manager\n    transform_config = processor.dataset_manager.config.transforms\n    with open(config_yaml) as f:\n        yaml_config = YamlSubConfigDict(**yaml.safe_load(f))\n    transform_loader = TransformLoader(seed=yaml_config.global_params.seed)\n    transform_loader.initialize_column_data_transformers_from_config(transform_config)\n    transform_manager = TransformManager(transform_loader)\n\n    # apply the transformations to the data\n    processor.apply_transformation_group(transform_manager)\n\n    # write the transformed data to a new csv\n    processor.save(out_path)\n
"},{"location":"reference/stimulus/cli/transform_csv/#stimulus.cli.transform_csv.run","title":"run","text":"
run() -> None\n

Run the CSV transformation script.

Source code in src/stimulus/cli/transform_csv.py
def run() -> None:\n    \"\"\"Run the CSV transformation script.\"\"\"\n    args = get_args()\n    main(args.csv, args.yaml, args.output)\n
"},{"location":"reference/stimulus/cli/tuning/","title":"stimulus.cli.tuning","text":""},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning","title":"tuning","text":"

CLI module for running raytune tuning experiment.

Functions:

  • get_args \u2013

    Get the arguments when using from the commandline.

  • main \u2013

    Run the main model checking pipeline.

  • run \u2013

    Run the model checking script.

"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.get_args","title":"get_args","text":"
get_args() -> Namespace\n

Get the arguments when using from the commandline.

Returns:

  • Namespace \u2013

    Parsed command line arguments.

Source code in src/stimulus/cli/tuning.py
def get_args() -> argparse.Namespace:\n    \"\"\"Get the arguments when using from the commandline.\n\n    Returns:\n        Parsed command line arguments.\n    \"\"\"\n    parser = argparse.ArgumentParser(description=\"Launch check_model.\")\n    parser.add_argument(\"-d\", \"--data\", type=str, required=True, metavar=\"FILE\", help=\"Path to input csv file.\")\n    parser.add_argument(\"-m\", \"--model\", type=str, required=True, metavar=\"FILE\", help=\"Path to model file.\")\n    parser.add_argument(\n        \"-e\",\n        \"--data_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to data config file.\",\n    )\n    parser.add_argument(\n        \"-c\",\n        \"--model_config\",\n        type=str,\n        required=True,\n        metavar=\"FILE\",\n        help=\"Path to yaml config training file.\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--initial_weights\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"FILE\",\n        help=\"The path to the initial weights (optional).\",\n    )\n    parser.add_argument(\n        \"--ray_results_dirpath\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"DIR_PATH\",\n        help=\"Location where ray_results output dir should be written. If None, uses ~/ray_results.\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_model.pt\",\n        default=\"best_model.pt\",\n        metavar=\"FILE\",\n        help=\"The output file path to write the trained model to\",\n    )\n    parser.add_argument(\n        \"-bm\",\n        \"--best_metrics\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_metrics.csv\",\n        default=\"best_metrics.csv\",\n        metavar=\"FILE\",\n        help=\"The path to write the best metrics to\",\n    )\n    parser.add_argument(\n        \"-bc\",\n        \"--best_config\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_config.yaml\",\n        default=\"best_config.yaml\",\n        metavar=\"FILE\",\n        help=\"The path to write the best config to\",\n    )\n    parser.add_argument(\n        \"-bo\",\n        \"--best_optimizer\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=\"best_optimizer.pt\",\n        default=\"best_optimizer.pt\",\n        metavar=\"FILE\",\n        help=\"The path to write the best optimizer to\",\n    )\n    parser.add_argument(\n        \"--tune_run_name\",\n        type=str,\n        required=False,\n        nargs=\"?\",\n        const=None,\n        default=None,\n        metavar=\"CUSTOM_RUN_NAME\",\n        help=(\n            \"Tells ray tune what the 'experiment_name' (i.e. the given tune_run name) should be. \"\n            \"If set, the subdirectory of ray_results is named with this value and its train dir is prefixed accordingly. \"\n            \"Default None means that ray will generate such a name on its own.\"\n        ),\n    )\n    parser.add_argument(\n        \"--debug_mode\",\n        action=\"store_true\",\n        help=\"Activate debug mode for tuning. Default false, no debug.\",\n    )\n    return parser.parse_args()\n
"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.main","title":"main","text":"
main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,\n    ray_results_dirpath: str | None = None,\n    output_path: str | None = None,\n    best_optimizer_path: str | None = None,\n    best_metrics_path: str | None = None,\n    best_config_path: str | None = None,\n    *,\n    debug_mode: bool = False\n) -> None\n

Run the main model checking pipeline.

Parameters:

  • data_path (str) \u2013

    Path to input data file.

  • model_path (str) \u2013

    Path to model file.

  • data_config_path (str) \u2013

    Path to data config file.

  • model_config_path (str) \u2013

    Path to model config file.

  • initial_weights (str | None, default: None ) \u2013

    Optional path to initial weights.

  • ray_results_dirpath (str | None, default: None ) \u2013

    Directory for ray results.

  • debug_mode (bool, default: False ) \u2013

    Whether to run in debug mode.

  • output_path (str | None, default: None ) \u2013

    Path to write the best model to.

  • best_optimizer_path (str | None, default: None ) \u2013

    Path to write the best optimizer to.

  • best_metrics_path (str | None, default: None ) \u2013

    Path to write the best metrics to.

  • best_config_path (str | None, default: None ) \u2013

    Path to write the best config to.

Source code in src/stimulus/cli/tuning.py
def main(\n    model_path: str,\n    data_path: str,\n    data_config_path: str,\n    model_config_path: str,\n    initial_weights: str | None = None,  # noqa: ARG001\n    ray_results_dirpath: str | None = None,\n    output_path: str | None = None,\n    best_optimizer_path: str | None = None,\n    best_metrics_path: str | None = None,\n    best_config_path: str | None = None,\n    *,\n    debug_mode: bool = False,\n) -> None:\n    \"\"\"Run the main model checking pipeline.\n\n    Args:\n        data_path: Path to input data file.\n        model_path: Path to model file.\n        data_config_path: Path to data config file.\n        model_config_path: Path to model config file.\n        initial_weights: Optional path to initial weights.\n        ray_results_dirpath: Directory for ray results.\n        debug_mode: Whether to run in debug mode.\n        output_path: Path to write the best model to.\n        best_optimizer_path: Path to write the best optimizer to.\n        best_metrics_path: Path to write the best metrics to.\n        best_config_path: Path to write the best config to.\n    \"\"\"\n    # Convert data config to proper type\n    with open(data_config_path) as file:\n        data_config_dict: dict[str, Any] = yaml.safe_load(file)\n    data_config: yaml_data.YamlSubConfigDict = yaml_data.YamlSubConfigDict(**data_config_dict)\n\n    with open(model_config_path) as file:\n        model_config_dict: dict[str, Any] = yaml.safe_load(file)\n    model_config: yaml_model_schema.Model = yaml_model_schema.Model(**model_config_dict)\n\n    encoder_loader = loaders.EncoderLoader()\n    encoder_loader.initialize_column_encoders_from_config(column_config=data_config.columns)\n\n    model_class = launch_utils.import_class_from_file(model_path)\n\n    ray_config_loader = yaml_model_schema.YamlRayConfigLoader(model=model_config)\n    ray_config_model = ray_config_loader.get_config()\n\n    tuner = raytune_learner.TuneWrapper(\n        model_config=ray_config_model,\n        data_config_path=data_config_path,\n        model_class=model_class,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        seed=42,\n        ray_results_dir=ray_results_dirpath,\n        debug=debug_mode,\n    )\n\n    # Ensure output_path is provided\n    if output_path is None:\n        raise ValueError(\"output_path must not be None\")\n    try:\n        grid_results = tuner.tune()\n        if not grid_results:\n            _raise_empty_grid()\n\n        # Initialize parser with results\n        parser = raytune_parser.TuneParser(result=grid_results)\n\n        # Ensure output directory exists\n        Path(output_path).parent.mkdir(parents=True, exist_ok=True)\n\n        # Save outputs using proper Result object API\n        parser.save_best_model(output=output_path)\n        parser.save_best_optimizer(output=best_optimizer_path)\n        parser.save_best_metrics_dataframe(output=best_metrics_path)\n        parser.save_best_config(output=best_config_path)\n\n    except RuntimeError:\n        logger.exception(\"Tuning failed\")\n        raise\n    except KeyError:\n        logger.exception(\"Missing expected result key\")\n        raise\n    finally:\n        if debug_mode:\n            logger.info(\"Debug mode - preserving Ray results directory\")\n        elif ray_results_dirpath:\n            shutil.rmtree(ray_results_dirpath, ignore_errors=True)\n
"},{"location":"reference/stimulus/cli/tuning/#stimulus.cli.tuning.run","title":"run","text":"
run() -> None\n

Run the model checking script.

Source code in src/stimulus/cli/tuning.py
def run() -> None:\n    \"\"\"Run the model checking script.\"\"\"\n    args = get_args()\n    main(\n        data_path=args.data,\n        model_path=args.model,\n        data_config_path=args.data_config,\n        model_config_path=args.model_config,\n        initial_weights=args.initial_weights,\n        ray_results_dirpath=args.ray_results_dirpath,\n        output_path=args.output,\n        best_optimizer_path=args.best_optimizer,\n        best_metrics_path=args.best_metrics,\n        best_config_path=args.best_config,\n        debug_mode=args.debug_mode,\n    )\n
"},{"location":"reference/stimulus/data/","title":"stimulus.data","text":""},{"location":"reference/stimulus/data/#stimulus.data","title":"data","text":"

Data handling and processing module.

This module provides functionality for loading, transforming, and managing data in various formats like CSV. It includes classes and utilities for:

  • Loading and processing CSV data files
  • Applying data transformations and augmentations
  • Splitting data into train/validation/test sets
  • Converting data into PyTorch datasets

Modules:

  • data_handlers \u2013

    This module provides classes for handling CSV data files in the STIMULUS format.

  • encoding \u2013

    Encoding package for data transformation.

  • handlertorch \u2013

    This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

  • loaders \u2013

    Loaders serve as interfaces between the CSV master class and custom methods.

  • splitters \u2013

    This package provides splitter classes for splitting data into train, validation, and test sets.

  • transform \u2013

    Transform package for data manipulation.

"},{"location":"reference/stimulus/data/data_handlers/","title":"stimulus.data.data_handlers","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers","title":"data_handlers","text":"

This module provides classes for handling CSV data files in the STIMULUS format.

The module contains three main classes: - DatasetHandler: Base class for loading and managing CSV data - DatasetProcessor: Class for preprocessing data with transformations and splits - DatasetLoader: Class for loading processed data for model training

The data format consists of: 1. A CSV file containing the raw data 2. A YAML configuration file that defines: - Column names and their roles (input/label/meta) - Data types and encoders for each column - Transformations to apply (noise, augmentation, etc.) - Split configuration for train/val/test sets

The data handling pipeline consists of: 1. Loading raw CSV data according to the YAML config 2. Applying configured transformations 3. Splitting into train/val/test sets based on config 4. Encoding data for model training using specified encoders

See titanic.yaml in tests/test_data/titanic/ for an example configuration file format.

Classes:

  • DatasetHandler \u2013

    Main class for handling dataset loading, encoding, transformation and splitting.

  • DatasetLoader \u2013

    Class for loading dataset and passing it to the deep learning model.

  • DatasetManager \u2013

    Class for managing the dataset.

  • DatasetProcessor \u2013

    Class for loading dataset, applying transformations and splitting.

  • EncodeManager \u2013

    Manages the encoding of data columns using configured encoders.

  • SplitManager \u2013

    Class for managing the splitting.

  • TransformManager \u2013

    Class for managing the transformations.

"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler","title":"DatasetHandler","text":"
DatasetHandler(config_path: str, csv_path: str)\n

Main class for handling dataset loading, encoding, transformation and splitting.

This class coordinates the interaction between different managers to process CSV datasets according to the provided configuration.

Attributes:

  • encoder_manager (EncodeManager) \u2013

    Manager for handling data encoding operations.

  • transform_manager (TransformManager) \u2013

    Manager for handling data transformations.

  • split_manager (SplitManager) \u2013

    Manager for handling dataset splitting.

  • dataset_manager (DatasetManager) \u2013

    Manager for organizing dataset columns and config.

Parameters:

  • config_path (str) \u2013

    Path to the dataset configuration file.

  • csv_path (str) \u2013

    Path to the CSV data file.

Methods:

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n) -> None:\n    \"\"\"Initialize the DatasetHandler with required config.\n\n    Args:\n        config_path (str): Path to the dataset configuration file.\n        csv_path (str): Path to the CSV data file.\n    \"\"\"\n    self.dataset_manager = DatasetManager(config_path)\n    self.columns = self.read_csv_header(csv_path)\n    self.data = self.load_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetHandler.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader","title":"DatasetLoader","text":"
DatasetLoader(\n    config_path: str,\n    csv_path: str,\n    encoder_loader: EncoderLoader,\n    split: Union[int, None] = None,\n)\n

Bases: DatasetHandler

Class for loading dataset and passing it to the deep learning model.

Methods:

  • get_all_items \u2013

    Get the full dataset as three separate dictionaries for inputs, labels and metadata.

  • get_all_items_and_length \u2013

    Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • load_csv_per_split \u2013

    Load the part of csv file that has the specified split value.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n    encoder_loader: loaders.EncoderLoader,\n    split: Union[int, None] = None,\n) -> None:\n    \"\"\"Initialize the DatasetLoader.\"\"\"\n    super().__init__(config_path, csv_path)\n    self.encoder_manager = EncodeManager(encoder_loader)\n    self.data = self.load_csv_per_split(csv_path, split) if split is not None else self.load_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.get_all_items","title":"get_all_items","text":"
get_all_items() -> tuple[dict, dict, dict]\n

Get the full dataset as three separate dictionaries for inputs, labels and metadata.

Returns:

  • tuple[dict, dict, dict] \u2013

    tuple[dict, dict, dict]: Three dictionaries containing: - Input dictionary mapping input column names to encoded input data - Label dictionary mapping label column names to encoded label data - Meta dictionary mapping meta column names to meta data

Example

handler = DatasetHandler(...) input_dict, label_dict, meta_dict = handler.get_dataset() print(input_dict.keys()) dict_keys(['age', 'fare']) print(label_dict.keys()) dict_keys(['survived']) print(meta_dict.keys()) dict_keys(['passenger_id'])

Source code in src/stimulus/data/data_handlers.py
def get_all_items(self) -> tuple[dict, dict, dict]:\n    \"\"\"Get the full dataset as three separate dictionaries for inputs, labels and metadata.\n\n    Returns:\n        tuple[dict, dict, dict]: Three dictionaries containing:\n            - Input dictionary mapping input column names to encoded input data\n            - Label dictionary mapping label column names to encoded label data\n            - Meta dictionary mapping meta column names to meta data\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> input_dict, label_dict, meta_dict = handler.get_dataset()\n        >>> print(input_dict.keys())\n        dict_keys(['age', 'fare'])\n        >>> print(label_dict.keys())\n        dict_keys(['survived'])\n        >>> print(meta_dict.keys())\n        dict_keys(['passenger_id'])\n    \"\"\"\n    input_columns, label_columns, meta_columns = (\n        self.dataset_manager.column_categories[\"input\"],\n        self.dataset_manager.column_categories[\"label\"],\n        self.dataset_manager.column_categories[\"meta\"],\n    )\n    input_data = self.encoder_manager.encode_dataframe(self.data[input_columns])\n    label_data = self.encoder_manager.encode_dataframe(self.data[label_columns])\n    meta_data = {key: self.data[key].to_list() for key in meta_columns}\n    return input_data, label_data, meta_data\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.get_all_items_and_length","title":"get_all_items_and_length","text":"
get_all_items_and_length() -> (\n    tuple[tuple[dict, dict, dict], int]\n)\n

Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.

Source code in src/stimulus/data/data_handlers.py
def get_all_items_and_length(self) -> tuple[tuple[dict, dict, dict], int]:\n    \"\"\"Get the full dataset as three separate dictionaries for inputs, labels and metadata, and the length of the data.\"\"\"\n    return self.get_all_items(), len(self.data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.load_csv_per_split","title":"load_csv_per_split","text":"
load_csv_per_split(csv_path: str, split: int) -> DataFrame\n

Load the part of csv file that has the specified split value.

Split is a number that for 0 is train, 1 is validation, 2 is test. This is accessed through the column with category split. Example column name could be split:split:int.

NOTE that the aim of having this function is that depending on the training, validation and test scenarios, we are gonna load only the relevant data for it.

Source code in src/stimulus/data/data_handlers.py
def load_csv_per_split(self, csv_path: str, split: int) -> pl.DataFrame:\n    \"\"\"Load the part of csv file that has the specified split value.\n\n    Split is a number that for 0 is train, 1 is validation, 2 is test.\n    This is accessed through the column with category `split`. Example column name could be `split:split:int`.\n\n    NOTE that the aim of having this function is that depending on the training, validation and test scenarios,\n    we are gonna load only the relevant data for it.\n    \"\"\"\n    if \"split\" not in self.columns:\n        raise ValueError(\"The category split is not present in the csv file\")\n    if split not in [0, 1, 2]:\n        raise ValueError(f\"The split value should be 0, 1 or 2. The specified split value is {split}\")\n    return pl.scan_csv(csv_path).filter(pl.col(\"split\") == split).collect()\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetLoader.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager","title":"DatasetManager","text":"
DatasetManager(config_path: str)\n

Class for managing the dataset.

This class handles loading and organizing dataset configuration from YAML files. It manages column categorization into input, label and meta types based on the config.

Attributes:

  • config (dict) \u2013

    The loaded configuration dictionary from YAML

  • column_categories (dict) \u2013

    Dictionary mapping column types to lists of column names

Methods:

  • _load_config \u2013

    str) -> dict: Loads the config from a YAML file.

  • categorize_columns_by_type \u2013

    Organizes the columns into input, label, meta based on the config.

Methods:

  • categorize_columns_by_type \u2013

    Organizes columns from config into input, label, and meta categories.

  • get_split_columns \u2013

    Get the columns that are used for splitting.

  • get_transform_logic \u2013

    Get the transformation logic.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    config_path: str,\n) -> None:\n    \"\"\"Initialize the DatasetManager.\"\"\"\n    self.config = self._load_config(config_path)\n    self.column_categories = self.categorize_columns_by_type()\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.categorize_columns_by_type","title":"categorize_columns_by_type","text":"
categorize_columns_by_type() -> dict\n

Organizes columns from config into input, label, and meta categories.

Reads the column definitions from the config and sorts them into categories based on their column_type field.

Returns:

  • dict ( dict ) \u2013

    Dictionary containing lists of column names for each category: { \"input\": [\"col1\", \"col2\"], # Input columns \"label\": [\"target\"], # Label/output columns \"meta\": [\"id\"] # Metadata columns }

Example

manager = DatasetManager(\"config.yaml\") categories = manager.categorize_columns_by_type() print(categories) { 'input': ['hello', 'bonjour'], 'label': ['ciao'], 'meta': [\"id\"] }

Source code in src/stimulus/data/data_handlers.py
def categorize_columns_by_type(self) -> dict:\n    \"\"\"Organizes columns from config into input, label, and meta categories.\n\n    Reads the column definitions from the config and sorts them into categories\n    based on their column_type field.\n\n    Returns:\n        dict: Dictionary containing lists of column names for each category:\n            {\n                \"input\": [\"col1\", \"col2\"],  # Input columns\n                \"label\": [\"target\"],        # Label/output columns\n                \"meta\": [\"id\"]     # Metadata columns\n            }\n\n    Example:\n        >>> manager = DatasetManager(\"config.yaml\")\n        >>> categories = manager.categorize_columns_by_type()\n        >>> print(categories)\n        {\n            'input': ['hello', 'bonjour'],\n            'label': ['ciao'],\n            'meta': [\"id\"]\n        }\n    \"\"\"\n    input_columns = []\n    label_columns = []\n    meta_columns = []\n    for column in self.config.columns:\n        if column.column_type == \"input\":\n            input_columns.append(column.column_name)\n        elif column.column_type == \"label\":\n            label_columns.append(column.column_name)\n        elif column.column_type == \"meta\":\n            meta_columns.append(column.column_name)\n\n    return {\"input\": input_columns, \"label\": label_columns, \"meta\": meta_columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.get_split_columns","title":"get_split_columns","text":"
get_split_columns() -> list[str]\n

Get the columns that are used for splitting.

Source code in src/stimulus/data/data_handlers.py
def get_split_columns(self) -> list[str]:\n    \"\"\"Get the columns that are used for splitting.\"\"\"\n    return self.config.split.split_input_columns\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetManager.get_transform_logic","title":"get_transform_logic","text":"
get_transform_logic() -> dict\n

Get the transformation logic.

Returns a dictionary in the following structure : { \"transformation_name\": str, \"transformations\": list[tuple[str, str, dict]] }

Source code in src/stimulus/data/data_handlers.py
def get_transform_logic(self) -> dict:\n    \"\"\"Get the transformation logic.\n\n    Returns a dictionary in the following structure :\n    {\n        \"transformation_name\": str,\n        \"transformations\": list[tuple[str, str, dict]]\n    }\n    \"\"\"\n    transformation_logic = {\n        \"transformation_name\": self.config.transforms.transformation_name,\n        \"transformations\": [],\n    }\n    for column in self.config.transforms.columns:\n        for transformation in column.transformations:\n            transformation_logic[\"transformations\"].append(\n                (column.column_name, transformation.name, transformation.params),\n            )\n    return transformation_logic\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor","title":"DatasetProcessor","text":"
DatasetProcessor(config_path: str, csv_path: str)\n

Bases: DatasetHandler

Class for loading dataset, applying transformations and splitting.

Methods:

  • add_split \u2013

    Add a column specifying the train, validation, test splits of the data.

  • apply_transformation_group \u2013

    Apply the transformation group to the data.

  • load_csv \u2013

    Load the CSV file into a polars DataFrame.

  • read_csv_header \u2013

    Get the column names from the header of the CSV file.

  • save \u2013

    Saves the data to a csv file.

  • select_columns \u2013

    Select specific columns from the DataFrame and return as a dictionary.

  • shuffle_labels \u2013

    Shuffles the labels in the data.

Source code in src/stimulus/data/data_handlers.py
def __init__(self, config_path: str, csv_path: str) -> None:\n    \"\"\"Initialize the DatasetProcessor.\"\"\"\n    super().__init__(config_path, csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.add_split","title":"add_split","text":"
add_split(\n    split_manager: SplitManager, *, force: bool = False\n) -> None\n

Add a column specifying the train, validation, test splits of the data.

An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.

Parameters:

  • split_manager (SplitManager) \u2013

    Manager for handling dataset splitting

  • force (bool, default: False ) \u2013

    If True, the split column present in the csv file will be overwritten.

Source code in src/stimulus/data/data_handlers.py
def add_split(self, split_manager: SplitManager, *, force: bool = False) -> None:\n    \"\"\"Add a column specifying the train, validation, test splits of the data.\n\n    An error exception is raised if the split column is already present in the csv file. This behaviour can be overriden by setting force=True.\n\n    Args:\n        split_manager (SplitManager): Manager for handling dataset splitting\n        force (bool): If True, the split column present in the csv file will be overwritten.\n    \"\"\"\n    if (\"split\" in self.columns) and (not force):\n        raise ValueError(\n            \"The category split is already present in the csv file. If you want to still use this function, set force=True\",\n        )\n    # get relevant split columns from the dataset_manager\n    split_columns = self.dataset_manager.get_split_columns()\n    split_input_data = self.select_columns(split_columns)\n\n    # get the split indices\n    train, validation, test = split_manager.get_split_indices(split_input_data)\n\n    # add the split column to the data\n    split_column = np.full(len(self.data), -1).astype(int)\n    split_column[train] = 0\n    split_column[validation] = 1\n    split_column[test] = 2\n    self.data = self.data.with_columns(pl.Series(\"split\", split_column))\n\n    if \"split\" not in self.columns:\n        self.columns.append(\"split\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.apply_transformation_group","title":"apply_transformation_group","text":"
apply_transformation_group(\n    transform_manager: TransformManager,\n) -> None\n

Apply the transformation group to the data.

Source code in src/stimulus/data/data_handlers.py
def apply_transformation_group(self, transform_manager: TransformManager) -> None:\n    \"\"\"Apply the transformation group to the data.\"\"\"\n    for column_name, transform_name, _params in self.dataset_manager.get_transform_logic()[\"transformations\"]:\n        transformed_data, add_row = transform_manager.transform_column(\n            column_name,\n            transform_name,\n            self.data[column_name],\n        )\n        if add_row:\n            new_rows = self.data.with_columns(pl.Series(column_name, transformed_data))\n            self.data = pl.vstack(self.data, new_rows)\n        else:\n            self.data = self.data.with_columns(pl.Series(column_name, transformed_data))\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.load_csv","title":"load_csv","text":"
load_csv(csv_path: str) -> DataFrame\n

Load the CSV file into a polars DataFrame.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to load.

Returns:

  • DataFrame \u2013

    pl.DataFrame: Polars DataFrame containing the loaded CSV data.

Source code in src/stimulus/data/data_handlers.py
def load_csv(self, csv_path: str) -> pl.DataFrame:\n    \"\"\"Load the CSV file into a polars DataFrame.\n\n    Args:\n        csv_path (str): Path to the CSV file to load.\n\n    Returns:\n        pl.DataFrame: Polars DataFrame containing the loaded CSV data.\n    \"\"\"\n    return pl.read_csv(csv_path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.read_csv_header","title":"read_csv_header","text":"
read_csv_header(csv_path: str) -> list\n

Get the column names from the header of the CSV file.

Parameters:

  • csv_path (str) \u2013

    Path to the CSV file to read headers from.

Returns:

  • list ( list ) \u2013

    List of column names from the CSV header.

Source code in src/stimulus/data/data_handlers.py
def read_csv_header(self, csv_path: str) -> list:\n    \"\"\"Get the column names from the header of the CSV file.\n\n    Args:\n        csv_path (str): Path to the CSV file to read headers from.\n\n    Returns:\n        list: List of column names from the CSV header.\n    \"\"\"\n    with open(csv_path) as f:\n        return f.readline().strip().split(\",\")\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.save","title":"save","text":"
save(path: str) -> None\n

Saves the data to a csv file.

Source code in src/stimulus/data/data_handlers.py
def save(self, path: str) -> None:\n    \"\"\"Saves the data to a csv file.\"\"\"\n    self.data.write_csv(path)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.select_columns","title":"select_columns","text":"
select_columns(columns: list) -> dict\n

Select specific columns from the DataFrame and return as a dictionary.

Parameters:

  • columns (list) \u2013

    List of column names to select.

Returns:

  • dict ( dict ) \u2013

    A dictionary where keys are column names and values are lists containing the column data.

Example

handler = DatasetHandler(...) data_dict = handler.select_columns([\"col1\", \"col2\"])

Source code in src/stimulus/data/data_handlers.py
def select_columns(self, columns: list) -> dict:\n    \"\"\"Select specific columns from the DataFrame and return as a dictionary.\n\n    Args:\n        columns (list): List of column names to select.\n\n    Returns:\n        dict: A dictionary where keys are column names and values are lists containing the column data.\n\n    Example:\n        >>> handler = DatasetHandler(...)\n        >>> data_dict = handler.select_columns([\"col1\", \"col2\"])\n        >>> # Returns {'col1': [1, 2, 3], 'col2': [4, 5, 6]}\n    \"\"\"\n    df = self.data.select(columns)\n    return {col: df[col].to_list() for col in columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.select_columns--returns","title":"Returns","text":""},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.DatasetProcessor.shuffle_labels","title":"shuffle_labels","text":"
shuffle_labels(seed: Optional[float] = None) -> None\n

Shuffles the labels in the data.

Source code in src/stimulus/data/data_handlers.py
def shuffle_labels(self, seed: Optional[float] = None) -> None:\n    \"\"\"Shuffles the labels in the data.\"\"\"\n    # set the np seed\n    np.random.seed(seed)\n\n    label_keys = self.dataset_manager.column_categories[\"label\"]\n    for key in label_keys:\n        self.data = self.data.with_columns(pl.Series(key, np.random.permutation(list(self.data[key]))))\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager","title":"EncodeManager","text":"
EncodeManager(encoder_loader: EncoderLoader)\n

Manages the encoding of data columns using configured encoders.

This class handles encoding of data columns based on the encoders specified in the configuration. It uses an EncoderLoader to get the appropriate encoder for each column and applies the encoding.

Attributes:

  • encoder_loader (EncoderLoader) \u2013

    Loader that provides encoders based on config.

Example

encoder_loader = EncoderLoader(config) encode_manager = EncodeManager(encoder_loader) data = [\"ACGT\", \"TGCA\", \"GCTA\"] encoded = encode_manager.encode_column(\"dna_seq\", data) print(encoded.shape) torch.Size([3, 4, 4]) # 3 sequences, length 4, one-hot encoded

Parameters:

  • encoder_loader (EncoderLoader) \u2013

    Loader that provides encoders based on configuration.

Methods:

  • encode_column \u2013

    Encodes a column of data using the configured encoder.

  • encode_columns \u2013

    Encodes multiple columns of data using the configured encoders.

  • encode_dataframe \u2013

    Encode the dataframe using the encoders.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    encoder_loader: loaders.EncoderLoader,\n) -> None:\n    \"\"\"Initialize the EncodeManager.\n\n    Args:\n        encoder_loader: Loader that provides encoders based on configuration.\n    \"\"\"\n    self.encoder_loader = encoder_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_column","title":"encode_column","text":"
encode_column(\n    column_name: str, column_data: list\n) -> Tensor\n

Encodes a column of data using the configured encoder.

Gets the appropriate encoder for the column from the encoder_loader and uses it to encode all the data in the column.

Parameters:

  • column_name (str) \u2013

    Name of the column to encode.

  • column_data (list) \u2013

    List of data values from the column to encode.

Returns:

  • Tensor \u2013

    Encoded data as a torch.Tensor. The exact shape depends on the encoder used.

Example

data = [\"ACGT\", \"TGCA\"] encoded = encode_manager.encode_column(\"dna_seq\", data) print(encoded.shape) torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

Source code in src/stimulus/data/data_handlers.py
def encode_column(self, column_name: str, column_data: list) -> torch.Tensor:\n    \"\"\"Encodes a column of data using the configured encoder.\n\n    Gets the appropriate encoder for the column from the encoder_loader and uses it\n    to encode all the data in the column.\n\n    Args:\n        column_name: Name of the column to encode.\n        column_data: List of data values from the column to encode.\n\n    Returns:\n        Encoded data as a torch.Tensor. The exact shape depends on the encoder used.\n\n    Example:\n        >>> data = [\"ACGT\", \"TGCA\"]\n        >>> encoded = encode_manager.encode_column(\"dna_seq\", data)\n        >>> print(encoded.shape)\n        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded\n    \"\"\"\n    encode_all_function = self.encoder_loader.get_function_encode_all(column_name)\n    return encode_all_function(column_data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_columns","title":"encode_columns","text":"
encode_columns(column_data: dict) -> dict\n

Encodes multiple columns of data using the configured encoders.

Gets the appropriate encoder for each column from the encoder_loader and encodes all data values in those columns.

Parameters:

  • column_data (dict) \u2013

    Dict mapping column names to lists of data values to encode.

Returns:

  • dict \u2013

    Dict mapping column names to their encoded tensors. The exact shape of each

  • dict \u2013

    tensor depends on the encoder used for that column.

Example

data = {\"dna_seq\": [\"ACGT\", \"TGCA\"], \"labels\": [\"1\", \"2\"]} encoded = encode_manager.encode_columns(data) print(encoded[\"dna_seq\"].shape) torch.Size([2, 4, 4]) # 2 sequences, length 4, one-hot encoded

Source code in src/stimulus/data/data_handlers.py
def encode_columns(self, column_data: dict) -> dict:\n    \"\"\"Encodes multiple columns of data using the configured encoders.\n\n    Gets the appropriate encoder for each column from the encoder_loader and encodes\n    all data values in those columns.\n\n    Args:\n        column_data: Dict mapping column names to lists of data values to encode.\n\n    Returns:\n        Dict mapping column names to their encoded tensors. The exact shape of each\n        tensor depends on the encoder used for that column.\n\n    Example:\n        >>> data = {\"dna_seq\": [\"ACGT\", \"TGCA\"], \"labels\": [\"1\", \"2\"]}\n        >>> encoded = encode_manager.encode_columns(data)\n        >>> print(encoded[\"dna_seq\"].shape)\n        torch.Size([2, 4, 4])  # 2 sequences, length 4, one-hot encoded\n    \"\"\"\n    return {col: self.encode_column(col, values) for col, values in column_data.items()}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.EncodeManager.encode_dataframe","title":"encode_dataframe","text":"
encode_dataframe(dataframe: DataFrame) -> dict[str, Tensor]\n

Encode the dataframe using the encoders.

Source code in src/stimulus/data/data_handlers.py
def encode_dataframe(self, dataframe: pl.DataFrame) -> dict[str, torch.Tensor]:\n    \"\"\"Encode the dataframe using the encoders.\"\"\"\n    return {col: self.encode_column(col, dataframe[col].to_list()) for col in dataframe.columns}\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.SplitManager","title":"SplitManager","text":"
SplitManager(split_loader: SplitLoader)\n

Class for managing the splitting.

Methods:

  • get_split_indices \u2013

    Get the indices for train, validation, and test splits.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    split_loader: loaders.SplitLoader,\n) -> None:\n    \"\"\"Initialize the SplitManager.\"\"\"\n    self.split_loader = split_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.SplitManager.get_split_indices","title":"get_split_indices","text":"
get_split_indices(\n    data: dict,\n) -> tuple[ndarray, ndarray, ndarray]\n

Get the indices for train, validation, and test splits.

Source code in src/stimulus/data/data_handlers.py
def get_split_indices(self, data: dict) -> tuple[np.ndarray, np.ndarray, np.ndarray]:\n    \"\"\"Get the indices for train, validation, and test splits.\"\"\"\n    return self.split_loader.get_function_split()(data)\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.TransformManager","title":"TransformManager","text":"
TransformManager(transform_loader: TransformLoader)\n

Class for managing the transformations.

Methods:

  • transform_column \u2013

    Transform a column of data using the specified transformation.

Source code in src/stimulus/data/data_handlers.py
def __init__(\n    self,\n    transform_loader: loaders.TransformLoader,\n) -> None:\n    \"\"\"Initialize the TransformManager.\"\"\"\n    self.transform_loader = transform_loader\n
"},{"location":"reference/stimulus/data/data_handlers/#stimulus.data.data_handlers.TransformManager.transform_column","title":"transform_column","text":"
transform_column(\n    column_name: str, transform_name: str, column_data: list\n) -> tuple[list, bool]\n

Transform a column of data using the specified transformation.

Parameters:

  • column_name (str) \u2013

    The name of the column to transform.

  • transform_name (str) \u2013

    The name of the transformation to use.

  • column_data (list) \u2013

    The data to transform.

Returns:

  • list ( list ) \u2013

    The transformed data.

  • bool ( bool ) \u2013

    Whether the transformation added new rows to the data.

Source code in src/stimulus/data/data_handlers.py
def transform_column(self, column_name: str, transform_name: str, column_data: list) -> tuple[list, bool]:\n    \"\"\"Transform a column of data using the specified transformation.\n\n    Args:\n        column_name (str): The name of the column to transform.\n        transform_name (str): The name of the transformation to use.\n        column_data (list): The data to transform.\n\n    Returns:\n        list: The transformed data.\n        bool: Whether the transformation added new rows to the data.\n    \"\"\"\n    transformer = self.transform_loader.__getattribute__(column_name)[transform_name]\n    return transformer.transform_all(column_data), transformer.add_row\n
"},{"location":"reference/stimulus/data/handlertorch/","title":"stimulus.data.handlertorch","text":""},{"location":"reference/stimulus/data/handlertorch/#stimulus.data.handlertorch","title":"handlertorch","text":"

This file provides the class API for handling the data in pytorch using the Dataset and Dataloader classes.

Classes:

  • TorchDataset \u2013

    Class for creating a torch dataset.

"},{"location":"reference/stimulus/data/handlertorch/#stimulus.data.handlertorch.TorchDataset","title":"TorchDataset","text":"
TorchDataset(\n    config_path: str,\n    csv_path: str,\n    encoder_loader: EncoderLoader,\n    split: Optional[int] = None,\n)\n

Bases: Dataset

Class for creating a torch dataset.

Parameters:

  • config_path (str) \u2013

    Path to the configuration file

  • csv_path (str) \u2013

    Path to the CSV data file

  • encoder_loader (EncoderLoader) \u2013

    Encoder loader instance

  • split (Optional[int], default: None ) \u2013

    Optional tuple containing split information

Source code in src/stimulus/data/handlertorch.py
def __init__(\n    self,\n    config_path: str,\n    csv_path: str,\n    encoder_loader: loaders.EncoderLoader,\n    split: Optional[int] = None,\n) -> None:\n    \"\"\"Initialize the TorchDataset.\n\n    Args:\n        config_path: Path to the configuration file\n        csv_path: Path to the CSV data file\n        encoder_loader: Encoder loader instance\n        split: Optional tuple containing split information\n    \"\"\"\n    self.loader = data_handlers.DatasetLoader(\n        config_path=config_path,\n        csv_path=csv_path,\n        encoder_loader=encoder_loader,\n        split=split,\n    )\n
"},{"location":"reference/stimulus/data/loaders/","title":"stimulus.data.loaders","text":""},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders","title":"loaders","text":"

Loaders serve as interfaces between the CSV master class and custom methods.

Mainly, three types of custom methods are supported: - Encoders: methods for encoding data before it is fed into the model - Data transformers: methods for transforming data (i.e. augmenting, noising...) - Splitters: methods for splitting data into train, validation and test sets

Loaders are built from an input config YAML file which format is described in the documentation, you can find an example here: tests/test_data/dna_experiment/dna_experiment_config_template.yaml

Classes:

  • EncoderLoader \u2013

    Class for loading encoders from a config file.

  • SplitLoader \u2013

    Class for loading splitters from a config file.

  • TransformLoader \u2013

    Class for loading transformations from a config file.

"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader","title":"EncoderLoader","text":"
EncoderLoader(seed: Optional[float] = None)\n

Class for loading encoders from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_encoder \u2013

    Gets an encoder object from the encoders module and initializes it with the given parameters.

  • get_function_encode_all \u2013

    Gets the encoding function for a specific field.

  • initialize_column_encoders_from_config \u2013

    Build the loader from a config dictionary.

  • set_encoder_as_attribute \u2013

    Sets the encoder as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the encoder loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.get_encoder","title":"get_encoder","text":"
get_encoder(\n    encoder_name: str, encoder_params: Optional[dict] = None\n) -> Any\n

Gets an encoder object from the encoders module and initializes it with the given parameters.

Parameters:

  • encoder_name (str) \u2013

    The name of the encoder to get

  • encoder_params (dict, default: None ) \u2013

    The parameters for the encoder

Returns:

  • Any ( Any ) \u2013

    The encoder function for the specified field and parameters

Source code in src/stimulus/data/loaders.py
def get_encoder(self, encoder_name: str, encoder_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets an encoder object from the encoders module and initializes it with the given parameters.\n\n    Args:\n        encoder_name (str): The name of the encoder to get\n        encoder_params (dict): The parameters for the encoder\n\n    Returns:\n        Any: The encoder function for the specified field and parameters\n    \"\"\"\n    try:\n        return getattr(encoders, encoder_name)(**encoder_params)\n    except AttributeError:\n        logging.exception(f\"Encoder '{encoder_name}' not found in the encoders module.\")\n        logging.exception(\n            f\"Available encoders: {[name for name, obj in encoders.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}\",\n        )\n        raise\n\n    except TypeError:\n        if encoder_params is None:\n            return getattr(encoders, encoder_name)()\n        logging.exception(f\"Encoder '{encoder_name}' has incorrect parameters: {encoder_params}\")\n        logging.exception(\n            f\"Expected parameters for '{encoder_name}': {inspect.signature(getattr(encoders, encoder_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.get_function_encode_all","title":"get_function_encode_all","text":"
get_function_encode_all(field_name: str) -> Any\n

Gets the encoding function for a specific field.

Parameters:

  • field_name (str) \u2013

    The field name to get the encoder for

Returns:

  • Any ( Any ) \u2013

    The encode_all function for the specified field

Source code in src/stimulus/data/loaders.py
def get_function_encode_all(self, field_name: str) -> Any:\n    \"\"\"Gets the encoding function for a specific field.\n\n    Args:\n        field_name (str): The field name to get the encoder for\n\n    Returns:\n        Any: The encode_all function for the specified field\n    \"\"\"\n    return getattr(self, field_name).encode_all\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.initialize_column_encoders_from_config","title":"initialize_column_encoders_from_config","text":"
initialize_column_encoders_from_config(\n    column_config: YamlColumns,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • column_config (YamlColumns) \u2013

    Configuration dictionary containing field names (column_name) and their encoder specifications.

Source code in src/stimulus/data/loaders.py
def initialize_column_encoders_from_config(self, column_config: yaml_data.YamlColumns) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        column_config (yaml_data.YamlColumns): Configuration dictionary containing field names (column_name) and their encoder specifications.\n    \"\"\"\n    for field in column_config:\n        encoder = self.get_encoder(field.encoder[0].name, field.encoder[0].params)\n        self.set_encoder_as_attribute(field.column_name, encoder)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.EncoderLoader.set_encoder_as_attribute","title":"set_encoder_as_attribute","text":"
set_encoder_as_attribute(\n    field_name: str, encoder: AbstractEncoder\n) -> None\n

Sets the encoder as an attribute of the loader.

Parameters:

  • field_name (str) \u2013

    The name of the field to set the encoder for

  • encoder (AbstractEncoder) \u2013

    The encoder to set

Source code in src/stimulus/data/loaders.py
def set_encoder_as_attribute(self, field_name: str, encoder: encoders.AbstractEncoder) -> None:\n    \"\"\"Sets the encoder as an attribute of the loader.\n\n    Args:\n        field_name (str): The name of the field to set the encoder for\n        encoder (encoders.AbstractEncoder): The encoder to set\n    \"\"\"\n    setattr(self, field_name, encoder)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader","title":"SplitLoader","text":"
SplitLoader(seed: Optional[float] = None)\n

Class for loading splitters from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_function_split \u2013

    Gets the function for splitting the data.

  • get_splitter \u2013

    Gets a splitter object from the splitters module.

  • initialize_splitter_from_config \u2013

    Build the loader from a config dictionary.

  • set_splitter_as_attribute \u2013

    Sets the splitter as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the split loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.get_function_split","title":"get_function_split","text":"
get_function_split() -> Any\n

Gets the function for splitting the data.

Returns:

  • Any ( Any ) \u2013

    The split function for the specified method

Raises:

  • AttributeError \u2013

    If splitter hasn't been initialized using initialize_splitter_from_config()

Source code in src/stimulus/data/loaders.py
def get_function_split(self) -> Any:\n    \"\"\"Gets the function for splitting the data.\n\n    Returns:\n        Any: The split function for the specified method\n\n    Raises:\n        AttributeError: If splitter hasn't been initialized using initialize_splitter_from_config()\n    \"\"\"\n    if not hasattr(self, \"split\"):\n        # Raise a more specific error and chain it to the original AttributeError\n        raise AttributeError(\n            \"Splitter not initialized. Please call initialize_splitter_from_config() or set_splitter_as_attribute() \"\n            \"before attempting to get split function.\",\n        )\n    return self.split.get_split_indexes\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.get_splitter","title":"get_splitter","text":"
get_splitter(\n    splitter_name: str,\n    splitter_params: Optional[dict] = None,\n) -> Any\n

Gets a splitter object from the splitters module.

Parameters:

  • splitter_name (str) \u2013

    The name of the splitter to get

  • splitter_params (Optional[dict], default: None ) \u2013

    Parameters for the splitter

Returns:

  • Any ( Any ) \u2013

    The splitter function for the specified splitter

Source code in src/stimulus/data/loaders.py
def get_splitter(self, splitter_name: str, splitter_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets a splitter object from the splitters module.\n\n    Args:\n        splitter_name (str): The name of the splitter to get\n        splitter_params (Optional[dict]): Parameters for the splitter\n\n    Returns:\n        Any: The splitter function for the specified splitter\n    \"\"\"\n    try:\n        return getattr(splitters, splitter_name)(**splitter_params)\n    except TypeError:\n        if splitter_params is None:\n            return getattr(splitters, splitter_name)()\n        logging.exception(f\"Splitter '{splitter_name}' has incorrect parameters: {splitter_params}\")\n        logging.exception(\n            f\"Expected parameters for '{splitter_name}': {inspect.signature(getattr(splitters, splitter_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.initialize_splitter_from_config","title":"initialize_splitter_from_config","text":"
initialize_splitter_from_config(\n    split_config: YamlSplit,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • split_config (YamlSplit) \u2013

    Configuration dictionary containing split configurations.

Source code in src/stimulus/data/loaders.py
def initialize_splitter_from_config(self, split_config: yaml_data.YamlSplit) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        split_config (yaml_data.YamlSplit): Configuration dictionary containing split configurations.\n    \"\"\"\n    splitter = self.get_splitter(split_config.split_method, split_config.params)\n    self.set_splitter_as_attribute(splitter)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.SplitLoader.set_splitter_as_attribute","title":"set_splitter_as_attribute","text":"
set_splitter_as_attribute(splitter: Any) -> None\n

Sets the splitter as an attribute of the loader.

Parameters:

  • splitter (Any) \u2013

    The splitter to set

Source code in src/stimulus/data/loaders.py
def set_splitter_as_attribute(self, splitter: Any) -> None:\n    \"\"\"Sets the splitter as an attribute of the loader.\n\n    Args:\n        splitter (Any): The splitter to set\n    \"\"\"\n    self.split = splitter\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader","title":"TransformLoader","text":"
TransformLoader(seed: Optional[float] = None)\n

Class for loading transformations from a config file.

Parameters:

  • seed (Optional[float], default: None ) \u2013

    Random seed for reproducibility

Methods:

  • get_data_transformer \u2013

    Gets a transformer object from the transformers module.

  • initialize_column_data_transformers_from_config \u2013

    Build the loader from a config dictionary.

  • set_data_transformer_as_attribute \u2013

    Sets the data transformer as an attribute of the loader.

Source code in src/stimulus/data/loaders.py
def __init__(self, seed: Optional[float] = None) -> None:\n    \"\"\"Initialize the transform loader.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.get_data_transformer","title":"get_data_transformer","text":"
get_data_transformer(\n    transformation_name: str,\n    transformation_params: Optional[dict] = None,\n) -> Any\n

Gets a transformer object from the transformers module.

Parameters:

  • transformation_name (str) \u2013

    The name of the transformer to get

  • transformation_params (Optional[dict], default: None ) \u2013

    Parameters for the transformer

Returns:

  • Any ( Any ) \u2013

    The transformer function for the specified transformation

Source code in src/stimulus/data/loaders.py
def get_data_transformer(self, transformation_name: str, transformation_params: Optional[dict] = None) -> Any:\n    \"\"\"Gets a transformer object from the transformers module.\n\n    Args:\n        transformation_name (str): The name of the transformer to get\n        transformation_params (Optional[dict]): Parameters for the transformer\n\n    Returns:\n        Any: The transformer function for the specified transformation\n    \"\"\"\n    try:\n        return getattr(data_transformation_generators, transformation_name)(**transformation_params)\n    except AttributeError:\n        logging.exception(f\"Transformer '{transformation_name}' not found in the transformers module.\")\n        logging.exception(\n            f\"Available transformers: {[name for name, obj in data_transformation_generators.__dict__.items() if isinstance(obj, type) and name not in ('ABC', 'Any')]}\",\n        )\n        raise\n\n    except TypeError:\n        if transformation_params is None:\n            return getattr(data_transformation_generators, transformation_name)()\n        logging.exception(f\"Transformer '{transformation_name}' has incorrect parameters: {transformation_params}\")\n        logging.exception(\n            f\"Expected parameters for '{transformation_name}': {inspect.signature(getattr(data_transformation_generators, transformation_name))}\",\n        )\n        raise\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.initialize_column_data_transformers_from_config","title":"initialize_column_data_transformers_from_config","text":"
initialize_column_data_transformers_from_config(\n    transform_config: YamlTransform,\n) -> None\n

Build the loader from a config dictionary.

Parameters:

  • transform_config (YamlTransform) \u2013

    Configuration dictionary containing transforms configurations.

Example

Given a YAML config like:

transforms:\n  transformation_name: noise\n  columns:\n    - column_name: age\n      transformations:\n        - name: GaussianNoise\n          params:\n            std: 0.1\n    - column_name: fare\n      transformations:\n        - name: GaussianNoise\n          params:\n            std: 0.1\n

The loader will: 1. Iterate through each column (age, fare) 2. For each transformation in the column: - Get the transformer (GaussianNoise) with its params (std=0.1) - Set it as an attribute on the loader using the column name as key

Source code in src/stimulus/data/loaders.py
def initialize_column_data_transformers_from_config(self, transform_config: yaml_data.YamlTransform) -> None:\n    \"\"\"Build the loader from a config dictionary.\n\n    Args:\n        transform_config (yaml_data.YamlTransform): Configuration dictionary containing transforms configurations.\n\n    Example:\n        Given a YAML config like:\n        ```yaml\n        transforms:\n          transformation_name: noise\n          columns:\n            - column_name: age\n              transformations:\n                - name: GaussianNoise\n                  params:\n                    std: 0.1\n            - column_name: fare\n              transformations:\n                - name: GaussianNoise\n                  params:\n                    std: 0.1\n        ```\n\n        The loader will:\n        1. Iterate through each column (age, fare)\n        2. For each transformation in the column:\n           - Get the transformer (GaussianNoise) with its params (std=0.1)\n           - Set it as an attribute on the loader using the column name as key\n    \"\"\"\n    for column in transform_config.columns:\n        col_name = column.column_name\n        for transform_spec in column.transformations:\n            transformer = self.get_data_transformer(transform_spec.name, transform_spec.params)\n            self.set_data_transformer_as_attribute(col_name, transformer)\n
"},{"location":"reference/stimulus/data/loaders/#stimulus.data.loaders.TransformLoader.set_data_transformer_as_attribute","title":"set_data_transformer_as_attribute","text":"
set_data_transformer_as_attribute(\n    field_name: str, data_transformer: Any\n) -> None\n

Sets the data transformer as an attribute of the loader.

Parameters:

  • field_name (str) \u2013

    The name of the field to set the data transformer for

  • data_transformer (Any) \u2013

    The data transformer to set

Source code in src/stimulus/data/loaders.py
def set_data_transformer_as_attribute(self, field_name: str, data_transformer: Any) -> None:\n    \"\"\"Sets the data transformer as an attribute of the loader.\n\n    Args:\n        field_name (str): The name of the field to set the data transformer for\n        data_transformer (Any): The data transformer to set\n    \"\"\"\n    # check if the field already exists, if it does not, initialize it to an empty dict\n    if not hasattr(self, field_name):\n        setattr(self, field_name, {data_transformer.__class__.__name__: data_transformer})\n    else:\n        field_value = getattr(self, field_name)\n        field_value[data_transformer.__class__.__name__] = data_transformer\n
"},{"location":"reference/stimulus/data/encoding/","title":"stimulus.data.encoding","text":""},{"location":"reference/stimulus/data/encoding/#stimulus.data.encoding","title":"encoding","text":"

Encoding package for data transformation.

Modules:

  • encoders \u2013

    This file contains encoders classes for encoding various types of data.

"},{"location":"reference/stimulus/data/encoding/encoders/","title":"stimulus.data.encoding.encoders","text":""},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders","title":"encoders","text":"

This file contains encoders classes for encoding various types of data.

Classes:

  • AbstractEncoder \u2013

    Abstract class for encoders.

  • NumericEncoder \u2013

    Encoder for float/int data.

  • NumericRankEncoder \u2013

    Encoder for float/int data that encodes the data based on their rank.

  • StrClassificationEncoder \u2013

    A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

  • TextOneHotEncoder \u2013

    One hot encoder for text data.

"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder","title":"AbstractEncoder","text":"

Bases: ABC

Abstract class for encoders.

Encoders are classes that encode the raw data into torch.tensors. Different encoders provide different encoding methods. Different encoders may take different types of data as input.

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a torch.tensor

  • encode_multiprocess \u2013

    encodes a list of data points using multiprocessing

  • decode \u2013

    decodes a single data point

Methods:

  • decode \u2013

    Decode a single data point.

  • encode \u2013

    Encode a single data point.

  • encode_all \u2013

    Encode a list of data points.

"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.decode","title":"decode abstractmethod","text":"
decode(data: Any) -> Any\n

Decode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) \u2013

    a single encoded data point

Returns:

  • decoded_data_point ( Any ) \u2013

    the decoded data point

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef decode(self, data: Any) -> Any:\n    \"\"\"Decode a single data point.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (Any): a single encoded data point\n\n    Returns:\n        decoded_data_point (Any): the decoded data point\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.encode","title":"encode abstractmethod","text":"
encode(data: Any) -> Any\n

Encode a single data point.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (Any) \u2013

    a single data point

Returns:

  • encoded_data_point ( Any ) \u2013

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef encode(self, data: Any) -> Any:\n    \"\"\"Encode a single data point.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (Any): a single data point\n\n    Returns:\n        encoded_data_point (Any): the encoded data point\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.AbstractEncoder.encode_all","title":"encode_all abstractmethod","text":"
encode_all(data: list[Any]) -> Tensor\n

Encode a list of data points.

This is an abstract method, child classes should overwrite it.

Parameters:

  • data (list[Any]) \u2013

    a list of data points

Returns:

  • encoded_data ( Tensor ) \u2013

    encoded data points

Source code in src/stimulus/data/encoding/encoders.py
@abstractmethod\ndef encode_all(self, data: list[Any]) -> torch.Tensor:\n    \"\"\"Encode a list of data points.\n\n    This is an abstract method, child classes should overwrite it.\n\n    Args:\n        data (list[Any]): a list of data points\n\n    Returns:\n        encoded_data (torch.Tensor): encoded data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder","title":"NumericEncoder","text":"
NumericEncoder(dtype: dtype = float32)\n

Bases: AbstractEncoder

Encoder for float/int data.

Attributes:

  • dtype (dtype) \u2013

    The data type of the encoded data. Default = torch.float32 (32-bit floating point)

Parameters:

  • dtype (dtype, default: float32 ) \u2013

    the data type of the encoded data. Default = torch.float (32-bit floating point)

Methods:

  • decode \u2013

    Decodes the data.

  • encode \u2013

    Encodes the data.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, dtype: torch.dtype = torch.float32) -> None:\n    \"\"\"Initialize the NumericEncoder class.\n\n    Args:\n        dtype (torch.dtype): the data type of the encoded data. Default = torch.float (32-bit floating point)\n    \"\"\"\n    self.dtype = dtype\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.decode","title":"decode","text":"
decode(data: Tensor) -> list[float]\n

Decodes the data.

Parameters:

  • data (Tensor) \u2013

    the encoded data

Returns:

  • decoded_data ( list[float] ) \u2013

    the decoded data

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: torch.Tensor) -> list[float]:\n    \"\"\"Decodes the data.\n\n    Args:\n        data (torch.Tensor): the encoded data\n\n    Returns:\n        decoded_data (list[float]): the decoded data\n    \"\"\"\n    return data.cpu().numpy().tolist()\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.encode","title":"encode","text":"
encode(data: float) -> Tensor\n

Encodes the data.

This method takes as input a single data point, should be mappable to a single output.

Parameters:

  • data (float) \u2013

    a single data point

Returns:

  • encoded_data_point ( Tensor ) \u2013

    the encoded data point

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: float) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a single data point, should be mappable to a single output.\n\n    Args:\n        data (float): a single data point\n\n    Returns:\n        encoded_data_point (torch.Tensor): the encoded data point\n    \"\"\"\n    return self.encode_all([data])\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericEncoder.encode_all","title":"encode_all","text":"
encode_all(data: list[float]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, or a single float, and returns a torch.tensor.

Parameters:

  • data (list[float]) \u2013

    a list of data points or a single data point

Returns:

  • encoded_data ( Tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: list[float]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, or a single float, and returns a torch.tensor.\n\n    Args:\n        data (list[float]): a list of data points or a single data point\n\n    Returns:\n        encoded_data (torch.Tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n\n    self._check_input_dtype(data)\n    self._warn_float_is_converted_to_int(data)\n\n    return torch.tensor(data, dtype=self.dtype)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder","title":"NumericRankEncoder","text":"
NumericRankEncoder(*, scale: bool = False)\n

Bases: AbstractEncoder

Encoder for float/int data that encodes the data based on their rank.

Attributes:

  • scale (bool) \u2013

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a torch.tensor

  • decode \u2013

    decodes a single data point

  • _check_input_dtype \u2013

    checks if the input data is int or float data

Parameters:

  • scale (bool, default: False ) \u2013

    whether to scale the ranks to be between 0 and 1. Default = False

Methods:

  • decode \u2013

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode \u2013

    Returns an error since encoding a single float does not make sense.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, *, scale: bool = False) -> None:\n    \"\"\"Initialize the NumericRankEncoder class.\n\n    Args:\n        scale (bool): whether to scale the ranks to be between 0 and 1. Default = False\n    \"\"\"\n    self.scale = scale\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.decode","title":"decode","text":"
decode(data: Any) -> Any\n

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: Any) -> Any:\n    \"\"\"Returns an error since decoding does not make sense without encoder information, which is not yet supported.\"\"\"\n    raise NotImplementedError(\"Decoding is not yet supported for NumericRank.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.encode","title":"encode","text":"
encode(data: Any) -> Tensor\n

Returns an error since encoding a single float does not make sense.

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: Any) -> torch.Tensor:\n    \"\"\"Returns an error since encoding a single float does not make sense.\"\"\"\n    raise NotImplementedError(\"Encoding a single float does not make sense. Use encode_all instead.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.NumericRankEncoder.encode_all","title":"encode_all","text":"
encode_all(data: list[Union[int, float]]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, and returns the ranks of the data points. The ranks are normalized to be between 0 and 1, when scale is set to True.

Parameters:

  • data (list[Union[int, float]]) \u2013

    a list of numeric values

Returns:

  • encoded_data ( Tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: list[Union[int, float]]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, and returns the ranks of the data points.\n    The ranks are normalized to be between 0 and 1, when scale is set to True.\n\n    Args:\n        data (list[Union[int, float]]): a list of numeric values\n\n    Returns:\n        encoded_data (torch.Tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n    self._check_input_dtype(data)\n\n    # Get ranks (0 is lowest, n-1 is highest)\n    # and normalize to be between 0 and 1\n    array_data: np.ndarray = np.array(data)\n    ranks: np.ndarray = np.argsort(np.argsort(array_data))\n    if self.scale:\n        ranks = ranks / max(len(ranks) - 1, 1)\n    return torch.tensor(ranks)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder","title":"StrClassificationEncoder","text":"
StrClassificationEncoder(*, scale: bool = False)\n

Bases: AbstractEncoder

A string classification encoder that converts lists of strings into numeric labels using scikit-learn.

When scale is set to True, the labels are scaled to be between 0 and 1.

Attributes:

  • scale (bool) \u2013

    Whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • encode \u2013

    str) -> int: Raises a NotImplementedError, as encoding a single string is not meaningful in this context.

  • encode_all \u2013

    list[str]) -> torch.tensor: Encodes an entire list of string data into a numeric representation using LabelEncoder and returns a torch tensor. Ensures that the provided data items are valid strings prior to encoding.

  • decode \u2013

    Any) -> Any: Raises a NotImplementedError, as decoding is not supported with the current design.

  • _check_dtype \u2013

    list[str]) -> None: Validates that all items in the data list are strings, raising a ValueError otherwise.

Parameters:

  • scale (bool, default: False ) \u2013

    whether to scale the labels to be between 0 and 1. Default = False

Methods:

  • decode \u2013

    Returns an error since decoding does not make sense without encoder information, which is not yet supported.

  • encode \u2013

    Returns an error since encoding a single string does not make sense.

  • encode_all \u2013

    Encodes the data.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, *, scale: bool = False) -> None:\n    \"\"\"Initialize the StrClassificationEncoder class.\n\n    Args:\n        scale (bool): whether to scale the labels to be between 0 and 1. Default = False\n    \"\"\"\n    self.scale = scale\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.decode","title":"decode","text":"
decode(data: Any) -> Any\n

Returns an error since decoding does not make sense without encoder information, which is not yet supported.

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: Any) -> Any:\n    \"\"\"Returns an error since decoding does not make sense without encoder information, which is not yet supported.\"\"\"\n    raise NotImplementedError(\"Decoding is not yet supported for StrClassification.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.encode","title":"encode","text":"
encode(data: str) -> int\n

Returns an error since encoding a single string does not make sense.

Parameters:

  • data (str) \u2013

    a single string

Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: str) -> int:\n    \"\"\"Returns an error since encoding a single string does not make sense.\n\n    Args:\n        data (str): a single string\n    \"\"\"\n    raise NotImplementedError(\"Encoding a single string does not make sense. Use encode_all instead.\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.StrClassificationEncoder.encode_all","title":"encode_all","text":"
encode_all(data: Union[str, list[str]]) -> Tensor\n

Encodes the data.

This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array. For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html

Parameters:

  • data (Union[str, list[str]]) \u2013

    a list of strings or single string

Returns:

  • encoded_data ( tensor ) \u2013

    the encoded data

Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:\n    \"\"\"Encodes the data.\n\n    This method takes as input a list of data points, should be mappable to a single output, using LabelEncoder from scikit learn and returning a numpy array.\n    For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html\n\n    Args:\n        data (Union[str, list[str]]): a list of strings or single string\n\n    Returns:\n        encoded_data (torch.tensor): the encoded data\n    \"\"\"\n    if not isinstance(data, list):\n        data = [data]\n\n    self._check_dtype(data)\n\n    encoder = preprocessing.LabelEncoder()\n    encoded_data = torch.tensor(encoder.fit_transform(data))\n    if self.scale:\n        encoded_data = encoded_data / max(len(encoded_data) - 1, 1)\n\n    return encoded_data\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder","title":"TextOneHotEncoder","text":"
TextOneHotEncoder(\n    alphabet: str = \"acgt\",\n    *,\n    convert_lowercase: bool = False,\n    padding: bool = False\n)\n

Bases: AbstractEncoder

One hot encoder for text data.

NOTE encodes based on the given alphabet If a character c is not in the alphabet, c will be represented by a vector of zeros.

Attributes:

  • alphabet (str) \u2013

    the alphabet to one hot encode the data with.

  • convert_lowercase (bool) \u2013

    whether to convert the sequence and alphabet to lowercase. Default is False.

  • padding (bool) \u2013

    whether to pad the sequences with zeros. Default is False.

  • encoder (OneHotEncoder) \u2013

    preprocessing.OneHotEncoder object initialized with self.alphabet

Methods:

  • encode \u2013

    encodes a single data point

  • encode_all \u2013

    encodes a list of data points into a numpy array

  • encode_multiprocess \u2013

    encodes a list of data points using multiprocessing

  • decode \u2013

    decodes a single data point

  • _sequence_to_array \u2013

    transforms a sequence into a numpy array

Parameters:

  • alphabet (str, default: 'acgt' ) \u2013

    the alphabet to one hot encode the data with.

Raises:

  • TypeError \u2013

    If the input alphabet is not a string.

Methods:

  • decode \u2013

    Decodes one-hot encoded tensor back to sequences.

  • encode \u2013

    One hot encodes a single sequence.

  • encode_all \u2013

    Encodes a list of sequences.

  • encode_multiprocess \u2013

    Encodes a list of sequences using multiprocessing.

Source code in src/stimulus/data/encoding/encoders.py
def __init__(self, alphabet: str = \"acgt\", *, convert_lowercase: bool = False, padding: bool = False) -> None:\n    \"\"\"Initialize the TextOneHotEncoder class.\n\n    Args:\n        alphabet (str): the alphabet to one hot encode the data with.\n\n    Raises:\n        TypeError: If the input alphabet is not a string.\n    \"\"\"\n    if not isinstance(alphabet, str):\n        error_msg = f\"Expected a string input for alphabet, got {type(alphabet).__name__}\"\n        logger.error(error_msg)\n        raise TypeError(error_msg)\n\n    if convert_lowercase:\n        alphabet = alphabet.lower()\n\n    self.alphabet = alphabet\n    self.convert_lowercase = convert_lowercase\n    self.padding = padding\n\n    self.encoder = preprocessing.OneHotEncoder(\n        categories=[list(alphabet)],\n        handle_unknown=\"ignore\",\n    )  # handle_unknown='ignore' unsures that a vector of zeros is returned for unknown characters, such as 'Ns' in DNA sequences\n    self.encoder.fit(np.array(list(alphabet)).reshape(-1, 1))\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.decode","title":"decode","text":"
decode(data: Tensor) -> Union[str, list[str]]\n

Decodes one-hot encoded tensor back to sequences.

Parameters:

  • data (Tensor) \u2013

    2D or 3D tensor of one-hot encoded sequences - 2D shape: (sequence_length, alphabet_size) - 3D shape: (batch_size, sequence_length, alphabet_size)

NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.

Returns:

  • Union[str, list[str]] \u2013

    Union[str, list[str]]: Single sequence string or list of sequence strings

Raises:

  • TypeError \u2013

    If the input data is not a 2D or 3D tensor

Source code in src/stimulus/data/encoding/encoders.py
def decode(self, data: torch.Tensor) -> Union[str, list[str]]:\n    \"\"\"Decodes one-hot encoded tensor back to sequences.\n\n    Args:\n        data (torch.Tensor): 2D or 3D tensor of one-hot encoded sequences\n            - 2D shape: (sequence_length, alphabet_size)\n            - 3D shape: (batch_size, sequence_length, alphabet_size)\n\n    NOTE that when decoding 3D shape tensor, it assumes all sequences have the same length.\n\n    Returns:\n        Union[str, list[str]]: Single sequence string or list of sequence strings\n\n    Raises:\n        TypeError: If the input data is not a 2D or 3D tensor\n    \"\"\"\n    expected_2d_tensor = 2\n    expected_3d_tensor = 3\n\n    if data.dim() == expected_2d_tensor:\n        # Single sequence\n        data_np = data.numpy().reshape(-1, len(self.alphabet))\n        decoded = self.encoder.inverse_transform(data_np).flatten()\n        return \"\".join([i for i in decoded if i is not None])\n\n    if data.dim() == expected_3d_tensor:\n        # Multiple sequences\n        batch_size, seq_len, _ = data.shape\n        data_np = data.reshape(-1, len(self.alphabet)).numpy()\n        decoded = self.encoder.inverse_transform(data_np)\n        sequences = decoded.reshape(batch_size, seq_len)\n        # Convert to masked array where None values are masked\n        masked_sequences = np.ma.masked_equal(sequences, None)\n        # Fill masked values with \"-\"\n        filled_sequences = masked_sequences.filled(\"-\")\n        return [\"\".join(seq) for seq in filled_sequences]\n\n    raise ValueError(f\"Expected 2D or 3D tensor, got {data.dim()}D\")\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode","title":"encode","text":"
encode(data: str) -> Tensor\n

One hot encodes a single sequence.

Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequence. Unknown characters are represented by a vector of zeros.

Parameters:

  • data (str) \u2013

    single sequence

Returns:

  • encoded_data_point ( Tensor ) \u2013

    one hot encoded sequence

Raises:

  • TypeError \u2013

    If the input data is not a string.

Examples:

>>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n>>> encoder.encode(\"acgt\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n>>> encoder.encode(\"acgtn\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1],\n        [0, 0, 0, 0]])\n
>>> encoder = TextOneHotEncoder(alphabet=\"ACgt\")\n>>> encoder.encode(\"acgt\")\ntensor([[0, 0, 0, 0],\n        [0, 0, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n>>> encoder.encode(\"ACgt\")\ntensor([[1, 0, 0, 0],\n        [0, 1, 0, 0],\n        [0, 0, 1, 0],\n        [0, 0, 0, 1]])\n
Source code in src/stimulus/data/encoding/encoders.py
def encode(self, data: str) -> torch.Tensor:\n    \"\"\"One hot encodes a single sequence.\n\n    Takes a single string sequence and returns a torch tensor of shape (sequence_length, alphabet_length).\n    The returned tensor corresponds to the one hot encoding of the sequence.\n    Unknown characters are represented by a vector of zeros.\n\n    Args:\n        data (str): single sequence\n\n    Returns:\n        encoded_data_point (torch.Tensor): one hot encoded sequence\n\n    Raises:\n        TypeError: If the input data is not a string.\n\n    Examples:\n        >>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n        >>> encoder.encode(\"acgt\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n        >>> encoder.encode(\"acgtn\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1],\n                [0, 0, 0, 0]])\n\n        >>> encoder = TextOneHotEncoder(alphabet=\"ACgt\")\n        >>> encoder.encode(\"acgt\")\n        tensor([[0, 0, 0, 0],\n                [0, 0, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n        >>> encoder.encode(\"ACgt\")\n        tensor([[1, 0, 0, 0],\n                [0, 1, 0, 0],\n                [0, 0, 1, 0],\n                [0, 0, 0, 1]])\n    \"\"\"\n    sequence_array = self._sequence_to_array(data)\n    transformed = self.encoder.transform(sequence_array)\n    numpy_array = np.squeeze(np.stack(transformed.toarray()))\n    return torch.from_numpy(numpy_array)\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode_all","title":"encode_all","text":"
encode_all(data: Union[str, list[str]]) -> Tensor\n

Encodes a list of sequences.

Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length). The returned tensor corresponds to the one hot encoding of the sequences. Unknown characters are represented by a vector of zeros.

Parameters:

  • data (Union[str, list[str]]) \u2013

    list of sequences or a single sequence

Returns:

  • encoded_data ( Tensor ) \u2013

    one hot encoded sequences

Raises:

  • TypeError \u2013

    If the input data is not a list or a string.

  • ValueError \u2013

    If all sequences do not have the same length when padding is False.

Examples:

>>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n>>> encoder.encode_all([\"acgt\", \"acgtn\"])\ntensor([[[1, 0, 0, 0],\n         [0, 1, 0, 0],\n         [0, 0, 1, 0],\n         [0, 0, 0, 1],\n         [0, 0, 0, 0]], // this is padded with zeros\n
    [[1, 0, 0, 0],\n     [0, 1, 0, 0],\n     [0, 0, 1, 0],\n     [0, 0, 0, 1],\n     [0, 0, 0, 0]]])\n
Source code in src/stimulus/data/encoding/encoders.py
def encode_all(self, data: Union[str, list[str]]) -> torch.Tensor:\n    \"\"\"Encodes a list of sequences.\n\n    Takes a list of string sequences and returns a torch tensor of shape (number_of_sequences, sequence_length, alphabet_length).\n    The returned tensor corresponds to the one hot encoding of the sequences.\n    Unknown characters are represented by a vector of zeros.\n\n    Args:\n        data (Union[str, list[str]]): list of sequences or a single sequence\n\n    Returns:\n        encoded_data (torch.Tensor): one hot encoded sequences\n\n    Raises:\n        TypeError: If the input data is not a list or a string.\n        ValueError: If all sequences do not have the same length when padding is False.\n\n    Examples:\n        >>> encoder = TextOneHotEncoder(alphabet=\"acgt\")\n        >>> encoder.encode_all([\"acgt\", \"acgtn\"])\n        tensor([[[1, 0, 0, 0],\n                 [0, 1, 0, 0],\n                 [0, 0, 1, 0],\n                 [0, 0, 0, 1],\n                 [0, 0, 0, 0]], // this is padded with zeros\n\n                [[1, 0, 0, 0],\n                 [0, 1, 0, 0],\n                 [0, 0, 1, 0],\n                 [0, 0, 0, 1],\n                 [0, 0, 0, 0]]])\n    \"\"\"\n    encoded_data = None  # to prevent UnboundLocalError\n    # encode data\n    if isinstance(data, str):\n        encoded_data = self.encode(data)\n        return torch.stack([encoded_data])\n    if isinstance(data, list):\n        # TODO instead maybe we can run encode_multiprocess when data size is larger than a certain threshold.\n        encoded_list = self.encode_multiprocess(data)\n    else:\n        error_msg = f\"Expected list or string input for data, got {type(data).__name__}\"\n        logger.error(error_msg)\n        raise TypeError(error_msg)\n\n    # handle padding\n    if self.padding:\n        max_length = max([len(d) for d in encoded_list])\n        encoded_data = torch.stack([F.pad(d, (0, 0, 0, max_length - len(d))) for d in encoded_list])\n    else:\n        lengths = {len(d) for d in encoded_list}\n        if len(lengths) > 1:\n            error_msg = \"All sequences must have the same length when padding is False.\"\n            logger.error(error_msg)\n            raise ValueError(error_msg)\n        encoded_data = torch.stack(encoded_list)\n\n    if encoded_data is None:\n        raise ValueError(\"Encoded data is None. This should not happen.\")\n\n    return encoded_data\n
"},{"location":"reference/stimulus/data/encoding/encoders/#stimulus.data.encoding.encoders.TextOneHotEncoder.encode_multiprocess","title":"encode_multiprocess","text":"
encode_multiprocess(data: list[str]) -> list[Tensor]\n

Encodes a list of sequences using multiprocessing.

Source code in src/stimulus/data/encoding/encoders.py
def encode_multiprocess(self, data: list[str]) -> list[torch.Tensor]:\n    \"\"\"Encodes a list of sequences using multiprocessing.\"\"\"\n    with mp.Pool() as pool:\n        return pool.map(self.encode, data)\n
"},{"location":"reference/stimulus/data/splitters/","title":"stimulus.data.splitters","text":""},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters","title":"splitters","text":"

This package provides splitter classes for splitting data into train, validation, and test sets.

Modules:

  • splitters \u2013

    This file contains the splitter classes for splitting data accordingly.

Classes:

  • AbstractSplitter \u2013

    Abstract class for splitters.

  • RandomSplit \u2013

    This splitter randomly splits the data.

"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter","title":"AbstractSplitter","text":"
AbstractSplitter(seed: float = 42)\n

Bases: ABC

Abstract class for splitters.

A splitter splits the data into train, validation, and test sets.

Methods:

  • get_split_indexes \u2013

    calculates split indices for the data

  • distance \u2013

    calculates the distance between two elements of the data

Parameters:

  • seed (float, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculates the distance between two elements of the data.

  • get_split_indexes \u2013

    Splits the data. Always return indices mapping to the original list.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, seed: float = 42) -> None:\n    \"\"\"Initialize the splitter.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter.distance","title":"distance abstractmethod","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculates the distance between two elements of the data.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data_one (Any) \u2013

    the first data point

  • data_two (Any) \u2013

    the second data point

Returns:

  • distance ( float ) \u2013

    the distance between the two data points

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculates the distance between two elements of the data.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data_one (Any): the first data point\n        data_two (Any): the second data point\n\n    Returns:\n        distance (float): the distance between the two data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.AbstractSplitter.get_split_indexes","title":"get_split_indexes abstractmethod","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data. Always return indices mapping to the original list.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (DataFrame) \u2013

    the data to be split

Returns:

  • split_indices ( list ) \u2013

    the indices for train, validation, and test sets

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef get_split_indexes(self, data: dict) -> tuple[list, list, list]:\n    \"\"\"Splits the data. Always return indices mapping to the original list.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (pl.DataFrame): the data to be split\n\n    Returns:\n        split_indices (list): the indices for train, validation, and test sets\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit","title":"RandomSplit","text":"
RandomSplit(split: Optional[list] = None, seed: int = 42)\n

Bases: AbstractSplitter

This splitter randomly splits the data.

Parameters:

  • split (Optional[list], default: None ) \u2013

    List of proportions for train/val/test splits

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculate distance between two data points.

  • get_split_indexes \u2013

    Splits the data indices into train, validation, and test sets.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:\n    \"\"\"Initialize the random splitter.\n\n    Args:\n        split: List of proportions for train/val/test splits\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.split = [0.7, 0.2, 0.1] if split is None else split\n    self.seed = seed\n    if len(self.split) != SPLIT_SIZE:\n        raise ValueError(\n            \"The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.\",\n        )\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit.distance","title":"distance","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculate distance between two data points.

Parameters:

  • data_one (Any) \u2013

    First data point

  • data_two (Any) \u2013

    Second data point

Returns:

  • float \u2013

    Distance between the points

Source code in src/stimulus/data/splitters/splitters.py
def distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculate distance between two data points.\n\n    Args:\n        data_one: First data point\n        data_two: Second data point\n\n    Returns:\n        Distance between the points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/#stimulus.data.splitters.RandomSplit.get_split_indexes","title":"get_split_indexes","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data indices into train, validation, and test sets.

One can use these lists of indices to parse the data afterwards.

Parameters:

  • data (dict) \u2013

    Dictionary mapping column names to lists of data values.

Returns:

  • train ( list ) \u2013

    The indices for the training set.

  • validation ( list ) \u2013

    The indices for the validation set.

  • test ( list ) \u2013

    The indices for the test set.

Raises:

  • ValueError \u2013

    If the split argument is not a list with length 3.

  • ValueError \u2013

    If the sum of the split proportions is not 1.

Source code in src/stimulus/data/splitters/splitters.py
def get_split_indexes(\n    self,\n    data: dict,\n) -> tuple[list, list, list]:\n    \"\"\"Splits the data indices into train, validation, and test sets.\n\n    One can use these lists of indices to parse the data afterwards.\n\n    Args:\n        data (dict): Dictionary mapping column names to lists of data values.\n\n    Returns:\n        train (list): The indices for the training set.\n        validation (list): The indices for the validation set.\n        test (list): The indices for the test set.\n\n    Raises:\n        ValueError: If the split argument is not a list with length 3.\n        ValueError: If the sum of the split proportions is not 1.\n    \"\"\"\n    # Use round to avoid errors due to floating point imprecisions\n    if round(sum(self.split), 3) < 1.0:\n        raise ValueError(f\"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.\")\n\n    if not data:\n        raise ValueError(\"No data provided for splitting\")\n    # Get length from first column's data list\n    length_of_data = len(next(iter(data.values())))\n\n    # Generate a list of indices and shuffle it\n    indices = np.arange(length_of_data)\n    np.random.seed(self.seed)\n    np.random.shuffle(indices)\n\n    # Calculate the sizes of the train, validation, and test sets\n    train_size = int(self.split[0] * length_of_data)\n    validation_size = int(self.split[1] * length_of_data)\n\n    # Split the shuffled indices according to the calculated sizes\n    train = indices[:train_size].tolist()\n    validation = indices[train_size : train_size + validation_size].tolist()\n    test = indices[train_size + validation_size :].tolist()\n\n    return train, validation, test\n
"},{"location":"reference/stimulus/data/splitters/splitters/","title":"stimulus.data.splitters.splitters","text":""},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters","title":"splitters","text":"

This file contains the splitter classes for splitting data accordingly.

Classes:

  • AbstractSplitter \u2013

    Abstract class for splitters.

  • RandomSplit \u2013

    This splitter randomly splits the data.

"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter","title":"AbstractSplitter","text":"
AbstractSplitter(seed: float = 42)\n

Bases: ABC

Abstract class for splitters.

A splitter splits the data into train, validation, and test sets.

Methods:

  • get_split_indexes \u2013

    calculates split indices for the data

  • distance \u2013

    calculates the distance between two elements of the data

Parameters:

  • seed (float, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculates the distance between two elements of the data.

  • get_split_indexes \u2013

    Splits the data. Always return indices mapping to the original list.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, seed: float = 42) -> None:\n    \"\"\"Initialize the splitter.\n\n    Args:\n        seed: Random seed for reproducibility\n    \"\"\"\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter.distance","title":"distance abstractmethod","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculates the distance between two elements of the data.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data_one (Any) \u2013

    the first data point

  • data_two (Any) \u2013

    the second data point

Returns:

  • distance ( float ) \u2013

    the distance between the two data points

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculates the distance between two elements of the data.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data_one (Any): the first data point\n        data_two (Any): the second data point\n\n    Returns:\n        distance (float): the distance between the two data points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.AbstractSplitter.get_split_indexes","title":"get_split_indexes abstractmethod","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data. Always return indices mapping to the original list.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (DataFrame) \u2013

    the data to be split

Returns:

  • split_indices ( list ) \u2013

    the indices for train, validation, and test sets

Source code in src/stimulus/data/splitters/splitters.py
@abstractmethod\ndef get_split_indexes(self, data: dict) -> tuple[list, list, list]:\n    \"\"\"Splits the data. Always return indices mapping to the original list.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (pl.DataFrame): the data to be split\n\n    Returns:\n        split_indices (list): the indices for train, validation, and test sets\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit","title":"RandomSplit","text":"
RandomSplit(split: Optional[list] = None, seed: int = 42)\n

Bases: AbstractSplitter

This splitter randomly splits the data.

Parameters:

  • split (Optional[list], default: None ) \u2013

    List of proportions for train/val/test splits

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • distance \u2013

    Calculate distance between two data points.

  • get_split_indexes \u2013

    Splits the data indices into train, validation, and test sets.

Source code in src/stimulus/data/splitters/splitters.py
def __init__(self, split: Optional[list] = None, seed: int = 42) -> None:\n    \"\"\"Initialize the random splitter.\n\n    Args:\n        split: List of proportions for train/val/test splits\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.split = [0.7, 0.2, 0.1] if split is None else split\n    self.seed = seed\n    if len(self.split) != SPLIT_SIZE:\n        raise ValueError(\n            \"The split argument should be a list with length 3 that contains the proportions for [train, validation, test] splits.\",\n        )\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit.distance","title":"distance","text":"
distance(data_one: Any, data_two: Any) -> float\n

Calculate distance between two data points.

Parameters:

  • data_one (Any) \u2013

    First data point

  • data_two (Any) \u2013

    Second data point

Returns:

  • float \u2013

    Distance between the points

Source code in src/stimulus/data/splitters/splitters.py
def distance(self, data_one: Any, data_two: Any) -> float:\n    \"\"\"Calculate distance between two data points.\n\n    Args:\n        data_one: First data point\n        data_two: Second data point\n\n    Returns:\n        Distance between the points\n    \"\"\"\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/splitters/splitters/#stimulus.data.splitters.splitters.RandomSplit.get_split_indexes","title":"get_split_indexes","text":"
get_split_indexes(data: dict) -> tuple[list, list, list]\n

Splits the data indices into train, validation, and test sets.

One can use these lists of indices to parse the data afterwards.

Parameters:

  • data (dict) \u2013

    Dictionary mapping column names to lists of data values.

Returns:

  • train ( list ) \u2013

    The indices for the training set.

  • validation ( list ) \u2013

    The indices for the validation set.

  • test ( list ) \u2013

    The indices for the test set.

Raises:

  • ValueError \u2013

    If the split argument is not a list with length 3.

  • ValueError \u2013

    If the sum of the split proportions is not 1.

Source code in src/stimulus/data/splitters/splitters.py
def get_split_indexes(\n    self,\n    data: dict,\n) -> tuple[list, list, list]:\n    \"\"\"Splits the data indices into train, validation, and test sets.\n\n    One can use these lists of indices to parse the data afterwards.\n\n    Args:\n        data (dict): Dictionary mapping column names to lists of data values.\n\n    Returns:\n        train (list): The indices for the training set.\n        validation (list): The indices for the validation set.\n        test (list): The indices for the test set.\n\n    Raises:\n        ValueError: If the split argument is not a list with length 3.\n        ValueError: If the sum of the split proportions is not 1.\n    \"\"\"\n    # Use round to avoid errors due to floating point imprecisions\n    if round(sum(self.split), 3) < 1.0:\n        raise ValueError(f\"The sum of the split proportions should be 1. Instead, it is {sum(self.split)}.\")\n\n    if not data:\n        raise ValueError(\"No data provided for splitting\")\n    # Get length from first column's data list\n    length_of_data = len(next(iter(data.values())))\n\n    # Generate a list of indices and shuffle it\n    indices = np.arange(length_of_data)\n    np.random.seed(self.seed)\n    np.random.shuffle(indices)\n\n    # Calculate the sizes of the train, validation, and test sets\n    train_size = int(self.split[0] * length_of_data)\n    validation_size = int(self.split[1] * length_of_data)\n\n    # Split the shuffled indices according to the calculated sizes\n    train = indices[:train_size].tolist()\n    validation = indices[train_size : train_size + validation_size].tolist()\n    test = indices[train_size + validation_size :].tolist()\n\n    return train, validation, test\n
"},{"location":"reference/stimulus/data/transform/","title":"stimulus.data.transform","text":""},{"location":"reference/stimulus/data/transform/#stimulus.data.transform","title":"transform","text":"

Transform package for data manipulation.

Modules:

  • data_transformation_generators \u2013

    This file contains noise generators classes for generating various types of noise.

"},{"location":"reference/stimulus/data/transform/data_transformation_generators/","title":"stimulus.data.transform.data_transformation_generators","text":""},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators","title":"data_transformation_generators","text":"

This file contains noise generators classes for generating various types of noise.

Classes:

  • AbstractAugmentationGenerator \u2013

    Abstract class for augmentation generators.

  • AbstractDataTransformer \u2013

    Abstract class for data transformers.

  • AbstractNoiseGenerator \u2013

    Abstract class for noise generators.

  • GaussianChunk \u2013

    Subset data around a random midpoint.

  • GaussianNoise \u2013

    Add Gaussian noise to data.

  • ReverseComplement \u2013

    Reverse complement biological sequences.

  • UniformTextMasker \u2013

    Mask characters in text.

"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator","title":"AbstractAugmentationGenerator","text":"
AbstractAugmentationGenerator()\n

Bases: AbstractDataTransformer

Abstract class for augmentation generators.

All augmentation function should have the seed in it. This is because the multiprocessing of them could unset the seed.

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the augmentation generator.\"\"\"\n    super().__init__()\n    self.add_row = True\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractAugmentationGenerator.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer","title":"AbstractDataTransformer","text":"
AbstractDataTransformer()\n

Bases: ABC

Abstract class for data transformers.

Data transformers implement in_place or augmentation transformations. Whether it is in_place or augmentation is specified in the \"add_row\" attribute (should be True or False and set in children classes constructor)

Child classes should override the transform and transform_all methods.

transform_all should always return a list

Both methods should take an optional seed argument set to None by default to be compliant with stimulus' core principle of reproducibility. Seed should be initialized through np.random.seed(seed) in the method implementation.

Attributes:

  • add_row (bool) \u2013

    whether the transformer adds rows to the data

Methods:

  • transform \u2013

    transforms a data point

  • transform_all \u2013

    transforms a list of data points

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the data transformer.\"\"\"\n    self.add_row: bool = False\n    self.seed: int = 42\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractDataTransformer.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator","title":"AbstractNoiseGenerator","text":"
AbstractNoiseGenerator()\n

Bases: AbstractDataTransformer

Abstract class for noise generators.

All noise function should have the seed in it. This is because the multiprocessing of them could unset the seed.

Methods:

  • transform \u2013

    Transforms a single data point.

  • transform_all \u2013

    Transforms a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self) -> None:\n    \"\"\"Initialize the noise generator.\"\"\"\n    super().__init__()\n    self.add_row = False\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator.transform","title":"transform abstractmethod","text":"
transform(data: Any) -> Any\n

Transforms a single data point.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (Any) \u2013

    the data to be transformed

Returns:

  • transformed_data ( Any ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform(self, data: Any) -> Any:\n    \"\"\"Transforms a single data point.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (Any): the data to be transformed\n\n    Returns:\n        transformed_data (Any): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.AbstractNoiseGenerator.transform_all","title":"transform_all abstractmethod","text":"
transform_all(data: list) -> list\n

Transforms a list of data points.

This is an abstract method that should be implemented by the child class.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data

Source code in src/stimulus/data/transform/data_transformation_generators.py
@abstractmethod\ndef transform_all(self, data: list) -> list:\n    \"\"\"Transforms a list of data points.\n\n    This is an abstract method that should be implemented by the child class.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data\n    \"\"\"\n    #  np.random.seed(self.seed)\n    raise NotImplementedError\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk","title":"GaussianChunk","text":"
GaussianChunk(\n    chunk_size: int, seed: int = 42, std: float = 1\n)\n

Bases: AbstractAugmentationGenerator

Subset data around a random midpoint.

This augmentation strategy chunks the input sequences, for which the middle positions are obtained through a gaussian distribution.

In concrete, it changes the middle position (ie. peak summit) to another position. This position is chosen based on a gaussian distribution, so the region close to the middle point are more likely to be chosen than the rest. Then a chunk with size chunk_size around the new middle point is returned. This process will be repeated for each sequence with transform_all.

Methods:

  • transform \u2013

    chunk a single list

  • transform_all \u2013

    chunks multiple lists

Parameters:

  • chunk_size (int) \u2013

    Size of chunks to extract

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

  • std (float, default: 1 ) \u2013

    Standard deviation for the Gaussian distribution

Methods:

  • transform \u2013

    Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

  • transform_all \u2013

    Adds chunks to multiple lists using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, chunk_size: int, seed: int = 42, std: float = 1) -> None:\n    \"\"\"Initialize the Gaussian chunk generator.\n\n    Args:\n        chunk_size: Size of chunks to extract\n        seed: Random seed for reproducibility\n        std: Standard deviation for the Gaussian distribution\n    \"\"\"\n    super().__init__()\n    self.chunk_size = chunk_size\n    self.seed = seed\n    self.std = std\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk.transform","title":"transform","text":"
transform(data: str) -> str\n

Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.

Parameters:

  • data (str) \u2013

    the sequence to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the chunk of the sequence

Raises:

  • AssertionError \u2013

    if the input data is shorter than the chunk size

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Chunks a sequence of size chunk_size from the middle position +/- a value obtained through a gaussian distribution.\n\n    Args:\n        data (str): the sequence to be transformed\n\n    Returns:\n        transformed_data (str): the chunk of the sequence\n\n    Raises:\n        AssertionError: if the input data is shorter than the chunk size\n    \"\"\"\n    np.random.seed(self.seed)\n\n    # make sure that the data is longer than chunk_size otherwise raise an error\n    if len(data) <= self.chunk_size:\n        raise ValueError(\"The input data is shorter than the chunk size\")\n\n    # Get the middle position of the input sequence\n    middle_position = len(data) // 2\n\n    # Change the middle position by a value obtained through a gaussian distribution\n    new_middle_position = int(middle_position + np.random.normal(0, self.std))\n\n    # Get the start and end position of the chunk\n    start_position = new_middle_position - self.chunk_size // 2\n    end_position = new_middle_position + self.chunk_size // 2\n\n    # if the start position is negative, set it to 0\n    start_position = max(start_position, 0)\n\n    # Get the chunk of size chunk_size from the start position if the end position is smaller than the length of the data\n    if end_position < len(data):\n        return data[start_position : start_position + self.chunk_size]\n    # Otherwise return the chunk of the sequence from the end of the sequence of size chunk_size\n    return data[-self.chunk_size :]\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianChunk.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds chunks to multiple lists using multiprocessing.

Parameters:

  • data (list) \u2013

    the sequences to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed sequences

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds chunks to multiple lists using multiprocessing.\n\n    Args:\n        data (list): the sequences to be transformed\n\n    Returns:\n        transformed_data (list): the transformed sequences\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.starmap(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise","title":"GaussianNoise","text":"
GaussianNoise(\n    mean: float = 0, std: float = 1, seed: int = 42\n)\n

Bases: AbstractNoiseGenerator

Add Gaussian noise to data.

This noise generator adds Gaussian noise to float values.

Methods:

  • transform \u2013

    adds noise to a single data point

  • transform_all \u2013

    adds noise to a list of data points

Parameters:

  • mean (float, default: 0 ) \u2013

    Mean of the Gaussian noise

  • std (float, default: 1 ) \u2013

    Standard deviation of the Gaussian noise

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • transform \u2013

    Adds Gaussian noise to a single point of data.

  • transform_all \u2013

    Adds Gaussian noise to a list of data points.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, mean: float = 0, std: float = 1, seed: int = 42) -> None:\n    \"\"\"Initialize the Gaussian noise generator.\n\n    Args:\n        mean: Mean of the Gaussian noise\n        std: Standard deviation of the Gaussian noise\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.mean = mean\n    self.std = std\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise.transform","title":"transform","text":"
transform(data: float) -> float\n

Adds Gaussian noise to a single point of data.

Parameters:

  • data (float) \u2013

    the data to be transformed

Returns:

  • transformed_data ( float ) \u2013

    the transformed data point

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: float) -> float:\n    \"\"\"Adds Gaussian noise to a single point of data.\n\n    Args:\n        data (float): the data to be transformed\n\n    Returns:\n        transformed_data (float): the transformed data point\n    \"\"\"\n    np.random.seed(self.seed)\n    return data + np.random.normal(self.mean, self.std)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.GaussianNoise.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds Gaussian noise to a list of data points.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data points

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds Gaussian noise to a list of data points.\n\n    Args:\n        data (list): the data to be transformed\n\n    Returns:\n        transformed_data (list): the transformed data points\n    \"\"\"\n    np.random.seed(self.seed)\n    return list(np.array(data) + np.random.normal(self.mean, self.std, len(data)))\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement","title":"ReverseComplement","text":"
ReverseComplement(sequence_type: str = 'DNA')\n

Bases: AbstractAugmentationGenerator

Reverse complement biological sequences.

This augmentation strategy reverse complements the input nucleotide sequences.

Methods:

  • transform \u2013

    reverse complements a single data point

  • transform_all \u2013

    reverse complements a list of data points

Raises:

  • ValueError \u2013

    if the type of the sequence is not DNA or RNA

Parameters:

  • sequence_type (str, default: 'DNA' ) \u2013

    Type of sequence ('DNA' or 'RNA')

Methods:

  • transform \u2013

    Returns the reverse complement of a list of string data using the complement_mapping.

  • transform_all \u2013

    Reverse complement multiple data points using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, sequence_type: str = \"DNA\") -> None:\n    \"\"\"Initialize the reverse complement generator.\n\n    Args:\n        sequence_type: Type of sequence ('DNA' or 'RNA')\n    \"\"\"\n    super().__init__()\n    if sequence_type not in (\"DNA\", \"RNA\"):\n        raise ValueError(\n            \"Currently only DNA and RNA sequences are supported. Update the class ReverseComplement to support other types.\",\n        )\n    if sequence_type == \"DNA\":\n        self.complement_mapping = str.maketrans(\"ATCG\", \"TAGC\")\n    elif sequence_type == \"RNA\":\n        self.complement_mapping = str.maketrans(\"AUCG\", \"UAGC\")\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement.transform","title":"transform","text":"
transform(data: str) -> str\n

Returns the reverse complement of a list of string data using the complement_mapping.

Parameters:

  • data (str) \u2013

    the sequence to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the reverse complement of the sequence

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Returns the reverse complement of a list of string data using the complement_mapping.\n\n    Args:\n        data (str): the sequence to be transformed\n\n    Returns:\n        transformed_data (str): the reverse complement of the sequence\n    \"\"\"\n    return data.translate(self.complement_mapping)[::-1]\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.ReverseComplement.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Reverse complement multiple data points using multiprocessing.

Parameters:

  • data (list) \u2013

    the sequences to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the reverse complement of the sequences

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Reverse complement multiple data points using multiprocessing.\n\n    Args:\n        data (list): the sequences to be transformed\n\n    Returns:\n        transformed_data (list): the reverse complement of the sequences\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.map(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker","title":"UniformTextMasker","text":"
UniformTextMasker(\n    probability: float = 0.1,\n    mask: str = \"*\",\n    seed: int = 42,\n)\n

Bases: AbstractNoiseGenerator

Mask characters in text.

This noise generators replace characters with a masking character with a given probability.

Methods:

  • transform \u2013

    adds character masking to a single data point

  • transform_all \u2013

    adds character masking to a list of data points

Parameters:

  • probability (float, default: 0.1 ) \u2013

    Probability of masking each character

  • mask (str, default: '*' ) \u2013

    Character to use for masking

  • seed (int, default: 42 ) \u2013

    Random seed for reproducibility

Methods:

  • transform \u2013

    Adds character masking to the data.

  • transform_all \u2013

    Adds character masking to multiple data points using multiprocessing.

Source code in src/stimulus/data/transform/data_transformation_generators.py
def __init__(self, probability: float = 0.1, mask: str = \"*\", seed: int = 42) -> None:\n    \"\"\"Initialize the text masker.\n\n    Args:\n        probability: Probability of masking each character\n        mask: Character to use for masking\n        seed: Random seed for reproducibility\n    \"\"\"\n    super().__init__()\n    self.probability = probability\n    self.mask = mask\n    self.seed = seed\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker.transform","title":"transform","text":"
transform(data: str) -> str\n

Adds character masking to the data.

Parameters:

  • data (str) \u2013

    the data to be transformed

Returns:

  • transformed_data ( str ) \u2013

    the transformed data point

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform(self, data: str) -> str:\n    \"\"\"Adds character masking to the data.\n\n    Args:\n        data (str): the data to be transformed\n\n    Returns:\n        transformed_data (str): the transformed data point\n    \"\"\"\n    np.random.seed(self.seed)\n    return \"\".join([c if np.random.rand() > self.probability else self.mask for c in data])\n
"},{"location":"reference/stimulus/data/transform/data_transformation_generators/#stimulus.data.transform.data_transformation_generators.UniformTextMasker.transform_all","title":"transform_all","text":"
transform_all(data: list) -> list\n

Adds character masking to multiple data points using multiprocessing.

Parameters:

  • data (list) \u2013

    the data to be transformed

Returns:

  • transformed_data ( list ) \u2013

    the transformed data points

Source code in src/stimulus/data/transform/data_transformation_generators.py
def transform_all(self, data: list) -> list:\n    \"\"\"Adds character masking to multiple data points using multiprocessing.\n\n    Args:\n        data (list): the data to be transformed\n\n\n    Returns:\n        transformed_data (list): the transformed data points\n    \"\"\"\n    with mp.Pool(mp.cpu_count()) as pool:\n        function_specific_input = list(data)\n        return pool.starmap(self.transform, function_specific_input)\n
"},{"location":"reference/stimulus/learner/","title":"stimulus.learner","text":""},{"location":"reference/stimulus/learner/#stimulus.learner","title":"learner","text":"

Learner package for model training and evaluation.

Modules:

  • predict \u2013

    A module for making predictions with PyTorch models using DataLoaders.

  • raytune_learner \u2013

    Ray Tune wrapper and trainable model classes for hyperparameter optimization.

  • raytune_parser \u2013

    Ray Tune results parser for extracting and saving best model configurations and weights.

"},{"location":"reference/stimulus/learner/predict/","title":"stimulus.learner.predict","text":""},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict","title":"predict","text":"

A module for making predictions with PyTorch models using DataLoaders.

Classes:

  • PredictWrapper \u2013

    A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper","title":"PredictWrapper","text":"
PredictWrapper(\n    model: Module,\n    dataloader: DataLoader,\n    loss_dict: Optional[dict[str, Any]] = None,\n)\n

A wrapper to predict the output of a model on a datset loaded into a torch DataLoader.

It also provides the functionalities to measure the performance of the model.

Parameters:

  • model (Module) \u2013

    The PyTorch model to make predictions with

  • dataloader (DataLoader) \u2013

    DataLoader containing the evaluation data

  • loss_dict (Optional[dict[str, Any]], default: None ) \u2013

    Optional dictionary of loss functions

Methods:

  • compute_loss \u2013

    Compute the loss.

  • compute_metric \u2013

    Wrapper to compute the performance metric.

  • compute_metrics \u2013

    Wrapper to compute the performance metrics.

  • compute_other_metric \u2013

    Compute the performance metric.

  • handle_predictions \u2013

    Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

  • predict \u2013

    Get the model predictions.

Source code in src/stimulus/learner/predict.py
def __init__(self, model: nn.Module, dataloader: DataLoader, loss_dict: Optional[dict[str, Any]] = None) -> None:\n    \"\"\"Initialize the PredictWrapper.\n\n    Args:\n        model: The PyTorch model to make predictions with\n        dataloader: DataLoader containing the evaluation data\n        loss_dict: Optional dictionary of loss functions\n    \"\"\"\n    self.model = model\n    self.dataloader = dataloader\n    self.loss_dict = loss_dict\n    try:\n        self.model.eval()\n    except RuntimeError as e:\n        # Using logging instead of print\n        import logging\n\n        logging.warning(\"Not able to run model.eval: %s\", str(e))\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_loss","title":"compute_loss","text":"
compute_loss() -> float\n

Compute the loss.

The current implmentation basically computes the loss for each batch and then averages them. TODO we could potentially summarize the los across batches in a different way. Or sometimes we may potentially even have 1+ losses.

Source code in src/stimulus/learner/predict.py
def compute_loss(self) -> float:\n    \"\"\"Compute the loss.\n\n    The current implmentation basically computes the loss for each batch and then averages them.\n    TODO we could potentially summarize the los across batches in a different way.\n    Or sometimes we may potentially even have 1+ losses.\n    \"\"\"\n    if self.loss_dict is None:\n        raise ValueError(\"Loss function is not provided.\")\n    loss = 0.0\n    with torch.no_grad():\n        for x, y, _ in self.dataloader:\n            # the loss_dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.\n            current_loss = self.model.batch(x=x, y=y, **self.loss_dict)[0]\n            loss += current_loss.item()\n    return loss / len(self.dataloader)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_metric","title":"compute_metric","text":"
compute_metric(metric: str = 'loss') -> float\n

Wrapper to compute the performance metric.

Source code in src/stimulus/learner/predict.py
def compute_metric(self, metric: str = \"loss\") -> float:\n    \"\"\"Wrapper to compute the performance metric.\"\"\"\n    if metric == \"loss\":\n        return self.compute_loss()\n    return self.compute_other_metric(metric)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_metrics","title":"compute_metrics","text":"
compute_metrics(metrics: list[str]) -> dict[str, float]\n

Wrapper to compute the performance metrics.

Source code in src/stimulus/learner/predict.py
def compute_metrics(self, metrics: list[str]) -> dict[str, float]:\n    \"\"\"Wrapper to compute the performance metrics.\"\"\"\n    return {m: self.compute_metric(m) for m in metrics}\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_other_metric","title":"compute_other_metric","text":"
compute_other_metric(metric: str) -> float\n

Compute the performance metric.

"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.compute_other_metric--todo-currently-we-computes-the-average-performance-metric-across-target-y-but-maybe-in-the-future-we-want-something-different","title":"TODO currently we computes the average performance metric across target y, but maybe in the future we want something different","text":"Source code in src/stimulus/learner/predict.py
def compute_other_metric(self, metric: str) -> float:\n    \"\"\"Compute the performance metric.\n\n    # TODO currently we computes the average performance metric across target y, but maybe in the future we want something different\n    \"\"\"\n    if not hasattr(self, \"predictions\") or not hasattr(self, \"labels\"):\n        predictions, labels = self.predict(return_labels=True)\n        self.predictions = predictions\n        self.labels = labels\n\n    # Explicitly type the labels and predictions as dictionaries with str keys\n    labels_dict: dict[str, Tensor] = self.labels if isinstance(self.labels, dict) else {}\n    predictions_dict: dict[str, Tensor] = self.predictions if isinstance(self.predictions, dict) else {}\n\n    return sum(\n        Performance(labels=labels_dict[k], predictions=predictions_dict[k], metric=metric).val for k in labels_dict\n    ) / len(labels_dict)\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.handle_predictions","title":"handle_predictions","text":"
handle_predictions(\n    predictions: Any, y: dict[str, Tensor]\n) -> dict[str, Tensor]\n

Handle the model outputs from forward pass, into a dictionary of tensors, just like y.

Source code in src/stimulus/learner/predict.py
def handle_predictions(self, predictions: Any, y: dict[str, Tensor]) -> dict[str, Tensor]:\n    \"\"\"Handle the model outputs from forward pass, into a dictionary of tensors, just like y.\"\"\"\n    if len(y) == 1:\n        return {next(iter(y.keys())): predictions}\n    return dict(zip(y.keys(), predictions))\n
"},{"location":"reference/stimulus/learner/predict/#stimulus.learner.predict.PredictWrapper.predict","title":"predict","text":"
predict(*, return_labels: bool = False) -> Union[\n    dict[str, Tensor],\n    tuple[dict[str, Tensor], dict[str, Tensor]],\n]\n

Get the model predictions.

Basically, it runs a foward pass on the model for each batch, gets the predictions and concatenate them for all batches. Since the returned current_predictions are formed by tensors computed for one batch, the final predictions are obtained by concatenating them.

At the end it returns predictions as a dictionary of tensors with the same keys as y.

If return_labels if True, then the labels will be returned as well, also as a dictionary of tensors.

Parameters:

  • return_labels (bool, default: False ) \u2013

    Whether to also return the labels

Returns:

  • Union[dict[str, Tensor], tuple[dict[str, Tensor], dict[str, Tensor]]] \u2013

    Dictionary of predictions, and optionally labels

Source code in src/stimulus/learner/predict.py
def predict(\n    self,\n    *,\n    return_labels: bool = False,\n) -> Union[dict[str, Tensor], tuple[dict[str, Tensor], dict[str, Tensor]]]:\n    \"\"\"Get the model predictions.\n\n    Basically, it runs a foward pass on the model for each batch,\n    gets the predictions and concatenate them for all batches.\n    Since the returned `current_predictions` are formed by tensors computed for one batch,\n    the final `predictions` are obtained by concatenating them.\n\n    At the end it returns `predictions` as a dictionary of tensors with the same keys as `y`.\n\n    If return_labels if True, then the `labels` will be returned as well, also as a dictionary of tensors.\n\n    Args:\n        return_labels: Whether to also return the labels\n\n    Returns:\n        Dictionary of predictions, and optionally labels\n    \"\"\"\n    # create empty dictionaries with the column names\n    first_batch = next(iter(self.dataloader))\n    keys = first_batch[1].keys()\n    predictions: dict[str, list[Tensor]] = {k: [] for k in keys}\n    labels: dict[str, list[Tensor]] = {k: [] for k in keys}\n\n    # get the predictions (and labels) for each batch\n    with torch.no_grad():\n        for x, y, _ in self.dataloader:\n            current_predictions = self.model(**x)\n            current_predictions = self.handle_predictions(current_predictions, y)\n            for k in keys:\n                # it might happen that the batch consists of one element only so the torch.cat will fail. To prevent this the function to ensure at least one dimensionality is called.\n                predictions[k].append(ensure_at_least_1d(current_predictions[k]))\n                if return_labels:\n                    labels[k].append(ensure_at_least_1d(y[k]))\n\n    # return the predictions (and labels) as a dictionary of tensors for the entire dataset.\n    if not return_labels:\n        return {k: torch.cat(v) for k, v in predictions.items()}\n    return {k: torch.cat(v) for k, v in predictions.items()}, {k: torch.cat(v) for k, v in labels.items()}\n
"},{"location":"reference/stimulus/learner/raytune_learner/","title":"stimulus.learner.raytune_learner","text":""},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner","title":"raytune_learner","text":"

Ray Tune wrapper and trainable model classes for hyperparameter optimization.

Classes:

  • CheckpointDict \u2013

    Dictionary type for checkpoint data.

  • TuneModel \u2013

    Trainable model class for Ray Tune.

  • TuneWrapper \u2013

    Wrapper class for Ray Tune hyperparameter optimization.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.CheckpointDict","title":"CheckpointDict","text":"

Bases: TypedDict

Dictionary type for checkpoint data.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel","title":"TuneModel","text":"

Bases: Trainable

Trainable model class for Ray Tune.

Methods:

  • export_model \u2013

    Export model to safetensors format.

  • load_checkpoint \u2013

    Load model and optimizer state from checkpoint.

  • objective \u2013

    Compute the objective metric(s) for the tuning process.

  • save_checkpoint \u2013

    Save model and optimizer state to checkpoint.

  • setup \u2013

    Get the model, loss function(s), optimizer, train and test data from the config.

  • step \u2013

    For each batch in the training data, calculate the loss and update the model parameters.

"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.export_model","title":"export_model","text":"
export_model(export_dir: str | None = None) -> None\n

Export model to safetensors format.

Source code in src/stimulus/learner/raytune_learner.py
def export_model(self, export_dir: str | None = None) -> None:  # type: ignore[override]\n    \"\"\"Export model to safetensors format.\"\"\"\n    if export_dir is None:\n        return\n    safe_save_model(self.model, os.path.join(export_dir, \"model.safetensors\"))\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.load_checkpoint","title":"load_checkpoint","text":"
load_checkpoint(checkpoint: dict[Any, Any] | None) -> None\n

Load model and optimizer state from checkpoint.

Source code in src/stimulus/learner/raytune_learner.py
def load_checkpoint(self, checkpoint: dict[Any, Any] | None) -> None:\n    \"\"\"Load model and optimizer state from checkpoint.\"\"\"\n    if checkpoint is None:\n        return\n    checkpoint_dir = checkpoint[\"checkpoint_dir\"]\n    self.model = safe_load_model(self.model, os.path.join(checkpoint_dir, \"model.safetensors\"))\n    self.optimizer.load_state_dict(torch.load(os.path.join(checkpoint_dir, \"optimizer.pt\")))\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.objective","title":"objective","text":"
objective() -> dict[str, float]\n

Compute the objective metric(s) for the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def objective(self) -> dict[str, float]:\n    \"\"\"Compute the objective metric(s) for the tuning process.\"\"\"\n    metrics = [\n        \"loss\",\n        \"rocauc\",\n        \"prauc\",\n        \"mcc\",\n        \"f1score\",\n        \"precision\",\n        \"recall\",\n        \"spearmanr\",\n    ]  # TODO maybe we report only a subset of metrics, given certain criteria (eg. if classification or regression)\n    predict_val = PredictWrapper(self.model, self.validation, loss_dict=self.loss_dict)\n    predict_train = PredictWrapper(self.model, self.training, loss_dict=self.loss_dict)\n    return {\n        **{\"val_\" + metric: value for metric, value in predict_val.compute_metrics(metrics).items()},\n        **{\"train_\" + metric: value for metric, value in predict_train.compute_metrics(metrics).items()},\n    }\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.save_checkpoint","title":"save_checkpoint","text":"
save_checkpoint(checkpoint_dir: str) -> dict[Any, Any]\n

Save model and optimizer state to checkpoint.

Source code in src/stimulus/learner/raytune_learner.py
def save_checkpoint(self, checkpoint_dir: str) -> dict[Any, Any]:\n    \"\"\"Save model and optimizer state to checkpoint.\"\"\"\n    safe_save_model(self.model, os.path.join(checkpoint_dir, \"model.safetensors\"))\n    torch.save(self.optimizer.state_dict(), os.path.join(checkpoint_dir, \"optimizer.pt\"))\n    return {\"checkpoint_dir\": checkpoint_dir}\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.setup","title":"setup","text":"
setup(config: dict[Any, Any]) -> None\n

Get the model, loss function(s), optimizer, train and test data from the config.

Source code in src/stimulus/learner/raytune_learner.py
def setup(self, config: dict[Any, Any]) -> None:\n    \"\"\"Get the model, loss function(s), optimizer, train and test data from the config.\"\"\"\n    # set the seeds the second time, first in TuneWrapper initialization\n    set_general_seeds(self.config[\"ray_worker_seed\"])\n\n    # Initialize model with the config params\n    self.model = config[\"model\"](**config[\"network_params\"])\n\n    # Get the loss function(s) from the config model params\n    self.loss_dict = config[\"loss_params\"]\n    for key, loss_fn in self.loss_dict.items():\n        try:\n            self.loss_dict[key] = getattr(nn, loss_fn)()\n        except AttributeError as err:\n            raise ValueError(\n                f\"Invalid loss function: {loss_fn}, check PyTorch for documentation on available loss functions\",\n            ) from err\n\n    # get the optimizer parameters\n    optimizer_lr = config[\"optimizer_params\"][\"lr\"]\n    self.optimizer = getattr(optim, config[\"optimizer_params\"][\"method\"])(\n        self.model.parameters(),\n        lr=optimizer_lr,\n    )\n\n    # get step size from the config\n    self.step_size = config[\"tune\"][\"step_size\"]\n\n    # Get datasets from Ray's object store\n    training, validation = ray.get(self.config[\"_training_ref\"]), ray.get(self.config[\"_validation_ref\"])\n\n    # use dataloader on training/validation data\n    self.batch_size = config[\"data_params\"][\"batch_size\"]\n    self.training = DataLoader(\n        training,\n        batch_size=self.batch_size,\n        shuffle=True,\n    )\n    self.validation = DataLoader(\n        validation,\n        batch_size=self.batch_size,\n        shuffle=True,\n    )\n\n    # debug section, first create a dedicated directory for each worker inside Ray_results/<tune_model_run_specific_dir> location\n    debug_dir = os.path.join(\n        config[\"tune_run_path\"],\n        \"debug\",\n        (\"worker_with_seed_\" + str(self.config[\"ray_worker_seed\"])),\n    )\n    if config[\"_debug\"]:\n        # creating a special directory for it one that is worker/trial/experiment specific\n        os.makedirs(debug_dir)\n        seed_filename = os.path.join(debug_dir, \"seeds.txt\")\n\n        # save the initialized model weights\n        self.export_model(export_dir=debug_dir)\n\n        # save the seeds\n        with open(seed_filename, \"a\") as seed_f:\n            # you can not retrieve the actual seed once it set, or the current seed neither for python, numpy nor torch. so we select five numbers randomly. If that is the first draw of numbers they are always the same.\n            python_values = random.sample(range(100), 5)\n            numpy_values = list(np.random.randint(0, 100, size=5))\n            torch_values = torch.randint(0, 100, (5,)).tolist()\n            seed_f.write(\n                f\"python drawn numbers : {python_values}\\nnumpy drawn numbers : {numpy_values}\\ntorch drawn numbers : {torch_values}\\n\",\n            )\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneModel.step","title":"step","text":"
step() -> dict\n

For each batch in the training data, calculate the loss and update the model parameters.

This calculation is performed based on the model's batch function. At the end, return the objective metric(s) for the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def step(self) -> dict:\n    \"\"\"For each batch in the training data, calculate the loss and update the model parameters.\n\n    This calculation is performed based on the model's batch function.\n    At the end, return the objective metric(s) for the tuning process.\n    \"\"\"\n    for _step_size in range(self.step_size):\n        for x, y, _meta in self.training:\n            # the loss dict could be unpacked with ** and the function declaration handle it differently like **kwargs. to be decided, personally find this more clean and understable.\n            self.model.batch(x=x, y=y, optimizer=self.optimizer, **self.loss_dict)\n    return self.objective()\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper","title":"TuneWrapper","text":"
TuneWrapper(\n    model_config: RayTuneModel,\n    data_config_path: str,\n    model_class: Module,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    seed: int,\n    ray_results_dir: Optional[str] = None,\n    tune_run_name: Optional[str] = None,\n    *,\n    debug: bool = False,\n    autoscaler: bool = False\n)\n

Wrapper class for Ray Tune hyperparameter optimization.

Methods:

  • tune \u2013

    Run the tuning process.

  • tuner_initialization \u2013

    Prepare the tuner with the configs.

Source code in src/stimulus/learner/raytune_learner.py
def __init__(\n    self,\n    model_config: RayTuneModel,\n    data_config_path: str,\n    model_class: nn.Module,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    seed: int,\n    ray_results_dir: Optional[str] = None,\n    tune_run_name: Optional[str] = None,\n    *,\n    debug: bool = False,\n    autoscaler: bool = False,\n) -> None:\n    \"\"\"Initialize the TuneWrapper with the paths to the config, model, and data.\"\"\"\n    self.config = model_config.model_dump()\n\n    # set all general seeds: python, numpy and torch.\n    set_general_seeds(seed)\n\n    # build the tune config:\n    try:\n        scheduler_class = getattr(\n            tune.schedulers,\n            model_config.tune.scheduler.name,\n        )  # todo, do this in RayConfigLoader\n    except AttributeError as err:\n        raise ValueError(\n            f\"Invalid scheduler: {model_config.tune.scheduler.name}, check Ray Tune for documentation on available schedulers\",\n        ) from err\n\n    scheduler = scheduler_class(**model_config.tune.scheduler.params)\n    self.tune_config = tune.TuneConfig(\n        metric=model_config.tune.tune_params.metric,\n        mode=model_config.tune.tune_params.mode,\n        num_samples=model_config.tune.tune_params.num_samples,\n        scheduler=scheduler,\n    )\n\n    # build the run config\n    self.run_config = train.RunConfig(\n        name=tune_run_name\n        if tune_run_name is not None\n        else \"TuneModel_\" + datetime.datetime.now(tz=datetime.timezone.utc).strftime(\"%Y-%m-%d_%H-%M-%S\"),\n        storage_path=ray_results_dir,\n        checkpoint_config=train.CheckpointConfig(checkpoint_at_end=True),\n        stop=model_config.tune.run_params.stop,\n    )\n\n    # add the data path to the config\n    if not os.path.exists(data_path):\n        raise ValueError(\"Data path does not exist. Given path:\" + data_path)\n    self.config[\"data_path\"] = os.path.abspath(data_path)\n\n    # Set up tune_run path\n    if ray_results_dir is None:\n        ray_results_dir = os.environ.get(\"HOME\", \"\")\n    self.config[\"tune_run_path\"] = os.path.join(\n        ray_results_dir,\n        tune_run_name\n        if tune_run_name is not None\n        else \"TuneModel_\" + datetime.datetime.now(tz=datetime.timezone.utc).strftime(\"%Y-%m-%d_%H-%M-%S\"),\n    )\n    self.config[\"_debug\"] = debug\n    self.config[\"model\"] = model_class\n    self.config[\"encoder_loader\"] = encoder_loader\n    self.config[\"ray_worker_seed\"] = tune.randint(0, 1000)\n\n    self.gpu_per_trial = model_config.tune.gpu_per_trial\n    self.cpu_per_trial = model_config.tune.cpu_per_trial\n\n    self.tuner = self.tuner_initialization(\n        data_config_path=data_config_path,\n        data_path=data_path,\n        encoder_loader=encoder_loader,\n        autoscaler=autoscaler,\n    )\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper.tune","title":"tune","text":"
tune() -> ResultGrid\n

Run the tuning process.

Source code in src/stimulus/learner/raytune_learner.py
def tune(self) -> ray.tune.ResultGrid:\n    \"\"\"Run the tuning process.\"\"\"\n    return self.tuner.fit()\n
"},{"location":"reference/stimulus/learner/raytune_learner/#stimulus.learner.raytune_learner.TuneWrapper.tuner_initialization","title":"tuner_initialization","text":"
tuner_initialization(\n    data_config_path: str,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    *,\n    autoscaler: bool = False\n) -> Tuner\n

Prepare the tuner with the configs.

Source code in src/stimulus/learner/raytune_learner.py
def tuner_initialization(\n    self,\n    data_config_path: str,\n    data_path: str,\n    encoder_loader: EncoderLoader,\n    *,\n    autoscaler: bool = False,\n) -> tune.Tuner:\n    \"\"\"Prepare the tuner with the configs.\"\"\"\n    # Get available resources from Ray cluster\n    cluster_res = cluster_resources()\n    logging.info(f\"CLUSTER resources   ->  {cluster_res}\")\n\n    # Check per-trial resources\n    try:\n        if self.gpu_per_trial > cluster_res[\"GPU\"] and not autoscaler:\n            raise ValueError(\n                \"GPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.\",\n            )\n    except KeyError as err:\n        logging.warning(f\"KeyError: {err}, no GPU resources available in the cluster: {cluster_res}\")\n\n    if self.cpu_per_trial > cluster_res[\"CPU\"] and not autoscaler:\n        raise ValueError(\n            \"CPU per trial is more than what is available in the cluster, set autoscaler to True to allow for autoscaler to be used.\",\n        )\n\n    logging.info(f\"PER_TRIAL resources ->  GPU: {self.gpu_per_trial} CPU: {self.cpu_per_trial}\")\n\n    # Pre-load and encode datasets once, then put them in Ray's object store\n\n    training = TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n        split=0,\n    )\n    validation = TorchDataset(\n        config_path=data_config_path,\n        csv_path=data_path,\n        encoder_loader=encoder_loader,\n        split=1,\n    )\n\n    # log to debug the names of the columns and shapes of tensors for a batch of training\n    # Log shapes of encoded tensors for first batch of training data\n    inputs, labels, meta = training[0:10]\n\n    logging.debug(\"Training data tensor shapes:\")\n    for field, tensor in inputs.items():\n        logging.debug(f\"Input field '{field}' shape: {tensor.shape}\")\n\n    for field, tensor in labels.items():\n        logging.debug(f\"Label field '{field}' shape: {tensor.shape}\")\n\n    for field, values in meta.items():\n        logging.debug(f\"Meta field '{field}' length: {len(values)}\")\n\n    training_ref = ray.put(training)\n    validation_ref = ray.put(validation)\n\n    self.config[\"_training_ref\"] = training_ref\n    self.config[\"_validation_ref\"] = validation_ref\n\n    # Configure trainable with resources and dataset parameters\n    trainable = tune.with_resources(\n        tune.with_parameters(\n            TuneModel,\n        ),\n        resources={\"cpu\": self.cpu_per_trial, \"gpu\": self.gpu_per_trial},\n    )\n\n    return tune.Tuner(trainable, tune_config=self.tune_config, param_space=self.config, run_config=self.run_config)\n
"},{"location":"reference/stimulus/learner/raytune_parser/","title":"stimulus.learner.raytune_parser","text":""},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser","title":"raytune_parser","text":"

Ray Tune results parser for extracting and saving best model configurations and weights.

Classes:

  • RayTuneMetrics \u2013

    TypedDict for storing Ray Tune metrics results.

  • RayTuneOptimizer \u2013

    TypedDict for storing Ray Tune optimizer state.

  • RayTuneResult \u2013

    TypedDict for storing Ray Tune optimization results.

  • TuneParser \u2013

    Parser class for Ray Tune results to extract best configurations and model weights.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneMetrics","title":"RayTuneMetrics","text":"

Bases: TypedDict

TypedDict for storing Ray Tune metrics results.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneOptimizer","title":"RayTuneOptimizer","text":"

Bases: TypedDict

TypedDict for storing Ray Tune optimizer state.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.RayTuneResult","title":"RayTuneResult","text":"

Bases: TypedDict

TypedDict for storing Ray Tune optimization results.

"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser","title":"TuneParser","text":"
TuneParser(result: ResultGrid)\n

Parser class for Ray Tune results to extract best configurations and model weights.

Methods:

  • fix_config_values \u2013

    Correct config values.

  • get_best_config \u2013

    Get the best config from the results.

  • get_best_model \u2013

    Get the best model weights from the results.

  • get_best_optimizer \u2013

    Get the best optimizer state from the results.

  • save_best_config \u2013

    Save the best config to a file.

  • save_best_metrics_dataframe \u2013

    Save the dataframe with the metrics at each iteration of the best sample to a file.

  • save_best_model \u2013

    Save the best model weights to a file.

  • save_best_optimizer \u2013

    Save the best optimizer state to a file.

Source code in src/stimulus/learner/raytune_parser.py
def __init__(self, result: ResultGrid) -> None:\n    \"\"\"Initialize with the given Ray Tune result grid.\"\"\"\n    self.result: ResultGrid = result\n    self.best_result: Result = self._validate_best_result()\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.fix_config_values","title":"fix_config_values","text":"
fix_config_values(config: dict[str, Any]) -> dict[str, Any]\n

Correct config values.

This method modifies the configuration dictionary to remove or convert non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary can be safely dumped to a YAML file.

Parameters:

  • config (dict[str, Any]) \u2013

    Configuration dictionary to fix.

Returns:

  • dict[str, Any] \u2013

    Fixed configuration dictionary.

Source code in src/stimulus/learner/raytune_parser.py
def fix_config_values(self, config: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Correct config values.\n\n    This method modifies the configuration dictionary to remove or convert\n    non-serializable objects (such as Ray ObjectRefs) so that the entire dictionary\n    can be safely dumped to a YAML file.\n\n    Args:\n        config: Configuration dictionary to fix.\n\n    Returns:\n        Fixed configuration dictionary.\n    \"\"\"\n    # Replace the model class with its name for serialization purposes\n    config[\"model\"] = config[\"model\"].__name__\n\n    # Remove keys that contain non-serializable objects\n    keys_to_remove = [\n        \"_debug\",\n        \"tune_run_path\",\n        \"_training_ref\",\n        \"_validation_ref\",\n        \"encoder_loader\",  # if this key holds a non-serializable object\n    ]\n    for key in keys_to_remove:\n        config.pop(key, None)\n\n    return config\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_config","title":"get_best_config","text":"
get_best_config() -> dict[str, Any]\n

Get the best config from the results.

Returns:

  • dict[str, Any] \u2013

    The configuration dictionary of the best result.

Raises:

  • ValueError \u2013

    If the config is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_config(self) -> dict[str, Any]:\n    \"\"\"Get the best config from the results.\n\n    Returns:\n        The configuration dictionary of the best result.\n\n    Raises:\n        ValueError: If the config is missing.\n    \"\"\"\n    config: dict[str, Any] | None = self.best_result.config\n    if config is None:\n        raise ValueError(\"Best result does not contain a configuration.\")\n    return config\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_model","title":"get_best_model","text":"
get_best_model() -> dict[str, Tensor]\n

Get the best model weights from the results.

Returns:

  • dict[str, Tensor] \u2013

    Dictionary of model weights.

Raises:

  • ValueError \u2013

    If the checkpoint is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_model(self) -> dict[str, torch.Tensor]:\n    \"\"\"Get the best model weights from the results.\n\n    Returns:\n        Dictionary of model weights.\n\n    Raises:\n        ValueError: If the checkpoint is missing.\n    \"\"\"\n    if self.best_result.checkpoint is None:\n        raise ValueError(\"Best result does not contain a checkpoint for the model.\")\n    checkpoint_dir: str = self.best_result.checkpoint.to_directory()\n    checkpoint: str = os.path.join(checkpoint_dir, \"model.safetensors\")\n    return safe_load_file(checkpoint)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.get_best_optimizer","title":"get_best_optimizer","text":"
get_best_optimizer() -> dict[str, Any]\n

Get the best optimizer state from the results.

Returns:

  • dict[str, Any] \u2013

    Optimizer state dictionary.

Raises:

  • ValueError \u2013

    If the checkpoint is missing.

Source code in src/stimulus/learner/raytune_parser.py
def get_best_optimizer(self) -> dict[str, Any]:\n    \"\"\"Get the best optimizer state from the results.\n\n    Returns:\n        Optimizer state dictionary.\n\n    Raises:\n        ValueError: If the checkpoint is missing.\n    \"\"\"\n    if self.best_result.checkpoint is None:\n        raise ValueError(\"Best result does not contain a checkpoint for the optimizer.\")\n    checkpoint_dir: str = self.best_result.checkpoint.to_directory()\n    checkpoint: str = os.path.join(checkpoint_dir, \"optimizer.pt\")\n    return torch.load(checkpoint)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_config","title":"save_best_config","text":"
save_best_config(output: str) -> None\n

Save the best config to a file.

Todo

maybe only save the relevant config values.

Parameters:

  • output (str) \u2013

    File path to save the configuration.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_config(self, output: str) -> None:\n    \"\"\"Save the best config to a file.\n\n    TODO: maybe only save the relevant config values.\n\n    Args:\n        output: File path to save the configuration.\n    \"\"\"\n    config: dict[str, Any] = self.get_best_config()\n    config = self.fix_config_values(config)\n    with open(output, \"w\") as f:\n        yaml.safe_dump(config, f)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_metrics_dataframe","title":"save_best_metrics_dataframe","text":"
save_best_metrics_dataframe(output: str) -> None\n

Save the dataframe with the metrics at each iteration of the best sample to a file.

Parameters:

  • output (str) \u2013

    CSV file path to save the metrics.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_metrics_dataframe(self, output: str) -> None:\n    \"\"\"Save the dataframe with the metrics at each iteration of the best sample to a file.\n\n    Args:\n        output: CSV file path to save the metrics.\n    \"\"\"\n    metrics_df: pd.DataFrame = pd.DataFrame([self.best_result.metrics])\n    metrics_df.to_csv(output, index=False)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_model","title":"save_best_model","text":"
save_best_model(output: str) -> None\n

Save the best model weights to a file.

This method retrieves the best model weights using the get_best_model helper which loads the model data from the checkpoint's directory, then re-saves it using safe_save_file.

Parameters:

  • output (str) \u2013

    Path where the best model weights will be saved.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_model(self, output: str) -> None:\n    \"\"\"Save the best model weights to a file.\n\n    This method retrieves the best model weights using the get_best_model helper\n    which loads the model data from the checkpoint's directory, then re-saves\n    it using safe_save_file.\n\n    Args:\n        output: Path where the best model weights will be saved.\n    \"\"\"\n    model: dict[str, torch.Tensor] = self.get_best_model()\n    safe_save_file(model, output)\n
"},{"location":"reference/stimulus/learner/raytune_parser/#stimulus.learner.raytune_parser.TuneParser.save_best_optimizer","title":"save_best_optimizer","text":"
save_best_optimizer(output: str) -> None\n

Save the best optimizer state to a file.

Parameters:

  • output (str) \u2013

    Path where the best optimizer state will be saved.

Source code in src/stimulus/learner/raytune_parser.py
def save_best_optimizer(self, output: str) -> None:\n    \"\"\"Save the best optimizer state to a file.\n\n    Args:\n        output: Path where the best optimizer state will be saved.\n    \"\"\"\n    optimizer_state: dict[str, Any] = self.get_best_optimizer()\n    torch.save(optimizer_state, output)\n
"},{"location":"reference/stimulus/typing/","title":"stimulus.typing","text":""},{"location":"reference/stimulus/typing/#stimulus.typing","title":"typing","text":"

Typing for Stimulus Python API.

This module contains all Stimulus types which will be used for variable typing and likely not instantiated, as well as aliases for other types to use for typing purposes.

The aliases from this module should be used for typing purposes only.

"},{"location":"reference/stimulus/utils/","title":"stimulus.utils","text":""},{"location":"reference/stimulus/utils/#stimulus.utils","title":"utils","text":"

Utility functions package.

Modules:

  • generic_utils \u2013

    Utility functions for general purpose operations like seed setting and tensor manipulation.

  • launch_utils \u2013

    Utility functions for launching and configuring experiments and ray tuning.

  • performance \u2013

    Utility module for computing various performance metrics for machine learning models.

  • yaml_data \u2013

    Utility module for handling YAML configuration files and their validation.

  • yaml_model_schema \u2013

    Module for handling YAML configuration files and converting them to Ray Tune format.

"},{"location":"reference/stimulus/utils/generic_utils/","title":"stimulus.utils.generic_utils","text":""},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils","title":"generic_utils","text":"

Utility functions for general purpose operations like seed setting and tensor manipulation.

Functions:

  • ensure_at_least_1d \u2013

    Function to make sure tensors given are not zero dimensional. if they are add one dimension.

  • set_general_seeds \u2013

    Set all relevant random seeds to a given value.

"},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils.ensure_at_least_1d","title":"ensure_at_least_1d","text":"
ensure_at_least_1d(tensor: Tensor) -> Tensor\n

Function to make sure tensors given are not zero dimensional. if they are add one dimension.

Source code in src/stimulus/utils/generic_utils.py
def ensure_at_least_1d(tensor: torch.Tensor) -> torch.Tensor:\n    \"\"\"Function to make sure tensors given are not zero dimensional. if they are add one dimension.\"\"\"\n    if tensor.dim() == 0:\n        tensor = tensor.unsqueeze(0)\n    return tensor\n
"},{"location":"reference/stimulus/utils/generic_utils/#stimulus.utils.generic_utils.set_general_seeds","title":"set_general_seeds","text":"
set_general_seeds(seed_value: Union[int, None]) -> None\n

Set all relevant random seeds to a given value.

Especially useful in case of ray.tune. Ray does not have a \"generic\" seed as far as ray 2.23.

Source code in src/stimulus/utils/generic_utils.py
def set_general_seeds(seed_value: Union[int, None]) -> None:\n    \"\"\"Set all relevant random seeds to a given value.\n\n    Especially useful in case of ray.tune. Ray does not have a \"generic\" seed as far as ray 2.23.\n    \"\"\"\n    # Set python seed\n    random.seed(seed_value)\n\n    # set numpy seed\n    np.random.seed(seed_value)\n\n    # set torch seed, diffrently from the two above torch can nopt take Noneas input value so it will not be called in that case.\n    if seed_value is not None:\n        torch.manual_seed(seed_value)\n
"},{"location":"reference/stimulus/utils/launch_utils/","title":"stimulus.utils.launch_utils","text":""},{"location":"reference/stimulus/utils/launch_utils/#stimulus.utils.launch_utils","title":"launch_utils","text":"

Utility functions for launching and configuring experiments and ray tuning.

Functions:

  • import_class_from_file \u2013

    Import and return the Model class from a specified Python file.

"},{"location":"reference/stimulus/utils/launch_utils/#stimulus.utils.launch_utils.import_class_from_file","title":"import_class_from_file","text":"
import_class_from_file(file_path: str) -> type\n

Import and return the Model class from a specified Python file.

Parameters:

  • file_path (str) \u2013

    Path to the Python file containing the Model class.

Returns:

  • type ( type ) \u2013

    The Model class found in the file.

Raises:

  • ImportError \u2013

    If no class starting with 'Model' is found in the file.

Source code in src/stimulus/utils/launch_utils.py
def import_class_from_file(file_path: str) -> type:\n    \"\"\"Import and return the Model class from a specified Python file.\n\n    Args:\n        file_path (str): Path to the Python file containing the Model class.\n\n    Returns:\n        type: The Model class found in the file.\n\n    Raises:\n        ImportError: If no class starting with 'Model' is found in the file.\n    \"\"\"\n    # Extract directory path and file name\n    directory, file_name = os.path.split(file_path)\n    module_name = os.path.splitext(file_name)[0]  # Remove extension to get module name\n\n    # Create a module from the file path\n    # In summary, these three lines of code are responsible for creating a module specification based on a file location, creating a module object from that specification, and then executing the module's code to populate the module object with the definitions from the Python file.\n    spec = importlib.util.spec_from_file_location(module_name, file_path)\n    if spec is None:\n        raise ImportError(f\"Could not create module spec for {file_path}\")\n    module = importlib.util.module_from_spec(spec)\n    if spec.loader is None:\n        raise ImportError(f\"Module spec has no loader for {file_path}\")\n    spec.loader.exec_module(module)\n\n    # Find the class dynamically\n    for name in dir(module):\n        model_class = getattr(module, name)\n        if isinstance(model_class, type) and name.startswith(\"Model\"):\n            return model_class\n\n    # Class not found\n    raise ImportError(\"No class starting with 'Model' found in the file.\")\n
"},{"location":"reference/stimulus/utils/performance/","title":"stimulus.utils.performance","text":""},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance","title":"performance","text":"

Utility module for computing various performance metrics for machine learning models.

Classes:

  • Performance \u2013

    Returns the value of a given metric.

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance","title":"Performance","text":"
Performance(\n    labels: Any, predictions: Any, metric: str = \"rocauc\"\n)\n

Returns the value of a given metric.

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance--parameters","title":"Parameters","text":"

labels (np.array) : labels predictions (np.array) : predictions metric (str) : the metric to compute

"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance--returns","title":"Returns:","text":"

value (float) : the value of the metric

TODO we can add more metrics here

TODO currently for classification metrics like precision, recall, f1score and mcc, we are using a threshold of 0.5 to convert the probabilities to binary predictions. However for models with imbalanced predictions, where the meaningful threshold is not located at 0.5, one can end up with full of 0s or 1s, and thus meaningless performance metrics.

Parameters:

  • labels (Any) \u2013

    Ground truth labels

  • predictions (Any) \u2013

    Model predictions

  • metric (str, default: 'rocauc' ) \u2013

    Type of metric to compute (default: \"rocauc\")

Methods:

  • data2array \u2013

    Convert input data to numpy array.

  • f1score \u2013

    Compute F1 score.

  • handle_multiclass \u2013

    Handle the case of multiclass classification.

  • mcc \u2013

    Compute Matthews Correlation Coefficient.

  • prauc \u2013

    Compute PR AUC score.

  • precision \u2013

    Compute precision score.

  • recall \u2013

    Compute recall score.

  • rocauc \u2013

    Compute ROC AUC score.

  • spearmanr \u2013

    Compute Spearman correlation coefficient.

Source code in src/stimulus/utils/performance.py
def __init__(self, labels: Any, predictions: Any, metric: str = \"rocauc\") -> None:\n    \"\"\"Initialize Performance class with labels, predictions and metric type.\n\n    Args:\n        labels: Ground truth labels\n        predictions: Model predictions\n        metric: Type of metric to compute (default: \"rocauc\")\n    \"\"\"\n    labels_arr = self.data2array(labels)\n    predictions_arr = self.data2array(predictions)\n    labels_arr, predictions_arr = self.handle_multiclass(labels_arr, predictions_arr)\n    if labels_arr.shape != predictions_arr.shape:\n        raise ValueError(\n            f\"The labels have shape {labels_arr.shape} whereas predictions have shape {predictions_arr.shape}.\",\n        )\n    function = getattr(self, metric)\n    self.val = function(labels_arr, predictions_arr)\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.data2array","title":"data2array","text":"
data2array(data: Any) -> NDArray[float64]\n

Convert input data to numpy array.

Parameters:

  • data (Any) \u2013

    Input data in various formats

Returns:

  • NDArray[float64] \u2013

    NDArray[np.float64]: Converted numpy array

Raises:

  • ValueError \u2013

    If input data type is not supported

Source code in src/stimulus/utils/performance.py
def data2array(self, data: Any) -> NDArray[np.float64]:\n    \"\"\"Convert input data to numpy array.\n\n    Args:\n        data: Input data in various formats\n\n    Returns:\n        NDArray[np.float64]: Converted numpy array\n\n    Raises:\n        ValueError: If input data type is not supported\n    \"\"\"\n    if isinstance(data, list):\n        return np.array(data, dtype=np.float64)\n    if isinstance(data, np.ndarray):\n        return data.astype(np.float64)\n    if isinstance(data, torch.Tensor):\n        return data.detach().cpu().numpy().astype(np.float64)\n    if isinstance(data, (int, float)):\n        return np.array([data], dtype=np.float64)\n    raise ValueError(f\"The data must be a list, np.array, torch.Tensor, int or float. Instead it is {type(data)}\")\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.f1score","title":"f1score","text":"
f1score(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute F1 score.

Source code in src/stimulus/utils/performance.py
def f1score(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute F1 score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(f1_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.handle_multiclass","title":"handle_multiclass","text":"
handle_multiclass(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> tuple[NDArray[float64], NDArray[float64]]\n

Handle the case of multiclass classification.

Parameters:

  • labels (NDArray[float64]) \u2013

    Labels array of shape (N,) or (N, 1)

  • predictions (NDArray[float64]) \u2013

    Predictions array of shape (N,) or (N, C) where C is number of classes

Returns:

  • tuple[NDArray[float64], NDArray[float64]] \u2013

    tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions

Raises:

  • ValueError \u2013

    If input shapes are not compatible

Source code in src/stimulus/utils/performance.py
def handle_multiclass(\n    self,\n    labels: NDArray[np.float64],\n    predictions: NDArray[np.float64],\n) -> tuple[NDArray[np.float64], NDArray[np.float64]]:\n    \"\"\"Handle the case of multiclass classification.\n\n    Args:\n        labels: Labels array of shape (N,) or (N, 1)\n        predictions: Predictions array of shape (N,) or (N, C) where C is number of classes\n\n    Returns:\n        tuple[NDArray[np.float64], NDArray[np.float64]]: Processed labels and predictions\n\n    Raises:\n        ValueError: If input shapes are not compatible\n    \"\"\"\n    # Case 1: If labels are 2D with shape (N,1), squeeze to 1D shape (N,)\n    # This handles cases where labels come as column vectors\n    if len(labels.shape) == NON_SQUEEZED_SHAPE_LENGTH and labels.shape[1] == 1:\n        labels = labels.squeeze(-1)\n\n    if len(predictions.shape) == NON_SQUEEZED_SHAPE_LENGTH:\n        # Case 2: Binary classification with shape (N,2)\n        # Take probability of positive class (second column)\n        if predictions.shape[1] == BINARY_CLASS_COUNT:\n            predictions = predictions[:, 1]  # Shape becomes (N,)\n            return labels, predictions\n        # Case 3: Multi-class classification with shape (N,C)\n        # Keep predictions as-is if labels are 1D and batch sizes match\n        if len(labels.shape) == 1 and predictions.shape[0] == labels.shape[0]:\n            return labels, predictions\n\n    # If we get here, the shapes are not compatible\n    raise ValueError(\n        f\"Incompatible shapes: labels {labels.shape}, predictions {predictions.shape}. \"\n        \"Expected labels (N,) or (N, 1) and predictions (N,) or (N, C) where C is number of classes.\",\n    )\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.mcc","title":"mcc","text":"
mcc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute Matthews Correlation Coefficient.

Source code in src/stimulus/utils/performance.py
def mcc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute Matthews Correlation Coefficient.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(matthews_corrcoef(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.prauc","title":"prauc","text":"
prauc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute PR AUC score.

Source code in src/stimulus/utils/performance.py
def prauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute PR AUC score.\"\"\"\n    return float(average_precision_score(labels, predictions))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.precision","title":"precision","text":"
precision(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute precision score.

Source code in src/stimulus/utils/performance.py
def precision(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute precision score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(precision_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.recall","title":"recall","text":"
recall(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute recall score.

Source code in src/stimulus/utils/performance.py
def recall(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute recall score.\"\"\"\n    predictions_binary = np.array([1 if p > BINARY_THRESHOLD else 0 for p in predictions])\n    return float(recall_score(labels, predictions_binary))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.rocauc","title":"rocauc","text":"
rocauc(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute ROC AUC score.

Source code in src/stimulus/utils/performance.py
def rocauc(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute ROC AUC score.\"\"\"\n    return float(roc_auc_score(labels, predictions))\n
"},{"location":"reference/stimulus/utils/performance/#stimulus.utils.performance.Performance.spearmanr","title":"spearmanr","text":"
spearmanr(\n    labels: NDArray[float64], predictions: NDArray[float64]\n) -> float\n

Compute Spearman correlation coefficient.

Source code in src/stimulus/utils/performance.py
def spearmanr(self, labels: NDArray[np.float64], predictions: NDArray[np.float64]) -> float:\n    \"\"\"Compute Spearman correlation coefficient.\"\"\"\n    return float(spearmanr(labels, predictions)[0])\n
"},{"location":"reference/stimulus/utils/yaml_data/","title":"stimulus.utils.yaml_data","text":""},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data","title":"yaml_data","text":"

Utility module for handling YAML configuration files and their validation.

Classes:

  • YamlColumns \u2013

    Model for column configuration.

  • YamlColumnsEncoder \u2013

    Model for column encoder configuration.

  • YamlConfigDict \u2013

    Model for main YAML configuration.

  • YamlGlobalParams \u2013

    Model for global parameters in YAML configuration.

  • YamlSchema \u2013

    Model for validating YAML schema.

  • YamlSplit \u2013

    Model for split configuration.

  • YamlSubConfigDict \u2013

    Model for sub-configuration generated from main config.

  • YamlTransform \u2013

    Model for transform configuration.

  • YamlTransformColumns \u2013

    Model for transform columns configuration.

  • YamlTransformColumnsTransformation \u2013

    Model for column transformation configuration.

Functions:

  • check_yaml_schema \u2013

    Validate YAML configuration fields have correct types.

  • dump_yaml_list_into_files \u2013

    Dumps a list of YAML configurations into separate files with custom formatting.

  • expand_transform_list_combinations \u2013

    Expands a list of transforms into all possible parameter combinations.

  • expand_transform_parameter_combinations \u2013

    Get all possible transforms by extracting parameters at each valid index.

  • extract_transform_parameters_at_index \u2013

    Get a transform with parameters at the specified index.

  • generate_data_configs \u2013

    Generates all possible data configurations from a YAML config.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlColumns","title":"YamlColumns","text":"

Bases: BaseModel

Model for column configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlColumnsEncoder","title":"YamlColumnsEncoder","text":"

Bases: BaseModel

Model for column encoder configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlConfigDict","title":"YamlConfigDict","text":"

Bases: BaseModel

Model for main YAML configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlGlobalParams","title":"YamlGlobalParams","text":"

Bases: BaseModel

Model for global parameters in YAML configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSchema","title":"YamlSchema","text":"

Bases: BaseModel

Model for validating YAML schema.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSplit","title":"YamlSplit","text":"

Bases: BaseModel

Model for split configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlSubConfigDict","title":"YamlSubConfigDict","text":"

Bases: BaseModel

Model for sub-configuration generated from main config.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransform","title":"YamlTransform","text":"

Bases: BaseModel

Model for transform configuration.

Methods:

  • validate_param_lists_across_columns \u2013

    Validate that parameter lists across columns have consistent lengths.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransform.validate_param_lists_across_columns","title":"validate_param_lists_across_columns classmethod","text":"
validate_param_lists_across_columns(\n    columns: list[YamlTransformColumns],\n) -> list[YamlTransformColumns]\n

Validate that parameter lists across columns have consistent lengths.

Parameters:

  • columns (list[YamlTransformColumns]) \u2013

    List of transform columns to validate

Returns:

  • list[YamlTransformColumns] \u2013

    The validated columns list

Source code in src/stimulus/utils/yaml_data.py
@field_validator(\"columns\")\n@classmethod\ndef validate_param_lists_across_columns(cls, columns: list[YamlTransformColumns]) -> list[YamlTransformColumns]:\n    \"\"\"Validate that parameter lists across columns have consistent lengths.\n\n    Args:\n        columns: List of transform columns to validate\n\n    Returns:\n        The validated columns list\n    \"\"\"\n    # Get all parameter list lengths across all columns and transformations\n    all_list_lengths: set[int] = set()\n\n    for column in columns:\n        for transformation in column.transformations:\n            if transformation.params and any(\n                isinstance(param_value, list) and len(param_value) > 0\n                for param_value in transformation.params.values()\n            ):\n                all_list_lengths.update(\n                    len(param_value)\n                    for param_value in transformation.params.values()\n                    if isinstance(param_value, list) and len(param_value) > 0\n                )\n\n    # Skip validation if no lists found\n    if not all_list_lengths:\n        return columns\n\n    # Check if all lists either have length 1, or all have the same length\n    all_list_lengths.discard(1)  # Remove length 1 as it's always valid\n    if len(all_list_lengths) > 1:  # Multiple different lengths found\n        raise ValueError(\n            \"All parameter lists across columns must either contain one element or have the same length\",\n        )\n\n    return columns\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransformColumns","title":"YamlTransformColumns","text":"

Bases: BaseModel

Model for transform columns configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.YamlTransformColumnsTransformation","title":"YamlTransformColumnsTransformation","text":"

Bases: BaseModel

Model for column transformation configuration.

"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.check_yaml_schema","title":"check_yaml_schema","text":"
check_yaml_schema(config_yaml: YamlConfigDict) -> str\n

Validate YAML configuration fields have correct types.

If the children field is specific to a parent, the children fields class is hosted in the parent fields class. If any field in not the right type, the function prints an error message explaining the problem and exits the python code.

Parameters:

  • config_yaml (YamlConfigDict) \u2013

    The YamlConfigDict containing the fields of the yaml configuration file

Returns:

  • str ( str ) \u2013

    Empty string if validation succeeds

Raises:

  • ValueError \u2013

    If validation fails

Source code in src/stimulus/utils/yaml_data.py
def check_yaml_schema(config_yaml: YamlConfigDict) -> str:\n    \"\"\"Validate YAML configuration fields have correct types.\n\n    If the children field is specific to a parent, the children fields class is hosted in the parent fields class.\n    If any field in not the right type, the function prints an error message explaining the problem and exits the python code.\n\n    Args:\n        config_yaml: The YamlConfigDict containing the fields of the yaml configuration file\n\n    Returns:\n        str: Empty string if validation succeeds\n\n    Raises:\n        ValueError: If validation fails\n    \"\"\"\n    try:\n        YamlSchema(yaml_conf=config_yaml)\n    except ValidationError as e:\n        # Use logging instead of print for error handling\n        raise ValueError(\"Wrong type on a field, see the pydantic report above\") from e\n    return \"\"\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.dump_yaml_list_into_files","title":"dump_yaml_list_into_files","text":"
dump_yaml_list_into_files(\n    yaml_list: list[YamlSubConfigDict],\n    directory_path: str,\n    base_name: str,\n) -> None\n

Dumps a list of YAML configurations into separate files with custom formatting.

Source code in src/stimulus/utils/yaml_data.py
def dump_yaml_list_into_files(\n    yaml_list: list[YamlSubConfigDict],\n    directory_path: str,\n    base_name: str,\n) -> None:\n    \"\"\"Dumps a list of YAML configurations into separate files with custom formatting.\"\"\"\n    # Create a new class attribute rather than assigning to the method\n    # Remove this line since we'll add ignore_aliases to CustomDumper instead\n\n    def represent_none(dumper: yaml.Dumper, _: Any) -> yaml.Node:\n        \"\"\"Custom representer to format None values as empty strings in YAML output.\"\"\"\n        return dumper.represent_scalar(\"tag:yaml.org,2002:null\", \"\")\n\n    def custom_representer(dumper: yaml.Dumper, data: Any) -> yaml.Node:\n        \"\"\"Custom representer to handle different types of lists with appropriate formatting.\"\"\"\n        if isinstance(data, list):\n            if len(data) == 0:\n                return dumper.represent_scalar(\"tag:yaml.org,2002:null\", \"\")\n            if isinstance(data[0], dict):\n                return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=False)\n            if isinstance(data[0], list):\n                return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=True)\n        return dumper.represent_sequence(\"tag:yaml.org,2002:seq\", data, flow_style=True)\n\n    class CustomDumper(yaml.Dumper):\n        \"\"\"Custom YAML dumper that adds extra formatting controls.\"\"\"\n\n        def ignore_aliases(self, _data: Any) -> bool:\n            \"\"\"Ignore aliases in the YAML output.\"\"\"\n            return True\n\n        def write_line_break(self, _data: Any = None) -> None:\n            \"\"\"Add extra newline after root-level elements.\"\"\"\n            super().write_line_break(_data)\n            if len(self.indents) <= 1:  # At root level\n                super().write_line_break(_data)\n\n        def increase_indent(self, *, flow: bool = False, indentless: bool = False) -> None:  # type: ignore[override]\n            \"\"\"Ensure consistent indentation by preventing indentless sequences.\"\"\"\n            return super().increase_indent(\n                flow=flow,\n                indentless=indentless,\n            )  # Force indentless to False for better formatting\n\n    # Register the custom representers with our dumper\n    yaml.add_representer(type(None), represent_none, Dumper=CustomDumper)\n    yaml.add_representer(list, custom_representer, Dumper=CustomDumper)\n\n    for i, yaml_dict in enumerate(yaml_list):\n        dict_data = yaml_dict.model_dump(exclude_none=True)\n\n        def fix_params(input_dict: dict[str, Any]) -> dict[str, Any]:\n            \"\"\"Recursively process dictionary to properly handle params fields.\"\"\"\n            if isinstance(input_dict, dict):\n                processed_dict: dict[str, Any] = {}\n                for key, value in input_dict.items():\n                    if key == \"encoder\" and isinstance(value, list):\n                        processed_dict[key] = []\n                        for encoder in value:\n                            processed_encoder = dict(encoder)\n                            if \"params\" not in processed_encoder or not processed_encoder[\"params\"]:\n                                processed_encoder[\"params\"] = {}\n                            processed_dict[key].append(processed_encoder)\n                    elif key == \"transformations\" and isinstance(value, list):\n                        processed_dict[key] = []\n                        for transformation in value:\n                            processed_transformation = dict(transformation)\n                            if \"params\" not in processed_transformation or not processed_transformation[\"params\"]:\n                                processed_transformation[\"params\"] = {}\n                            processed_dict[key].append(processed_transformation)\n                    elif isinstance(value, dict):\n                        processed_dict[key] = fix_params(value)\n                    elif isinstance(value, list):\n                        processed_dict[key] = [\n                            fix_params(list_item) if isinstance(list_item, dict) else list_item for list_item in value\n                        ]\n                    else:\n                        processed_dict[key] = value\n                return processed_dict\n            return input_dict\n\n        dict_data = fix_params(dict_data)\n\n        with open(f\"{directory_path}/{base_name}_{i}.yaml\", \"w\") as f:\n            yaml.dump(\n                dict_data,\n                f,\n                Dumper=CustomDumper,\n                sort_keys=False,\n                default_flow_style=False,\n                indent=2,\n                width=float(\"inf\"),  # Prevent line wrapping\n            )\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.expand_transform_list_combinations","title":"expand_transform_list_combinations","text":"
expand_transform_list_combinations(\n    transform_list: list[YamlTransform],\n) -> list[YamlTransform]\n

Expands a list of transforms into all possible parameter combinations.

Takes a list of transforms where each transform may contain parameter lists, and expands them into separate transforms with single parameter values. For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will create two transforms: one with 0.1/1 and another with 0.2/2.

Parameters:

  • transform_list (list[YamlTransform]) \u2013

    A list of YamlTransform objects containing parameter lists that need to be expanded into individual transforms.

Returns:

  • list[YamlTransform] \u2013

    list[YamlTransform]: A flattened list of transforms where each transform has single parameter values instead of parameter lists. The length of the returned list will be the sum of the number of parameter combinations for each input transform.

Source code in src/stimulus/utils/yaml_data.py
def expand_transform_list_combinations(transform_list: list[YamlTransform]) -> list[YamlTransform]:\n    \"\"\"Expands a list of transforms into all possible parameter combinations.\n\n    Takes a list of transforms where each transform may contain parameter lists,\n    and expands them into separate transforms with single parameter values.\n    For example, if a transform has parameters [0.1, 0.2] and [1, 2], this will\n    create two transforms: one with 0.1/1 and another with 0.2/2.\n\n    Args:\n        transform_list: A list of YamlTransform objects containing parameter lists\n            that need to be expanded into individual transforms.\n\n    Returns:\n        list[YamlTransform]: A flattened list of transforms where each transform\n            has single parameter values instead of parameter lists. The length of\n            the returned list will be the sum of the number of parameter combinations\n            for each input transform.\n    \"\"\"\n    sub_transforms = []\n    for transform in transform_list:\n        sub_transforms.extend(expand_transform_parameter_combinations(transform))\n    return sub_transforms\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.expand_transform_parameter_combinations","title":"expand_transform_parameter_combinations","text":"
expand_transform_parameter_combinations(\n    transform: YamlTransform,\n) -> list[YamlTransform]\n

Get all possible transforms by extracting parameters at each valid index.

For a transform with parameter lists, creates multiple new transforms, each containing single parameter values from the corresponding indices of the parameter lists.

Parameters:

  • transform (YamlTransform) \u2013

    The original transform containing parameter lists

Returns:

  • list[YamlTransform] \u2013

    A list of transforms, each with single parameter values from sequential indices

Source code in src/stimulus/utils/yaml_data.py
def expand_transform_parameter_combinations(transform: YamlTransform) -> list[YamlTransform]:\n    \"\"\"Get all possible transforms by extracting parameters at each valid index.\n\n    For a transform with parameter lists, creates multiple new transforms, each containing\n    single parameter values from the corresponding indices of the parameter lists.\n\n    Args:\n        transform: The original transform containing parameter lists\n\n    Returns:\n        A list of transforms, each with single parameter values from sequential indices\n    \"\"\"\n    # Find the length of parameter lists - we only need to check the first list we find\n    # since all lists must have the same length (enforced by pydantic validator)\n    max_length = 1\n    for column in transform.columns:\n        for transformation in column.transformations:\n            if transformation.params:\n                list_lengths = [len(v) for v in transformation.params.values() if isinstance(v, list) and len(v) > 1]\n                if list_lengths:\n                    max_length = list_lengths[0]  # All lists have same length due to validator\n                    break\n\n    # Generate a transform for each index\n    transforms = []\n    for i in range(max_length):\n        transforms.append(extract_transform_parameters_at_index(transform, i))\n\n    return transforms\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.extract_transform_parameters_at_index","title":"extract_transform_parameters_at_index","text":"
extract_transform_parameters_at_index(\n    transform: YamlTransform, index: int = 0\n) -> YamlTransform\n

Get a transform with parameters at the specified index.

Parameters:

  • transform (YamlTransform) \u2013

    The original transform containing parameter lists

  • index (int, default: 0 ) \u2013

    Index to extract parameters from (default 0)

Returns:

  • YamlTransform \u2013

    A new transform with single parameter values at the specified index

Source code in src/stimulus/utils/yaml_data.py
def extract_transform_parameters_at_index(transform: YamlTransform, index: int = 0) -> YamlTransform:\n    \"\"\"Get a transform with parameters at the specified index.\n\n    Args:\n        transform: The original transform containing parameter lists\n        index: Index to extract parameters from (default 0)\n\n    Returns:\n        A new transform with single parameter values at the specified index\n    \"\"\"\n    # Create a copy of the transform\n    new_transform = YamlTransform(**transform.model_dump())\n\n    # Process each column and transformation\n    for column in new_transform.columns:\n        for transformation in column.transformations:\n            if transformation.params:\n                # Convert each parameter list to single value at index\n                new_params = {}\n                for param_name, param_value in transformation.params.items():\n                    if isinstance(param_value, list):\n                        new_params[param_name] = param_value[index]\n                    else:\n                        new_params[param_name] = param_value\n                transformation.params = new_params\n\n    return new_transform\n
"},{"location":"reference/stimulus/utils/yaml_data/#stimulus.utils.yaml_data.generate_data_configs","title":"generate_data_configs","text":"
generate_data_configs(\n    yaml_config: YamlConfigDict,\n) -> list[YamlSubConfigDict]\n

Generates all possible data configurations from a YAML config.

Takes a YAML configuration that may contain parameter lists and splits, and generates all possible combinations of parameters and splits into separate data configurations.

For example, if the config has: - A transform with parameters [0.1, 0.2] - Two splits [0.7/0.3] and [0.8/0.2] This will generate 4 configs, 2 for each split.

Parameters:

  • yaml_config (YamlConfigDict) \u2013

    The source YAML configuration containing transforms with parameter lists and multiple splits.

Returns:

  • list[YamlSubConfigDict] \u2013

    list[YamlSubConfigDict]: A list of data configurations, where each config has single parameter values and one split configuration. The length will be the product of the number of parameter combinations and the number of splits.

Source code in src/stimulus/utils/yaml_data.py
def generate_data_configs(yaml_config: YamlConfigDict) -> list[YamlSubConfigDict]:\n    \"\"\"Generates all possible data configurations from a YAML config.\n\n    Takes a YAML configuration that may contain parameter lists and splits,\n    and generates all possible combinations of parameters and splits into\n    separate data configurations.\n\n    For example, if the config has:\n    - A transform with parameters [0.1, 0.2]\n    - Two splits [0.7/0.3] and [0.8/0.2]\n    This will generate 4 configs, 2 for each split.\n\n    Args:\n        yaml_config: The source YAML configuration containing transforms with\n            parameter lists and multiple splits.\n\n    Returns:\n        list[YamlSubConfigDict]: A list of data configurations, where each\n            config has single parameter values and one split configuration. The\n            length will be the product of the number of parameter combinations\n            and the number of splits.\n    \"\"\"\n    if isinstance(yaml_config, dict) and not isinstance(yaml_config, YamlConfigDict):\n        raise TypeError(\"Input must be a YamlConfigDict object\")\n\n    sub_transforms = expand_transform_list_combinations(yaml_config.transforms)\n    sub_splits = yaml_config.split\n    sub_configs = []\n    for split in sub_splits:\n        for transform in sub_transforms:\n            sub_configs.append(\n                YamlSubConfigDict(\n                    global_params=yaml_config.global_params,\n                    columns=yaml_config.columns,\n                    transforms=transform,\n                    split=split,\n                ),\n            )\n    return sub_configs\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/","title":"stimulus.utils.yaml_model_schema","text":""},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema","title":"yaml_model_schema","text":"

Module for handling YAML configuration files and converting them to Ray Tune format.

Classes:

  • CustomTunableParameter \u2013

    Custom tunable parameter.

  • Data \u2013

    Data parameters.

  • Loss \u2013

    Loss parameters.

  • Model \u2013

    Model configuration.

  • RayTuneModel \u2013

    Ray Tune compatible model configuration.

  • RunParams \u2013

    Run parameters.

  • Scheduler \u2013

    Scheduler parameters.

  • TunableParameter \u2013

    Tunable parameter.

  • Tune \u2013

    Tune parameters.

  • TuneParams \u2013

    Tune parameters.

  • YamlRayConfigLoader \u2013

    Load and convert YAML configurations to Ray Tune format.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.CustomTunableParameter","title":"CustomTunableParameter","text":"

Bases: BaseModel

Custom tunable parameter.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Data","title":"Data","text":"

Bases: BaseModel

Data parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Loss","title":"Loss","text":"

Bases: BaseModel

Loss parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Model","title":"Model","text":"

Bases: BaseModel

Model configuration.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.RayTuneModel","title":"RayTuneModel","text":"

Bases: BaseModel

Ray Tune compatible model configuration.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.RunParams","title":"RunParams","text":"

Bases: BaseModel

Run parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Scheduler","title":"Scheduler","text":"

Bases: BaseModel

Scheduler parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TunableParameter","title":"TunableParameter","text":"

Bases: BaseModel

Tunable parameter.

Methods:

  • validate_mode \u2013

    Validate that mode is supported by Ray Tune.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TunableParameter.validate_mode","title":"validate_mode","text":"
validate_mode() -> TunableParameter\n

Validate that mode is supported by Ray Tune.

Source code in src/stimulus/utils/yaml_model_schema.py
@pydantic.model_validator(mode=\"after\")\ndef validate_mode(self) -> \"TunableParameter\":\n    \"\"\"Validate that mode is supported by Ray Tune.\"\"\"\n    if not hasattr(tune, self.mode):\n        raise AttributeError(\n            f\"Mode {self.mode} not recognized, check the ray.tune documentation at https://docs.ray.io/en/master/tune/api_docs/suggestion.html\",\n        )\n\n    mode = getattr(tune, self.mode)\n    if mode.__name__ not in [\n        \"choice\",\n        \"uniform\",\n        \"loguniform\",\n        \"quniform\",\n        \"qloguniform\",\n        \"qnormal\",\n        \"randint\",\n        \"sample_from\",\n    ]:\n        raise NotImplementedError(f\"Mode {mode.__name__} not implemented yet\")\n\n    return self\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.Tune","title":"Tune","text":"

Bases: BaseModel

Tune parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.TuneParams","title":"TuneParams","text":"

Bases: BaseModel

Tune parameters.

"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader","title":"YamlRayConfigLoader","text":"
YamlRayConfigLoader(model: Model)\n

Load and convert YAML configurations to Ray Tune format.

This class handles loading model configurations and converting them into formats compatible with Ray Tune's hyperparameter search spaces.

Parameters:

  • model (Model) \u2013

    Pydantic Model instance containing configuration

Methods:

  • convert_config_to_ray \u2013

    Convert Model configuration to Ray Tune format.

  • convert_raytune \u2013

    Convert parameter configuration to Ray Tune format.

  • get_config \u2013

    Return the current configuration.

  • raytune_sample_from \u2013

    Apply tune.sample_from to a given custom sampling function.

  • raytune_space_selector \u2013

    Convert space parameters to Ray Tune format based on the mode.

  • sampint \u2013

    Return a list of n random samples from the sample_space.

Source code in src/stimulus/utils/yaml_model_schema.py
def __init__(self, model: Model) -> None:\n    \"\"\"Initialize the config loader with a Model instance.\n\n    Args:\n        model: Pydantic Model instance containing configuration\n    \"\"\"\n    self.model = model\n    self.ray_model = self.convert_config_to_ray(model)\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.convert_config_to_ray","title":"convert_config_to_ray","text":"
convert_config_to_ray(model: Model) -> RayTuneModel\n

Convert Model configuration to Ray Tune format.

Converts parameters in network_params and optimizer_params to Ray Tune search spaces.

Parameters:

  • model (Model) \u2013

    Model configuration

Returns:

  • RayTuneModel \u2013

    Ray Tune compatible model configuration

Source code in src/stimulus/utils/yaml_model_schema.py
def convert_config_to_ray(self, model: Model) -> RayTuneModel:\n    \"\"\"Convert Model configuration to Ray Tune format.\n\n    Converts parameters in network_params and optimizer_params to Ray Tune search spaces.\n\n    Args:\n        model: Model configuration\n\n    Returns:\n        Ray Tune compatible model configuration\n    \"\"\"\n    return RayTuneModel(\n        network_params={k: self.convert_raytune(v) for k, v in model.network_params.items()},\n        optimizer_params={k: self.convert_raytune(v) for k, v in model.optimizer_params.items()},\n        loss_params={k: self.convert_raytune(v) for k, v in model.loss_params},\n        data_params={k: self.convert_raytune(v) for k, v in model.data_params},\n        tune=model.tune,\n    )\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.convert_raytune","title":"convert_raytune","text":"
convert_raytune(\n    param: TunableParameter | CustomTunableParameter,\n) -> Any\n

Convert parameter configuration to Ray Tune format.

Parameters:

  • param (TunableParameter | CustomTunableParameter) \u2013

    Parameter configuration

Returns:

  • Any \u2013

    Ray Tune compatible parameter configuration

Source code in src/stimulus/utils/yaml_model_schema.py
def convert_raytune(self, param: TunableParameter | CustomTunableParameter) -> Any:\n    \"\"\"Convert parameter configuration to Ray Tune format.\n\n    Args:\n        param: Parameter configuration\n\n    Returns:\n        Ray Tune compatible parameter configuration\n    \"\"\"\n    mode = getattr(tune, param.mode)\n\n    if isinstance(param, TunableParameter):\n        return self.raytune_space_selector(mode, param.space)\n    return self.raytune_sample_from(mode, param)\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.get_config","title":"get_config","text":"
get_config() -> RayTuneModel\n

Return the current configuration.

Returns:

  • RayTuneModel \u2013

    Current configuration dictionary

Source code in src/stimulus/utils/yaml_model_schema.py
def get_config(self) -> RayTuneModel:\n    \"\"\"Return the current configuration.\n\n    Returns:\n        Current configuration dictionary\n    \"\"\"\n    return self.ray_model\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.raytune_sample_from","title":"raytune_sample_from","text":"
raytune_sample_from(\n    mode: Callable, param: CustomTunableParameter\n) -> Any\n

Apply tune.sample_from to a given custom sampling function.

Parameters:

  • mode (Callable) \u2013

    Ray Tune sampling function

  • param (CustomTunableParameter) \u2013

    TunableParameter containing sampling parameters

Returns:

  • Any \u2013

    Configured sampling function

Raises:

  • NotImplementedError \u2013

    If the sampling function is not supported

Source code in src/stimulus/utils/yaml_model_schema.py
def raytune_sample_from(self, mode: Callable, param: CustomTunableParameter) -> Any:\n    \"\"\"Apply tune.sample_from to a given custom sampling function.\n\n    Args:\n        mode: Ray Tune sampling function\n        param: TunableParameter containing sampling parameters\n\n    Returns:\n        Configured sampling function\n\n    Raises:\n        NotImplementedError: If the sampling function is not supported\n    \"\"\"\n    if param.function == \"sampint\":\n        return mode(lambda _: self.sampint(param.sample_space, param.n_space))\n\n    raise NotImplementedError(f\"Function {param.function} not implemented yet\")\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.raytune_space_selector","title":"raytune_space_selector","text":"
raytune_space_selector(mode: Callable, space: list) -> Any\n

Convert space parameters to Ray Tune format based on the mode.

Parameters:

  • mode (Callable) \u2013

    Ray Tune search space function (e.g., tune.choice, tune.uniform)

  • space (list) \u2013

    List of parameters defining the search space

Returns:

  • Any \u2013

    Configured Ray Tune search space

Source code in src/stimulus/utils/yaml_model_schema.py
def raytune_space_selector(self, mode: Callable, space: list) -> Any:\n    \"\"\"Convert space parameters to Ray Tune format based on the mode.\n\n    Args:\n        mode: Ray Tune search space function (e.g., tune.choice, tune.uniform)\n        space: List of parameters defining the search space\n\n    Returns:\n        Configured Ray Tune search space\n    \"\"\"\n    if mode.__name__ == \"choice\":\n        return mode(space)\n\n    return mode(*tuple(space))\n
"},{"location":"reference/stimulus/utils/yaml_model_schema/#stimulus.utils.yaml_model_schema.YamlRayConfigLoader.sampint","title":"sampint staticmethod","text":"
sampint(sample_space: list, n_space: list) -> list[int]\n

Return a list of n random samples from the sample_space.

This function is useful for sampling different numbers of layers, each with different numbers of neurons.

Parameters:

  • sample_space (list) \u2013

    List [min, max] defining range of values to sample from

  • n_space (list) \u2013

    List [min, max] defining range for number of samples

Returns:

  • list[int] \u2013

    List of randomly sampled integers

Note

Uses Python's random module which is not cryptographically secure. This is acceptable for hyperparameter sampling but should not be used for security-critical purposes (S311 fails when linting).

Source code in src/stimulus/utils/yaml_model_schema.py
@staticmethod\ndef sampint(sample_space: list, n_space: list) -> list[int]:\n    \"\"\"Return a list of n random samples from the sample_space.\n\n    This function is useful for sampling different numbers of layers,\n    each with different numbers of neurons.\n\n    Args:\n        sample_space: List [min, max] defining range of values to sample from\n        n_space: List [min, max] defining range for number of samples\n\n    Returns:\n        List of randomly sampled integers\n\n    Note:\n        Uses Python's random module which is not cryptographically secure.\n        This is acceptable for hyperparameter sampling but should not be\n        used for security-critical purposes (S311 fails when linting).\n    \"\"\"\n    sample_space_list = list(range(sample_space[0], sample_space[1] + 1))\n    n_space_list = list(range(n_space[0], n_space[1] + 1))\n    n = random.choice(n_space_list)  # noqa: S311\n    return random.sample(sample_space_list, n)\n
"},{"location":"coverage/","title":"Coverage report","text":""}]} \ No newline at end of file